From a9678182355f9673c11563f4c9063002f87644b9 Mon Sep 17 00:00:00 2001 From: Glenn Randers-Pehrson Date: Tue, 19 Apr 2016 08:23:10 -0500 Subject: [PATCH] [libpng16] Moved INTEL-SSE code from pngpriv.h to contrib/intel/intel_sse.patch. --- ANNOUNCE | 5 +- CHANGES | 3 +- contrib/intel/INSTALL | 61 +++++++++++++++++++++++++ contrib/intel/intel_sse.patch | 86 +++++++++++++++++++++++++++++++++++ pngpriv.h | 49 -------------------- 5 files changed, 152 insertions(+), 52 deletions(-) diff --git a/ANNOUNCE b/ANNOUNCE index 0fa046af2..19cc36885 100644 --- a/ANNOUNCE +++ b/ANNOUNCE @@ -1,4 +1,4 @@ -Libpng 1.6.22beta05 - April 15, 2016 +Libpng 1.6.22beta05 - April 18, 2016 This is not intended to be a public release. It will be replaced within a few weeks by a public version or by another test version. @@ -83,10 +83,11 @@ Version 1.6.22beta04 [April 5, 2016] memcpy-free implementations of load3() / store3(). call load3() only when needed at the end of a scanline. -Version 1.6.22beta05 [April 15, 2016] +Version 1.6.22beta05 [April 18, 2016] Added PNG_FAST_FILTERS macro (defined as PNG_FILTER_NONE|PNG_FILTER_SUB|PNG_FILTER_UP). Various fixes for contrib/libtests/timepng.c + Moved INTEL-SSE code from pngpriv.h into contrib/intel/intel_sse.patch. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/CHANGES b/CHANGES index e1df18534..d29e39c5b 100644 --- a/CHANGES +++ b/CHANGES @@ -5543,10 +5543,11 @@ Version 1.6.22beta04 [April 5, 2016] memcpy-free implementations of load3() / store3(). call load3() only when needed at the end of a scanline. -Version 1.6.22beta05 [April 15, 2016] +Version 1.6.22beta05 [April 18, 2016] Added PNG_FAST_FILTERS macro (defined as PNG_FILTER_NONE|PNG_FILTER_SUB|PNG_FILTER_UP). Various fixes for contrib/libtests/timepng.c + Moved INTEL-SSE code from pngpriv.h into contrib/intel/intel_sse.patch. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/contrib/intel/INSTALL b/contrib/intel/INSTALL index f57467fe2..bed34a9cc 100644 --- a/contrib/intel/INSTALL +++ b/contrib/intel/INSTALL @@ -112,3 +112,64 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\ contrib/intel/filter_sse2_intrinsics.c endif -----------------cut---------------- + +5. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT +code: + +-----------------cut---------------- +#ifndef PNG_INTEL_SSE_OPT +# ifdef PNG_INTEL_SSE + /* Only check for SSE if the build configuration has been modified to + * enable SSE optimizations. This means that these optimizations will + * be off by default. See contrib/intel for more details. + */ +# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ + defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_OPT 1 +# endif +# endif +#endif + +#if PNG_INTEL_SSE_OPT > 0 +# ifndef PNG_INTEL_SSE_IMPLEMENTATION +# if defined(__SSE4_1__) || defined(__AVX__) + /* We are not actually using AVX, but checking for AVX is the best + way we can detect SSE4.1 and SSSE3 on MSVC. + */ +# define PNG_INTEL_SSE_IMPLEMENTATION 3 +# elif defined(__SSSE3__) +# define PNG_INTEL_SSE_IMPLEMENTATION 2 +# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP >= 2) +# define PNG_INTEL_SSE_IMPLEMENTATION 1 +# else +# define PNG_INTEL_SSE_IMPLEMENTATION 0 +# endif +# endif + +# if PNG_INTEL_SSE_IMPLEMENTATION > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 +# endif +#endif + +-----------------cut---------------- + +6. Add the following lines to pngpriv.h, following the prototype for +png_read_filter_row_paeth4_neon: + +-----------------cut---------------- +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); + +-----------------cut---------------- diff --git a/contrib/intel/intel_sse.patch b/contrib/intel/intel_sse.patch index ab1a73577..13472dc12 100644 --- a/contrib/intel/intel_sse.patch +++ b/contrib/intel/intel_sse.patch @@ -104,3 +104,89 @@ diff --git a/Makefile.am b/Makefile.am if HAVE_LD_VERSION_SCRIPT # Versioned symbols and restricted exports if HAVE_SOLARIS_LD +diff --git a/pngpriv.h b/pngpriv.h +--- a/pngpriv.h 2016-04-18 09:34:43.670248185 -0500 ++++ b/pngpriv.h 2016-04-18 09:31:01.697943803 -0500 +@@ -177,16 +177,52 @@ + # endif /* !PNG_ARM_NEON_IMPLEMENTATION */ + + # ifndef PNG_ARM_NEON_IMPLEMENTATION + /* Use the intrinsics code by default. */ + # define PNG_ARM_NEON_IMPLEMENTATION 1 + # endif + #endif /* PNG_ARM_NEON_OPT > 0 */ + ++#ifndef PNG_INTEL_SSE_OPT ++# ifdef PNG_INTEL_SSE ++ /* Only check for SSE if the build configuration has been modified to ++ * enable SSE optimizations. This means that these optimizations will ++ * be off by default. See contrib/intel for more details. ++ */ ++# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ ++ defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ ++ (defined(_M_IX86_FP) && _M_IX86_FP >= 2) ++# define PNG_INTEL_SSE_OPT 1 ++# endif ++# endif ++#endif ++ ++#if PNG_INTEL_SSE_OPT > 0 ++# ifndef PNG_INTEL_SSE_IMPLEMENTATION ++# if defined(__SSE4_1__) || defined(__AVX__) ++ /* We are not actually using AVX, but checking for AVX is the best ++ way we can detect SSE4.1 and SSSE3 on MSVC. ++ */ ++# define PNG_INTEL_SSE_IMPLEMENTATION 3 ++# elif defined(__SSSE3__) ++# define PNG_INTEL_SSE_IMPLEMENTATION 2 ++# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ ++ (defined(_M_IX86_FP) && _M_IX86_FP >= 2) ++# define PNG_INTEL_SSE_IMPLEMENTATION 1 ++# else ++# define PNG_INTEL_SSE_IMPLEMENTATION 0 ++# endif ++# endif ++ ++# if PNG_INTEL_SSE_IMPLEMENTATION > 0 ++# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 ++# endif ++#endif ++ + /* Is this a build of a DLL where compilation of the object modules requires + * different preprocessor settings to those required for a simple library? If + * so PNG_BUILD_DLL must be set. + * + * If libpng is used inside a DLL but that DLL does not export the libpng APIs + * PNG_BUILD_DLL must not be set. To avoid the code below kicking in build a + * static library of libpng then link the DLL against that. + */ +@@ -1184,16 +1220,29 @@ PNG_INTERNAL_FUNCTION(void,png_read_filt + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); + PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); + PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); + PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); + ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop ++ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); ++ + /* Choose the best filter to use and filter the row data */ + PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, + png_row_infop row_info),PNG_EMPTY); + + #ifdef PNG_SEQUENTIAL_READ_SUPPORTED + PNG_INTERNAL_FUNCTION(void,png_read_IDAT_data,(png_structrp png_ptr, + png_bytep output, png_alloc_size_t avail_out),PNG_EMPTY); + /* Read 'avail_out' bytes of data from the IDAT stream. If the output buffer diff --git a/pngpriv.h b/pngpriv.h index 8893f18b9..5727f601c 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -182,42 +182,6 @@ # endif #endif /* PNG_ARM_NEON_OPT > 0 */ -#ifndef PNG_INTEL_SSE_OPT -# ifdef PNG_INTEL_SSE - /* Only check for SSE if the build configuration has been modified to - * enable SSE optimizations. This means that these optimizations will - * be off by default. See contrib/intel for more details. - */ -# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \ - defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -# define PNG_INTEL_SSE_OPT 1 -# endif -# endif -#endif - -#if PNG_INTEL_SSE_OPT > 0 -# ifndef PNG_INTEL_SSE_IMPLEMENTATION -# if defined(__SSE4_1__) || defined(__AVX__) - /* We are not actually using AVX, but checking for AVX is the best - way we can detect SSE4.1 and SSSE3 on MSVC. - */ -# define PNG_INTEL_SSE_IMPLEMENTATION 3 -# elif defined(__SSSE3__) -# define PNG_INTEL_SSE_IMPLEMENTATION 2 -# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \ - (defined(_M_IX86_FP) && _M_IX86_FP >= 2) -# define PNG_INTEL_SSE_IMPLEMENTATION 1 -# else -# define PNG_INTEL_SSE_IMPLEMENTATION 0 -# endif -# endif - -# if PNG_INTEL_SSE_IMPLEMENTATION > 0 -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2 -# endif -#endif - /* Is this a build of a DLL where compilation of the object modules requires * different preprocessor settings to those required for a simple library? If * so PNG_BUILD_DLL must be set. @@ -1225,19 +1189,6 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); - /* Choose the best filter to use and filter the row data */ PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, png_row_infop row_info),PNG_EMPTY);