[master] Imported from libpng-1.6.22.tar

This commit is contained in:
Glenn Randers-Pehrson
2016-05-25 22:41:46 -05:00
parent 5756fcab2f
commit 87c14c528a
54 changed files with 3023 additions and 526 deletions

View File

@@ -54,7 +54,7 @@
---------------------------------------------------------------------------
Changelog:
%RDATE% - Check return value of png_get_bKGD() (Glenn R-P)
2015-11-12 - Check return value of png_get_bKGD() (Glenn R-P)
---------------------------------------------------------------------------*/

158
contrib/intel/INSTALL Normal file
View File

@@ -0,0 +1,158 @@
Enabling SSE support
Copyright (c) 2016 Google, Inc.
Written by Mike Klein, Matt Sarett
This INSTALL file written by Glenn Randers-Pehrson, 2016.
If you have moved intel_init.c and filter_sse2_intrinsics.c to a different
directory, be sure to update the '#include "../../pngpriv.h"' line in both
files if necessary to point to the correct relative location of pngpriv.h
with respect to the new location of those files.
To enable SSE support in libpng, follow the instructions in I, II, or III,
below:
I. Using patched "configure" scripts:
First, apply intel_sse.patch in your build directory.
patch -i contrib/intel/intel_sse.patch -p1
Then, if you are not building in a new GIT clone, e.g., in a tar
distribution, remove any existing pre-built configure scripts:
./configure --enable-maintainer-mode
make maintainer-clean
./autogen.sh --maintainer --clean
Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS:
./autogen.sh --maintainer
CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options]
make CPPFLAGS="-DPNG_INTEL_SSE" [options]
make
II. Using a custom makefile:
If you are using a custom makefile makefile, you will have to update it
manually to include contrib/intel/*.o in the dependencies, and to define
PNG_INTEL_SSE.
III. Using manually updated "configure" scripts:
If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am,
following the instructions below, then follow the instructions in
section II of INSTALL in the main libpng directory, then configure libpng
with -DPNG_INTEL_SSE in CPPFLAGS.
1. Add the following code to configure.ac under HOST SPECIFIC OPTIONS
directly beneath the section for ARM:
-----------------cut----------------
# INTEL
# =====
#
# INTEL SSE (SIMD) support.
AC_ARG_ENABLE([intel-sse],
AS_HELP_STRING([[[--enable-intel-sse]]],
[Enable Intel SSE optimizations: =no/off, yes/on:]
[no/off: disable the optimizations;]
[yes/on: enable the optimizations.]
[If not specified: determined by the compiler.]),
[case "$enableval" in
no|off)
# disable the default enabling:
AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
[Disable Intel SSE optimizations])
# Prevent inclusion of the assembler files below:
enable_intel_sse=no;;
yes|on)
AC_DEFINE([PNG_INTEL_SSE_OPT], [1],
[Enable Intel SSE optimizations]);;
*)
AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
esac])
# Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
# or where Intel optimizations were explicitly requested (this allows a
# fallback if a future host CPU does not match 'x86*')
AM_CONDITIONAL([PNG_INTEL_SSE],
[test "$enable_intel_sse" != 'no' &&
case "$host_cpu" in
i?86|x86_64) :;;
*) test "$enable_intel_sse" != '';;
esac])
-----------------cut----------------
2. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS
directly beneath the "if PNG_ARM_NEON ... endif" statement:
-----------------cut----------------
if PNG_INTEL_SSE
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\
contrib/intel/filter_sse2_intrinsics.c
endif
-----------------cut----------------
3. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT
code:
-----------------cut----------------
#ifndef PNG_INTEL_SSE_OPT
# ifdef PNG_INTEL_SSE
/* Only check for SSE if the build configuration has been modified to
* enable SSE optimizations. This means that these optimizations will
* be off by default. See contrib/intel for more details.
*/
# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
(defined(_M_IX86_FP) && _M_IX86_FP >= 2)
# define PNG_INTEL_SSE_OPT 1
# endif
# endif
#endif
#if PNG_INTEL_SSE_OPT > 0
# ifndef PNG_INTEL_SSE_IMPLEMENTATION
# if defined(__SSE4_1__) || defined(__AVX__)
/* We are not actually using AVX, but checking for AVX is the best
way we can detect SSE4.1 and SSSE3 on MSVC.
*/
# define PNG_INTEL_SSE_IMPLEMENTATION 3
# elif defined(__SSSE3__)
# define PNG_INTEL_SSE_IMPLEMENTATION 2
# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
(defined(_M_IX86_FP) && _M_IX86_FP >= 2)
# define PNG_INTEL_SSE_IMPLEMENTATION 1
# else
# define PNG_INTEL_SSE_IMPLEMENTATION 0
# endif
# endif
# if PNG_INTEL_SSE_IMPLEMENTATION > 0
# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
# endif
#endif
-----------------cut----------------
4. Add the following lines to pngpriv.h, following the prototype for
png_read_filter_row_paeth4_neon:
-----------------cut----------------
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-----------------cut----------------

View File

@@ -0,0 +1,379 @@
/* filter_sse2_intrinsics.c - SSE2 optimized filter functions
*
* Copyright (c) 2016 Google, Inc.
* Written by Mike Klein and Matt Sarett
* Derived from arm/filter_neon_intrinsics.c, which was
* Copyright (c) 2014,2016 Glenn Randers-Pehrson
*
* Last changed in libpng 1.6.22 [May 26, 2016]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_INTEL_SSE_IMPLEMENTATION > 0
#include <immintrin.h>
/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
* They're positioned like this:
* prev: c b
* row: a d
* The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
* whichever of a, b, or c is closest to p=a+b-c.
*/
static __m128i load4(const void* p) {
return _mm_cvtsi32_si128(*(const int*)p);
}
static void store4(void* p, __m128i v) {
*(int*)p = _mm_cvtsi128_si32(v);
}
static __m128i load3(const void* p) {
/* We'll load 2 bytes, then 1 byte,
* then mask them together, and finally load into SSE.
*/
const png_uint_16* p01 = p;
const png_byte* p2 = (const png_byte*)(p01+1);
png_uint_32 v012 = (png_uint_32)(*p01)
| (png_uint_32)(*p2) << 16;
return load4(&v012);
}
static void store3(void* p, __m128i v) {
/* We'll pull from SSE as a 32-bit int, then write
* its bottom two bytes, then its third byte.
*/
png_uint_32 v012;
store4(&v012, v);
png_uint_16* p01 = p;
png_byte* p2 = (png_byte*)(p01+1);
*p01 = v012;
*p2 = v012 >> 16;
}
void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* The Sub filter predicts each pixel as the previous pixel, a.
* There is no pixel to the left of the first pixel. It's encoded directly.
* That works with our main loop if we just say that left pixel was zero.
*/
png_debug(1, "in png_read_filter_row_sub3_sse2");
__m128i a, d = _mm_setzero_si128();
int rb = row_info->rowbytes;
while (rb >= 4) {
a = d; d = load4(row);
d = _mm_add_epi8(d, a);
store3(row, d);
row += 3;
rb -= 3;
}
if (rb > 0) {
a = d; d = load3(row);
d = _mm_add_epi8(d, a);
store3(row, d);
row += 3;
rb -= 3;
}
}
void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* The Sub filter predicts each pixel as the previous pixel, a.
* There is no pixel to the left of the first pixel. It's encoded directly.
* That works with our main loop if we just say that left pixel was zero.
*/
png_debug(1, "in png_read_filter_row_sub4_sse2");
__m128i a, d = _mm_setzero_si128();
int rb = row_info->rowbytes;
while (rb > 0) {
a = d; d = load4(row);
d = _mm_add_epi8(d, a);
store4(row, d);
row += 4;
rb -= 4;
}
}
void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* The Avg filter predicts each pixel as the (truncated) average of a and b.
* There's no pixel to the left of the first pixel. Luckily, it's
* predicted to be half of the pixel above it. So again, this works
* perfectly with our loop if we make sure a starts at zero.
*/
png_debug(1, "in png_read_filter_row_avg3_sse2");
const __m128i zero = _mm_setzero_si128();
__m128i b;
__m128i a, d = zero;
int rb = row_info->rowbytes;
while (rb >= 4) {
b = load4(prev);
a = d; d = load4(row );
/* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
__m128i avg = _mm_avg_epu8(a,b);
/* ...but we can fix it up by subtracting off 1 if it rounded up. */
avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
_mm_set1_epi8(1)));
d = _mm_add_epi8(d, avg);
store3(row, d);
prev += 3;
row += 3;
rb -= 3;
}
if (rb > 0) {
b = load3(prev);
a = d; d = load3(row );
/* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
__m128i avg = _mm_avg_epu8(a,b);
/* ...but we can fix it up by subtracting off 1 if it rounded up. */
avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
_mm_set1_epi8(1)));
d = _mm_add_epi8(d, avg);
store3(row, d);
prev += 3;
row += 3;
rb -= 3;
}
}
void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* The Avg filter predicts each pixel as the (truncated) average of a and b.
* There's no pixel to the left of the first pixel. Luckily, it's
* predicted to be half of the pixel above it. So again, this works
* perfectly with our loop if we make sure a starts at zero.
*/
png_debug(1, "in png_read_filter_row_avg4_sse2");
const __m128i zero = _mm_setzero_si128();
__m128i b;
__m128i a, d = zero;
int rb = row_info->rowbytes;
while (rb > 0) {
b = load4(prev);
a = d; d = load4(row );
/* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
__m128i avg = _mm_avg_epu8(a,b);
/* ...but we can fix it up by subtracting off 1 if it rounded up. */
avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
_mm_set1_epi8(1)));
d = _mm_add_epi8(d, avg);
store4(row, d);
prev += 4;
row += 4;
rb -= 4;
}
}
/* Returns |x| for 16-bit lanes. */
static __m128i abs_i16(__m128i x) {
#if PNG_INTEL_SSE_IMPLEMENTATION >= 2
return _mm_abs_epi16(x);
#else
/* Read this all as, return x<0 ? -x : x.
* To negate two's complement, you flip all the bits then add 1.
*/
__m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
/* Flip negative lanes. */
x = _mm_xor_si128(x, is_negative);
/* +1 to negative lanes, else +0. */
x = _mm_add_epi16(x, _mm_srli_epi16(is_negative, 15));
return x;
#endif
}
/* Bytewise c ? t : e. */
static __m128i if_then_else(__m128i c, __m128i t, __m128i e) {
#if PNG_INTEL_SSE_IMPLEMENTATION >= 3
return _mm_blendv_epi8(e,t,c);
#else
return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
#endif
}
void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* Paeth tries to predict pixel d using the pixel to the left of it, a,
* and two pixels from the previous row, b and c:
* prev: c b
* row: a d
* The Paeth function predicts d to be whichever of a, b, or c is nearest to
* p=a+b-c.
*
* The first pixel has no left context, and so uses an Up filter, p = b.
* This works naturally with our main loop's p = a+b-c if we force a and c
* to zero.
* Here we zero b and d, which become c and a respectively at the start of
* the loop.
*/
png_debug(1, "in png_read_filter_row_paeth3_sse2");
const __m128i zero = _mm_setzero_si128();
__m128i c, b = zero,
a, d = zero;
int rb = row_info->rowbytes;
while (rb >= 4) {
/* It's easiest to do this math (particularly, deal with pc) with 16-bit
* intermediates.
*/
c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
/* (p-a) == (a+b-c - a) == (b-c) */
__m128i pa = _mm_sub_epi16(b,c);
/* (p-b) == (a+b-c - b) == (a-c) */
__m128i pb = _mm_sub_epi16(a,c);
/* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
__m128i pc = _mm_add_epi16(pa,pb);
pa = abs_i16(pa); /* |p-a| */
pb = abs_i16(pb); /* |p-b| */
pc = abs_i16(pc); /* |p-c| */
__m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
/* Paeth breaks ties favoring a over b over c. */
__m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
c));
/* Note `_epi8`: we need addition to wrap modulo 255. */
d = _mm_add_epi8(d, nearest);
store3(row, _mm_packus_epi16(d,d));
prev += 3;
row += 3;
rb -= 3;
}
if (rb > 0) {
/* It's easiest to do this math (particularly, deal with pc) with 16-bit
* intermediates.
*/
c = b; b = _mm_unpacklo_epi8(load3(prev), zero);
a = d; d = _mm_unpacklo_epi8(load3(row ), zero);
/* (p-a) == (a+b-c - a) == (b-c) */
__m128i pa = _mm_sub_epi16(b,c);
/* (p-b) == (a+b-c - b) == (a-c) */
__m128i pb = _mm_sub_epi16(a,c);
/* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
__m128i pc = _mm_add_epi16(pa,pb);
pa = abs_i16(pa); /* |p-a| */
pb = abs_i16(pb); /* |p-b| */
pc = abs_i16(pc); /* |p-c| */
__m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
/* Paeth breaks ties favoring a over b over c. */
__m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
c));
/* Note `_epi8`: we need addition to wrap modulo 255. */
d = _mm_add_epi8(d, nearest);
store3(row, _mm_packus_epi16(d,d));
prev += 3;
row += 3;
rb -= 3;
}
}
void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* Paeth tries to predict pixel d using the pixel to the left of it, a,
* and two pixels from the previous row, b and c:
* prev: c b
* row: a d
* The Paeth function predicts d to be whichever of a, b, or c is nearest to
* p=a+b-c.
*
* The first pixel has no left context, and so uses an Up filter, p = b.
* This works naturally with our main loop's p = a+b-c if we force a and c
* to zero.
* Here we zero b and d, which become c and a respectively at the start of
* the loop.
*/
png_debug(1, "in png_read_filter_row_paeth4_sse2");
const __m128i zero = _mm_setzero_si128();
__m128i c, b = zero,
a, d = zero;
int rb = row_info->rowbytes;
while (rb > 0) {
/* It's easiest to do this math (particularly, deal with pc) with 16-bit
* intermediates.
*/
c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
/* (p-a) == (a+b-c - a) == (b-c) */
__m128i pa = _mm_sub_epi16(b,c);
/* (p-b) == (a+b-c - b) == (a-c) */
__m128i pb = _mm_sub_epi16(a,c);
/* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
__m128i pc = _mm_add_epi16(pa,pb);
pa = abs_i16(pa); /* |p-a| */
pb = abs_i16(pb); /* |p-b| */
pc = abs_i16(pc); /* |p-c| */
__m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
/* Paeth breaks ties favoring a over b over c. */
__m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
c));
/* Note `_epi8`: we need addition to wrap modulo 255. */
d = _mm_add_epi8(d, nearest);
store4(row, _mm_packus_epi16(d,d));
prev += 4;
row += 4;
rb -= 4;
}
}
#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
#endif /* READ */

View File

@@ -0,0 +1,54 @@
/* intel_init.c - SSE2 optimized filter functions
*
* Copyright (c) 2016 Google, Inc.
* Written by Mike Klein and Matt Sarett
* Derived from arm/arm_init.c, which was
* Copyright (c) 2014,2016 Glenn Randers-Pehrson
*
* Last changed in libpng 1.6.22 [May 26, 2016]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_INTEL_SSE_IMPLEMENTATION > 0
void
png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
{
/* The techniques used to implement each of these filters in SSE operate on
* one pixel at a time.
* So they generally speed up 3bpp images about 3x, 4bpp images about 4x.
* They can scale up to 6 and 8 bpp images and down to 2 bpp images,
* but they'd not likely have any benefit for 1bpp images.
* Most of these can be implemented using only MMX and 64-bit registers,
* but they end up a bit slower than using the equally-ubiquitous SSE2.
*/
png_debug(1, "in png_init_filter_functions_sse2");
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth3_sse2;
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth4_sse2;
}
/* No need optimize PNG_FILTER_VALUE_UP. The compiler should
* autovectorize.
*/
}
#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
#endif /* PNG_READ_SUPPORTED */

View File

@@ -0,0 +1,164 @@
diff --git libpng-1.6.22-orig/configure.ac libpng-1.6.22/configure.ac
--- libpng-1.6.22-orig/configure.ac 2016-05-25 18:59:10.000000000 -0400
+++ libpng-1.6.22/configure.ac 2016-05-25 19:48:10.631751170 -0400
@@ -341,16 +341,50 @@ AC_ARG_ENABLE([arm-neon],
AM_CONDITIONAL([PNG_ARM_NEON],
[test "$enable_arm_neon" != 'no' &&
case "$host_cpu" in
arm*|aarch64*) :;;
*) test "$enable_arm_neon" != '';;
esac])
+# INTEL
+# =====
+#
+# INTEL SSE (SIMD) support.
+
+AC_ARG_ENABLE([intel-sse],
+ AS_HELP_STRING([[[--enable-intel-sse]]],
+ [Enable Intel SSE optimizations: =no/off, yes/on:]
+ [no/off: disable the optimizations;]
+ [yes/on: enable the optimizations.]
+ [If not specified: determined by the compiler.]),
+ [case "$enableval" in
+ no|off)
+ # disable the default enabling:
+ AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
+ [Disable Intel SSE optimizations])
+ # Prevent inclusion of the assembler files below:
+ enable_intel_sse=no;;
+ yes|on)
+ AC_DEFINE([PNG_INTEL_SSE_OPT], [1],
+ [Enable Intel SSE optimizations]);;
+ *)
+ AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
+ esac])
+
+# Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
+# or where Intel optimizations were explicitly requested (this allows a
+# fallback if a future host CPU does not match 'x86*')
+AM_CONDITIONAL([PNG_INTEL_SSE],
+ [test "$enable_intel_sse" != 'no' &&
+ case "$host_cpu" in
+ i?86|x86_64) :;;
+ *) test "$enable_intel_sse" != '';;
+ esac])
AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]])
# Config files, substituting as above
AC_CONFIG_FILES([Makefile libpng.pc:libpng.pc.in])
AC_CONFIG_FILES([libpng-config:libpng-config.in],
[chmod +x libpng-config])
AC_OUTPUT
diff --git libpng-1.6.22-orig/Makefile.am libpng-1.6.22/Makefile.am
--- libpng-1.6.22-orig/Makefile.am 2016-05-17 18:15:12.000000000 -0400
+++ libpng-1.6.22/Makefile.am 2016-05-25 19:48:10.631751170 -0400
@@ -89,16 +89,20 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SO
pngset.c pngtrans.c pngwio.c pngwrite.c pngwtran.c pngwutil.c\
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
if PNG_ARM_NEON
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
arm/filter_neon.S arm/filter_neon_intrinsics.c
endif
+if PNG_INTEL_SSE
+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\
+ contrib/intel/filter_sse2_intrinsics.c
+endif
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \
-version-number @PNGLIB_MAJOR@@PNGLIB_MINOR@:@PNGLIB_RELEASE@:0
if HAVE_LD_VERSION_SCRIPT
# Versioned symbols and restricted exports
if HAVE_SOLARIS_LD
diff --git libpng-1.6.22-orig/pngpriv.h libpng-1.6.22/pngpriv.h
--- libpng-1.6.22-orig/pngpriv.h 2016-05-25 18:59:10.000000000 -0400
+++ libpng-1.6.22/pngpriv.h 2016-05-25 19:48:10.635751171 -0400
@@ -177,16 +177,52 @@
# endif /* !PNG_ARM_NEON_IMPLEMENTATION */
# ifndef PNG_ARM_NEON_IMPLEMENTATION
/* Use the intrinsics code by default. */
# define PNG_ARM_NEON_IMPLEMENTATION 1
# endif
#endif /* PNG_ARM_NEON_OPT > 0 */
+#ifndef PNG_INTEL_SSE_OPT
+# ifdef PNG_INTEL_SSE
+ /* Only check for SSE if the build configuration has been modified to
+ * enable SSE optimizations. This means that these optimizations will
+ * be off by default. See contrib/intel for more details.
+ */
+# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
+ defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+# define PNG_INTEL_SSE_OPT 1
+# endif
+# endif
+#endif
+
+#if PNG_INTEL_SSE_OPT > 0
+# ifndef PNG_INTEL_SSE_IMPLEMENTATION
+# if defined(__SSE4_1__) || defined(__AVX__)
+ /* We are not actually using AVX, but checking for AVX is the best
+ way we can detect SSE4.1 and SSSE3 on MSVC.
+ */
+# define PNG_INTEL_SSE_IMPLEMENTATION 3
+# elif defined(__SSSE3__)
+# define PNG_INTEL_SSE_IMPLEMENTATION 2
+# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+# define PNG_INTEL_SSE_IMPLEMENTATION 1
+# else
+# define PNG_INTEL_SSE_IMPLEMENTATION 0
+# endif
+# endif
+
+# if PNG_INTEL_SSE_IMPLEMENTATION > 0
+# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
+# endif
+#endif
+
/* Is this a build of a DLL where compilation of the object modules requires
* different preprocessor settings to those required for a simple library? If
* so PNG_BUILD_DLL must be set.
*
* If libpng is used inside a DLL but that DLL does not export the libpng APIs
* PNG_BUILD_DLL must not be set. To avoid the code below kicking in build a
* static library of libpng then link the DLL against that.
*/
@@ -1184,16 +1220,29 @@ PNG_INTERNAL_FUNCTION(void,png_read_filt
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_neon,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_neon,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_neon,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+
/* Choose the best filter to use and filter the row data */
PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr,
png_row_infop row_info),PNG_EMPTY);
#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
PNG_INTERNAL_FUNCTION(void,png_read_IDAT_data,(png_structrp png_ptr,
png_bytep output, png_alloc_size_t avail_out),PNG_EMPTY);
/* Read 'avail_out' bytes of data from the IDAT stream. If the output buffer

View File

@@ -1,8 +1,8 @@
/* pngimage.c
*
* Copyright (c) 2015 John Cunningham Bowler
* Copyright (c) 2015,2016 John Cunningham Bowler
*
* Last changed in libpng 1.6.20 [December 3, 2015]
* Last changed in libpng 1.6.22 [May 26, 2016]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
@@ -36,6 +36,15 @@
# include <setjmp.h> /* because png.h did *not* include this */
#endif
/* 1.6.1 added support for the configure test harness, which uses 77 to indicate
* a skipped test, in earlier versions we need to succeed on a skipped test, so:
*/
#if PNG_LIBPNG_VER >= 10601 && defined(HAVE_CONFIG_H)
# define SKIP 77
#else
# define SKIP 0
#endif
#if defined(PNG_INFO_IMAGE_SUPPORTED) && defined(PNG_SEQUENTIAL_READ_SUPPORTED)\
&& (defined(PNG_READ_PNG_SUPPORTED) || PNG_LIBPNG_VER < 10700)
/* If a transform is valid on both read and write this implies that if the
@@ -1682,6 +1691,6 @@ int
main(void)
{
fprintf(stderr, "pngimage: no support for png_read/write_image\n");
return 77;
return SKIP;
}
#endif

View File

@@ -1,9 +1,9 @@
/*-
* pngstest.c
*
* Copyright (c) 2013-2015 John Cunningham Bowler
* Copyright (c) 2013-2016 John Cunningham Bowler
*
* Last changed in libpng 1.6.19 [November 12, 2015]
* Last changed in libpng 1.6.22 [May 26, 2016]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
@@ -26,6 +26,15 @@
# include <config.h>
#endif
/* 1.6.1 added support for the configure test harness, which uses 77 to indicate
* a skipped test, in earlier versions we need to succeed on a skipped test, so:
*/
#if PNG_LIBPNG_VER >= 10601 && defined(HAVE_CONFIG_H)
# define SKIP 77
#else
# define SKIP 0
#endif
/* Define the following to use this test against your installed libpng, rather
* than the one being built here:
*/
@@ -99,10 +108,18 @@ make_random_bytes(png_uint_32* seed, void* pv, size_t size)
seed[1] = u1;
}
static png_uint_32 color_seed[2];
static void
reseed(void)
{
color_seed[0] = 0x12345678U;
color_seed[1] = 0x9abcdefU;
}
static void
random_color(png_colorp color)
{
static png_uint_32 color_seed[2] = { 0x12345678, 0x9abcdef };
make_random_bytes(color_seed, color, sizeof *color);
}
@@ -307,7 +324,7 @@ compare_16bit(int v1, int v2, int error_limit, int multiple_algorithms)
}
#endif /* unused */
#define READ_FILE 1 /* else memory */
#define USE_FILE 1 /* else memory */
#define USE_STDIO 2 /* else use file name */
#define STRICT 4 /* fail on warnings too */
#define VERBOSE 8
@@ -316,16 +333,19 @@ compare_16bit(int v1, int v2, int error_limit, int multiple_algorithms)
#define ACCUMULATE 64
#define FAST_WRITE 128
#define sRGB_16BIT 256
#define NO_RESEED 512 /* do not reseed on each new file */
#define GBG_ERROR 1024 /* do not ignore the gamma+background_rgb_to_gray
* libpng warning. */
static void
print_opts(png_uint_32 opts)
{
if (opts & READ_FILE)
if (opts & USE_FILE)
printf(" --file");
if (opts & USE_STDIO)
printf(" --stdio");
if (opts & STRICT)
printf(" --strict");
if (!(opts & STRICT))
printf(" --nostrict");
if (opts & VERBOSE)
printf(" --verbose");
if (opts & KEEP_TMPFILES)
@@ -338,6 +358,12 @@ print_opts(png_uint_32 opts)
printf(" --slow");
if (opts & sRGB_16BIT)
printf(" --sRGB-16bit");
if (opts & NO_RESEED)
printf(" --noreseed");
#if PNG_LIBPNG_VER < 10700 /* else on by default */
if (opts & GBG_ERROR)
printf(" --fault-gbg-warning");
#endif
}
#define FORMAT_NO_CHANGE 0x80000000 /* additional flag */
@@ -741,8 +767,15 @@ checkopaque(Image *image)
return logerror(image, image->file_name, ": opaque not NULL", "");
}
else if (image->image.warning_or_error != 0 && (image->opts & STRICT) != 0)
return logerror(image, image->file_name, " --strict", "");
/* Separate out the gamma+background_rgb_to_gray warning because it may
* produce opaque component errors:
*/
else if (image->image.warning_or_error != 0 &&
(strcmp(image->image.message,
"libpng does not support gamma+background+rgb_to_gray") == 0 ?
(image->opts & GBG_ERROR) != 0 : (image->opts & STRICT) != 0))
return logerror(image, image->file_name, (image->opts & GBG_ERROR) != 0 ?
" --fault-gbg-warning" : " --strict", "");
else
return 1;
@@ -3008,14 +3041,14 @@ read_file(Image *image, png_uint_32 format, png_const_colorp background)
static int
read_one_file(Image *image)
{
if (!(image->opts & READ_FILE) || (image->opts & USE_STDIO))
if (!(image->opts & USE_FILE) || (image->opts & USE_STDIO))
{
/* memory or stdio. */
FILE *f = fopen(image->file_name, "rb");
if (f != NULL)
{
if (image->opts & READ_FILE)
if (image->opts & USE_FILE)
image->input_file = f;
else /* memory */
@@ -3096,7 +3129,8 @@ write_one_file(Image *output, Image *image, int convert_to_8bit)
if (image->opts & USE_STDIO)
{
#ifndef PNG_USE_MKSTEMP
#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
#ifndef __COVERITY__
FILE *f = tmpfile();
#else
/* Experimental. Coverity says tmpfile() is insecure because it
@@ -3158,10 +3192,14 @@ write_one_file(Image *output, Image *image, int convert_to_8bit)
else
return logerror(image, "tmpfile", ": open: ", strerror(errno));
#else /* SIMPLIFIED_WRITE_STDIO */
return logerror(image, "tmpfile", ": open: unsupported", "");
#endif /* SIMPLIFIED_WRITE_STDIO */
}
else
else if (image->opts & USE_FILE)
{
#ifdef PNG_SIMPLIFIED_WRITE_STDIO_SUPPORTED
static int counter = 0;
char name[32];
@@ -3181,6 +3219,51 @@ write_one_file(Image *output, Image *image, int convert_to_8bit)
else
return logerror(image, name, ": write failed", "");
#else /* SIMPLIFIED_WRITE_STDIO */
return logerror(image, "stdio", ": open: unsupported", "");
#endif /* SIMPLIFIED_WRITE_STDIO */
}
else /* use memory */
{
png_alloc_size_t size;
if (png_image_write_get_memory_size(image->image, size, convert_to_8bit,
image->buffer+16, (png_int_32)image->stride, image->colormap))
{
/* This is non-fatal but ignoring it was causing serious problems in
* the macro to be ignored:
*/
if (size > PNG_IMAGE_PNG_SIZE_MAX(image->image))
return logerror(image, "memory", ": PNG_IMAGE_SIZE_MAX wrong", "");
initimage(output, image->opts, "memory", image->stride_extra);
output->input_memory = malloc(size);
if (output->input_memory != NULL)
{
output->input_memory_size = size;
if (png_image_write_to_memory(&image->image, output->input_memory,
&output->input_memory_size, convert_to_8bit, image->buffer+16,
(png_int_32)image->stride, image->colormap))
{
/* This is also non-fatal but it safes safer to error out anyway:
*/
if (size != output->input_memory_size)
return logerror(image, "memory", ": memory size wrong", "");
}
else
return logerror(image, "memory", ": write failed", "");
}
else
return logerror(image, "memory", ": out of memory", "");
}
else
return logerror(image, "memory", ": get size:", "");
}
/* 'output' has an initialized temporary image, read this back in and compare
@@ -3356,6 +3439,8 @@ test_one_file(const char *file_name, format_list *formats, png_uint_32 opts,
int result;
Image image;
if (!(opts & NO_RESEED))
reseed(); /* ensure that the random numbers don't depend on file order */
newimage(&image);
initimage(&image, opts, file_name, stride_extra);
result = read_one_file(&image);
@@ -3393,7 +3478,7 @@ test_one_file(const char *file_name, format_list *formats, png_uint_32 opts,
int
main(int argc, char **argv)
{
png_uint_32 opts = FAST_WRITE;
png_uint_32 opts = FAST_WRITE | STRICT;
format_list formats;
const char *touch = NULL;
int log_pass = 0;
@@ -3402,11 +3487,17 @@ main(int argc, char **argv)
int retval = 0;
int c;
#if PNG_LIBPNG_VER >= 10700
/* This error should not exist in 1.7 or later: */
opts |= GBG_ERROR;
#endif
init_sRGB_to_d();
#if 0
init_error_via_linear();
#endif
format_init(&formats);
reseed(); /* initialize random number seeds */
for (c=1; c<argc; ++c)
{
@@ -3421,17 +3512,17 @@ main(int argc, char **argv)
}
else if (strcmp(arg, "--file") == 0)
# ifdef PNG_STDIO_SUPPORTED
opts |= READ_FILE;
opts |= USE_FILE;
# else
return 77; /* skipped: no support */
return SKIP; /* skipped: no support */
# endif
else if (strcmp(arg, "--memory") == 0)
opts &= ~READ_FILE;
opts &= ~USE_FILE;
else if (strcmp(arg, "--stdio") == 0)
# ifdef PNG_STDIO_SUPPORTED
opts |= USE_STDIO;
# else
return 77; /* skipped: no support */
return SKIP; /* skipped: no support */
# endif
else if (strcmp(arg, "--name") == 0)
opts &= ~USE_STDIO;
@@ -3457,10 +3548,16 @@ main(int argc, char **argv)
opts &= ~KEEP_GOING;
else if (strcmp(arg, "--strict") == 0)
opts |= STRICT;
else if (strcmp(arg, "--nostrict") == 0)
opts &= ~STRICT;
else if (strcmp(arg, "--sRGB-16bit") == 0)
opts |= sRGB_16BIT;
else if (strcmp(arg, "--linear-16bit") == 0)
opts &= ~sRGB_16BIT;
else if (strcmp(arg, "--noreseed") == 0)
opts |= NO_RESEED;
else if (strcmp(arg, "--fault-gbg-warning") == 0)
opts |= GBG_ERROR;
else if (strcmp(arg, "--tmpfile") == 0)
{
if (c+1 < argc)
@@ -3717,6 +3814,6 @@ int main(void)
{
fprintf(stderr, "pngstest: no read support in libpng, test skipped\n");
/* So the test is skipped: */
return 77;
return SKIP;
}
#endif /* PNG_SIMPLIFIED_READ_SUPPORTED */

View File

@@ -1,8 +1,8 @@
/* pngunknown.c - test the read side unknown chunk handling
*
* Last changed in libpng 1.6.10 [March 6, 2014]
* Copyright (c) 2014 Glenn Randers-Pehrson
* Last changed in libpng 1.6.22 [May 26, 2016]
* Copyright (c) 2015,2016 Glenn Randers-Pehrson
* Written by John Cunningham Bowler
*
* This code is released under the libpng license.
@@ -30,10 +30,21 @@
# include "../../png.h"
#endif
/* 1.6.1 added support for the configure test harness, which uses 77 to indicate
* a skipped test, in earlier versions we need to succeed on a skipped test, so:
*/
#if PNG_LIBPNG_VER >= 10601 && defined(HAVE_CONFIG_H)
# define SKIP 77
#else
# define SKIP 0
#endif
/* Since this program tests the ability to change the unknown chunk handling
* these must be defined:
*/
#if defined(PNG_SET_UNKNOWN_CHUNKS_SUPPORTED) &&\
defined(PNG_STDIO_SUPPORTED) &&\
defined(PNG_READ_SUPPORTED)
/* One of these must be defined to allow us to find out what happened. It is
@@ -615,7 +626,7 @@ get_unknown(display *d, png_infop info_ptr, int after_IDAT)
return flags;
}
#else
#else /* SAVE_UNKNOWN_CHUNKS */
static png_uint_32
get_unknown(display *d, png_infop info_ptr, int after_IDAT)
/* Otherwise this will return the cached values set by any user callback */
@@ -634,8 +645,8 @@ get_unknown(display *d, png_infop info_ptr, int after_IDAT)
* a check to ensure the logic is correct.
*/
# error No store support and no user chunk support, this will not work
# endif
#endif
# endif /* READ_USER_CHUNKS */
#endif /* SAVE_UNKNOWN_CHUNKS */
static int
check(FILE *fp, int argc, const char **argv, png_uint_32p flags/*out*/,
@@ -1001,6 +1012,20 @@ perform_one_test(FILE *fp, int argc, const char **argv,
def = check(fp, argc, argv, flags[1], d, set_callback);
/* If IDAT is being handled as unknown the image read is skipped and all the
* IDATs after the first end up in the end info struct, so in this case add
* IDAT to the list of unknowns. (Do this after 'check' above sets the
* chunk_info 'keep' fields.)
*
* Note that the flag setting has to be in the 'known' field to avoid
* triggering the consistency check below and the flag must only be set if
* there are multiple IDATs, so if the check above did find an unknown IDAT
* after IDAT.
*/
if (chunk_info[0/*IDAT*/].keep != PNG_HANDLE_CHUNK_AS_DEFAULT &&
(flags[1][3] & PNG_INFO_IDAT) != 0)
flags[0][2] |= PNG_INFO_IDAT;
/* Chunks should either be known or unknown, never both and this should apply
* whether the chunk is before or after the IDAT (actually, the app can
* probably change this by swapping the handling after the image, but this
@@ -1245,7 +1270,7 @@ main(void)
fprintf(stderr,
" test ignored: no support to find out about unknown chunks\n");
/* So the test is skipped: */
return 77;
return SKIP;
}
#endif /* READ_USER_CHUNKS || SAVE_UNKNOWN_CHUNKS */
@@ -1256,6 +1281,6 @@ main(void)
fprintf(stderr,
" test ignored: no support to modify unknown chunk handling\n");
/* So the test is skipped: */
return 77;
return SKIP;
}
#endif /* SET_UNKNOWN_CHUNKS && READ*/

View File

@@ -1,7 +1,7 @@
/* pngvalid.c - validate libpng by constructing then reading png files.
*
* Last changed in libpng 1.6.21 [January 15, 2016]
* Last changed in libpng 1.6.22 [May 26, 2016]
* Copyright (c) 2014-2016 Glenn Randers-Pehrson
* Written by John Cunningham Bowler
*
@@ -62,10 +62,10 @@
/* 1.6.1 added support for the configure test harness, which uses 77 to indicate
* a skipped test, in earlier versions we need to succeed on a skipped test, so:
*/
#if PNG_LIBPNG_VER < 10601
# define SKIP 0
#else
#if PNG_LIBPNG_VER >= 10601 && defined(HAVE_CONFIG_H)
# define SKIP 77
#else
# define SKIP 0
#endif
/* pngvalid requires write support and one of the fixed or floating point APIs.
@@ -309,13 +309,8 @@ static void r16(png_uint_16p p16, size_t count)
}
}
#ifdef __COVERITY__
# define R16(this)\
r16(&(this), (sizeof (this))/2U/*(sizeof (png_uint_16))*/)
#else
# define R16(this)\
r16(&(this), (sizeof (this))/(sizeof (png_uint_16)))
#endif
#define R16(this) r16(&(this), (sizeof (this))/(sizeof (png_uint_16)))
#define R16_1(this) r16(&(this), (size_t) 1U)
#if defined PNG_READ_RGB_TO_GRAY_SUPPORTED ||\
defined PNG_READ_FILLER_SUPPORTED
@@ -331,13 +326,8 @@ static void r32(png_uint_32p p32, size_t count)
}
}
#ifdef __COVERITY__
# define R32(this)\
r32(&(this), (sizeof (this))/4U/*(sizeof (png_uint_32))*/)
#else
# define R32(this)\
r32(&(this), (sizeof (this))/(sizeof (png_uint_32)))
#endif
#define R32(this) r32(&(this), (sizeof (this))/(sizeof (png_uint_32)))
#define R32_1(this) r32(&(this), (size_t) 1U)
#endif /* READ_FILLER || READ_RGB_TO_GRAY */
@@ -350,7 +340,7 @@ random_mod(unsigned int max)
{
png_uint_16 x;
R16(x);
R16_1(x);
return x % max; /* 0 .. max-1 */
}
@@ -3317,10 +3307,10 @@ init_standard_palette(png_store *ps, png_structp pp, png_infop pi, int npalette,
for (; i<256; ++i)
tRNS[i] = 24;
# ifdef PNG_WRITE_tRNS_SUPPORTED
if (j > 0)
png_set_tRNS(pp, pi, tRNS, j, 0/*color*/);
# endif
#ifdef PNG_WRITE_tRNS_SUPPORTED
if (j > 0)
png_set_tRNS(pp, pi, tRNS, j, 0/*color*/);
#endif
}
}
@@ -7313,7 +7303,7 @@ image_transform_png_set_rgb_to_gray_ini(const image_transform *this,
png_uint_32 ru;
double total;
R32(ru);
R32_1(ru);
data.green_coefficient = total = (ru & 0xffff) / 65535.;
ru >>= 16;
data.red_coefficient = (1 - total) * (ru & 0xffff) / 65535.;
@@ -7947,11 +7937,11 @@ image_transform_png_set_background_set(const image_transform *this,
else
back.gray = (png_uint_16)data.red;
# ifdef PNG_FLOATING_POINT_SUPPORTED
png_set_background(pp, &back, PNG_BACKGROUND_GAMMA_FILE, expand, 0);
# else
png_set_background_fixed(pp, &back, PNG_BACKGROUND_GAMMA_FILE, expand, 0);
# endif
#ifdef PNG_FLOATING_POINT_SUPPORTED
png_set_background(pp, &back, PNG_BACKGROUND_GAMMA_FILE, expand, 0);
#else
png_set_background_fixed(pp, &back, PNG_BACKGROUND_GAMMA_FILE, expand, 0);
#endif
this->next->set(this->next, that, pp, pi);
}

View File

@@ -1,8 +1,8 @@
/* timepng.c
*
* Copyright (c) 2013 John Cunningham Bowler
* Copyright (c) 2013,2016 John Cunningham Bowler
*
* Last changed in libpng 1.6.1 [March 28, 2013]
* Last changed in libpng 1.6.22 [May 26, 2016]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
@@ -10,15 +10,17 @@
*
* Load an arbitrary number of PNG files (from the command line, or, if there
* are no arguments on the command line, from stdin) then run a time test by
* reading each file by row. The test does nothing with the read result and
* does no transforms. The only output is a time as a floating point number of
* seconds with 9 decimal digits.
* reading each file by row or by image (possibly with transforms in the latter
* case). The only output is a time as a floating point number of seconds with
* 9 decimal digits.
*/
#define _POSIX_C_SOURCE 199309L /* for clock_gettime */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
#include <time.h>
@@ -35,36 +37,73 @@
# include "../../png.h"
#endif
static int read_png(FILE *fp)
/* The following is to support direct compilation of this file as C++ */
#ifdef __cplusplus
# define voidcast(type, value) static_cast<type>(value)
#else
# define voidcast(type, value) (value)
#endif /* __cplusplus */
/* 'CLOCK_PROCESS_CPUTIME_ID' is one of the clock timers for clock_gettime. It
* need not be supported even when clock_gettime is available. It returns the
* 'CPU' time the process has consumed. 'CPU' time is assumed to include time
* when the CPU is actually blocked by a pending cache fill but not time
* waiting for page faults. The attempt is to get a measure of the actual time
* the implementation takes to read a PNG ignoring the potentially very large IO
* overhead.
*/
#if defined (CLOCK_PROCESS_CPUTIME_ID) && defined(PNG_STDIO_SUPPORTED) &&\
defined(PNG_EASY_ACCESS_SUPPORTED) &&\
(PNG_LIBPNG_VER >= 10700 ? defined(PNG_READ_PNG_SUPPORTED) :\
defined (PNG_SEQUENTIAL_READ_SUPPORTED) &&\
defined(PNG_INFO_IMAGE_SUPPORTED))
typedef struct
{
png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING,0,0,0);
png_infop info_ptr = NULL;
png_bytep row = NULL, display = NULL;
FILE *input;
FILE *output;
} io_data;
if (png_ptr == NULL)
return 0;
static PNG_CALLBACK(void, read_and_copy,
(png_structp png_ptr, png_bytep buffer, png_size_t cb))
{
io_data *io = (io_data*)png_get_io_ptr(png_ptr);
if (setjmp(png_jmpbuf(png_ptr)))
if (fread(buffer, cb, 1, io->input) != 1)
png_error(png_ptr, strerror(errno));
if (fwrite(buffer, cb, 1, io->output) != 1)
{
png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
if (row != NULL) free(row);
if (display != NULL) free(display);
return 0;
perror("temporary file");
fprintf(stderr, "temporary file PNG write failed\n");
exit(1);
}
}
png_init_io(png_ptr, fp);
static void read_by_row(png_structp png_ptr, png_infop info_ptr,
FILE *write_ptr, FILE *read_ptr)
{
/* These don't get freed on error, this is fine; the program immediately
* exits.
*/
png_bytep row = NULL, display = NULL;
io_data io_copy;
info_ptr = png_create_info_struct(png_ptr);
if (info_ptr == NULL)
png_error(png_ptr, "OOM allocating info structure");
if (write_ptr != NULL)
{
/* Set up for a copy to the temporary file: */
io_copy.input = read_ptr;
io_copy.output = write_ptr;
png_set_read_fn(png_ptr, &io_copy, read_and_copy);
}
png_read_info(png_ptr, info_ptr);
{
png_size_t rowbytes = png_get_rowbytes(png_ptr, info_ptr);
row = malloc(rowbytes);
display = malloc(rowbytes);
row = voidcast(png_bytep,malloc(rowbytes));
display = voidcast(png_bytep,malloc(rowbytes));
if (row == NULL || display == NULL)
png_error(png_ptr, "OOM allocating row buffers");
@@ -81,7 +120,8 @@ static int read_png(FILE *fp)
png_uint_32 y = height;
/* NOTE: this trashes the row each time; interlace handling won't
* work, but this avoids memory thrashing for speed testing.
* work, but this avoids memory thrashing for speed testing and is
* somewhat representative of an application that works row-by-row.
*/
while (y-- > 0)
png_read_row(png_ptr, row, display);
@@ -91,9 +131,51 @@ static int read_png(FILE *fp)
/* Make sure to read to the end of the file: */
png_read_end(png_ptr, info_ptr);
png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
/* Free this up: */
free(row);
free(display);
}
static PNG_CALLBACK(void, no_warnings, (png_structp png_ptr,
png_const_charp warning))
{
(void)png_ptr;
(void)warning;
}
static int read_png(FILE *fp, png_int_32 transforms, FILE *write_file)
{
png_structp png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING,0,0,
no_warnings);
png_infop info_ptr = NULL;
if (png_ptr == NULL)
return 0;
if (setjmp(png_jmpbuf(png_ptr)))
{
png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
return 0;
}
# ifdef PNG_BENIGN_ERRORS_SUPPORTED
png_set_benign_errors(png_ptr, 1/*allowed*/);
# endif
png_init_io(png_ptr, fp);
info_ptr = png_create_info_struct(png_ptr);
if (info_ptr == NULL)
png_error(png_ptr, "OOM allocating info structure");
if (transforms < 0)
read_by_row(png_ptr, info_ptr, write_file, fp);
else
png_read_png(png_ptr, info_ptr, transforms, NULL/*params*/);
png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
return 1;
}
@@ -108,7 +190,7 @@ static int mytime(struct timespec *t)
return 0;
}
static int perform_one_test(FILE *fp, int nfiles)
static int perform_one_test(FILE *fp, int nfiles, png_int_32 transforms)
{
int i;
struct timespec before, after;
@@ -120,7 +202,7 @@ static int perform_one_test(FILE *fp, int nfiles)
{
for (i=0; i<nfiles; ++i)
{
if (read_png(fp))
if (read_png(fp, transforms, NULL/*write*/))
{
if (ferror(fp))
{
@@ -184,120 +266,343 @@ static int add_one_file(FILE *fp, char *name)
if (ip != NULL)
{
int ch;
for (;;)
/* Read the file using libpng; this detects errors and also deals with
* files which contain data beyond the end of the file.
*/
int ok = 0;
fpos_t pos;
if (fgetpos(fp, &pos))
{
ch = getc(ip);
if (ch == EOF) break;
putc(ch, fp);
/* Fatal error reading the start: */
perror("temporary file");
fprintf(stderr, "temporary file fgetpos error\n");
exit(1);
}
if (ferror(ip))
if (read_png(ip, -1/*by row*/, fp/*output*/))
{
perror(name);
fprintf(stderr, "%s: read error\n", name);
return 0;
if (ferror(ip))
{
perror(name);
fprintf(stderr, "%s: read error\n", name);
}
else
ok = 1; /* read ok */
}
else
fprintf(stderr, "%s: file not added\n", name);
(void)fclose(ip);
/* An error in the output is fatal; exit immediately: */
if (ferror(fp))
{
perror("temporary file");
fprintf(stderr, "temporary file write error\n");
return 0;
exit(1);
}
if (ok)
return 1;
/* Did not read the file successfully, simply rewind the temporary
* file. This must happen after the ferror check above to avoid clearing
* the error.
*/
if (fsetpos(fp, &pos))
{
perror("temporary file");
fprintf(stderr, "temporary file fsetpos error\n");
exit(1);
}
}
else
{
/* file open error: */
perror(name);
fprintf(stderr, "%s: open failed\n", name);
return 0;
}
return 1;
return 0; /* file not added */
}
static void
usage(FILE *fp)
{
if (fp != NULL) fclose(fp);
fprintf(stderr,
"Usage:\n"
" timepng --assemble <assembly> {files}\n"
" Read the files into <assembly>, output the count. Options are ignored.\n"
" timepng --dissemble <assembly> <count> [options]\n"
" Time <count> files from <assembly>, additional files may not be given.\n"
" Otherwise:\n"
" Read the files into a temporary file and time the decode\n"
"Transforms:\n"
" --by-image: read by image with png_read_png\n"
" --<transform>: implies by-image, use PNG_TRANSFORM_<transform>\n"
" Otherwise: read by row using png_read_row (to a single row buffer)\n"
/* ISO C90 string length max 509 */);fprintf(stderr,
"{files}:\n"
" PNG files to copy into the assembly and time. Invalid files are skipped\n"
" with appropriate error messages. If no files are given the list of files\n"
" is read from stdin with each file name terminated by a newline\n"
"Output:\n"
" For --assemble the output is the name of the assembly file followed by the\n"
" count of the files it contains; the arguments for --dissemble. Otherwise\n"
" the output is the total decode time in seconds.\n");
exit(99);
}
int main(int argc, char **argv)
{
int ok = 0;
FILE *fp = tmpfile();
int err = 0;
int nfiles = 0;
int transforms = -1; /* by row */
const char *assembly = NULL;
FILE *fp;
if (fp != NULL)
if (argc > 2 && strcmp(argv[1], "--assemble") == 0)
{
int err = 0;
int nfiles = 0;
if (argc > 1)
/* Just build the test file, argv[2] is the file name. */
assembly = argv[2];
fp = fopen(assembly, "wb");
if (fp == NULL)
{
int i;
perror(assembly);
fprintf(stderr, "timepng --assemble %s: could not open for write\n",
assembly);
usage(NULL);
}
for (i=1; i<argc; ++i)
argv += 2;
argc -= 2;
}
else if (argc > 3 && strcmp(argv[1], "--dissemble") == 0)
{
fp = fopen(argv[2], "rb");
if (fp == NULL)
{
perror(argv[2]);
fprintf(stderr, "timepng --dissemble %s: could not open for read\n",
argv[2]);
usage(NULL);
}
nfiles = atoi(argv[3]);
if (nfiles <= 0)
{
fprintf(stderr,
"timepng --dissemble <file> <count>: %s is not a count\n",
argv[3]);
exit(99);
}
#ifdef __COVERITY__
else
{
nfiles &= PNG_UINT_31_MAX;
}
#endif
argv += 3;
argc -= 3;
}
else /* Else use a temporary file */
{
#ifndef __COVERITY__
fp = tmpfile();
#else
/* Experimental. Coverity says tmpfile() is insecure because it
* generates predictable names.
*
* It is possible to satisfy Coverity by using mkstemp(); however,
* any platform supporting mkstemp() undoubtedly has a secure tmpfile()
* implementation as well, and doesn't need the fix. Note that
* the fix won't work on platforms that don't support mkstemp().
*
* https://www.securecoding.cert.org/confluence/display/c/
* FIO21-C.+Do+not+create+temporary+files+in+shared+directories
* says that most historic implementations of tmpfile() provide
* only a limited number of possible temporary file names
* (usually 26) before file names are recycled. That article also
* provides a secure solution that unfortunately depends upon mkstemp().
*/
char tmpfile[] = "timepng-XXXXXX";
int filedes;
umask(0177);
filedes = mkstemp(tmpfile);
if (filedes < 0)
fp = NULL;
else
{
fp = fdopen(filedes,"w+");
/* Hide the filename immediately and ensure that the file does
* not exist after the program ends
*/
(void) unlink(tmpfile);
}
#endif
if (fp == NULL)
{
perror("tmpfile");
fprintf(stderr, "timepng: could not open the temporary file\n");
exit(1); /* not a user error */
}
}
/* Handle the transforms: */
while (argc > 1 && argv[1][0] == '-' && argv[1][1] == '-')
{
const char *opt = *++argv + 2;
--argc;
/* Transforms turn on the by-image processing and maybe set some
* transforms:
*/
if (transforms == -1)
transforms = PNG_TRANSFORM_IDENTITY;
if (strcmp(opt, "by-image") == 0)
{
/* handled above */
}
# define OPT(name) else if (strcmp(opt, #name) == 0)\
transforms |= PNG_TRANSFORM_ ## name
OPT(STRIP_16);
OPT(STRIP_ALPHA);
OPT(PACKING);
OPT(PACKSWAP);
OPT(EXPAND);
OPT(INVERT_MONO);
OPT(SHIFT);
OPT(BGR);
OPT(SWAP_ALPHA);
OPT(SWAP_ENDIAN);
OPT(INVERT_ALPHA);
OPT(STRIP_FILLER);
OPT(STRIP_FILLER_BEFORE);
OPT(STRIP_FILLER_AFTER);
OPT(GRAY_TO_RGB);
OPT(EXPAND_16);
OPT(SCALE_16);
else
{
fprintf(stderr, "timepng %s: unrecognized transform\n", opt);
usage(fp);
}
}
/* Handle the files: */
if (argc > 1 && nfiles > 0)
usage(fp); /* Additional files not valid with --dissemble */
else if (argc > 1)
{
int i;
for (i=1; i<argc; ++i)
{
if (nfiles == INT_MAX)
{
if (add_one_file(fp, argv[i]))
fprintf(stderr, "%s: skipped, too many files\n", argv[i]);
break;
}
else if (add_one_file(fp, argv[i]))
++nfiles;
}
}
else if (nfiles == 0) /* Read from stdin withoout --dissemble */
{
char filename[FILENAME_MAX+1];
while (fgets(filename, FILENAME_MAX+1, stdin))
{
size_t len = strlen(filename);
if (filename[len-1] == '\n')
{
filename[len-1] = 0;
if (nfiles == INT_MAX)
{
fprintf(stderr, "%s: skipped, too many files\n", filename);
break;
}
else if (add_one_file(fp, filename))
++nfiles;
}
else
{
fprintf(stderr, "timepng: file name too long: ...%s\n",
filename+len-32);
err = 1;
break;
}
}
if (ferror(stdin))
{
fprintf(stderr, "timepng: stdin: read error\n");
err = 1;
}
}
/* Perform the test, or produce the --assemble output: */
if (!err)
{
if (nfiles > 0)
{
if (assembly != NULL)
{
if (fflush(fp) && !ferror(fp) && fclose(fp))
{
perror(assembly);
fprintf(stderr, "%s: close failed\n", assembly);
}
else
{
err = 1;
break;
printf("%s %d\n", assembly, nfiles);
fflush(stdout);
ok = !ferror(stdout);
}
}
else
{
ok = perform_one_test(fp, nfiles, transforms);
(void)fclose(fp);
}
}
else
{
char filename[FILENAME_MAX+1];
while (fgets(filename, FILENAME_MAX+1, stdin))
{
size_t len = strlen(filename);
if (filename[len-1] == '\n')
{
filename[len-1] = 0;
if (add_one_file(fp, filename))
++nfiles;
else
{
err = 1;
break;
}
}
else
{
fprintf(stderr, "timepng: truncated file name ...%s\n",
filename+len-32);
err = 1;
break;
}
}
if (ferror(stdin))
{
fprintf(stderr, "timepng: stdin: read error\n");
err = 1;
}
}
if (!err)
{
if (nfiles > 0)
ok = perform_one_test(fp, nfiles);
else
fprintf(stderr, "usage: timepng {files} or ls files | timepng\n");
}
(void)fclose(fp);
usage(fp);
}
else
fprintf(stderr, "timepng: could not open temporary file\n");
(void)fclose(fp);
/* Exit code 0 on success. */
return ok == 0;
}
#else /* !sufficient support */
int main(void) { return 77; }
#endif /* !sufficient support */