[master] Imported from libpng-1.6.29.tar

This commit is contained in:
Glenn Randers-Pehrson
2017-03-16 07:21:03 -05:00
parent 0a181e1665
commit b363e01e6b
46 changed files with 1487 additions and 477 deletions

View File

@@ -1,10 +1,12 @@
/*- pngtopng
*
* COPYRIGHT: Written by John Cunningham Bowler, 2011.
* COPYRIGHT: Written by John Cunningham Bowler, 2011, 2017.
* To the extent possible under law, the author has waived all copyright and
* related or neighboring rights to this work. This work is published from:
* United States.
*
* Last changed in libpng 1.6.29 [%RDATE%]
*
* Read a PNG and write it out in a fixed format, using the 'simplified API'
* that was introduced in libpng-1.6.0.
*
@@ -59,26 +61,27 @@ int main(int argc, const char **argv)
else
fprintf(stderr, "pngtopng: write %s: %s\n", argv[2],
image.message);
free(buffer);
}
else
{
fprintf(stderr, "pngtopng: read %s: %s\n", argv[1],
image.message);
/* This is the only place where a 'free' is required; libpng does
* the cleanup on error and success, but in this case we couldn't
* complete the read because of running out of memory.
*/
png_image_free(&image);
}
free(buffer);
}
else
{
fprintf(stderr, "pngtopng: out of memory: %lu bytes\n",
(unsigned long)PNG_IMAGE_SIZE(image));
/* This is the only place where a 'free' is required; libpng does
* the cleanup on error and success, but in this case we couldn't
* complete the read because of running out of memory and so libpng
* has not got to the point where it can do cleanup.
*/
png_image_free(&image);
}
}
else

View File

@@ -1,158 +0,0 @@
Enabling SSE support
Copyright (c) 2016 Google, Inc.
Written by Mike Klein, Matt Sarett
This INSTALL file written by Glenn Randers-Pehrson, 2016.
If you have moved intel_init.c and filter_sse2_intrinsics.c to a different
directory, be sure to update the '#include "../../pngpriv.h"' line in both
files if necessary to point to the correct relative location of pngpriv.h
with respect to the new location of those files.
To enable SSE support in libpng, follow the instructions in I, II, or III,
below:
I. Using patched "configure" scripts:
First, apply intel_sse.patch in your build directory.
patch -i contrib/intel/intel_sse.patch -p1
Then, if you are not building in a new GIT clone, e.g., in a tar
distribution, remove any existing pre-built configure scripts:
./configure --enable-maintainer-mode
make maintainer-clean
./autogen.sh --maintainer --clean
Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS:
./autogen.sh --maintainer
CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options]
make CPPFLAGS="-DPNG_INTEL_SSE" [options]
make
II. Using a custom makefile:
If you are using a custom makefile makefile, you will have to update it
manually to include contrib/intel/*.o in the dependencies, and to define
PNG_INTEL_SSE.
III. Using manually updated "configure" scripts:
If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am,
following the instructions below, then follow the instructions in
section II of INSTALL in the main libpng directory, then configure libpng
with -DPNG_INTEL_SSE in CPPFLAGS.
1. Add the following code to configure.ac under HOST SPECIFIC OPTIONS
directly beneath the section for ARM:
-----------------cut----------------
# INTEL
# =====
#
# INTEL SSE (SIMD) support.
AC_ARG_ENABLE([intel-sse],
AS_HELP_STRING([[[--enable-intel-sse]]],
[Enable Intel SSE optimizations: =no/off, yes/on:]
[no/off: disable the optimizations;]
[yes/on: enable the optimizations.]
[If not specified: determined by the compiler.]),
[case "$enableval" in
no|off)
# disable the default enabling:
AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
[Disable Intel SSE optimizations])
# Prevent inclusion of the assembler files below:
enable_intel_sse=no;;
yes|on)
AC_DEFINE([PNG_INTEL_SSE_OPT], [1],
[Enable Intel SSE optimizations]);;
*)
AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
esac])
# Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
# or where Intel optimizations were explicitly requested (this allows a
# fallback if a future host CPU does not match 'x86*')
AM_CONDITIONAL([PNG_INTEL_SSE],
[test "$enable_intel_sse" != 'no' &&
case "$host_cpu" in
i?86|x86_64) :;;
*) test "$enable_intel_sse" != '';;
esac])
-----------------cut----------------
2. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS
directly beneath the "if PNG_ARM_NEON ... endif" statement:
-----------------cut----------------
if PNG_INTEL_SSE
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\
contrib/intel/filter_sse2_intrinsics.c
endif
-----------------cut----------------
3. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT
code:
-----------------cut----------------
#ifndef PNG_INTEL_SSE_OPT
# ifdef PNG_INTEL_SSE
/* Only check for SSE if the build configuration has been modified to
* enable SSE optimizations. This means that these optimizations will
* be off by default. See contrib/intel for more details.
*/
# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
(defined(_M_IX86_FP) && _M_IX86_FP >= 2)
# define PNG_INTEL_SSE_OPT 1
# endif
# endif
#endif
#if PNG_INTEL_SSE_OPT > 0
# ifndef PNG_INTEL_SSE_IMPLEMENTATION
# if defined(__SSE4_1__) || defined(__AVX__)
/* We are not actually using AVX, but checking for AVX is the best
way we can detect SSE4.1 and SSSE3 on MSVC.
*/
# define PNG_INTEL_SSE_IMPLEMENTATION 3
# elif defined(__SSSE3__)
# define PNG_INTEL_SSE_IMPLEMENTATION 2
# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
(defined(_M_IX86_FP) && _M_IX86_FP >= 2)
# define PNG_INTEL_SSE_IMPLEMENTATION 1
# else
# define PNG_INTEL_SSE_IMPLEMENTATION 0
# endif
# endif
# if PNG_INTEL_SSE_IMPLEMENTATION > 0
# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
# endif
#endif
-----------------cut----------------
4. Add the following lines to pngpriv.h, following the prototype for
png_read_filter_row_paeth4_neon:
-----------------cut----------------
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-----------------cut----------------

View File

@@ -1,379 +0,0 @@
/* filter_sse2_intrinsics.c - SSE2 optimized filter functions
*
* Copyright (c) 2016 Google, Inc.
* Written by Mike Klein and Matt Sarett
* Derived from arm/filter_neon_intrinsics.c, which was
* Copyright (c) 2014,2016 Glenn Randers-Pehrson
*
* Last changed in libpng 1.6.24 [August 4, 2016]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_INTEL_SSE_IMPLEMENTATION > 0
#include <immintrin.h>
/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
* They're positioned like this:
* prev: c b
* row: a d
* The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
* whichever of a, b, or c is closest to p=a+b-c.
*/
static __m128i load4(const void* p) {
return _mm_cvtsi32_si128(*(const int*)p);
}
static void store4(void* p, __m128i v) {
*(int*)p = _mm_cvtsi128_si32(v);
}
static __m128i load3(const void* p) {
/* We'll load 2 bytes, then 1 byte,
* then mask them together, and finally load into SSE.
*/
const png_uint_16* p01 = p;
const png_byte* p2 = (const png_byte*)(p01+1);
png_uint_32 v012 = (png_uint_32)(*p01)
| (png_uint_32)(*p2) << 16;
return load4(&v012);
}
static void store3(void* p, __m128i v) {
/* We'll pull from SSE as a 32-bit int, then write
* its bottom two bytes, then its third byte.
*/
png_uint_32 v012;
store4(&v012, v);
png_uint_16* p01 = p;
png_byte* p2 = (png_byte*)(p01+1);
*p01 = v012;
*p2 = v012 >> 16;
}
void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* The Sub filter predicts each pixel as the previous pixel, a.
* There is no pixel to the left of the first pixel. It's encoded directly.
* That works with our main loop if we just say that left pixel was zero.
*/
png_debug(1, "in png_read_filter_row_sub3_sse2");
__m128i a, d = _mm_setzero_si128();
int rb = row_info->rowbytes;
while (rb >= 4) {
a = d; d = load4(row);
d = _mm_add_epi8(d, a);
store3(row, d);
row += 3;
rb -= 3;
}
if (rb > 0) {
a = d; d = load3(row);
d = _mm_add_epi8(d, a);
store3(row, d);
row += 3;
rb -= 3;
}
}
void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* The Sub filter predicts each pixel as the previous pixel, a.
* There is no pixel to the left of the first pixel. It's encoded directly.
* That works with our main loop if we just say that left pixel was zero.
*/
png_debug(1, "in png_read_filter_row_sub4_sse2");
__m128i a, d = _mm_setzero_si128();
int rb = row_info->rowbytes;
while (rb > 0) {
a = d; d = load4(row);
d = _mm_add_epi8(d, a);
store4(row, d);
row += 4;
rb -= 4;
}
}
void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* The Avg filter predicts each pixel as the (truncated) average of a and b.
* There's no pixel to the left of the first pixel. Luckily, it's
* predicted to be half of the pixel above it. So again, this works
* perfectly with our loop if we make sure a starts at zero.
*/
png_debug(1, "in png_read_filter_row_avg3_sse2");
const __m128i zero = _mm_setzero_si128();
__m128i b;
__m128i a, d = zero;
int rb = row_info->rowbytes;
while (rb >= 4) {
b = load4(prev);
a = d; d = load4(row );
/* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
__m128i avg = _mm_avg_epu8(a,b);
/* ...but we can fix it up by subtracting off 1 if it rounded up. */
avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
_mm_set1_epi8(1)));
d = _mm_add_epi8(d, avg);
store3(row, d);
prev += 3;
row += 3;
rb -= 3;
}
if (rb > 0) {
b = load3(prev);
a = d; d = load3(row );
/* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
__m128i avg = _mm_avg_epu8(a,b);
/* ...but we can fix it up by subtracting off 1 if it rounded up. */
avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
_mm_set1_epi8(1)));
d = _mm_add_epi8(d, avg);
store3(row, d);
prev += 3;
row += 3;
rb -= 3;
}
}
void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* The Avg filter predicts each pixel as the (truncated) average of a and b.
* There's no pixel to the left of the first pixel. Luckily, it's
* predicted to be half of the pixel above it. So again, this works
* perfectly with our loop if we make sure a starts at zero.
*/
png_debug(1, "in png_read_filter_row_avg4_sse2");
const __m128i zero = _mm_setzero_si128();
__m128i b;
__m128i a, d = zero;
int rb = row_info->rowbytes;
while (rb > 0) {
b = load4(prev);
a = d; d = load4(row );
/* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
__m128i avg = _mm_avg_epu8(a,b);
/* ...but we can fix it up by subtracting off 1 if it rounded up. */
avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
_mm_set1_epi8(1)));
d = _mm_add_epi8(d, avg);
store4(row, d);
prev += 4;
row += 4;
rb -= 4;
}
}
/* Returns |x| for 16-bit lanes. */
static __m128i abs_i16(__m128i x) {
#if PNG_INTEL_SSE_IMPLEMENTATION >= 2
return _mm_abs_epi16(x);
#else
/* Read this all as, return x<0 ? -x : x.
* To negate two's complement, you flip all the bits then add 1.
*/
__m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
/* Flip negative lanes. */
x = _mm_xor_si128(x, is_negative);
/* +1 to negative lanes, else +0. */
x = _mm_sub_epi16(x, is_negative);
return x;
#endif
}
/* Bytewise c ? t : e. */
static __m128i if_then_else(__m128i c, __m128i t, __m128i e) {
#if PNG_INTEL_SSE_IMPLEMENTATION >= 3
return _mm_blendv_epi8(e,t,c);
#else
return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
#endif
}
void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* Paeth tries to predict pixel d using the pixel to the left of it, a,
* and two pixels from the previous row, b and c:
* prev: c b
* row: a d
* The Paeth function predicts d to be whichever of a, b, or c is nearest to
* p=a+b-c.
*
* The first pixel has no left context, and so uses an Up filter, p = b.
* This works naturally with our main loop's p = a+b-c if we force a and c
* to zero.
* Here we zero b and d, which become c and a respectively at the start of
* the loop.
*/
png_debug(1, "in png_read_filter_row_paeth3_sse2");
const __m128i zero = _mm_setzero_si128();
__m128i c, b = zero,
a, d = zero;
int rb = row_info->rowbytes;
while (rb >= 4) {
/* It's easiest to do this math (particularly, deal with pc) with 16-bit
* intermediates.
*/
c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
/* (p-a) == (a+b-c - a) == (b-c) */
__m128i pa = _mm_sub_epi16(b,c);
/* (p-b) == (a+b-c - b) == (a-c) */
__m128i pb = _mm_sub_epi16(a,c);
/* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
__m128i pc = _mm_add_epi16(pa,pb);
pa = abs_i16(pa); /* |p-a| */
pb = abs_i16(pb); /* |p-b| */
pc = abs_i16(pc); /* |p-c| */
__m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
/* Paeth breaks ties favoring a over b over c. */
__m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
c));
/* Note `_epi8`: we need addition to wrap modulo 255. */
d = _mm_add_epi8(d, nearest);
store3(row, _mm_packus_epi16(d,d));
prev += 3;
row += 3;
rb -= 3;
}
if (rb > 0) {
/* It's easiest to do this math (particularly, deal with pc) with 16-bit
* intermediates.
*/
c = b; b = _mm_unpacklo_epi8(load3(prev), zero);
a = d; d = _mm_unpacklo_epi8(load3(row ), zero);
/* (p-a) == (a+b-c - a) == (b-c) */
__m128i pa = _mm_sub_epi16(b,c);
/* (p-b) == (a+b-c - b) == (a-c) */
__m128i pb = _mm_sub_epi16(a,c);
/* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
__m128i pc = _mm_add_epi16(pa,pb);
pa = abs_i16(pa); /* |p-a| */
pb = abs_i16(pb); /* |p-b| */
pc = abs_i16(pc); /* |p-c| */
__m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
/* Paeth breaks ties favoring a over b over c. */
__m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
c));
/* Note `_epi8`: we need addition to wrap modulo 255. */
d = _mm_add_epi8(d, nearest);
store3(row, _mm_packus_epi16(d,d));
prev += 3;
row += 3;
rb -= 3;
}
}
void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row,
png_const_bytep prev)
{
/* Paeth tries to predict pixel d using the pixel to the left of it, a,
* and two pixels from the previous row, b and c:
* prev: c b
* row: a d
* The Paeth function predicts d to be whichever of a, b, or c is nearest to
* p=a+b-c.
*
* The first pixel has no left context, and so uses an Up filter, p = b.
* This works naturally with our main loop's p = a+b-c if we force a and c
* to zero.
* Here we zero b and d, which become c and a respectively at the start of
* the loop.
*/
png_debug(1, "in png_read_filter_row_paeth4_sse2");
const __m128i zero = _mm_setzero_si128();
__m128i c, b = zero,
a, d = zero;
int rb = row_info->rowbytes;
while (rb > 0) {
/* It's easiest to do this math (particularly, deal with pc) with 16-bit
* intermediates.
*/
c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
/* (p-a) == (a+b-c - a) == (b-c) */
__m128i pa = _mm_sub_epi16(b,c);
/* (p-b) == (a+b-c - b) == (a-c) */
__m128i pb = _mm_sub_epi16(a,c);
/* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
__m128i pc = _mm_add_epi16(pa,pb);
pa = abs_i16(pa); /* |p-a| */
pb = abs_i16(pb); /* |p-b| */
pc = abs_i16(pc); /* |p-c| */
__m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
/* Paeth breaks ties favoring a over b over c. */
__m128i nearest = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
c));
/* Note `_epi8`: we need addition to wrap modulo 255. */
d = _mm_add_epi8(d, nearest);
store4(row, _mm_packus_epi16(d,d));
prev += 4;
row += 4;
rb -= 4;
}
}
#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
#endif /* READ */

View File

@@ -1,54 +0,0 @@
/* intel_init.c - SSE2 optimized filter functions
*
* Copyright (c) 2016 Google, Inc.
* Written by Mike Klein and Matt Sarett
* Derived from arm/arm_init.c, which was
* Copyright (c) 2014,2016 Glenn Randers-Pehrson
*
* Last changed in libpng 1.6.22 [May 26, 2016]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_INTEL_SSE_IMPLEMENTATION > 0
void
png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
{
/* The techniques used to implement each of these filters in SSE operate on
* one pixel at a time.
* So they generally speed up 3bpp images about 3x, 4bpp images about 4x.
* They can scale up to 6 and 8 bpp images and down to 2 bpp images,
* but they'd not likely have any benefit for 1bpp images.
* Most of these can be implemented using only MMX and 64-bit registers,
* but they end up a bit slower than using the equally-ubiquitous SSE2.
*/
png_debug(1, "in png_init_filter_functions_sse2");
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth3_sse2;
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth4_sse2;
}
/* No need optimize PNG_FILTER_VALUE_UP. The compiler should
* autovectorize.
*/
}
#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
#endif /* PNG_READ_SUPPORTED */

View File

@@ -1,190 +0,0 @@
diff --git a/configure.ac b/configure.ac
--- a/configure.ac 2016-08-29 11:46:27.000000000 -0400
+++ b/configure.ac 2016-08-29 16:57:03.866355018 -0400
@@ -386,16 +386,51 @@ AC_ARG_ENABLE([mips-msa],
# future host CPU does not match 'mips*')
AM_CONDITIONAL([PNG_MIPS_MSA],
[test "$enable_mips_msa" != 'no' &&
case "$host_cpu" in
mipsel*|mips64el*) :;;
esac])
+# INTEL
+# =====
+#
+# INTEL SSE (SIMD) support.
+
+AC_ARG_ENABLE([intel-sse],
+ AS_HELP_STRING([[[--enable-intel-sse]]],
+ [Enable Intel SSE optimizations: =no/off, yes/on:]
+ [no/off: disable the optimizations;]
+ [yes/on: enable the optimizations.]
+ [If not specified: determined by the compiler.]),
+ [case "$enableval" in
+ no|off)
+ # disable the default enabling:
+ AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
+ [Disable Intel SSE optimizations])
+ # Prevent inclusion of the assembler files below:
+ enable_intel_sse=no;;
+ yes|on)
+ AC_DEFINE([PNG_INTEL_SSE_OPT], [1],
+ [Enable Intel SSE optimizations]);;
+ *)
+ AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
+ esac])
+
+# Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
+# or where Intel optimizations were explicitly requested (this allows a
+# fallback if a future host CPU does not match 'x86*')
+AM_CONDITIONAL([PNG_INTEL_SSE],
+ [test "$enable_intel_sse" != 'no' &&
+ case "$host_cpu" in
+ i?86|x86_64) :;;
+ *) test "$enable_intel_sse" != '';;
+ esac])
+
AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]])
# Config files, substituting as above
AC_CONFIG_FILES([Makefile libpng.pc:libpng.pc.in])
AC_CONFIG_FILES([libpng-config:libpng-config.in],
[chmod +x libpng-config])
AC_OUTPUT
diff --git a/Makefile.am b/Makefile.am
--- a/Makefile.am 2016-08-29 11:46:27.000000000 -0400
+++ b/Makefile.am 2016-08-29 16:57:45.955528215 -0400
@@ -97,16 +97,21 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SO
arm/filter_neon.S arm/filter_neon_intrinsics.c
endif
if PNG_MIPS_MSA
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c\
mips/filter_msa_intrinsics.c
endif
+if PNG_INTEL_SSE
+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\
+ contrib/intel/filter_sse2_intrinsics.c
+endif
+
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \
-version-number @PNGLIB_MAJOR@@PNGLIB_MINOR@:@PNGLIB_RELEASE@:0
if HAVE_LD_VERSION_SCRIPT
# Versioned symbols and restricted exports
if HAVE_SOLARIS_LD
diff --git a/pngpriv.h b/pngpriv.h
--- debug16/pngpriv.h 2016-08-30 10:46:36.000000000 -0400
+++ libpng16/pngpriv.h 2016-08-30 11:57:25.672280202 -0400
@@ -185,16 +185,52 @@
#ifndef PNG_MIPS_MSA_OPT
# if defined(__mips_msa) && (__mips_isa_rev >= 5) && defined(PNG_ALIGNED_MEMORY_SUPPORTED)
# define PNG_MIPS_MSA_OPT 2
# else
# define PNG_MIPS_MSA_OPT 0
# endif
#endif
+#ifndef PNG_INTEL_SSE_OPT
+# ifdef PNG_INTEL_SSE
+ /* Only check for SSE if the build configuration has been modified to
+ * enable SSE optimizations. This means that these optimizations will
+ * be off by default. See contrib/intel for more details.
+ */
+# if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
+ defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+# define PNG_INTEL_SSE_OPT 1
+# endif
+# endif
+#endif
+
+#if PNG_INTEL_SSE_OPT > 0
+# ifndef PNG_INTEL_SSE_IMPLEMENTATION
+# if defined(__SSE4_1__) || defined(__AVX__)
+ /* We are not actually using AVX, but checking for AVX is the best
+ way we can detect SSE4.1 and SSSE3 on MSVC.
+ */
+# define PNG_INTEL_SSE_IMPLEMENTATION 3
+# elif defined(__SSSE3__)
+# define PNG_INTEL_SSE_IMPLEMENTATION 2
+# elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
+ (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
+# define PNG_INTEL_SSE_IMPLEMENTATION 1
+# else
+# define PNG_INTEL_SSE_IMPLEMENTATION 0
+# endif
+# endif
+
+# if PNG_INTEL_SSE_IMPLEMENTATION > 0
+# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
+# endif
+#endif
+
#if PNG_MIPS_MSA_OPT > 0
# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_msa
# ifndef PNG_MIPS_MSA_IMPLEMENTATION
# if defined(__mips_msa)
# if defined(__clang__)
# elif defined(__GNUC__)
# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
# define PNG_MIPS_MSA_IMPLEMENTATION 2
@@ -1251,16 +1287,31 @@ PNG_INTERNAL_FUNCTION(void,png_read_filt
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_msa,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_msa,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif
+#if PNG_INTEL_SSE_IMPLEMENTATION > 0
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
+ row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
+#endif
+
/* Choose the best filter to use and filter the row data */
PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr,
png_row_infop row_info),PNG_EMPTY);
#ifdef PNG_SEQUENTIAL_READ_SUPPORTED
PNG_INTERNAL_FUNCTION(void,png_read_IDAT_data,(png_structrp png_ptr,
png_bytep output, png_alloc_size_t avail_out),PNG_EMPTY);
/* Read 'avail_out' bytes of data from the IDAT stream. If the output buffer
@@ -1986,16 +2037,21 @@ PNG_INTERNAL_FUNCTION(void, PNG_FILTER_O
PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon,
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
#endif
#if PNG_MIPS_MSA_OPT > 0
PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_msa,
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
#endif
+
+# if PNG_INTEL_SSE_IMPLEMENTATION > 0
+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
+ (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
+# endif
#endif
PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
png_const_charp key, png_bytep new_key), PNG_EMPTY);
/* Maintainer: Put new private prototypes here ^ */
#include "pngdebug.h"

View File

@@ -1,8 +1,8 @@
/* pngvalid.c - validate libpng by constructing then reading png files.
*
* Last changed in libpng 1.6.27 [December 29, 2016]
* Copyright (c) 2014-2016 John Cunningham Bowler
* Last changed in libpng 1.6.29 [March 16, 2017]
* Copyright (c) 2014-2017 John Cunningham Bowler
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
@@ -7742,13 +7742,11 @@ image_transform_png_set_rgb_to_gray_ini(const image_transform *this,
* NOTE: this number only affects the internal limit check in pngvalid,
* it has no effect on the limits applied to the libpng values.
*/
that->pm->limit += pow(
# if DIGITIZE
2.0
# else
1.0
# endif
/255, data.gamma);
#if DIGITIZE
that->pm->limit += pow( 2.0/255, data.gamma);
#else
that->pm->limit += pow( 1.0/255, data.gamma);
#endif
}
}

View File

@@ -0,0 +1,81 @@
OPERATING SYSTEM SPECIFIC POWERPC DETECTION
--------------------------------------------
Detection of the ability to execute POWERPC on processor requires
operating system support. (The information is not available in user mode.)
Currently only this feature is supported only for linux platform.
HOW TO USE THIS
---------------
This directory contains C code fragments that can be included in powerpc/powerpc_init.c
by setting the macro PNG_POWERPC_VSX_FILE to the file name in "" or <> at build
time. This setting is not recorded in pnglibconf.h and can be changed simply by
rebuilding arm/arm_init.o with the required macro definition.
For any of this code to be used the POWERPC code must be enabled and run time
checks must be supported. I.e.:
#if PNG_POWERPC_VSX_OPT > 0
#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED
This is done in a 'configure' build by passing configure the argument:
--enable-powerpc-vsx=check
FILE FORMAT
-----------
Each file documents its testing status as of the last time it was tested (which
may have been a long time ago):
STATUS: one of:
SUPPORTED: This indicates that the file is included in the regularly
performed test builds and bugs are fixed when discovered.
COMPILED: This indicates that the code did compile at least once. See the
more detailed description for the extent to which the result was
successful.
TESTED: This means the code was fully compiled into the libpng test programs
and these were run at least once.
BUG REPORTS: an email address to which to send reports of problems
The file is a fragment of C code. It should not define any 'extern' symbols;
everything should be static. It must define the function:
static int png_have_vsx(png_structp png_ptr);
That function must return 1 if ARM NEON instructions are supported, 0 if not.
It must not execute png_error unless it detects a bug. A png_error will prevent
the reading of the PNG and in the future, writing too.
BUG REPORTS
-----------
If you mail a bug report for any file that is not SUPPORTED there may only be
limited response. Consider fixing it and sending a patch to fix the problem -
this is more likely to result in action.
CONTRIBUTIONS
-------------
You may send contributions of new implementations to
png-mng-implement@sourceforge.net. Please write code in strict C90 C where
possible. Obviously OS dependencies are to be expected. If you submit code you
must have the authors permission and it must have a license that is acceptable
to the current maintainer; in particular that license must permit modification
and redistribution.
Please try to make the contribution a single file and give the file a clear and
unambiguous name that identifies the target OS. If multiple files really are
required put them all in a sub-directory.
You must also be prepared to handle bug reports from users of the code, either
by joining the png-mng-implement mailing list or by providing an email for the
"BUG REPORTS" entry or both. Please make sure that the header of the file
contains the STATUS and BUG REPORTS fields as above.
Please list the OS requirements as precisely as possible. Ideally you should
also list the environment in which the code has been tested and certainly list
any environments where you suspect it might not work.

View File

@@ -0,0 +1,57 @@
/* contrib/powerpc-vsx/linux.c
*
* Copyright (c) 2017 Glenn Randers-Pehrson
* Written by Vadim Barkov, 2017.
* Last changed in libpng 1.6.29 [March 16, 2017]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*
* STATUS: TESTED
* BUG REPORTS: png-mng-implement@sourceforge.net
*
* png_have_vsx implemented for Linux by reading the widely available
* pseudo-file /proc/cpuinfo.
*
* This code is strict ANSI-C and is probably moderately portable; it does
* however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "png.h"
#ifndef MAXLINE
# define MAXLINE 1024
#endif
static int
png_have_vsx(png_structp png_ptr)
{
FILE *f;
const char *string = "altivec supported";
char input[MAXLINE];
char *token = NULL;
PNG_UNUSED(png_ptr)
f = fopen("/proc/cpuinfo", "r");
if (f != NULL)
{
memset(input,0,MAXLINE);
while(fgets(input,MAXLINE,f) != NULL)
{
token = strstr(input,string);
if(token != NULL)
return 1;
}
}
#ifdef PNG_WARNINGS_SUPPORTED
else
png_warning(png_ptr, "/proc/cpuinfo open failed");
#endif
return 0;
}

View File

@@ -0,0 +1,36 @@
/* contrib/powerpc-vsx/linux_aux.c
*
* Copyright (c) 2017 Glenn Randers-Pehrson
* Written by Vadim Barkov, 2017.
* Last changed in libpng 1.6.29 [March 16, 2017]
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*
* STATUS: TESTED
* BUG REPORTS: png-mng-implement@sourceforge.net
*
* png_have_vsx implemented for Linux by using the auxiliary vector mechanism.
*
* This code is strict ANSI-C and is probably moderately portable; it does
* however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
*/
#include "sys/auxv.h"
#include "png.h"
static int
png_have_vsx(png_structp png_ptr)
{
const unsigned long auxv = getauxval( AT_HWCAP );
PNG_UNUSED(png_ptr)
if(auxv & (PPC_FEATURE_HAS_ALTIVEC|PPC_FEATURE_HAS_VSX ))
return 1;
else
return 0;
}