[master] Imported from libpng-1.6.29.tar

2025-07-10 18:04:09 +02:00 · 2017-03-16 07:21:03 -05:00
parent 0a181e1665
commit b363e01e6b
46 changed files with 1487 additions and 477 deletions
--- a/contrib/examples/pngtopng.c
+++ b/contrib/examples/pngtopng.c
@@ -1,10 +1,12 @@
 /*- pngtopng
 *
- * COPYRIGHT: Written by John Cunningham Bowler, 2011.
+ * COPYRIGHT: Written by John Cunningham Bowler, 2011, 2017.
 * To the extent possible under law, the author has waived all copyright and
 * related or neighboring rights to this work.  This work is published from:
 * United States.
 *
+ * Last changed in libpng 1.6.29 [%RDATE%]
+ *
 * Read a PNG and write it out in a fixed format, using the 'simplified API'
 * that was introduced in libpng-1.6.0.
 *
@@ -59,26 +61,27 @@ int main(int argc, const char **argv)
               else
                  fprintf(stderr, "pngtopng: write %s: %s\n", argv[2],
                      image.message);
-
-               free(buffer);
            }

            else
-            {
               fprintf(stderr, "pngtopng: read %s: %s\n", argv[1],
                   image.message);

-               /* This is the only place where a 'free' is required; libpng does
-                * the cleanup on error and success, but in this case we couldn't
-                * complete the read because of running out of memory.
-                */
-               png_image_free(&image);
-            }
+            free(buffer);
         }

         else
+         {
            fprintf(stderr, "pngtopng: out of memory: %lu bytes\n",
               (unsigned long)PNG_IMAGE_SIZE(image));
+
+            /* This is the only place where a 'free' is required; libpng does
+             * the cleanup on error and success, but in this case we couldn't
+             * complete the read because of running out of memory and so libpng
+             * has not got to the point where it can do cleanup.
+             */
+            png_image_free(&image);
+         }
      }

      else
--- a/contrib/intel/INSTALL
+++ b/contrib/intel/INSTALL
@@ -1,158 +0,0 @@
-Enabling SSE support
-
-Copyright (c) 2016 Google, Inc.
-Written by Mike Klein, Matt Sarett
-
-This INSTALL file written by Glenn Randers-Pehrson, 2016.
-
-If you have moved intel_init.c and filter_sse2_intrinsics.c to a different
-directory, be sure to update the '#include "../../pngpriv.h"' line in both
-files if necessary to point to the correct relative location of pngpriv.h
-with respect to the new location of those files.
-
-To enable SSE support in libpng, follow the instructions in I, II, or III,
-below:
-
-I. Using patched "configure" scripts:
-
-First, apply intel_sse.patch in your build directory.
-
-   patch -i contrib/intel/intel_sse.patch -p1
-
-Then, if you are not building in a new GIT clone, e.g., in a tar
-distribution, remove any existing pre-built configure scripts:
-
-   ./configure --enable-maintainer-mode
-   make maintainer-clean
-   ./autogen.sh --maintainer --clean
-
-Finally, configure libpng with -DPNG_INTEL_SSE in CPPFLAGS:
-
-   ./autogen.sh --maintainer
-   CPPFLAGS="-DPNG_INTEL_SSE" ./configure [options]
-   make CPPFLAGS="-DPNG_INTEL_SSE" [options]
-   make
-
-II. Using a custom makefile:
-
-If you are using a custom makefile makefile, you will have to update it
-manually to include contrib/intel/*.o in the dependencies, and to define
-PNG_INTEL_SSE.
-
-III. Using manually updated "configure" scripts:
-
-If you prefer, manually edit pngpriv.h, configure.ac, and Makefile.am,
-following the instructions below, then follow the instructions in
-section II of INSTALL in the main libpng directory, then configure libpng
-with -DPNG_INTEL_SSE in CPPFLAGS.
-
-1. Add the following code to configure.ac under HOST SPECIFIC OPTIONS
-directly beneath the section for ARM:
-
-----------------cut----------------
-# INTEL
-# =====
-#
-# INTEL SSE (SIMD) support.
-
-AC_ARG_ENABLE([intel-sse],
-   AS_HELP_STRING([[[--enable-intel-sse]]],
-      [Enable Intel SSE optimizations: =no/off, yes/on:]
-      [no/off: disable the optimizations;]
-      [yes/on: enable the optimizations.]
-      [If not specified: determined by the compiler.]),
-   [case "$enableval" in
-      no|off)
-         # disable the default enabling:
-         AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
-                   [Disable Intel SSE optimizations])
-         # Prevent inclusion of the assembler files below:
-         enable_intel_sse=no;;
-      yes|on)
-         AC_DEFINE([PNG_INTEL_SSE_OPT], [1],
-                   [Enable Intel SSE optimizations]);;
-      *)
-         AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
-   esac])
-
-# Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
-# or where Intel optimizations were explicitly requested (this allows a
-# fallback if a future host CPU does not match 'x86*')
-AM_CONDITIONAL([PNG_INTEL_SSE],
-   [test "$enable_intel_sse" != 'no' &&
-    case "$host_cpu" in
-      i?86|x86_64) :;;
-      *)    test "$enable_intel_sse" != '';;
-    esac])
-----------------cut----------------
-
-2. Add the following code to Makefile.am under HOST SPECIFIC OPTIONS
-directly beneath the "if PNG_ARM_NEON ... endif" statement:
-
-----------------cut----------------
-if PNG_INTEL_SSE
-libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\
-    contrib/intel/filter_sse2_intrinsics.c
-endif
-----------------cut----------------
-
-3. Add the following lines to pngpriv.h, following the PNG_ARM_NEON_OPT
-code:
-
-----------------cut----------------
-#ifndef PNG_INTEL_SSE_OPT
-#   ifdef PNG_INTEL_SSE
-      /* Only check for SSE if the build configuration has been modified to
-       * enable SSE optimizations.  This means that these optimizations will
-       * be off by default.  See contrib/intel for more details.
-       */
-#     if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
-       defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
-       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
-#         define PNG_INTEL_SSE_OPT 1
-#      endif
-#   endif
-#endif
-
-#if PNG_INTEL_SSE_OPT > 0
-#   ifndef PNG_INTEL_SSE_IMPLEMENTATION
-#      if defined(__SSE4_1__) || defined(__AVX__)
-          /* We are not actually using AVX, but checking for AVX is the best
-             way we can detect SSE4.1 and SSSE3 on MSVC.
-          */
-#         define PNG_INTEL_SSE_IMPLEMENTATION 3
-#      elif defined(__SSSE3__)
-#         define PNG_INTEL_SSE_IMPLEMENTATION 2
-#      elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
-       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
-#         define PNG_INTEL_SSE_IMPLEMENTATION 1
-#      else
-#         define PNG_INTEL_SSE_IMPLEMENTATION 0
-#      endif
-#   endif
-
-#   if PNG_INTEL_SSE_IMPLEMENTATION > 0
-#      define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
-#   endif
-#endif
-
-----------------cut----------------
-
-4. Add the following lines to pngpriv.h, following the prototype for
-png_read_filter_row_paeth4_neon:
-
-----------------cut----------------
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
-    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-
-----------------cut----------------
--- a/contrib/intel/filter_sse2_intrinsics.c
+++ b/contrib/intel/filter_sse2_intrinsics.c
@@ -1,379 +0,0 @@
-
-/* filter_sse2_intrinsics.c - SSE2 optimized filter functions
- *
- * Copyright (c) 2016 Google, Inc.
- * Written by Mike Klein and Matt Sarett
- * Derived from arm/filter_neon_intrinsics.c, which was
- * Copyright (c) 2014,2016 Glenn Randers-Pehrson
- *
- * Last changed in libpng 1.6.24 [August 4, 2016]
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "../../pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-
-#if PNG_INTEL_SSE_IMPLEMENTATION > 0
-
-#include <immintrin.h>
-
-/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
- * They're positioned like this:
- *    prev:  c b
- *    row:   a d
- * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
- * whichever of a, b, or c is closest to p=a+b-c.
- */
-
-static __m128i load4(const void* p) {
-   return _mm_cvtsi32_si128(*(const int*)p);
-}
-
-static void store4(void* p, __m128i v) {
-   *(int*)p = _mm_cvtsi128_si32(v);
-}
-
-static __m128i load3(const void* p) {
-   /* We'll load 2 bytes, then 1 byte,
-    * then mask them together, and finally load into SSE.
-    */
-   const png_uint_16* p01 = p;
-   const png_byte*    p2  = (const png_byte*)(p01+1);
-
-   png_uint_32 v012 = (png_uint_32)(*p01)
-                    | (png_uint_32)(*p2) << 16;
-   return load4(&v012);
-}
-
-static void store3(void* p, __m128i v) {
-   /* We'll pull from SSE as a 32-bit int, then write
-    * its bottom two bytes, then its third byte.
-    */
-   png_uint_32 v012;
-   store4(&v012, v);
-
-   png_uint_16* p01 = p;
-   png_byte*    p2  = (png_byte*)(p01+1);
-   *p01 = v012;
-   *p2  = v012 >> 16;
-}
-
-void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* The Sub filter predicts each pixel as the previous pixel, a.
-    * There is no pixel to the left of the first pixel.  It's encoded directly.
-    * That works with our main loop if we just say that left pixel was zero.
-    */
-   png_debug(1, "in png_read_filter_row_sub3_sse2");
-   __m128i a, d = _mm_setzero_si128();
-
-   int rb = row_info->rowbytes;
-   while (rb >= 4) {
-      a = d; d = load4(row);
-      d = _mm_add_epi8(d, a);
-      store3(row, d);
-
-      row += 3;
-      rb  -= 3;
-   }
-   if (rb > 0) {
-      a = d; d = load3(row);
-      d = _mm_add_epi8(d, a);
-      store3(row, d);
-
-      row += 3;
-      rb  -= 3;
-   }
-}
-
-void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* The Sub filter predicts each pixel as the previous pixel, a.
-    * There is no pixel to the left of the first pixel.  It's encoded directly.
-    * That works with our main loop if we just say that left pixel was zero.
-    */
-   png_debug(1, "in png_read_filter_row_sub4_sse2");
-   __m128i a, d = _mm_setzero_si128();
-
-   int rb = row_info->rowbytes;
-   while (rb > 0) {
-      a = d; d = load4(row);
-      d = _mm_add_epi8(d, a);
-      store4(row, d);
-
-      row += 4;
-      rb  -= 4;
-   }
-}
-
-void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* The Avg filter predicts each pixel as the (truncated) average of a and b.
-    * There's no pixel to the left of the first pixel.  Luckily, it's
-    * predicted to be half of the pixel above it.  So again, this works
-    * perfectly with our loop if we make sure a starts at zero.
-    */
-   png_debug(1, "in png_read_filter_row_avg3_sse2");
-   const __m128i zero = _mm_setzero_si128();
-   __m128i    b;
-   __m128i a, d = zero;
-
-   int rb = row_info->rowbytes;
-   while (rb >= 4) {
-             b = load4(prev);
-      a = d; d = load4(row );
-
-      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-      __m128i avg = _mm_avg_epu8(a,b);
-      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
-                                            _mm_set1_epi8(1)));
-      d = _mm_add_epi8(d, avg);
-      store3(row, d);
-
-      prev += 3;
-      row  += 3;
-      rb   -= 3;
-   }
-   if (rb > 0) {
-             b = load3(prev);
-      a = d; d = load3(row );
-
-      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-      __m128i avg = _mm_avg_epu8(a,b);
-      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
-                                            _mm_set1_epi8(1)));
-
-      d = _mm_add_epi8(d, avg);
-      store3(row, d);
-
-      prev += 3;
-      row  += 3;
-      rb   -= 3;
-   }
-}
-
-void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* The Avg filter predicts each pixel as the (truncated) average of a and b.
-    * There's no pixel to the left of the first pixel.  Luckily, it's
-    * predicted to be half of the pixel above it.  So again, this works
-    * perfectly with our loop if we make sure a starts at zero.
-    */
-   png_debug(1, "in png_read_filter_row_avg4_sse2");
-   const __m128i zero = _mm_setzero_si128();
-   __m128i    b;
-   __m128i a, d = zero;
-
-   int rb = row_info->rowbytes;
-   while (rb > 0) {
-             b = load4(prev);
-      a = d; d = load4(row );
-
-      /* PNG requires a truncating average, so we can't just use _mm_avg_epu8 */
-      __m128i avg = _mm_avg_epu8(a,b);
-      /* ...but we can fix it up by subtracting off 1 if it rounded up. */
-      avg = _mm_sub_epi8(avg, _mm_and_si128(_mm_xor_si128(a,b),
-                                            _mm_set1_epi8(1)));
-
-      d = _mm_add_epi8(d, avg);
-      store4(row, d);
-
-      prev += 4;
-      row  += 4;
-      rb   -= 4;
-   }
-}
-
-/* Returns |x| for 16-bit lanes. */
-static __m128i abs_i16(__m128i x) {
-#if PNG_INTEL_SSE_IMPLEMENTATION >= 2
-   return _mm_abs_epi16(x);
-#else
-   /* Read this all as, return x<0 ? -x : x.
-   * To negate two's complement, you flip all the bits then add 1.
-    */
-   __m128i is_negative = _mm_cmplt_epi16(x, _mm_setzero_si128());
-
-   /* Flip negative lanes. */
-   x = _mm_xor_si128(x, is_negative);
-
-   /* +1 to negative lanes, else +0. */
-   x = _mm_sub_epi16(x, is_negative);
-   return x;
-#endif
-}
-
-/* Bytewise c ? t : e. */
-static __m128i if_then_else(__m128i c, __m128i t, __m128i e) {
-#if PNG_INTEL_SSE_IMPLEMENTATION >= 3
-   return _mm_blendv_epi8(e,t,c);
-#else
-   return _mm_or_si128(_mm_and_si128(c, t), _mm_andnot_si128(c, e));
-#endif
-}
-
-void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* Paeth tries to predict pixel d using the pixel to the left of it, a,
-    * and two pixels from the previous row, b and c:
-    *   prev: c b
-    *   row:  a d
-    * The Paeth function predicts d to be whichever of a, b, or c is nearest to
-    * p=a+b-c.
-    *
-    * The first pixel has no left context, and so uses an Up filter, p = b.
-    * This works naturally with our main loop's p = a+b-c if we force a and c
-    * to zero.
-    * Here we zero b and d, which become c and a respectively at the start of
-    * the loop.
-    */
-   png_debug(1, "in png_read_filter_row_paeth3_sse2");
-   const __m128i zero = _mm_setzero_si128();
-   __m128i c, b = zero,
-           a, d = zero;
-
-   int rb = row_info->rowbytes;
-   while (rb >= 4) {
-      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-       * intermediates.
-       */
-      c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
-      a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
-
-      /* (p-a) == (a+b-c - a) == (b-c) */
-      __m128i pa = _mm_sub_epi16(b,c);
-
-      /* (p-b) == (a+b-c - b) == (a-c) */
-      __m128i pb = _mm_sub_epi16(a,c);
-
-      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-      __m128i pc = _mm_add_epi16(pa,pb);
-
-      pa = abs_i16(pa);  /* |p-a| */
-      pb = abs_i16(pb);  /* |p-b| */
-      pc = abs_i16(pc);  /* |p-c| */
-
-      __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-      /* Paeth breaks ties favoring a over b over c. */
-      __m128i nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                         if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
-                                                                     c));
-
-      /* Note `_epi8`: we need addition to wrap modulo 255. */
-      d = _mm_add_epi8(d, nearest);
-      store3(row, _mm_packus_epi16(d,d));
-
-      prev += 3;
-      row  += 3;
-      rb   -= 3;
-   }
-   if (rb > 0) {
-      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-       * intermediates.
-       */
-      c = b; b = _mm_unpacklo_epi8(load3(prev), zero);
-      a = d; d = _mm_unpacklo_epi8(load3(row ), zero);
-
-      /* (p-a) == (a+b-c - a) == (b-c) */
-      __m128i pa = _mm_sub_epi16(b,c);
-
-      /* (p-b) == (a+b-c - b) == (a-c) */
-      __m128i pb = _mm_sub_epi16(a,c);
-
-      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-      __m128i pc = _mm_add_epi16(pa,pb);
-
-      pa = abs_i16(pa);  /* |p-a| */
-      pb = abs_i16(pb);  /* |p-b| */
-      pc = abs_i16(pc);  /* |p-c| */
-
-      __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-      /* Paeth breaks ties favoring a over b over c. */
-      __m128i nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                         if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
-                                                                     c));
-
-      /* Note `_epi8`: we need addition to wrap modulo 255. */
-      d = _mm_add_epi8(d, nearest);
-      store3(row, _mm_packus_epi16(d,d));
-
-      prev += 3;
-      row  += 3;
-      rb   -= 3;
-   }
-}
-
-void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row,
-   png_const_bytep prev)
-{
-   /* Paeth tries to predict pixel d using the pixel to the left of it, a,
-    * and two pixels from the previous row, b and c:
-    *   prev: c b
-    *   row:  a d
-    * The Paeth function predicts d to be whichever of a, b, or c is nearest to
-    * p=a+b-c.
-    *
-    * The first pixel has no left context, and so uses an Up filter, p = b.
-    * This works naturally with our main loop's p = a+b-c if we force a and c
-    * to zero.
-    * Here we zero b and d, which become c and a respectively at the start of
-    * the loop.
-    */
-   png_debug(1, "in png_read_filter_row_paeth4_sse2");
-   const __m128i zero = _mm_setzero_si128();
-   __m128i c, b = zero,
-           a, d = zero;
-
-   int rb = row_info->rowbytes;
-   while (rb > 0) {
-      /* It's easiest to do this math (particularly, deal with pc) with 16-bit
-       * intermediates.
-       */
-      c = b; b = _mm_unpacklo_epi8(load4(prev), zero);
-      a = d; d = _mm_unpacklo_epi8(load4(row ), zero);
-
-      /* (p-a) == (a+b-c - a) == (b-c) */
-      __m128i pa = _mm_sub_epi16(b,c);
-
-      /* (p-b) == (a+b-c - b) == (a-c) */
-      __m128i pb = _mm_sub_epi16(a,c);
-
-      /* (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c) */
-      __m128i pc = _mm_add_epi16(pa,pb);
-
-      pa = abs_i16(pa);  /* |p-a| */
-      pb = abs_i16(pb);  /* |p-b| */
-      pc = abs_i16(pc);  /* |p-c| */
-
-      __m128i smallest = _mm_min_epi16(pc, _mm_min_epi16(pa, pb));
-
-      /* Paeth breaks ties favoring a over b over c. */
-      __m128i nearest  = if_then_else(_mm_cmpeq_epi16(smallest, pa), a,
-                         if_then_else(_mm_cmpeq_epi16(smallest, pb), b,
-                                                                     c));
-
-      /* Note `_epi8`: we need addition to wrap modulo 255. */
-      d = _mm_add_epi8(d, nearest);
-      store4(row, _mm_packus_epi16(d,d));
-
-      prev += 4;
-      row  += 4;
-      rb   -= 4;
-   }
-}
-
-#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
-#endif /* READ */
--- a/contrib/intel/intel_init.c
+++ b/contrib/intel/intel_init.c
@@ -1,54 +0,0 @@
-
-/* intel_init.c - SSE2 optimized filter functions
- *
- * Copyright (c) 2016 Google, Inc.
- * Written by Mike Klein and Matt Sarett
- * Derived from arm/arm_init.c, which was
- * Copyright (c) 2014,2016 Glenn Randers-Pehrson
- *
- * Last changed in libpng 1.6.22 [May 26, 2016]
- *
- * This code is released under the libpng license.
- * For conditions of distribution and use, see the disclaimer
- * and license in png.h
- */
-
-#include "../../pngpriv.h"
-
-#ifdef PNG_READ_SUPPORTED
-#if PNG_INTEL_SSE_IMPLEMENTATION > 0
-
-void
-png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
-{
-   /* The techniques used to implement each of these filters in SSE operate on
-    * one pixel at a time.
-    * So they generally speed up 3bpp images about 3x, 4bpp images about 4x.
-    * They can scale up to 6 and 8 bpp images and down to 2 bpp images,
-    * but they'd not likely have any benefit for 1bpp images.
-    * Most of these can be implemented using only MMX and 64-bit registers,
-    * but they end up a bit slower than using the equally-ubiquitous SSE2.
-   */
-   png_debug(1, "in png_init_filter_functions_sse2");
-   if (bpp == 3)
-   {
-      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
-      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2;
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-         png_read_filter_row_paeth3_sse2;
-   }
-   else if (bpp == 4)
-   {
-      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2;
-      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2;
-      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
-          png_read_filter_row_paeth4_sse2;
-   }
-
-   /* No need optimize PNG_FILTER_VALUE_UP.  The compiler should
-    * autovectorize.
-    */
-}
-
-#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
-#endif /* PNG_READ_SUPPORTED */
--- a/contrib/intel/intel_sse.patch
+++ b/contrib/intel/intel_sse.patch
@@ -1,190 +0,0 @@
-diff --git a/configure.ac b/configure.ac
--- a/configure.ac	2016-08-29 11:46:27.000000000 -0400
-+++ b/configure.ac	2016-08-29 16:57:03.866355018 -0400
-@@ -386,16 +386,51 @@ AC_ARG_ENABLE([mips-msa],
- # future host CPU does not match 'mips*')
- 
- AM_CONDITIONAL([PNG_MIPS_MSA],
-    [test "$enable_mips_msa" != 'no' &&
-     case "$host_cpu" in
-       mipsel*|mips64el*) :;;
-     esac])
- 
-+# INTEL
-+# =====
-+#
-+# INTEL SSE (SIMD) support.
-+
-+AC_ARG_ENABLE([intel-sse],
-+   AS_HELP_STRING([[[--enable-intel-sse]]],
-+      [Enable Intel SSE optimizations: =no/off, yes/on:]
-+      [no/off: disable the optimizations;]
-+      [yes/on: enable the optimizations.]
-+      [If not specified: determined by the compiler.]),
-+   [case "$enableval" in
-+      no|off)
-+         # disable the default enabling:
-+         AC_DEFINE([PNG_INTEL_SSE_OPT], [0],
-+                   [Disable Intel SSE optimizations])
-+         # Prevent inclusion of the assembler files below:
-+         enable_intel_sse=no;;
-+      yes|on)
-+         AC_DEFINE([PNG_INTEL_SSE_OPT], [1],
-+                   [Enable Intel SSE optimizations]);;
-+      *)
-+         AC_MSG_ERROR([--enable-intel-sse=${enable_intel_sse}: invalid value])
-+   esac])
-+
-+# Add Intel specific files to all builds where the host_cpu is Intel ('x86*')
-+# or where Intel optimizations were explicitly requested (this allows a
-+# fallback if a future host CPU does not match 'x86*')
-+AM_CONDITIONAL([PNG_INTEL_SSE],
-+   [test "$enable_intel_sse" != 'no' &&
-+    case "$host_cpu" in
-+      i?86|x86_64) :;;
-+      *)    test "$enable_intel_sse" != '';;
-+    esac])
-+
- AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]])
- 
- # Config files, substituting as above
- AC_CONFIG_FILES([Makefile libpng.pc:libpng.pc.in])
- AC_CONFIG_FILES([libpng-config:libpng-config.in],
-    [chmod +x libpng-config])
- 
- AC_OUTPUT
-diff --git a/Makefile.am b/Makefile.am
--- a/Makefile.am	2016-08-29 11:46:27.000000000 -0400
-+++ b/Makefile.am	2016-08-29 16:57:45.955528215 -0400
-@@ -97,16 +97,21 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SO
- 	arm/filter_neon.S arm/filter_neon_intrinsics.c
- endif
- 
- if PNG_MIPS_MSA
- libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c\
- 	mips/filter_msa_intrinsics.c
- endif
- 
-+if PNG_INTEL_SSE
-+libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += contrib/intel/intel_init.c\
-+    contrib/intel/filter_sse2_intrinsics.c
-+endif
-+
- nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
- 
- libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \
- 	-version-number @PNGLIB_MAJOR@@PNGLIB_MINOR@:@PNGLIB_RELEASE@:0
- 
- if HAVE_LD_VERSION_SCRIPT
- #   Versioned symbols and restricted exports
- if HAVE_SOLARIS_LD
-diff --git a/pngpriv.h b/pngpriv.h
--- debug16/pngpriv.h	2016-08-30 10:46:36.000000000 -0400
-+++ libpng16/pngpriv.h	2016-08-30 11:57:25.672280202 -0400
-@@ -185,16 +185,52 @@
- #ifndef PNG_MIPS_MSA_OPT
- #  if defined(__mips_msa) && (__mips_isa_rev >= 5) && defined(PNG_ALIGNED_MEMORY_SUPPORTED)
- #     define PNG_MIPS_MSA_OPT 2
- #  else
- #     define PNG_MIPS_MSA_OPT 0
- #  endif
- #endif
- 
-+#ifndef PNG_INTEL_SSE_OPT
-+#   ifdef PNG_INTEL_SSE
-+      /* Only check for SSE if the build configuration has been modified to
-+       * enable SSE optimizations.  This means that these optimizations will
-+       * be off by default.  See contrib/intel for more details.
-+       */
-+#     if defined(__SSE4_1__) || defined(__AVX__) || defined(__SSSE3__) || \
-+       defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
-+       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
-+#         define PNG_INTEL_SSE_OPT 1
-+#      endif
-+#   endif
-+#endif
-+
-+#if PNG_INTEL_SSE_OPT > 0
-+#   ifndef PNG_INTEL_SSE_IMPLEMENTATION
-+#      if defined(__SSE4_1__) || defined(__AVX__)
-+          /* We are not actually using AVX, but checking for AVX is the best
-+             way we can detect SSE4.1 and SSSE3 on MSVC.
-+          */
-+#         define PNG_INTEL_SSE_IMPLEMENTATION 3
-+#      elif defined(__SSSE3__)
-+#         define PNG_INTEL_SSE_IMPLEMENTATION 2
-+#      elif defined(__SSE2__) || defined(_M_X64) || defined(_M_AMD64) || \
-+       (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
-+#         define PNG_INTEL_SSE_IMPLEMENTATION 1
-+#      else
-+#         define PNG_INTEL_SSE_IMPLEMENTATION 0
-+#      endif
-+#   endif
-+
-+#   if PNG_INTEL_SSE_IMPLEMENTATION > 0
-+#      define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_sse2
-+#   endif
-+#endif
-+
- #if PNG_MIPS_MSA_OPT > 0
- #  define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_msa
- #  ifndef PNG_MIPS_MSA_IMPLEMENTATION
- #     if defined(__mips_msa)
- #        if defined(__clang__)
- #        elif defined(__GNUC__)
- #           if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
- #              define PNG_MIPS_MSA_IMPLEMENTATION 2
-@@ -1251,16 +1287,31 @@ PNG_INTERNAL_FUNCTION(void,png_read_filt
- PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_msa,(png_row_infop
-     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
- PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_msa,(png_row_infop
-     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
- PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop
-     row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
- #endif
- 
-+#if PNG_INTEL_SSE_IMPLEMENTATION > 0
-+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
-+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_sse2,(png_row_infop
-+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_sse2,(png_row_infop
-+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_sse2,(png_row_infop
-+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_sse2,(png_row_infop
-+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-+PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_sse2,(png_row_infop
-+    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
-+#endif
-+
- /* Choose the best filter to use and filter the row data */
- PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr,
-     png_row_infop row_info),PNG_EMPTY);
- 
- #ifdef PNG_SEQUENTIAL_READ_SUPPORTED
- PNG_INTERNAL_FUNCTION(void,png_read_IDAT_data,(png_structrp png_ptr,
-    png_bytep output, png_alloc_size_t avail_out),PNG_EMPTY);
-    /* Read 'avail_out' bytes of data from the IDAT stream.  If the output buffer
-@@ -1986,16 +2037,21 @@ PNG_INTERNAL_FUNCTION(void, PNG_FILTER_O
- PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon,
-    (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
- #endif
- 
- #if PNG_MIPS_MSA_OPT > 0
- PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_msa,
-    (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
- #endif
-+
-+#  if PNG_INTEL_SSE_IMPLEMENTATION > 0
-+PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2,
-+   (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
-+#  endif
- #endif
- 
- PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
-    png_const_charp key, png_bytep new_key), PNG_EMPTY);
- 
- /* Maintainer: Put new private prototypes here ^ */
- 
- #include "pngdebug.h"
--- a/contrib/libtests/pngvalid.c
+++ b/contrib/libtests/pngvalid.c
@@ -1,8 +1,8 @@

 /* pngvalid.c - validate libpng by constructing then reading png files.
 *
- * Last changed in libpng 1.6.27 [December 29, 2016]
- * Copyright (c) 2014-2016 John Cunningham Bowler
+ * Last changed in libpng 1.6.29 [March 16, 2017]
+ * Copyright (c) 2014-2017 John Cunningham Bowler
 *
 * This code is released under the libpng license.
 * For conditions of distribution and use, see the disclaimer
@@ -7742,13 +7742,11 @@ image_transform_png_set_rgb_to_gray_ini(const image_transform *this,
          * NOTE: this number only affects the internal limit check in pngvalid,
          * it has no effect on the limits applied to the libpng values.
          */
-         that->pm->limit += pow(
-#        if DIGITIZE
-            2.0
-#        else
-            1.0
-#        endif
-            /255, data.gamma);
+#if DIGITIZE
+          that->pm->limit += pow( 2.0/255, data.gamma);
+#else
+          that->pm->limit += pow( 1.0/255, data.gamma);
+#endif
      }
   }

--- a/contrib/powerpc-vsx/README
+++ b/contrib/powerpc-vsx/README
@@ -0,0 +1,81 @@
+OPERATING SYSTEM SPECIFIC POWERPC DETECTION
+--------------------------------------------
+
+Detection of the ability to execute POWERPC on processor requires
+operating system support.  (The information is not available in user mode.)
+
+Currently only this feature is supported only for linux platform.
+
+HOW TO USE THIS
+---------------
+
+This directory contains C code fragments that can be included in powerpc/powerpc_init.c
+by setting the macro PNG_POWERPC_VSX_FILE to the file name in "" or <> at build
+time.  This setting is not recorded in pnglibconf.h and can be changed simply by
+rebuilding arm/arm_init.o with the required macro definition.
+
+For any of this code to be used the POWERPC code must be enabled and run time
+checks must be supported.  I.e.:
+
+#if PNG_POWERPC_VSX_OPT > 0
+#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED
+
+This is done in a 'configure' build by passing configure the argument:
+
+   --enable-powerpc-vsx=check
+
+FILE FORMAT
+-----------
+
+Each file documents its testing status as of the last time it was tested (which
+may have been a long time ago):
+
+STATUS: one of:
+   SUPPORTED: This indicates that the file is included in the regularly
+         performed test builds and bugs are fixed when discovered.
+   COMPILED: This indicates that the code did compile at least once.  See the
+         more detailed description for the extent to which the result was
+         successful.
+   TESTED: This means the code was fully compiled into the libpng test programs
+         and these were run at least once.
+
+BUG REPORTS: an email address to which to send reports of problems
+
+The file is a fragment of C code. It should not define any 'extern' symbols;
+everything should be static.  It must define the function:
+
+static int png_have_vsx(png_structp png_ptr);
+
+That function must return 1 if ARM NEON instructions are supported, 0 if not.
+It must not execute png_error unless it detects a bug.  A png_error will prevent
+the reading of the PNG and in the future, writing too.
+
+BUG REPORTS
+-----------
+
+If you mail a bug report for any file that is not SUPPORTED there may only be
+limited response.  Consider fixing it and sending a patch to fix the problem -
+this is more likely to result in action.
+
+CONTRIBUTIONS
+-------------
+
+You may send contributions of new implementations to
+png-mng-implement@sourceforge.net.  Please write code in strict C90 C where
+possible.  Obviously OS dependencies are to be expected.  If you submit code you
+must have the authors permission and it must have a license that is acceptable
+to the current maintainer; in particular that license must permit modification
+and redistribution.
+
+Please try to make the contribution a single file and give the file a clear and
+unambiguous name that identifies the target OS.  If multiple files really are
+required put them all in a sub-directory.
+
+You must also be prepared to handle bug reports from users of the code, either
+by joining the png-mng-implement mailing list or by providing an email for the
+"BUG REPORTS" entry or both.  Please make sure that the header of the file
+contains the STATUS and BUG REPORTS fields as above.
+
+Please list the OS requirements as precisely as possible.  Ideally you should
+also list the environment in which the code has been tested and certainly list
+any environments where you suspect it might not work.
--- a/contrib/powerpc-vsx/linux.c
+++ b/contrib/powerpc-vsx/linux.c
@@ -0,0 +1,57 @@
+/* contrib/powerpc-vsx/linux.c
+ *
+ * Copyright (c) 2017 Glenn Randers-Pehrson
+ * Written by Vadim Barkov, 2017.
+ * Last changed in libpng 1.6.29 [March 16, 2017]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ * STATUS: TESTED
+ * BUG REPORTS: png-mng-implement@sourceforge.net
+ *
+ * png_have_vsx implemented for Linux by reading the widely available
+ * pseudo-file /proc/cpuinfo. 
+ *
+ * This code is strict ANSI-C and is probably moderately portable; it does
+ * however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "png.h"
+
+#ifndef MAXLINE
+#  define MAXLINE 1024
+#endif
+
+static int
+png_have_vsx(png_structp png_ptr)
+{
+   FILE *f;
+
+   const char *string = "altivec supported";
+   char input[MAXLINE];
+   char *token = NULL;
+
+   PNG_UNUSED(png_ptr)
+
+   f = fopen("/proc/cpuinfo", "r");
+   if (f != NULL)
+   {
+      memset(input,0,MAXLINE);
+      while(fgets(input,MAXLINE,f) != NULL)
+      {
+         token = strstr(input,string);
+         if(token != NULL)
+            return 1;
+      }
+   }
+#ifdef PNG_WARNINGS_SUPPORTED
+   else
+      png_warning(png_ptr, "/proc/cpuinfo open failed");
+#endif
+   return 0;
+}
--- a/contrib/powerpc-vsx/linux_aux.c
+++ b/contrib/powerpc-vsx/linux_aux.c
@@ -0,0 +1,36 @@
+/* contrib/powerpc-vsx/linux_aux.c
+ *
+ * Copyright (c) 2017 Glenn Randers-Pehrson
+ * Written by Vadim Barkov, 2017.
+ * Last changed in libpng 1.6.29 [March 16, 2017]
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ *
+ * STATUS: TESTED
+ * BUG REPORTS: png-mng-implement@sourceforge.net
+ *
+ * png_have_vsx implemented for Linux by using the auxiliary vector mechanism.
+ *
+ * This code is strict ANSI-C and is probably moderately portable; it does
+ * however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
+ */
+
+#include "sys/auxv.h"
+#include "png.h"
+
+static int
+png_have_vsx(png_structp png_ptr)
+{
+
+   const unsigned long auxv = getauxval( AT_HWCAP );
+
+   PNG_UNUSED(png_ptr)
+
+   if(auxv & (PPC_FEATURE_HAS_ALTIVEC|PPC_FEATURE_HAS_VSX ))
+      return 1;
+   else
+      return 0;
+}
+