Merge branch 'libpng16' of git://github.com/barkovv/libpng into libpng16

2025-07-10 18:04:09 +02:00 · 2017-02-21 20:41:27 -06:00
parent 5e8c50cc8d 3644db298b
commit 7980c79d69
13 changed files with 1214 additions and 4 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -44,7 +44,7 @@ include(GNUInstallDirs)
 # needed packages
-#Allow users to specify location of Zlib, 
+#Allow users to specify location of Zlib,
 # Useful if zlib is being built alongside this as a sub-project
 option(PNG_BUILD_ZLIB "Custom zlib Location, else find_package is used" OFF)
@@ -98,7 +98,7 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm" OR
      arm/arm_init.c
      arm/filter_neon.S
      arm/filter_neon_intrinsics.c)
-      
+
    if(${PNG_ARM_NEON} STREQUAL "on")
      add_definitions(-DPNG_ARM_NEON_OPT=2)
    elseif(${PNG_ARM_NEON} STREQUAL "check")
@@ -109,6 +109,38 @@ if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm" OR
  endif()
 endif()
 # set definitions and sources for powerpc
 if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^powerpc*" OR
 	${CMAKE_SYSTEM_PROCESSOR} MATCHES "^ppc64*" )
  set(PNG_POWERPC_VSX_POSSIBLE_VALUES check on off)
  set(PNG_POWERPC_VSX "check" CACHE STRING "Enable POWERPC VSX optimizations:
     check: (default) use internal checking code;
     off: disable the optimizations;
     on: turn on unconditionally.")
  set_property(CACHE PNG_POWERPC_VSX PROPERTY STRINGS
     ${PNG_POWERPC_VSX_POSSIBLE_VALUES})
  list(FIND PNG_POWERPC_VSX_POSSIBLE_VALUES ${PNG_POWERPC_VSX} index)
  if(index EQUAL -1)
    message(FATAL_ERROR
      " PNG_POWERPC_VSX must be one of [${PNG_POWERPC_VSX_POSSIBLE_VALUES}]")
  elseif(NOT ${PNG_POWERPC_VSX} STREQUAL "no")
    set(libpng_powerpc_sources
      powerpc/powerpc_init.c
      powerpc/filter_vsx_intrinsics.c)
    if(${PNG_POWERPC_VSX} STREQUAL "on")
      add_definitions(-DPNG_POWERPC_VSX_OPT=2)
    elseif(${PNG_POWERPC_VSX} STREQUAL "check")
      add_definitions(-DPNG_POWERPC_VSX_CHECK_SUPPORTED)
      message(WARNING
      "[PNG_POWERPC_VSX==check] Please check contrib/powerpc/README file for the list of supported OSes.")
    endif()
  else()
    add_definitions(-DPNG_POWERPC_VSX_OPT=0)
  endif()
 endif()
 # SET LIBNAME
 set(PNG_LIB_NAME png${PNGLIB_MAJOR}${PNGLIB_MINOR})
@@ -400,6 +432,7 @@ set(libpng_sources
  pngwtran.c
  pngwutil.c
  ${libpng_arm_sources}
  ${libpng_powerpc_sources}
 )
 set(pngtest_sources
  pngtest.c
@@ -842,4 +875,3 @@ endif()
 # to create msvc import lib for mingw compiled shared lib
 # pexports libpng.dll > libpng.def
 # lib /def:libpng.def /machine:x86
--- a/Makefile.am
+++ b/Makefile.am
@@ -107,6 +107,11 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += intel/intel_init.c\
 	intel/filter_sse2_intrinsics.c
 endif
 if PNG_POWERPC_VSX
 libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\
        powerpc/filter_vsx_intrinsics.c
 endif
 nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
 libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LDFLAGS = -no-undefined -export-dynamic \
--- a/2
+++ b/2
@@ -179,8 +179,10 @@ Files in this distribution:
      pngwtran.c    =>  Write data transformations
      pngwutil.c    =>  Write utility functions
      arm           =>  Contains optimized code for the ARM platform
      powerpc       =>  Contains optimized code for the PowerPC platform
      contrib       =>  Contributions
       arm-neon         =>  Optimized code for ARM-NEON platform
       powerpc          =>  Optimized code for POWERPC-VSX platform
       examples         =>  Example programs
       gregbook         =>  source code for PNG reading and writing, from
                            Greg Roelofs' "PNG: The Definitive Guide",
--- a/configure.ac
+++ b/configure.ac
@@ -426,6 +426,54 @@ AM_CONDITIONAL([PNG_INTEL_SSE],
      *)    test "$enable_intel_sse" != '';;
    esac])
 # PowerPC
 # ===
 #
 # PowerPC VSX (SIMD) support.
 AC_ARG_ENABLE([powerpc-vsx],
 AS_HELP_STRING([[[--enable-powerpc-vsx]]],
      [Enable POWERPC VSX optimizations: =no/off, check, api, yes/on:]
      [no/off: disable the optimizations; check: use internal checking code]
      [api: disable by default, enable by a call to png_set_option]
      [yes/on: turn on unconditionally.]
      [If not specified: determined by the compiler.]),
   [case "$enableval" in
      no|off)
         # disable the default enabling on __ppc64__ systems:
         AC_DEFINE([PNG_POWERPC_VSX_OPT], [0],
                   [Disable POWERPC VSX optimizations])
         # Prevent inclusion of the platform specific files below:
         enable_powerpc_vsx=no;;
      check)
         AC_DEFINE([PNG_POWERPC_VSX_CHECK_SUPPORTED], [],
                   [Check for POWERPC VSX support at run-time])
         AC_MSG_WARN([--enable-powerpc-vsx Please check contrib/powerpc/README file]
            [for the list of supported OSes.]);;
      api)
         AC_DEFINE([PNG_POWERPC_VSX_API_SUPPORTED], [],
                   [Turn on POWERPC VSX optimizations at run-time]);;
      yes|on)
         AC_DEFINE([PNG_POWERPC_VSX_OPT], [2],
                   [Enable POWERPC VSX optimizations])
         AC_MSG_WARN([--enable-powerpc-vsx: please specify 'check' or 'api', if]
            [you want the optimizations unconditionally pass '-maltivec -mvsx']
            [or '-mcpu=power8'to the compiler.]);;
      *)
         AC_MSG_ERROR([--enable-powerpc-vsx=${enable_powerpc_vsx}: invalid value])
   esac])
 # Add PowerPC specific files to all builds where the host_cpu is powerpc('powerpc*') or
 # where POWERPC optimizations were explicitly requested (this allows a fallback if a
 # future host CPU does not match 'powerpc*')
 AM_CONDITIONAL([PNG_POWERPC_VSX],
   [test "$enable_powerpc_vsx" != 'no' &&
    case "$host_cpu" in
      powerpc*|ppc64*) :;;
    esac])
 AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]])
 # Config files, substituting as above
--- a/contrib/powerpc-vsx/README
+++ b/contrib/powerpc-vsx/README
@@ -0,0 +1,81 @@
 OPERATING SYSTEM SPECIFIC POWERPC DETECTION
 --------------------------------------------
 Detection of the ability to execute POWERPC on processor requires
 operating system support.  (The information is not available in user mode.)
 Currently only this feature is supported only for linux platform.
 HOW TO USE THIS
 ---------------
 This directory contains C code fragments that can be included in powerpc/powerpc_init.c
 by setting the macro PNG_POWERPC_VSX_FILE to the file name in "" or <> at build
 time.  This setting is not recorded in pnglibconf.h and can be changed simply by
 rebuilding arm/arm_init.o with the required macro definition.
 For any of this code to be used the POWERPC code must be enabled and run time
 checks must be supported.  I.e.:
 #if PNG_POWERPC_VSX_OPT > 0
 #ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED
 This is done in a 'configure' build by passing configure the argument:
   --enable-powerpc-vsx=check
 FILE FORMAT
 -----------
 Each file documents its testing status as of the last time it was tested (which
 may have been a long time ago):
 STATUS: one of:
   SUPPORTED: This indicates that the file is included in the regularly
         performed test builds and bugs are fixed when discovered.
   COMPILED: This indicates that the code did compile at least once.  See the
         more detailed description for the extent to which the result was
         successful.
   TESTED: This means the code was fully compiled into the libpng test programs
         and these were run at least once.
 BUG REPORTS: an email address to which to send reports of problems
 The file is a fragment of C code. It should not define any 'extern' symbols;
 everything should be static.  It must define the function:
 static int png_have_vsx(png_structp png_ptr);
 That function must return 1 if ARM NEON instructions are supported, 0 if not.
 It must not execute png_error unless it detects a bug.  A png_error will prevent
 the reading of the PNG and in the future, writing too.
 BUG REPORTS
 -----------
 If you mail a bug report for any file that is not SUPPORTED there may only be
 limited response.  Consider fixing it and sending a patch to fix the problem -
 this is more likely to result in action.
 CONTRIBUTIONS
 -------------
 You may send contributions of new implementations to
 png-mng-implement@sourceforge.net.  Please write code in strict C90 C where
 possible.  Obviously OS dependencies are to be expected.  If you submit code you
 must have the authors permission and it must have a license that is acceptable
 to the current maintainer; in particular that license must permit modification
 and redistribution.
 Please try to make the contribution a single file and give the file a clear and
 unambiguous name that identifies the target OS.  If multiple files really are
 required put them all in a sub-directory.
 You must also be prepared to handle bug reports from users of the code, either
 by joining the png-mng-implement mailing list or by providing an email for the
 "BUG REPORTS" entry or both.  Please make sure that the header of the file
 contains the STATUS and BUG REPORTS fields as above.
 Please list the OS requirements as precisely as possible.  Ideally you should
 also list the environment in which the code has been tested and certainly list
 any environments where you suspect it might not work.
--- a/contrib/powerpc-vsx/linux.c
+++ b/contrib/powerpc-vsx/linux.c
@@ -0,0 +1,56 @@
 /* contrib/powerpc-vsx/linux.c
 *
 * Copyright (c) 2016 Glenn Randers-Pehrson
 * Written by Vadim Barkov, 2017.
 *
 * This code is released under the libpng license.
 * For conditions of distribution and use, see the disclaimer
 * and license in png.h
 *
 * STATUS: TESTED
 * BUG REPORTS: png-mng-implement@sourceforge.net
 *
 * png_have_vsx implemented for Linux by reading the widely available
 * pseudo-file /proc/cpuinfo. 
 *
 * This code is strict ANSI-C and is probably moderately portable; it does
 * however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
 */
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include "png.h"
 #ifndef MAXLINE
 #  define MAXLINE 1024
 #endif
 static int
 png_have_vsx(png_structp png_ptr)
 {
   FILE *f;
   const char *string = "altivec supported";
   char input[MAXLINE];
   char *token = NULL;
   PNG_UNUSED(png_ptr)
   f = fopen("/proc/cpuinfo", "r");
   if (f != NULL)
   {
      memset(input,0,MAXLINE);
      while(fgets(input,MAXLINE,f) != NULL)
      {
         token = strstr(input,string);
         if(token != NULL)
            return 1;
      }
   }
 #ifdef PNG_WARNINGS_SUPPORTED
   else
      png_warning(png_ptr, "/proc/cpuinfo open failed");
 #endif
   return 0;
 }
--- a/contrib/powerpc-vsx/linux_aux.c
+++ b/contrib/powerpc-vsx/linux_aux.c
@@ -0,0 +1,35 @@
 /* contrib/powerpc-vsx/linux_aux.c
 *
 * Copyright (c) 2016 Glenn Randers-Pehrson
 * Written by Vadim Barkov, 2017.
 *
 * This code is released under the libpng license.
 * For conditions of distribution and use, see the disclaimer
 * and license in png.h
 *
 * STATUS: TESTED
 * BUG REPORTS: png-mng-implement@sourceforge.net
 *
 * png_have_vsx implemented for Linux by using the auxiliary vector mechanism.
 *
 * This code is strict ANSI-C and is probably moderately portable; it does
 * however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
 */
 #include "sys/auxv.h"
 #include "png.h"
 static int
 png_have_vsx(png_structp png_ptr)
 {
   const unsigned long auxv = getauxval( AT_HWCAP );
   PNG_UNUSED(png_ptr)
   if(auxv & (PPC_FEATURE_HAS_ALTIVEC|PPC_FEATURE_HAS_VSX ))
      return 1;
   else
      return 0;
 }
--- a/png.h
+++ b/png.h
@@ -3225,7 +3225,10 @@ PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory,
 #  define PNG_MIPS_MSA   6 /* HARDWARE: MIPS Msa SIMD instructions supported */
 #endif
 #define PNG_IGNORE_ADLER32 8
-#define PNG_OPTION_NEXT  10 /* Next option - numbers must be even */
+#ifdef PNG_POWERPC_VSX_API_SUPPORTED
 #  define PNG_POWERPC_VSX   10 /* HARDWARE: PowerPC VSX SIMD instructions supported */
 #endif
 #define PNG_OPTION_NEXT  12 /* Next option - numbers must be even */
 /* Return values: NOTE: there are four values and 'off' is *not* zero */
 #define PNG_OPTION_UNSET   0 /* Unset - defaults to off */
--- a/pngpriv.h
+++ b/pngpriv.h
@@ -190,6 +190,14 @@
 #  endif
 #endif
 #ifndef PNG_POWERPC_VSX_OPT
 #  if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__)
 #     define PNG_POWERPC_VSX_OPT 2
 #  else
 #     define PNG_POWERPC_VSX_OPT 0
 #  endif
 #endif
 #ifndef PNG_INTEL_SSE_OPT
 #   ifdef PNG_INTEL_SSE
      /* Only check for SSE if the build configuration has been modified to
@@ -246,6 +254,11 @@
 #  endif
 #endif /* PNG_MIPS_MSA_OPT > 0 */
 #if PNG_POWERPC_VSX_OPT > 0
 #  define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx
 #  define PNG_POWERPC_VSX_IMPLEMENTATION 1
 #endif
 /* Is this a build of a DLL where compilation of the object modules requires
 * different preprocessor settings to those required for a simple library?  If
@@ -1292,6 +1305,23 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop
    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
 #endif
 #if PNG_POWERPC_VSX_OPT > 0
 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info,
    png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vsx,(png_row_infop
    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vsx,(png_row_infop
    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vsx,(png_row_infop
    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vsx,(png_row_infop
    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vsx,(png_row_infop
    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vsx,(png_row_infop
    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
 #endif
 #if PNG_INTEL_SSE_IMPLEMENTATION > 0
 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_sse2,(png_row_infop
    row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
--- a/powerpc/filter_vsx_intrinsics.c
+++ b/powerpc/filter_vsx_intrinsics.c
@@ -0,0 +1,767 @@
 /* filter_vsx_intrinsics.c - PowerPC optimised filter functions
 *
 * Copyright (c) 2016 Glenn Randers-Pehrson
 * Written by Vadim Barkov, 2017.
 * Last changed in libpng 1.6.25 [September 1, 2016]
 *
 * This code is released under the libpng license.
 * For conditions of distribution and use, see the disclaimer
 * and license in png.h
 */
 #include <stdio.h>
 #include <stdint.h>
 #include "../pngpriv.h"
 #ifdef PNG_READ_SUPPORTED
 /* This code requires -maltivec and -mvsx on the command line: */
 #if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
 #include <altivec.h>
 #if PNG_POWERPC_VSX_OPT > 0
 #ifndef __VSX__
 #  error "This code requires VSX support (POWER7 and later). Please provide -mvsx compiler flag."
 #endif
 #define vec_ld_unaligned(vec,data) vec = vec_vsx_ld(0,data)
 #define vec_st_unaligned(vec,data) vec_vsx_st(vec,0,data)
 /* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
 * They're positioned like this:
 *    prev:  c b
 *    row:   a d
 * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
 * whichever of a, b, or c is closest to p=a+b-c.
 * ( this is taken from ../intel/filter_sse2_intrinsics.c )
 */
 #define vsx_declare_common_vars(row_info,row,prev_row,offset) \
   png_byte i;\
   png_bytep rp = row + offset;\
   png_const_bytep pp = prev_row;\
   png_size_t unaligned_top = 16 - (((png_size_t)rp % 16));\
   png_size_t istop;\
   if(unaligned_top == 16)\
      unaligned_top = 0;\
   istop = row_info->rowbytes;\
   if((unaligned_top < istop))\
      istop -= unaligned_top;\
   else{\
      unaligned_top = istop;\
      istop = 0;\
   }
 void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
                                png_const_bytep prev_row)
 {
   vector unsigned char rp_vec;
   vector unsigned char pp_vec;
   vsx_declare_common_vars(row_info,row,prev_row,0)
   /* Altivec operations require 16-byte aligned data
    * but input can be unaligned. So we calculate
    * unaligned part as usual.
    */
   for (i = 0; i < unaligned_top; i++)
   {
      *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
      rp++;
   }
   /* Using SIMD while we can */
   while( istop >= 16 )
   {
      rp_vec = vec_ld(0,rp);
      vec_ld_unaligned(pp_vec,pp);
      rp_vec = vec_add(rp_vec,pp_vec);
      vec_st(rp_vec,0,rp);
      pp += 16;
      rp += 16;
      istop -= 16;
   }
   if(istop > 0)
   {
      /* If byte count of row is not divisible by 16
       * we will process remaining part as usual
       */
      for (i = 0; i < istop; i++)
      {
         *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
         rp++;
      }
 }
 }
 static const vector unsigned char VSX_LEFTSHIFTED1_4 = {16,16,16,16, 0, 1, 2, 3,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_LEFTSHIFTED2_4 = {16,16,16,16,16,16,16,16, 4, 5, 6, 7,16,16,16,16};
 static const vector unsigned char VSX_LEFTSHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 8, 9,10,11};
 static const vector unsigned char VSX_LEFTSHIFTED1_3 = {16,16,16, 0, 1, 2,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_LEFTSHIFTED2_3 = {16,16,16,16,16,16, 3, 4, 5,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_LEFTSHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 6, 7, 8,16,16,16,16};
 static const vector unsigned char VSX_LEFTSHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 9,10,11,16};
 static const vector unsigned char VSX_NOT_SHIFTED1_4 = {16,16,16,16, 4, 5, 6, 7,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_NOT_SHIFTED2_4 = {16,16,16,16,16,16,16,16, 8, 9,10,11,16,16,16,16};
 static const vector unsigned char VSX_NOT_SHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,15};
 static const vector unsigned char VSX_NOT_SHIFTED1_3 = {16,16,16, 3, 4, 5,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_NOT_SHIFTED2_3 = {16,16,16,16,16,16, 6, 7, 8,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_NOT_SHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 9,10,11,16,16,16,16};
 static const vector unsigned char VSX_NOT_SHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,16};
 static const vector unsigned char VSX_CHAR_ZERO = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
 #ifdef __LITTLE_ENDIAN__
 static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = { 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = { 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6};
 static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = { 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = { 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = { 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16};
 #elif defined(__BIG_ENDIAN__)
 static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = {16, 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = {16, 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {16,12,16,13,16,14,16,15,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 1, 3, 5, 7,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 1, 3, 5, 7,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5, 7};
 static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = {16, 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = {16, 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = {16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {16,12,16,13,16,14,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 1, 3, 5,16,16,16,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 1, 3, 5,16,16,16,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 1, 3, 5,16,16,16,16};
 static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5,16};
 #endif
 #define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VSX_CHAR_ZERO,VSX_CHAR_TO_SHORT##offset##_##bpp)
 #define vsx_short_to_char(vec,offset,bpp) vec_perm(((vector unsigned char)(vec)),VSX_CHAR_ZERO,VSX_SHORT_TO_CHAR##offset##_##bpp)
 #ifdef PNG_USE_ABS
 #  define vsx_abs(number) abs(number)
 #else
 #  define vsx_abs(number) (number > 0) ? (number) : -(number)
 #endif
 void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
                                  png_const_bytep prev_row)
 {
   const png_byte bpp = 4;
   vector unsigned char rp_vec;
   vector unsigned char part_vec;
   vsx_declare_common_vars(row_info,row,prev_row,bpp)
   PNG_UNUSED(pp)
   /* Altivec operations require 16-byte aligned data
    * but input can be unaligned. So we calculate
    * unaligned part as usual.
    */
   for (i = 0; i < unaligned_top; i++)
   {
      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
      rp++;
   }
   /* Using SIMD while we can */
   while( istop >= 16 )
   {
      for(i=0;i < bpp ; i++)
      {
         *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
         rp++;
      }
      rp -= bpp;
      rp_vec = vec_ld(0,rp);
      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4);
      rp_vec = vec_add(rp_vec,part_vec);
      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4);
      rp_vec = vec_add(rp_vec,part_vec);
      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4);
      rp_vec = vec_add(rp_vec,part_vec);
      vec_st(rp_vec,0,rp);
      rp += 16;
      istop -= 16;
   }
   if(istop > 0)
      for (i = 0; i < istop % 16; i++)
      {
         *rp = (png_byte)(((int)(*rp) + (int)(*(rp - bpp))) & 0xff);
         rp++;
      }
 }
 void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row,
                                  png_const_bytep prev_row)
 {
   const png_byte bpp = 3;
   vector unsigned char rp_vec;
   vector unsigned char part_vec;
   vsx_declare_common_vars(row_info,row,prev_row,bpp)
   PNG_UNUSED(pp)
   /* Altivec operations require 16-byte aligned data
    * but input can be unaligned. So we calculate
    * unaligned part as usual.
    */
   for (i = 0; i < unaligned_top; i++)
   {
      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
      rp++;
   }
   /* Using SIMD while we can */
   while( istop >= 16 )
   {
      for(i=0;i < bpp ; i++)
      {
         *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
         rp++;
      }
      rp -= bpp;
      rp_vec = vec_ld(0,rp);
      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3);
      rp_vec = vec_add(rp_vec,part_vec);
      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3);
      rp_vec = vec_add(rp_vec,part_vec);
      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3);
      rp_vec = vec_add(rp_vec,part_vec);
      part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3);
      rp_vec = vec_add(rp_vec,part_vec);
      vec_st(rp_vec,0,rp);
      rp += 15;
      istop -= 16;
      /* Since 16 % bpp = 16 % 3 = 1, last element of array must
       * be proceeded manually
       */
      *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
      rp++;
   }
   if(istop > 0)
      for (i = 0; i < istop % 16; i++)
      {
         *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
         rp++;
      }
 }
 void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row,
                                  png_const_bytep prev_row)
 {
   const png_byte bpp = 4;
   vector unsigned char rp_vec;
   vector unsigned char pp_vec;
   vector unsigned char pp_part_vec;
   vector unsigned char rp_part_vec;
   vector unsigned char avg_vec;
   vsx_declare_common_vars(row_info,row,prev_row,bpp)
   rp -= bpp;
   if(istop >= bpp)
      istop -= bpp;
   for (i = 0; i < bpp; i++)
   {
      *rp = (png_byte)(((int)(*rp) +
         ((int)(*pp++) / 2 )) & 0xff);
      rp++;
   }
   /* Altivec operations require 16-byte aligned data
    * but input can be unaligned. So we calculate
    * unaligned part as usual.
    */
   for (i = 0; i < unaligned_top; i++)
   {
      *rp = (png_byte)(((int)(*rp) +
         (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
      rp++;
   }
   /* Using SIMD while we can */
   while( istop >= 16 )
   {
      for(i=0;i < bpp ; i++)
      {
         *rp = (png_byte)(((int)(*rp) +
            (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
         rp++;
      }
      rp -= bpp;
      pp -= bpp;
      vec_ld_unaligned(pp_vec,pp);
      rp_vec = vec_ld(0,rp);
      rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4);
      pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_4);
      avg_vec = vec_avg(rp_part_vec,pp_part_vec);
      avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
      rp_vec = vec_add(rp_vec,avg_vec);
      rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4);
      pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_4);
      avg_vec = vec_avg(rp_part_vec,pp_part_vec);
      avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
      rp_vec = vec_add(rp_vec,avg_vec);
      rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4);
      pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_4);
      avg_vec = vec_avg(rp_part_vec,pp_part_vec);
      avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
      rp_vec = vec_add(rp_vec,avg_vec);
      vec_st(rp_vec,0,rp);
      rp += 16;
      pp += 16;
      istop -= 16;
   }
   if(istop  > 0)
      for (i = 0; i < istop % 16; i++)
      {
         *rp = (png_byte)(((int)(*rp) +
            (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
         rp++;
      }
 }
 void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
                                  png_const_bytep prev_row)
 {
  const png_byte bpp = 3;
  vector unsigned char rp_vec;
  vector unsigned char pp_vec;
  vector unsigned char pp_part_vec;
  vector unsigned char rp_part_vec;
  vector unsigned char avg_vec;
  vsx_declare_common_vars(row_info,row,prev_row,bpp)
  rp -= bpp;
  if(istop >= bpp)
     istop -= bpp;
  for (i = 0; i < bpp; i++)
  {
     *rp = (png_byte)(((int)(*rp) +
        ((int)(*pp++) / 2 )) & 0xff);
     rp++;
  }
  /* Altivec operations require 16-byte aligned data
   * but input can be unaligned. So we calculate
   * unaligned part as usual.
   */
  for (i = 0; i < unaligned_top; i++)
  {
     *rp = (png_byte)(((int)(*rp) +
        (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
     rp++;
  }
  /* Using SIMD while we can */
  while( istop >= 16 )
  {
     for(i=0;i < bpp ; i++)
     {
        *rp = (png_byte)(((int)(*rp) +
           (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
        rp++;
     }
     rp -= bpp;
     pp -= bpp;
     vec_ld_unaligned(pp_vec,pp);
     rp_vec = vec_ld(0,rp);
     rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3);
     pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_3);
     avg_vec = vec_avg(rp_part_vec,pp_part_vec);
     avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
     rp_vec = vec_add(rp_vec,avg_vec);
     rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3);
     pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_3);
     avg_vec = vec_avg(rp_part_vec,pp_part_vec);
     avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
     rp_vec = vec_add(rp_vec,avg_vec);
     rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3);
     pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_3);
     avg_vec = vec_avg(rp_part_vec,pp_part_vec);
     avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
     rp_vec = vec_add(rp_vec,avg_vec);
     rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3);
     pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED4_3);
     avg_vec = vec_avg(rp_part_vec,pp_part_vec);
     avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1)));
     rp_vec = vec_add(rp_vec,avg_vec);
     vec_st(rp_vec,0,rp);
     rp += 15;
     pp += 15;
     istop -= 16;
     /* Since 16 % bpp = 16 % 3 = 1, last element of array must
      * be proceeded manually
      */
     *rp = (png_byte)(((int)(*rp) +
        (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
     rp++;
  }
  if(istop  > 0)
     for (i = 0; i < istop % 16; i++)
     {
        *rp = (png_byte)(((int)(*rp) +
           (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff);
        rp++;
     }
 }
 /* Bytewise c ? t : e. */
 #define if_then_else(c,t,e) vec_sel(e,t,c)
 #define vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) {\
      c = *(pp - bpp);\
      a = *(rp - bpp);\
      b = *pp++;\
      p = b - c;\
      pc = a - c;\
      pa = vsx_abs(p);\
      pb = vsx_abs(pc);\
      pc = vsx_abs(p + pc);\
      if (pb < pa) pa = pb, a = b;\
      if (pc < pa) a = c;\
      a += *rp;\
      *rp++ = (png_byte)a;\
      }
 void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
   png_const_bytep prev_row)
 {
   const png_byte bpp = 4;
   int a, b, c, pa, pb, pc, p;
   vector unsigned char rp_vec;
   vector unsigned char pp_vec;
   vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
   vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
   vsx_declare_common_vars(row_info,row,prev_row,bpp)
   rp -= bpp;
   if(istop >= bpp)
      istop -= bpp;
   /* Process the first pixel in the row completely (this is the same as 'up'
    * because there is only one candidate predictor for the first row).
    */
   for(i = 0; i < bpp ; i++)
   {
      *rp = (png_byte)( *rp + *pp);
      rp++;
      pp++;
   }
   for(i = 0; i < unaligned_top ; i++)
   {
      vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
   }
   while( istop >= 16)
   {
      for(i = 0; i < bpp ; i++)
      {
         vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
      }
      rp -= bpp;
      pp -= bpp;
      rp_vec = vec_ld(0,rp);
      vec_ld_unaligned(pp_vec,pp);
      a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4);
      b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_4),1,4);
      c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4);
      pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
      pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
      pc_vec = vec_add(pa_vec,pb_vec);
      pa_vec = vec_abs(pa_vec);
      pb_vec = vec_abs(pb_vec);
      pc_vec = vec_abs(pc_vec);
      smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
      nearest_vec =  if_then_else(
            vec_cmpeq(pa_vec,smallest_vec),
            a_vec,
            if_then_else(
              vec_cmpeq(pb_vec,smallest_vec),
              b_vec,
              c_vec
              )
            );
      rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4)));
      a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4);
      b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_4),2,4);
      c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4);
      pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
      pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
      pc_vec = vec_add(pa_vec,pb_vec);
      pa_vec = vec_abs(pa_vec);
      pb_vec = vec_abs(pb_vec);
      pc_vec = vec_abs(pc_vec);
      smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
      nearest_vec =  if_then_else(
            vec_cmpeq(pa_vec,smallest_vec),
            a_vec,
            if_then_else(
              vec_cmpeq(pb_vec,smallest_vec),
              b_vec,
              c_vec
              )
            );
      rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4)));
      a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4);
      b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_4),3,4);
      c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4);
      pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
      pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
      pc_vec = vec_add(pa_vec,pb_vec);
      pa_vec = vec_abs(pa_vec);
      pb_vec = vec_abs(pb_vec);
      pc_vec = vec_abs(pc_vec);
      smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
      nearest_vec =  if_then_else(
            vec_cmpeq(pa_vec,smallest_vec),
            a_vec,
            if_then_else(
              vec_cmpeq(pb_vec,smallest_vec),
              b_vec,
              c_vec
              )
            );
      rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4)));
      vec_st(rp_vec,0,rp);
      rp += 16;
      pp += 16;
      istop -= 16;
   }
   if(istop > 0)
      for (i = 0; i < istop % 16; i++)
      {
         vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
      }
 }
 void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
   png_const_bytep prev_row)
 {
  const png_byte bpp = 3;
  int a, b, c, pa, pb, pc, p;
  vector unsigned char rp_vec;
  vector unsigned char pp_vec;
  vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
  vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
  vsx_declare_common_vars(row_info,row,prev_row,bpp)
  rp -= bpp;
  if(istop >= bpp)
     istop -= bpp;
  /* Process the first pixel in the row completely (this is the same as 'up'
   * because there is only one candidate predictor for the first row).
   */
  for(i = 0; i < bpp ; i++)
  {
     *rp = (png_byte)( *rp + *pp);
     rp++;
     pp++;
  }
  for(i = 0; i < unaligned_top ; i++)
  {
     vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
  }
  while( istop >= 16)
  {
     for(i = 0; i < bpp ; i++)
     {
        vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
     }
     rp -= bpp;
     pp -= bpp;
     rp_vec = vec_ld(0,rp);
     vec_ld_unaligned(pp_vec,pp);
     a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3);
     b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_3),1,3);
     c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3);
     pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
     pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
     pc_vec = vec_add(pa_vec,pb_vec);
     pa_vec = vec_abs(pa_vec);
     pb_vec = vec_abs(pb_vec);
     pc_vec = vec_abs(pc_vec);
     smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
     nearest_vec =  if_then_else(
           vec_cmpeq(pa_vec,smallest_vec),
           a_vec,
           if_then_else(
             vec_cmpeq(pb_vec,smallest_vec),
             b_vec,
             c_vec
             )
           );
     rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3)));
     a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3);
     b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_3),2,3);
     c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3);
     pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
     pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
     pc_vec = vec_add(pa_vec,pb_vec);
     pa_vec = vec_abs(pa_vec);
     pb_vec = vec_abs(pb_vec);
     pc_vec = vec_abs(pc_vec);
     smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
     nearest_vec =  if_then_else(
           vec_cmpeq(pa_vec,smallest_vec),
           a_vec,
           if_then_else(
             vec_cmpeq(pb_vec,smallest_vec),
             b_vec,
             c_vec
             )
           );
     rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3)));
     a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3);
     b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_3),3,3);
     c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3);
     pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
     pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
     pc_vec = vec_add(pa_vec,pb_vec);
     pa_vec = vec_abs(pa_vec);
     pb_vec = vec_abs(pb_vec);
     pc_vec = vec_abs(pc_vec);
     smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
     nearest_vec =  if_then_else(
           vec_cmpeq(pa_vec,smallest_vec),
           a_vec,
           if_then_else(
             vec_cmpeq(pb_vec,smallest_vec),
             b_vec,
             c_vec
             )
           );
     rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3)));
     a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3);
     b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED4_3),4,3);
     c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3);
     pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
     pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
     pc_vec = vec_add(pa_vec,pb_vec);
     pa_vec = vec_abs(pa_vec);
     pb_vec = vec_abs(pb_vec);
     pc_vec = vec_abs(pc_vec);
     smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
     nearest_vec =  if_then_else(
           vec_cmpeq(pa_vec,smallest_vec),
           a_vec,
           if_then_else(
             vec_cmpeq(pb_vec,smallest_vec),
             b_vec,
             c_vec
             )
           );
     rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3)));
     vec_st(rp_vec,0,rp);
     rp += 15;
     pp += 15;
     istop -= 16;
     /* Since 16 % bpp = 16 % 3 = 1, last element of array must
      * be proceeded manually
      */
     vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
  }
  if(istop > 0)
     for (i = 0; i < istop % 16; i++)
     {
        vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp)
     }
 }
 #endif /* PNG_POWERPC_VSX_OPT > 0 */
 #endif /* PNG_POWERPC_VSX_IMPLEMENTATION == 1 (intrinsics) */
 #endif /* READ */
--- a/powerpc/powerpc_init.c
+++ b/powerpc/powerpc_init.c
@@ -0,0 +1,122 @@
 /* powerpc_init.c - POWERPC optimised filter functions
 *
 *
 * This code is released under the libpng license.
 * For conditions of distribution and use, see the disclaimer
 * and license in png.h
 */
 /* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are
 * called.
 */
 #define _POSIX_SOURCE 1
 #include <stdio.h>
 #include "../pngpriv.h"
 #ifdef PNG_READ_SUPPORTED
 #if PNG_POWERPC_VSX_OPT > 0
 #ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED /* Do run-time checks */
 /* WARNING: it is strongly recommended that you do not build libpng with
 * run-time checks for CPU features if at all possible.  In the case of the PowerPC
 * VSX instructions there is no processor-specific way of detecting the
 * presence of the required support, therefore run-time detection is extremely
 * OS specific.
 *
 * You may set the macro PNG_POWERPC_VSX_FILE to the file name of file containing
 * a fragment of C source code which defines the png_have_vsx function.  There
 * are a number of implementations in contrib/powerpc-vsx, but the only one that
 * has partial support is contrib/powerpc-vsx/linux.c - a generic Linux
 * implementation which reads /proc/cpufino.
 */
 #ifndef PNG_POWERPC_VSX_FILE
 #  ifdef __linux__
 #     define  PNG_POWERPC_VSX_FILE "contrib/powerpc-vsx/linux_aux.c"
 #  endif
 #endif
 #ifdef PNG_POWERPC_VSX_FILE
 #include <signal.h> /* for sig_atomic_t */
 static int png_have_vsx(png_structp png_ptr);
 #include PNG_POWERPC_VSX_FILE
 #else  /* PNG_POWERPC_VSX_FILE */
 #  error "PNG_POWERPC_VSX_FILE undefined: no support for run-time POWERPC VSX checks"
 #endif /* PNG_POWERPC_VSX_FILE */
 #endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */
 void
 png_init_filter_functions_vsx(png_structp pp, unsigned int bpp)
 {
   /* The switch statement is compiled in for POWERPC_VSX_API, the call to
    * png_have_vsx is compiled in for POWERPC_VSX_CHECK. If both are defined
    * the check is only performed if the API has not set the PowerPC option on
    * or off explicitly. In this case the check controls what happens.
    */
 #ifdef PNG_POWERPC_VSX_API_SUPPORTED
   switch ((pp->options >> PNG_POWERPC_VSX) & 3)
   {
      case PNG_OPTION_UNSET:
         /* Allow the run-time check to execute if it has been enabled -
          * thus both API and CHECK can be turned on.  If it isn't supported
          * this case will fall through to the 'default' below, which just
          * returns.
          */
 #endif /* PNG_POWERPC_VSX_API_SUPPORTED */
 #ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED
         {
            static volatile sig_atomic_t no_vsx = -1; /* not checked */
            if (no_vsx < 0)
               no_vsx = !png_have_vsx(pp);
            if (no_vsx)
               return;
         }
 #ifdef PNG_POWERPC_VSX_API_SUPPORTED
         break;
 #endif
 #endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */
 #ifdef PNG_POWERPC_VSX_API_SUPPORTED
      default: /* OFF or INVALID */
         return;
      case PNG_OPTION_ON:
         /* Option turned on */
         break;
   }
 #endif
   /* IMPORTANT: any new internal functions used here must be declared using
    * PNG_INTERNAL_FUNCTION in ../pngpriv.h.  This is required so that the
    * 'prefix' option to configure works:
    *
    *    ./configure --with-libpng-prefix=foobar_
    *
    * Verify you have got this right by running the above command, doing a build
    * and examining pngprefix.h; it must contain a #define for every external
    * function you add.  (Notice that this happens automatically for the
    * initialization function.)
    */
   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vsx;
   if (bpp == 3)
   {
      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vsx;
      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vsx;
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vsx;
   }
   else if (bpp == 4)
   {
      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vsx;
      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vsx;
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vsx;
   }
 }
 #endif /* PNG_POWERPC_VSX_OPT > 0 */
 #endif /* READ */
--- a/scripts/pnglibconf.dfa
+++ b/scripts/pnglibconf.dfa
@@ -229,6 +229,33 @@ option ARM_NEON_API disabled requires ALIGNED_MEMORY enables SET_OPTION,
 option ARM_NEON_CHECK disabled requires ALIGNED_MEMORY,
   sets ARM_NEON_OPT 1
 # These options are specific to the PowerPC VSX hardware optimizations.
 #
 # POWERPC_VSX_OPT: unset: check at compile time (__PPC64__,__ALTIVEC__,__VSX__
 #                      must be defined by the compiler, typically as a result
 #                      of specifying
 #                      "-mvsx -maltivec" compiler flags)
 #                   0: disable (even if the CPU supports VSX.)
 #                   1: check at run time (via POWERPC_VSX_{API,CHECK})
 #                   2: switch on unconditionally (inadvisable - instead pass
 #                      -mvsx -maltivec to compiler options)
 #           When building libpng avoid using any setting other than '0'; '1' is
 #           set automatically when either 'API' or 'CHECK' are configured in,
 #           '2' should not be necessary as "-mvsx -maltivec" will achieve the same
 #           effect as well as applying VSX optimizations to the rest of the
 #           libpng code.
 # POWERPC_VSX_API:   (PNG_POWERPC_VSX == 1) allow the optimization to be switched on
 #                 with png_set_option
 # POWERPC_VSX_CHECK: (PNG_POWERPC_VSX == 1) compile a run-time check to see if VSX
 #                 extensions are supported. This is supported not for all OSes
 #                 (see contrib/powerpc/README)
 setting POWERPC_VSX_OPT
 option POWERPC_VSX_API disabled enables SET_OPTION,
  sets POWERPC_VSX_OPT 1
 option POWERPC_VSX_CHECK disabled,
  sets POWERPC_VSX_OPT 1
 # These settings configure the default compression level (0-9) and 'strategy';
 # strategy is as defined by the implementors of zlib. It describes the input
 # data and modifies the zlib parameters in an attempt to optimize the balance
--- a/scripts/pnglibconf.h.prebuilt
+++ b/scripts/pnglibconf.h.prebuilt
@@ -20,6 +20,8 @@
 #define PNG_ALIGNED_MEMORY_SUPPORTED
 /*#undef PNG_ARM_NEON_API_SUPPORTED*/
 /*#undef PNG_ARM_NEON_CHECK_SUPPORTED*/
 /*#undef PNG_POWERPC_VSX_API_SUPPORTED*/
 /*#undef PNG_POWERPC_VSX_CHECK_SUPPORTED*/
 #define PNG_BENIGN_ERRORS_SUPPORTED
 #define PNG_BENIGN_READ_ERRORS_SUPPORTED
 /*#undef PNG_BENIGN_WRITE_ERRORS_SUPPORTED*/