mirror of
				https://git.code.sf.net/p/libpng/code.git
				synced 2025-07-10 18:04:09 +02:00 
			
		
		
		
	[libpng16] Rearranged the ARM-NEON optimizations: Isolated the machine specific
code to the hardware subdirectory and added comments to pngrutil.c so that implementors of other optimizations know what to do.
This commit is contained in:
		
							parent
							
								
									5c2d76fdef
								
							
						
					
					
						commit
						f758d6c813
					
				
							
								
								
									
										7
									
								
								ANNOUNCE
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								ANNOUNCE
									
									
									
									
									
								
							@ -1,5 +1,5 @@
 | 
			
		||||
 | 
			
		||||
Libpng 1.6.0beta33 - December 10, 2012
 | 
			
		||||
Libpng 1.6.0beta33 - December 13, 2012
 | 
			
		||||
 | 
			
		||||
This is not intended to be a public release.  It will be replaced
 | 
			
		||||
within a few weeks by a public version or by another test version.
 | 
			
		||||
@ -552,12 +552,15 @@ Version 1.6.0beta32 [November 25, 2012]
 | 
			
		||||
  Fixed error checking in the simplified write API (Olaf van der Spek)
 | 
			
		||||
  Made png_user_version_check() ok to use with libpng version 1.10.x and later.
 | 
			
		||||
 | 
			
		||||
Version 1.6.0beta33 [December 10, 2012]
 | 
			
		||||
Version 1.6.0beta33 [December 13, 2012]
 | 
			
		||||
  Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
 | 
			
		||||
    that causes the MALLOC_MAX limit not to work (John Bowler)
 | 
			
		||||
  Change png_warning() to png_app_error() in pngwrite.c and comment the
 | 
			
		||||
    fall-through condition.
 | 
			
		||||
  Change png_warning() to png_app_warning() in png_write_tRNS().
 | 
			
		||||
  Rearranged the ARM-NEON optimizations: Isolated the machine specific code
 | 
			
		||||
    to the hardware subdirectory and added comments to pngrutil.c so that
 | 
			
		||||
    implementors of other optimizations know what to do.
 | 
			
		||||
 | 
			
		||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 | 
			
		||||
(subscription required; visit
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										5
									
								
								CHANGES
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								CHANGES
									
									
									
									
									
								
							@ -4304,12 +4304,15 @@ Version 1.6.0beta32 [November 25, 2012]
 | 
			
		||||
  Fixed error checking in the simplified write API (Olaf van der Spek)
 | 
			
		||||
  Made png_user_version_check() ok to use with libpng version 1.10.x and later.
 | 
			
		||||
 | 
			
		||||
Version 1.6.0beta33 [December 10, 2012]
 | 
			
		||||
Version 1.6.0beta33 [December 13, 2012]
 | 
			
		||||
  Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
 | 
			
		||||
    that causes the MALLOC_MAX limit not to work (John Bowler)
 | 
			
		||||
  Change png_warning() to png_app_error() in pngwrite.c and comment the
 | 
			
		||||
    fall-through condition.
 | 
			
		||||
  Change png_warning() to png_app_warning() in png_write_tRNS().
 | 
			
		||||
  Rearranged the ARM-NEON optimizations: Isolated the machine specific code
 | 
			
		||||
    to the hardware subdirectory and added comments to pngrutil.c so that
 | 
			
		||||
    implementors of other optimizations know what to do.
 | 
			
		||||
 | 
			
		||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 | 
			
		||||
(subscription required; visit
 | 
			
		||||
 | 
			
		||||
@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
 | 
			
		||||
	png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
 | 
			
		||||
 | 
			
		||||
if PNG_ARM_NEON
 | 
			
		||||
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S
 | 
			
		||||
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
 | 
			
		||||
	arm/filter_neon.S
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										74
									
								
								arm/arm_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								arm/arm_init.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,74 @@
 | 
			
		||||
 | 
			
		||||
/* filter_neon.S - NEON optimised filter functions
 | 
			
		||||
 *
 | 
			
		||||
 * Copyright (c) 2011 Glenn Randers-Pehrson
 | 
			
		||||
 * Written by Mans Rullgard, 2011.
 | 
			
		||||
 *
 | 
			
		||||
 * This code is released under the libpng license.
 | 
			
		||||
 * For conditions of distribution and use, see the disclaimer
 | 
			
		||||
 * and license in png.h
 | 
			
		||||
 */
 | 
			
		||||
#include "../pngpriv.h"
 | 
			
		||||
 | 
			
		||||
/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */
 | 
			
		||||
#if defined __linux__ && defined __arm__
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <elf.h>
 | 
			
		||||
#include <asm/hwcap.h>
 | 
			
		||||
 | 
			
		||||
static int png_have_hwcap(unsigned cap)
 | 
			
		||||
{
 | 
			
		||||
   FILE *f = fopen("/proc/self/auxv", "r");
 | 
			
		||||
   Elf32_auxv_t aux;
 | 
			
		||||
   int have_cap = 0;
 | 
			
		||||
 | 
			
		||||
   if (!f)
 | 
			
		||||
      return 0;
 | 
			
		||||
 | 
			
		||||
   while (fread(&aux, sizeof(aux), 1, f) > 0)
 | 
			
		||||
   {
 | 
			
		||||
      if (aux.a_type == AT_HWCAP &&
 | 
			
		||||
          aux.a_un.a_val & cap)
 | 
			
		||||
      {
 | 
			
		||||
         have_cap = 1;
 | 
			
		||||
         break;
 | 
			
		||||
      }
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   fclose(f);
 | 
			
		||||
 | 
			
		||||
   return have_cap;
 | 
			
		||||
}
 | 
			
		||||
#endif /* __linux__ && __arm__ */
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __arm__
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
   if (!png_have_hwcap(HWCAP_NEON))
 | 
			
		||||
      return;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
 | 
			
		||||
 | 
			
		||||
   if (bpp == 3)
 | 
			
		||||
   {
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
			
		||||
         png_read_filter_row_paeth3_neon;
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   else if (bpp == 4)
 | 
			
		||||
   {
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
			
		||||
          png_read_filter_row_paeth4_neon;
 | 
			
		||||
   }
 | 
			
		||||
#else
 | 
			
		||||
   PNG_UNUSED(pp)
 | 
			
		||||
   PNG_UNUSED(bpp)
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
@ -9,6 +9,7 @@
 | 
			
		||||
 * and license in png.h
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#ifdef __arm__
 | 
			
		||||
#if defined(__linux__) && defined(__ELF__)
 | 
			
		||||
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
 | 
			
		||||
#endif
 | 
			
		||||
@ -223,3 +224,4 @@ func    png_read_filter_row_paeth3_neon, export=1
 | 
			
		||||
 | 
			
		||||
        pop             {r4,pc}
 | 
			
		||||
endfunc
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack])
 | 
			
		||||
AC_ARG_ENABLE([arm-neon],
 | 
			
		||||
   AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
 | 
			
		||||
   [if test "${enableval}" = "yes"; then
 | 
			
		||||
      AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations])
 | 
			
		||||
      AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers])
 | 
			
		||||
      AC_DEFINE([PNG_FILTER_OPTIMIZATIONS],
 | 
			
		||||
                [png_init_filter_functions_neon],
 | 
			
		||||
                [ARM NEON filter initialization function])
 | 
			
		||||
      AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1],
 | 
			
		||||
                [Align row buffers])
 | 
			
		||||
    fi])
 | 
			
		||||
AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1862,6 +1862,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
 | 
			
		||||
 | 
			
		||||
#endif /* SIMPLIFIED READ/WRITE */
 | 
			
		||||
 | 
			
		||||
#ifdef PNG_FILTER_OPTIMIZATIONS
 | 
			
		||||
PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
 | 
			
		||||
   /* This is the initialization function for hardware specific optimizations,
 | 
			
		||||
    * one implementation (for ARM NEON machines) is contained in
 | 
			
		||||
    * arm/filter_neon.c.  It need not be defined - the generic code will be used
 | 
			
		||||
    * if not.
 | 
			
		||||
    */
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* Maintainer: Put new private prototypes here ^ */
 | 
			
		||||
 | 
			
		||||
#include "pngdebug.h"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										72
									
								
								pngrutil.c
									
									
									
									
									
								
							
							
						
						
									
										72
									
								
								pngrutil.c
									
									
									
									
									
								
							@ -3863,66 +3863,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
 | 
			
		||||
   }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifdef PNG_ARM_NEON
 | 
			
		||||
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
#include <stdio.h>
 | 
			
		||||
#include <elf.h>
 | 
			
		||||
#include <asm/hwcap.h>
 | 
			
		||||
 | 
			
		||||
static int png_have_hwcap(unsigned cap)
 | 
			
		||||
{
 | 
			
		||||
   FILE *f = fopen("/proc/self/auxv", "r");
 | 
			
		||||
   Elf32_auxv_t aux;
 | 
			
		||||
   int have_cap = 0;
 | 
			
		||||
 | 
			
		||||
   if (!f)
 | 
			
		||||
      return 0;
 | 
			
		||||
 | 
			
		||||
   while (fread(&aux, (sizeof aux), 1, f) > 0)
 | 
			
		||||
   {
 | 
			
		||||
      if (aux.a_type == AT_HWCAP &&
 | 
			
		||||
          aux.a_un.a_val & cap)
 | 
			
		||||
      {
 | 
			
		||||
         have_cap = 1;
 | 
			
		||||
         break;
 | 
			
		||||
      }
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   fclose(f);
 | 
			
		||||
 | 
			
		||||
   return have_cap;
 | 
			
		||||
}
 | 
			
		||||
#endif /* __linux__ */
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
png_init_filter_functions_neon(png_structrp pp, unsigned int bpp)
 | 
			
		||||
{
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
   if (!png_have_hwcap(HWCAP_NEON))
 | 
			
		||||
      return;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
 | 
			
		||||
 | 
			
		||||
   if (bpp == 3)
 | 
			
		||||
   {
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
			
		||||
         png_read_filter_row_paeth3_neon;
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   else if (bpp == 4)
 | 
			
		||||
   {
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
			
		||||
          png_read_filter_row_paeth4_neon;
 | 
			
		||||
   }
 | 
			
		||||
}
 | 
			
		||||
#endif /* PNG_ARM_NEON */
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
png_init_filter_functions(png_structrp pp)
 | 
			
		||||
{
 | 
			
		||||
@ -3938,8 +3878,16 @@ png_init_filter_functions(png_structrp pp)
 | 
			
		||||
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
			
		||||
         png_read_filter_row_paeth_multibyte_pixel;
 | 
			
		||||
 | 
			
		||||
#ifdef PNG_ARM_NEON
 | 
			
		||||
   png_init_filter_functions_neon(pp, bpp);
 | 
			
		||||
#ifdef PNG_FILTER_OPTIMIZATIONS
 | 
			
		||||
   /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
 | 
			
		||||
    * call to install hardware optimizations for the above functions; simply
 | 
			
		||||
    * replace whatever elements of the pp->read_filter[] array with a hardware
 | 
			
		||||
    * specific (or, for that matter, generic) optimization.
 | 
			
		||||
    *
 | 
			
		||||
    * To see an example of this examine what configure.ac does when
 | 
			
		||||
    * --enable-arm-neon is specified on the command line.
 | 
			
		||||
    */
 | 
			
		||||
   PNG_FILTER_OPTIMIZATIONS(pp, bpp);
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user