mirror of
				https://git.code.sf.net/p/libpng/code.git
				synced 2025-07-10 18:04:09 +02:00 
			
		
		
		
	[libpng16] Rearranged the ARM-NEON optimizations: Isolated the machine specific
code to the hardware subdirectory and added comments to pngrutil.c so that implementors of other optimizations know what to do.
This commit is contained in:
		
							parent
							
								
									5c2d76fdef
								
							
						
					
					
						commit
						f758d6c813
					
				
							
								
								
									
										7
									
								
								ANNOUNCE
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								ANNOUNCE
									
									
									
									
									
								
							@ -1,5 +1,5 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
Libpng 1.6.0beta33 - December 10, 2012
 | 
					Libpng 1.6.0beta33 - December 13, 2012
 | 
				
			||||||
 | 
					
 | 
				
			||||||
This is not intended to be a public release.  It will be replaced
 | 
					This is not intended to be a public release.  It will be replaced
 | 
				
			||||||
within a few weeks by a public version or by another test version.
 | 
					within a few weeks by a public version or by another test version.
 | 
				
			||||||
@ -552,12 +552,15 @@ Version 1.6.0beta32 [November 25, 2012]
 | 
				
			|||||||
  Fixed error checking in the simplified write API (Olaf van der Spek)
 | 
					  Fixed error checking in the simplified write API (Olaf van der Spek)
 | 
				
			||||||
  Made png_user_version_check() ok to use with libpng version 1.10.x and later.
 | 
					  Made png_user_version_check() ok to use with libpng version 1.10.x and later.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Version 1.6.0beta33 [December 10, 2012]
 | 
					Version 1.6.0beta33 [December 13, 2012]
 | 
				
			||||||
  Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
 | 
					  Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
 | 
				
			||||||
    that causes the MALLOC_MAX limit not to work (John Bowler)
 | 
					    that causes the MALLOC_MAX limit not to work (John Bowler)
 | 
				
			||||||
  Change png_warning() to png_app_error() in pngwrite.c and comment the
 | 
					  Change png_warning() to png_app_error() in pngwrite.c and comment the
 | 
				
			||||||
    fall-through condition.
 | 
					    fall-through condition.
 | 
				
			||||||
  Change png_warning() to png_app_warning() in png_write_tRNS().
 | 
					  Change png_warning() to png_app_warning() in png_write_tRNS().
 | 
				
			||||||
 | 
					  Rearranged the ARM-NEON optimizations: Isolated the machine specific code
 | 
				
			||||||
 | 
					    to the hardware subdirectory and added comments to pngrutil.c so that
 | 
				
			||||||
 | 
					    implementors of other optimizations know what to do.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 | 
					Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 | 
				
			||||||
(subscription required; visit
 | 
					(subscription required; visit
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										5
									
								
								CHANGES
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								CHANGES
									
									
									
									
									
								
							@ -4304,12 +4304,15 @@ Version 1.6.0beta32 [November 25, 2012]
 | 
				
			|||||||
  Fixed error checking in the simplified write API (Olaf van der Spek)
 | 
					  Fixed error checking in the simplified write API (Olaf van der Spek)
 | 
				
			||||||
  Made png_user_version_check() ok to use with libpng version 1.10.x and later.
 | 
					  Made png_user_version_check() ok to use with libpng version 1.10.x and later.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Version 1.6.0beta33 [December 10, 2012]
 | 
					Version 1.6.0beta33 [December 13, 2012]
 | 
				
			||||||
  Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
 | 
					  Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
 | 
				
			||||||
    that causes the MALLOC_MAX limit not to work (John Bowler)
 | 
					    that causes the MALLOC_MAX limit not to work (John Bowler)
 | 
				
			||||||
  Change png_warning() to png_app_error() in pngwrite.c and comment the
 | 
					  Change png_warning() to png_app_error() in pngwrite.c and comment the
 | 
				
			||||||
    fall-through condition.
 | 
					    fall-through condition.
 | 
				
			||||||
  Change png_warning() to png_app_warning() in png_write_tRNS().
 | 
					  Change png_warning() to png_app_warning() in png_write_tRNS().
 | 
				
			||||||
 | 
					  Rearranged the ARM-NEON optimizations: Isolated the machine specific code
 | 
				
			||||||
 | 
					    to the hardware subdirectory and added comments to pngrutil.c so that
 | 
				
			||||||
 | 
					    implementors of other optimizations know what to do.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 | 
					Send comments/corrections/commendations to png-mng-implement at lists.sf.net
 | 
				
			||||||
(subscription required; visit
 | 
					(subscription required; visit
 | 
				
			||||||
 | 
				
			|||||||
@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
 | 
				
			|||||||
	png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
 | 
						png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if PNG_ARM_NEON
 | 
					if PNG_ARM_NEON
 | 
				
			||||||
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S
 | 
					libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
 | 
				
			||||||
 | 
						arm/filter_neon.S
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
 | 
					nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										74
									
								
								arm/arm_init.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								arm/arm_init.c
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,74 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					/* filter_neon.S - NEON optimised filter functions
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * Copyright (c) 2011 Glenn Randers-Pehrson
 | 
				
			||||||
 | 
					 * Written by Mans Rullgard, 2011.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This code is released under the libpng license.
 | 
				
			||||||
 | 
					 * For conditions of distribution and use, see the disclaimer
 | 
				
			||||||
 | 
					 * and license in png.h
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					#include "../pngpriv.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */
 | 
				
			||||||
 | 
					#if defined __linux__ && defined __arm__
 | 
				
			||||||
 | 
					#include <stdio.h>
 | 
				
			||||||
 | 
					#include <elf.h>
 | 
				
			||||||
 | 
					#include <asm/hwcap.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int png_have_hwcap(unsigned cap)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					   FILE *f = fopen("/proc/self/auxv", "r");
 | 
				
			||||||
 | 
					   Elf32_auxv_t aux;
 | 
				
			||||||
 | 
					   int have_cap = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   if (!f)
 | 
				
			||||||
 | 
					      return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   while (fread(&aux, sizeof(aux), 1, f) > 0)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					      if (aux.a_type == AT_HWCAP &&
 | 
				
			||||||
 | 
					          aux.a_un.a_val & cap)
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					         have_cap = 1;
 | 
				
			||||||
 | 
					         break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   fclose(f);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   return have_cap;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#endif /* __linux__ && __arm__ */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					#ifdef __arm__
 | 
				
			||||||
 | 
					#ifdef __linux__
 | 
				
			||||||
 | 
					   if (!png_have_hwcap(HWCAP_NEON))
 | 
				
			||||||
 | 
					      return;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   if (bpp == 3)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
 | 
				
			||||||
 | 
					      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
 | 
				
			||||||
 | 
					      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
				
			||||||
 | 
					         png_read_filter_row_paeth3_neon;
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   else if (bpp == 4)
 | 
				
			||||||
 | 
					   {
 | 
				
			||||||
 | 
					      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
 | 
				
			||||||
 | 
					      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
 | 
				
			||||||
 | 
					      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
				
			||||||
 | 
					          png_read_filter_row_paeth4_neon;
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
					   PNG_UNUSED(pp)
 | 
				
			||||||
 | 
					   PNG_UNUSED(bpp)
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@ -9,6 +9,7 @@
 | 
				
			|||||||
 * and license in png.h
 | 
					 * and license in png.h
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef __arm__
 | 
				
			||||||
#if defined(__linux__) && defined(__ELF__)
 | 
					#if defined(__linux__) && defined(__ELF__)
 | 
				
			||||||
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
 | 
					.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
@ -223,3 +224,4 @@ func    png_read_filter_row_paeth3_neon, export=1
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        pop             {r4,pc}
 | 
					        pop             {r4,pc}
 | 
				
			||||||
endfunc
 | 
					endfunc
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
				
			|||||||
@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack])
 | 
				
			|||||||
AC_ARG_ENABLE([arm-neon],
 | 
					AC_ARG_ENABLE([arm-neon],
 | 
				
			||||||
   AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
 | 
					   AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
 | 
				
			||||||
   [if test "${enableval}" = "yes"; then
 | 
					   [if test "${enableval}" = "yes"; then
 | 
				
			||||||
      AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations])
 | 
					      AC_DEFINE([PNG_FILTER_OPTIMIZATIONS],
 | 
				
			||||||
      AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers])
 | 
					                [png_init_filter_functions_neon],
 | 
				
			||||||
 | 
					                [ARM NEON filter initialization function])
 | 
				
			||||||
 | 
					      AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1],
 | 
				
			||||||
 | 
					                [Align row buffers])
 | 
				
			||||||
    fi])
 | 
					    fi])
 | 
				
			||||||
AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])
 | 
					AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1862,6 +1862,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#endif /* SIMPLIFIED READ/WRITE */
 | 
					#endif /* SIMPLIFIED READ/WRITE */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef PNG_FILTER_OPTIMIZATIONS
 | 
				
			||||||
 | 
					PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
 | 
				
			||||||
 | 
					   /* This is the initialization function for hardware specific optimizations,
 | 
				
			||||||
 | 
					    * one implementation (for ARM NEON machines) is contained in
 | 
				
			||||||
 | 
					    * arm/filter_neon.c.  It need not be defined - the generic code will be used
 | 
				
			||||||
 | 
					    * if not.
 | 
				
			||||||
 | 
					    */
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Maintainer: Put new private prototypes here ^ */
 | 
					/* Maintainer: Put new private prototypes here ^ */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "pngdebug.h"
 | 
					#include "pngdebug.h"
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										72
									
								
								pngrutil.c
									
									
									
									
									
								
							
							
						
						
									
										72
									
								
								pngrutil.c
									
									
									
									
									
								
							@ -3863,66 +3863,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
 | 
				
			|||||||
   }
 | 
					   }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef PNG_ARM_NEON
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#ifdef __linux__
 | 
					 | 
				
			||||||
#include <stdio.h>
 | 
					 | 
				
			||||||
#include <elf.h>
 | 
					 | 
				
			||||||
#include <asm/hwcap.h>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static int png_have_hwcap(unsigned cap)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
   FILE *f = fopen("/proc/self/auxv", "r");
 | 
					 | 
				
			||||||
   Elf32_auxv_t aux;
 | 
					 | 
				
			||||||
   int have_cap = 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   if (!f)
 | 
					 | 
				
			||||||
      return 0;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   while (fread(&aux, (sizeof aux), 1, f) > 0)
 | 
					 | 
				
			||||||
   {
 | 
					 | 
				
			||||||
      if (aux.a_type == AT_HWCAP &&
 | 
					 | 
				
			||||||
          aux.a_un.a_val & cap)
 | 
					 | 
				
			||||||
      {
 | 
					 | 
				
			||||||
         have_cap = 1;
 | 
					 | 
				
			||||||
         break;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
   }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   fclose(f);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   return have_cap;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif /* __linux__ */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void
 | 
					 | 
				
			||||||
png_init_filter_functions_neon(png_structrp pp, unsigned int bpp)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
#ifdef __linux__
 | 
					 | 
				
			||||||
   if (!png_have_hwcap(HWCAP_NEON))
 | 
					 | 
				
			||||||
      return;
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   if (bpp == 3)
 | 
					 | 
				
			||||||
   {
 | 
					 | 
				
			||||||
      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
 | 
					 | 
				
			||||||
      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
 | 
					 | 
				
			||||||
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
					 | 
				
			||||||
         png_read_filter_row_paeth3_neon;
 | 
					 | 
				
			||||||
   }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   else if (bpp == 4)
 | 
					 | 
				
			||||||
   {
 | 
					 | 
				
			||||||
      pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
 | 
					 | 
				
			||||||
      pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
 | 
					 | 
				
			||||||
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
					 | 
				
			||||||
          png_read_filter_row_paeth4_neon;
 | 
					 | 
				
			||||||
   }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
#endif /* PNG_ARM_NEON */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
png_init_filter_functions(png_structrp pp)
 | 
					png_init_filter_functions(png_structrp pp)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@ -3938,8 +3878,16 @@ png_init_filter_functions(png_structrp pp)
 | 
				
			|||||||
      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
					      pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
 | 
				
			||||||
         png_read_filter_row_paeth_multibyte_pixel;
 | 
					         png_read_filter_row_paeth_multibyte_pixel;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifdef PNG_ARM_NEON
 | 
					#ifdef PNG_FILTER_OPTIMIZATIONS
 | 
				
			||||||
   png_init_filter_functions_neon(pp, bpp);
 | 
					   /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
 | 
				
			||||||
 | 
					    * call to install hardware optimizations for the above functions; simply
 | 
				
			||||||
 | 
					    * replace whatever elements of the pp->read_filter[] array with a hardware
 | 
				
			||||||
 | 
					    * specific (or, for that matter, generic) optimization.
 | 
				
			||||||
 | 
					    *
 | 
				
			||||||
 | 
					    * To see an example of this examine what configure.ac does when
 | 
				
			||||||
 | 
					    * --enable-arm-neon is specified on the command line.
 | 
				
			||||||
 | 
					    */
 | 
				
			||||||
 | 
					   PNG_FILTER_OPTIMIZATIONS(pp, bpp);
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user