[libpng17] Rearranged ARM-NEON optimizations to isolate the machine specific

code to the hardware subdirectory, and add comments to pngrutil.c so that
implementors of other optimizations will know what to do.
This commit is contained in:
John Bowler 2012-12-14 23:12:16 -06:00 committed by Glenn Randers-Pehrson
parent eac85878bf
commit 0f2a5bac64
8 changed files with 110 additions and 67 deletions

View File

@ -53,8 +53,11 @@ Version 1.7.0alpha01 [December 15, 2012]
in png.h have been made more clear. Minor fixes to in png.h have been made more clear. Minor fixes to
contrib/libtests/timepng.c and some of the png_*_tRNS logic, including contrib/libtests/timepng.c and some of the png_*_tRNS logic, including
more error detection in png_set_tRNS. more error detection in png_set_tRNS.
Clean up USER_LIMITS feature, removing members from png_struct when not Cleaned up USER_LIMITS feature, removing members from png_struct when not
required. required.
Rearranged ARM-NEON optimizations to isolate the machine specific code to
the hardware subdirectory, and add comments to pngrutil.c so that
implementors of other optimizations will know what to do.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit (subscription required; visit

View File

@ -4338,8 +4338,11 @@ Version 1.7.0alpha01 [December 15, 2012]
in png.h have been made more clear. Minor fixes to in png.h have been made more clear. Minor fixes to
contrib/libtests/timepng.c and some of the png_*_tRNS logic, including contrib/libtests/timepng.c and some of the png_*_tRNS logic, including
more error detection in png_set_tRNS. more error detection in png_set_tRNS.
Clean up USER_LIMITS feature, removing members from png_struct when not Cleaned up USER_LIMITS feature, removing members from png_struct when not
required. required.
Rearranged ARM-NEON optimizations to isolate the machine specific code to
the hardware subdirectory, and add comments to pngrutil.c so that
implementors of other optimizations will know what to do.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit (subscription required; visit

View File

@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
if PNG_ARM_NEON if PNG_ARM_NEON
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
arm/filter_neon.S
endif endif
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h

74
arm/arm_init.c Normal file
View File

@ -0,0 +1,74 @@
/* filter_neon.S - NEON optimised filter functions
*
* Copyright (c) 2011 Glenn Randers-Pehrson
* Written by Mans Rullgard, 2011.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../pngpriv.h"
/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */
#if defined __linux__ && defined __arm__
#include <stdio.h>
#include <elf.h>
#include <asm/hwcap.h>
static int png_have_hwcap(unsigned cap)
{
FILE *f = fopen("/proc/self/auxv", "r");
Elf32_auxv_t aux;
int have_cap = 0;
if (!f)
return 0;
while (fread(&aux, sizeof(aux), 1, f) > 0)
{
if (aux.a_type == AT_HWCAP &&
aux.a_un.a_val & cap)
{
have_cap = 1;
break;
}
}
fclose(f);
return have_cap;
}
#endif /* __linux__ && __arm__ */
void
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
{
#ifdef __arm__
#ifdef __linux__
if (!png_have_hwcap(HWCAP_NEON))
return;
#endif
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth3_neon;
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth4_neon;
}
#else
PNG_UNUSED(pp)
PNG_UNUSED(bpp)
#endif
}

View File

@ -9,6 +9,7 @@
* and license in png.h * and license in png.h
*/ */
#ifdef __arm__
#if defined(__linux__) && defined(__ELF__) #if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ .section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
#endif #endif
@ -223,3 +224,4 @@ func png_read_filter_row_paeth3_neon, export=1
pop {r4,pc} pop {r4,pc}
endfunc endfunc
#endif

View File

@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack])
AC_ARG_ENABLE([arm-neon], AC_ARG_ENABLE([arm-neon],
AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]), AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
[if test "${enableval}" = "yes"; then [if test "${enableval}" = "yes"; then
AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations]) AC_DEFINE([PNG_FILTER_OPTIMIZATIONS],
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers]) [png_init_filter_functions_neon],
[ARM NEON filter initialization function])
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1],
[Align row buffers])
fi]) fi])
AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes]) AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])

View File

@ -1872,6 +1872,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
#endif /* SIMPLIFIED READ/WRITE */ #endif /* SIMPLIFIED READ/WRITE */
#ifdef PNG_FILTER_OPTIMIZATIONS
PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
/* This is the initialization function for hardware specific optimizations,
* one implementation (for ARM NEON machines) is contained in
* arm/filter_neon.c. It need not be defined - the generic code will be used
* if not.
*/
#endif
/* Maintainer: Put new private prototypes here ^ */ /* Maintainer: Put new private prototypes here ^ */
#include "pngdebug.h" #include "pngdebug.h"

View File

@ -3866,66 +3866,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
} }
} }
#ifdef PNG_ARM_NEON
#ifdef __linux__
#include <stdio.h>
#include <elf.h>
#include <asm/hwcap.h>
static int png_have_hwcap(unsigned cap)
{
FILE *f = fopen("/proc/self/auxv", "r");
Elf32_auxv_t aux;
int have_cap = 0;
if (!f)
return 0;
while (fread(&aux, (sizeof aux), 1, f) > 0)
{
if (aux.a_type == AT_HWCAP &&
aux.a_un.a_val & cap)
{
have_cap = 1;
break;
}
}
fclose(f);
return have_cap;
}
#endif /* __linux__ */
static void
png_init_filter_functions_neon(png_structrp pp, unsigned int bpp)
{
#ifdef __linux__
if (!png_have_hwcap(HWCAP_NEON))
return;
#endif
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth3_neon;
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth4_neon;
}
}
#endif /* PNG_ARM_NEON */
static void static void
png_init_filter_functions(png_structrp pp) png_init_filter_functions(png_structrp pp)
{ {
@ -3941,8 +3881,16 @@ png_init_filter_functions(png_structrp pp)
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
png_read_filter_row_paeth_multibyte_pixel; png_read_filter_row_paeth_multibyte_pixel;
#ifdef PNG_ARM_NEON #ifdef PNG_FILTER_OPTIMIZATIONS
png_init_filter_functions_neon(pp, bpp); /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
* call to install hardware optimizations for the above functions; simply
* replace whatever elements of the pp->read_filter[] array with a hardware
* specific (or, for that matter, generic) optimization.
*
* To see an example of this examine what configure.ac does when
* --enable-arm-neon is specified on the command line.
*/
PNG_FILTER_OPTIMIZATIONS(pp, bpp);
#endif #endif
} }