From 0f2a5bac645016cb5d2f5c3b9816767ee45d6143 Mon Sep 17 00:00:00 2001 From: John Bowler Date: Fri, 14 Dec 2012 23:12:16 -0600 Subject: [PATCH] [libpng17] Rearranged ARM-NEON optimizations to isolate the machine specific code to the hardware subdirectory, and add comments to pngrutil.c so that implementors of other optimizations will know what to do. --- ANNOUNCE | 5 +++- CHANGES | 5 +++- Makefile.am | 3 +- arm/arm_init.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ arm/filter_neon.S | 2 ++ configure.ac | 7 +++-- pngpriv.h | 9 ++++++ pngrutil.c | 72 +++++++-------------------------------------- 8 files changed, 110 insertions(+), 67 deletions(-) create mode 100644 arm/arm_init.c diff --git a/ANNOUNCE b/ANNOUNCE index cb0d9f316..556d18ed4 100644 --- a/ANNOUNCE +++ b/ANNOUNCE @@ -53,8 +53,11 @@ Version 1.7.0alpha01 [December 15, 2012] in png.h have been made more clear. Minor fixes to contrib/libtests/timepng.c and some of the png_*_tRNS logic, including more error detection in png_set_tRNS. - Clean up USER_LIMITS feature, removing members from png_struct when not + Cleaned up USER_LIMITS feature, removing members from png_struct when not required. + Rearranged ARM-NEON optimizations to isolate the machine specific code to + the hardware subdirectory, and add comments to pngrutil.c so that + implementors of other optimizations will know what to do. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/CHANGES b/CHANGES index 4c8042fa3..9dc61daba 100644 --- a/CHANGES +++ b/CHANGES @@ -4338,8 +4338,11 @@ Version 1.7.0alpha01 [December 15, 2012] in png.h have been made more clear. Minor fixes to contrib/libtests/timepng.c and some of the png_*_tRNS logic, including more error detection in png_set_tRNS. - Clean up USER_LIMITS feature, removing members from png_struct when not + Cleaned up USER_LIMITS feature, removing members from png_struct when not required. + Rearranged ARM-NEON optimizations to isolate the machine specific code to + the hardware subdirectory, and add comments to pngrutil.c so that + implementors of other optimizations will know what to do. Send comments/corrections/commendations to png-mng-implement at lists.sf.net (subscription required; visit diff --git a/Makefile.am b/Makefile.am index dfa5da82e..98c01ae66 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\ png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa if PNG_ARM_NEON -libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S +libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\ + arm/filter_neon.S endif nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h diff --git a/arm/arm_init.c b/arm/arm_init.c new file mode 100644 index 000000000..6b0a925f2 --- /dev/null +++ b/arm/arm_init.c @@ -0,0 +1,74 @@ + +/* filter_neon.S - NEON optimised filter functions + * + * Copyright (c) 2011 Glenn Randers-Pehrson + * Written by Mans Rullgard, 2011. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ +#include "../pngpriv.h" + +/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */ +#if defined __linux__ && defined __arm__ +#include +#include +#include + +static int png_have_hwcap(unsigned cap) +{ + FILE *f = fopen("/proc/self/auxv", "r"); + Elf32_auxv_t aux; + int have_cap = 0; + + if (!f) + return 0; + + while (fread(&aux, sizeof(aux), 1, f) > 0) + { + if (aux.a_type == AT_HWCAP && + aux.a_un.a_val & cap) + { + have_cap = 1; + break; + } + } + + fclose(f); + + return have_cap; +} +#endif /* __linux__ && __arm__ */ + +void +png_init_filter_functions_neon(png_structp pp, unsigned int bpp) +{ +#ifdef __arm__ +#ifdef __linux__ + if (!png_have_hwcap(HWCAP_NEON)) + return; +#endif + + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; + + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth3_neon; + } + + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth4_neon; + } +#else + PNG_UNUSED(pp) + PNG_UNUSED(bpp) +#endif +} diff --git a/arm/filter_neon.S b/arm/filter_neon.S index 63a5d8c17..9ce04d3be 100644 --- a/arm/filter_neon.S +++ b/arm/filter_neon.S @@ -9,6 +9,7 @@ * and license in png.h */ +#ifdef __arm__ #if defined(__linux__) && defined(__ELF__) .section .note.GNU-stack,"",%progbits /* mark stack as non-executable */ #endif @@ -223,3 +224,4 @@ func png_read_filter_row_paeth3_neon, export=1 pop {r4,pc} endfunc +#endif diff --git a/configure.ac b/configure.ac index b9e8a94dc..613ec1ca9 100644 --- a/configure.ac +++ b/configure.ac @@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack]) AC_ARG_ENABLE([arm-neon], AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]), [if test "${enableval}" = "yes"; then - AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations]) - AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers]) + AC_DEFINE([PNG_FILTER_OPTIMIZATIONS], + [png_init_filter_functions_neon], + [ARM NEON filter initialization function]) + AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], + [Align row buffers]) fi]) AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes]) diff --git a/pngpriv.h b/pngpriv.h index 6b0809e33..5d42f47f0 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -1872,6 +1872,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY); #endif /* SIMPLIFIED READ/WRITE */ +#ifdef PNG_FILTER_OPTIMIZATIONS +PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); + /* This is the initialization function for hardware specific optimizations, + * one implementation (for ARM NEON machines) is contained in + * arm/filter_neon.c. It need not be defined - the generic code will be used + * if not. + */ +#endif + /* Maintainer: Put new private prototypes here ^ */ #include "pngdebug.h" diff --git a/pngrutil.c b/pngrutil.c index 172add2df..f46793ec7 100644 --- a/pngrutil.c +++ b/pngrutil.c @@ -3866,66 +3866,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row, } } -#ifdef PNG_ARM_NEON - -#ifdef __linux__ -#include -#include -#include - -static int png_have_hwcap(unsigned cap) -{ - FILE *f = fopen("/proc/self/auxv", "r"); - Elf32_auxv_t aux; - int have_cap = 0; - - if (!f) - return 0; - - while (fread(&aux, (sizeof aux), 1, f) > 0) - { - if (aux.a_type == AT_HWCAP && - aux.a_un.a_val & cap) - { - have_cap = 1; - break; - } - } - - fclose(f); - - return have_cap; -} -#endif /* __linux__ */ - -static void -png_init_filter_functions_neon(png_structrp pp, unsigned int bpp) -{ -#ifdef __linux__ - if (!png_have_hwcap(HWCAP_NEON)) - return; -#endif - - pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon; - - if (bpp == 3) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth3_neon; - } - - else if (bpp == 4) - { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon; - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = - png_read_filter_row_paeth4_neon; - } -} -#endif /* PNG_ARM_NEON */ - static void png_init_filter_functions(png_structrp pp) { @@ -3941,8 +3881,16 @@ png_init_filter_functions(png_structrp pp) pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth_multibyte_pixel; -#ifdef PNG_ARM_NEON - png_init_filter_functions_neon(pp, bpp); +#ifdef PNG_FILTER_OPTIMIZATIONS + /* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to + * call to install hardware optimizations for the above functions; simply + * replace whatever elements of the pp->read_filter[] array with a hardware + * specific (or, for that matter, generic) optimization. + * + * To see an example of this examine what configure.ac does when + * --enable-arm-neon is specified on the command line. + */ + PNG_FILTER_OPTIMIZATIONS(pp, bpp); #endif }