mirror of
https://git.code.sf.net/p/libpng/code.git
synced 2025-07-10 18:04:09 +02:00
[libpng17] Rearranged ARM-NEON optimizations to isolate the machine specific
code to the hardware subdirectory, and add comments to pngrutil.c so that implementors of other optimizations will know what to do.
This commit is contained in:
parent
eac85878bf
commit
0f2a5bac64
5
ANNOUNCE
5
ANNOUNCE
@ -53,8 +53,11 @@ Version 1.7.0alpha01 [December 15, 2012]
|
||||
in png.h have been made more clear. Minor fixes to
|
||||
contrib/libtests/timepng.c and some of the png_*_tRNS logic, including
|
||||
more error detection in png_set_tRNS.
|
||||
Clean up USER_LIMITS feature, removing members from png_struct when not
|
||||
Cleaned up USER_LIMITS feature, removing members from png_struct when not
|
||||
required.
|
||||
Rearranged ARM-NEON optimizations to isolate the machine specific code to
|
||||
the hardware subdirectory, and add comments to pngrutil.c so that
|
||||
implementors of other optimizations will know what to do.
|
||||
|
||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
|
||||
(subscription required; visit
|
||||
|
||||
5
CHANGES
5
CHANGES
@ -4338,8 +4338,11 @@ Version 1.7.0alpha01 [December 15, 2012]
|
||||
in png.h have been made more clear. Minor fixes to
|
||||
contrib/libtests/timepng.c and some of the png_*_tRNS logic, including
|
||||
more error detection in png_set_tRNS.
|
||||
Clean up USER_LIMITS feature, removing members from png_struct when not
|
||||
Cleaned up USER_LIMITS feature, removing members from png_struct when not
|
||||
required.
|
||||
Rearranged ARM-NEON optimizations to isolate the machine specific code to
|
||||
the hardware subdirectory, and add comments to pngrutil.c so that
|
||||
implementors of other optimizations will know what to do.
|
||||
|
||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
|
||||
(subscription required; visit
|
||||
|
||||
@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
|
||||
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
|
||||
|
||||
if PNG_ARM_NEON
|
||||
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S
|
||||
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
|
||||
arm/filter_neon.S
|
||||
endif
|
||||
|
||||
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
|
||||
|
||||
74
arm/arm_init.c
Normal file
74
arm/arm_init.c
Normal file
@ -0,0 +1,74 @@
|
||||
|
||||
/* filter_neon.S - NEON optimised filter functions
|
||||
*
|
||||
* Copyright (c) 2011 Glenn Randers-Pehrson
|
||||
* Written by Mans Rullgard, 2011.
|
||||
*
|
||||
* This code is released under the libpng license.
|
||||
* For conditions of distribution and use, see the disclaimer
|
||||
* and license in png.h
|
||||
*/
|
||||
#include "../pngpriv.h"
|
||||
|
||||
/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */
|
||||
#if defined __linux__ && defined __arm__
|
||||
#include <stdio.h>
|
||||
#include <elf.h>
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
static int png_have_hwcap(unsigned cap)
|
||||
{
|
||||
FILE *f = fopen("/proc/self/auxv", "r");
|
||||
Elf32_auxv_t aux;
|
||||
int have_cap = 0;
|
||||
|
||||
if (!f)
|
||||
return 0;
|
||||
|
||||
while (fread(&aux, sizeof(aux), 1, f) > 0)
|
||||
{
|
||||
if (aux.a_type == AT_HWCAP &&
|
||||
aux.a_un.a_val & cap)
|
||||
{
|
||||
have_cap = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
return have_cap;
|
||||
}
|
||||
#endif /* __linux__ && __arm__ */
|
||||
|
||||
void
|
||||
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
|
||||
{
|
||||
#ifdef __arm__
|
||||
#ifdef __linux__
|
||||
if (!png_have_hwcap(HWCAP_NEON))
|
||||
return;
|
||||
#endif
|
||||
|
||||
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
|
||||
|
||||
if (bpp == 3)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth3_neon;
|
||||
}
|
||||
|
||||
else if (bpp == 4)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth4_neon;
|
||||
}
|
||||
#else
|
||||
PNG_UNUSED(pp)
|
||||
PNG_UNUSED(bpp)
|
||||
#endif
|
||||
}
|
||||
@ -9,6 +9,7 @@
|
||||
* and license in png.h
|
||||
*/
|
||||
|
||||
#ifdef __arm__
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
|
||||
#endif
|
||||
@ -223,3 +224,4 @@ func png_read_filter_row_paeth3_neon, export=1
|
||||
|
||||
pop {r4,pc}
|
||||
endfunc
|
||||
#endif
|
||||
|
||||
@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack])
|
||||
AC_ARG_ENABLE([arm-neon],
|
||||
AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
|
||||
[if test "${enableval}" = "yes"; then
|
||||
AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations])
|
||||
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers])
|
||||
AC_DEFINE([PNG_FILTER_OPTIMIZATIONS],
|
||||
[png_init_filter_functions_neon],
|
||||
[ARM NEON filter initialization function])
|
||||
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1],
|
||||
[Align row buffers])
|
||||
fi])
|
||||
AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])
|
||||
|
||||
|
||||
@ -1872,6 +1872,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
|
||||
|
||||
#endif /* SIMPLIFIED READ/WRITE */
|
||||
|
||||
#ifdef PNG_FILTER_OPTIMIZATIONS
|
||||
PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
|
||||
/* This is the initialization function for hardware specific optimizations,
|
||||
* one implementation (for ARM NEON machines) is contained in
|
||||
* arm/filter_neon.c. It need not be defined - the generic code will be used
|
||||
* if not.
|
||||
*/
|
||||
#endif
|
||||
|
||||
/* Maintainer: Put new private prototypes here ^ */
|
||||
|
||||
#include "pngdebug.h"
|
||||
|
||||
72
pngrutil.c
72
pngrutil.c
@ -3866,66 +3866,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PNG_ARM_NEON
|
||||
|
||||
#ifdef __linux__
|
||||
#include <stdio.h>
|
||||
#include <elf.h>
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
static int png_have_hwcap(unsigned cap)
|
||||
{
|
||||
FILE *f = fopen("/proc/self/auxv", "r");
|
||||
Elf32_auxv_t aux;
|
||||
int have_cap = 0;
|
||||
|
||||
if (!f)
|
||||
return 0;
|
||||
|
||||
while (fread(&aux, (sizeof aux), 1, f) > 0)
|
||||
{
|
||||
if (aux.a_type == AT_HWCAP &&
|
||||
aux.a_un.a_val & cap)
|
||||
{
|
||||
have_cap = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
||||
return have_cap;
|
||||
}
|
||||
#endif /* __linux__ */
|
||||
|
||||
static void
|
||||
png_init_filter_functions_neon(png_structrp pp, unsigned int bpp)
|
||||
{
|
||||
#ifdef __linux__
|
||||
if (!png_have_hwcap(HWCAP_NEON))
|
||||
return;
|
||||
#endif
|
||||
|
||||
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
|
||||
|
||||
if (bpp == 3)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth3_neon;
|
||||
}
|
||||
|
||||
else if (bpp == 4)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth4_neon;
|
||||
}
|
||||
}
|
||||
#endif /* PNG_ARM_NEON */
|
||||
|
||||
static void
|
||||
png_init_filter_functions(png_structrp pp)
|
||||
{
|
||||
@ -3941,8 +3881,16 @@ png_init_filter_functions(png_structrp pp)
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth_multibyte_pixel;
|
||||
|
||||
#ifdef PNG_ARM_NEON
|
||||
png_init_filter_functions_neon(pp, bpp);
|
||||
#ifdef PNG_FILTER_OPTIMIZATIONS
|
||||
/* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
|
||||
* call to install hardware optimizations for the above functions; simply
|
||||
* replace whatever elements of the pp->read_filter[] array with a hardware
|
||||
* specific (or, for that matter, generic) optimization.
|
||||
*
|
||||
* To see an example of this examine what configure.ac does when
|
||||
* --enable-arm-neon is specified on the command line.
|
||||
*/
|
||||
PNG_FILTER_OPTIMIZATIONS(pp, bpp);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user