mirror of
https://git.code.sf.net/p/libpng/code.git
synced 2025-07-10 18:04:09 +02:00
[libpng16] Rearranged the ARM-NEON optimizations: Isolated the machine specific
code to the hardware subdirectory and added comments to pngrutil.c so that implementors of other optimizations know what to do.
This commit is contained in:
parent
5c2d76fdef
commit
f758d6c813
7
ANNOUNCE
7
ANNOUNCE
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
Libpng 1.6.0beta33 - December 10, 2012
|
Libpng 1.6.0beta33 - December 13, 2012
|
||||||
|
|
||||||
This is not intended to be a public release. It will be replaced
|
This is not intended to be a public release. It will be replaced
|
||||||
within a few weeks by a public version or by another test version.
|
within a few weeks by a public version or by another test version.
|
||||||
@ -552,12 +552,15 @@ Version 1.6.0beta32 [November 25, 2012]
|
|||||||
Fixed error checking in the simplified write API (Olaf van der Spek)
|
Fixed error checking in the simplified write API (Olaf van der Spek)
|
||||||
Made png_user_version_check() ok to use with libpng version 1.10.x and later.
|
Made png_user_version_check() ok to use with libpng version 1.10.x and later.
|
||||||
|
|
||||||
Version 1.6.0beta33 [December 10, 2012]
|
Version 1.6.0beta33 [December 13, 2012]
|
||||||
Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
|
Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
|
||||||
that causes the MALLOC_MAX limit not to work (John Bowler)
|
that causes the MALLOC_MAX limit not to work (John Bowler)
|
||||||
Change png_warning() to png_app_error() in pngwrite.c and comment the
|
Change png_warning() to png_app_error() in pngwrite.c and comment the
|
||||||
fall-through condition.
|
fall-through condition.
|
||||||
Change png_warning() to png_app_warning() in png_write_tRNS().
|
Change png_warning() to png_app_warning() in png_write_tRNS().
|
||||||
|
Rearranged the ARM-NEON optimizations: Isolated the machine specific code
|
||||||
|
to the hardware subdirectory and added comments to pngrutil.c so that
|
||||||
|
implementors of other optimizations know what to do.
|
||||||
|
|
||||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
|
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
|
||||||
(subscription required; visit
|
(subscription required; visit
|
||||||
|
|||||||
5
CHANGES
5
CHANGES
@ -4304,12 +4304,15 @@ Version 1.6.0beta32 [November 25, 2012]
|
|||||||
Fixed error checking in the simplified write API (Olaf van der Spek)
|
Fixed error checking in the simplified write API (Olaf van der Spek)
|
||||||
Made png_user_version_check() ok to use with libpng version 1.10.x and later.
|
Made png_user_version_check() ok to use with libpng version 1.10.x and later.
|
||||||
|
|
||||||
Version 1.6.0beta33 [December 10, 2012]
|
Version 1.6.0beta33 [December 13, 2012]
|
||||||
Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
|
Fixed typo in png.c (PNG_SET_CHUNK_MALLOC_MAX should be PNG_CHUNK_MALLOC_MAX)
|
||||||
that causes the MALLOC_MAX limit not to work (John Bowler)
|
that causes the MALLOC_MAX limit not to work (John Bowler)
|
||||||
Change png_warning() to png_app_error() in pngwrite.c and comment the
|
Change png_warning() to png_app_error() in pngwrite.c and comment the
|
||||||
fall-through condition.
|
fall-through condition.
|
||||||
Change png_warning() to png_app_warning() in png_write_tRNS().
|
Change png_warning() to png_app_warning() in png_write_tRNS().
|
||||||
|
Rearranged the ARM-NEON optimizations: Isolated the machine specific code
|
||||||
|
to the hardware subdirectory and added comments to pngrutil.c so that
|
||||||
|
implementors of other optimizations know what to do.
|
||||||
|
|
||||||
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
|
Send comments/corrections/commendations to png-mng-implement at lists.sf.net
|
||||||
(subscription required; visit
|
(subscription required; visit
|
||||||
|
|||||||
@ -43,7 +43,8 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c\
|
|||||||
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
|
png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h pngstruct.h pngusr.dfa
|
||||||
|
|
||||||
if PNG_ARM_NEON
|
if PNG_ARM_NEON
|
||||||
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/filter_neon.S
|
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += arm/arm_init.c\
|
||||||
|
arm/filter_neon.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
|
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = pnglibconf.h
|
||||||
|
|||||||
74
arm/arm_init.c
Normal file
74
arm/arm_init.c
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
|
||||||
|
/* filter_neon.S - NEON optimised filter functions
|
||||||
|
*
|
||||||
|
* Copyright (c) 2011 Glenn Randers-Pehrson
|
||||||
|
* Written by Mans Rullgard, 2011.
|
||||||
|
*
|
||||||
|
* This code is released under the libpng license.
|
||||||
|
* For conditions of distribution and use, see the disclaimer
|
||||||
|
* and license in png.h
|
||||||
|
*/
|
||||||
|
#include "../pngpriv.h"
|
||||||
|
|
||||||
|
/* __arm__ is defined by GCC, MSVC defines _M_ARM to the ARM version number */
|
||||||
|
#if defined __linux__ && defined __arm__
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <elf.h>
|
||||||
|
#include <asm/hwcap.h>
|
||||||
|
|
||||||
|
static int png_have_hwcap(unsigned cap)
|
||||||
|
{
|
||||||
|
FILE *f = fopen("/proc/self/auxv", "r");
|
||||||
|
Elf32_auxv_t aux;
|
||||||
|
int have_cap = 0;
|
||||||
|
|
||||||
|
if (!f)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
while (fread(&aux, sizeof(aux), 1, f) > 0)
|
||||||
|
{
|
||||||
|
if (aux.a_type == AT_HWCAP &&
|
||||||
|
aux.a_un.a_val & cap)
|
||||||
|
{
|
||||||
|
have_cap = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(f);
|
||||||
|
|
||||||
|
return have_cap;
|
||||||
|
}
|
||||||
|
#endif /* __linux__ && __arm__ */
|
||||||
|
|
||||||
|
void
|
||||||
|
png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
|
||||||
|
{
|
||||||
|
#ifdef __arm__
|
||||||
|
#ifdef __linux__
|
||||||
|
if (!png_have_hwcap(HWCAP_NEON))
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
|
||||||
|
|
||||||
|
if (bpp == 3)
|
||||||
|
{
|
||||||
|
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
|
||||||
|
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
|
||||||
|
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||||
|
png_read_filter_row_paeth3_neon;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (bpp == 4)
|
||||||
|
{
|
||||||
|
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
|
||||||
|
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
|
||||||
|
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||||
|
png_read_filter_row_paeth4_neon;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
PNG_UNUSED(pp)
|
||||||
|
PNG_UNUSED(bpp)
|
||||||
|
#endif
|
||||||
|
}
|
||||||
@ -9,6 +9,7 @@
|
|||||||
* and license in png.h
|
* and license in png.h
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef __arm__
|
||||||
#if defined(__linux__) && defined(__ELF__)
|
#if defined(__linux__) && defined(__ELF__)
|
||||||
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
|
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
|
||||||
#endif
|
#endif
|
||||||
@ -223,3 +224,4 @@ func png_read_filter_row_paeth3_neon, export=1
|
|||||||
|
|
||||||
pop {r4,pc}
|
pop {r4,pc}
|
||||||
endfunc
|
endfunc
|
||||||
|
#endif
|
||||||
|
|||||||
@ -241,8 +241,11 @@ AC_SUBST([AM_CCASFLAGS], [-Wa,--noexecstack])
|
|||||||
AC_ARG_ENABLE([arm-neon],
|
AC_ARG_ENABLE([arm-neon],
|
||||||
AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
|
AS_HELP_STRING([[[--enable-arm-neon]]], [Enable ARM NEON optimizations]),
|
||||||
[if test "${enableval}" = "yes"; then
|
[if test "${enableval}" = "yes"; then
|
||||||
AC_DEFINE([PNG_ARM_NEON], [1], [Enable ARM NEON optimizations])
|
AC_DEFINE([PNG_FILTER_OPTIMIZATIONS],
|
||||||
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1], [Align row buffers])
|
[png_init_filter_functions_neon],
|
||||||
|
[ARM NEON filter initialization function])
|
||||||
|
AC_DEFINE([PNG_ALIGNED_MEMORY_SUPPORTED], [1],
|
||||||
|
[Align row buffers])
|
||||||
fi])
|
fi])
|
||||||
AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])
|
AM_CONDITIONAL([PNG_ARM_NEON], [test "${enable_arm_neon:-no}" = yes])
|
||||||
|
|
||||||
|
|||||||
@ -1862,6 +1862,15 @@ PNG_INTERNAL_FUNCTION(void, png_image_free, (png_imagep image), PNG_EMPTY);
|
|||||||
|
|
||||||
#endif /* SIMPLIFIED READ/WRITE */
|
#endif /* SIMPLIFIED READ/WRITE */
|
||||||
|
|
||||||
|
#ifdef PNG_FILTER_OPTIMIZATIONS
|
||||||
|
PNG_INTERNAL_FUNCTION(void, PNG_FILTER_OPTIMIZATIONS, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
|
||||||
|
/* This is the initialization function for hardware specific optimizations,
|
||||||
|
* one implementation (for ARM NEON machines) is contained in
|
||||||
|
* arm/filter_neon.c. It need not be defined - the generic code will be used
|
||||||
|
* if not.
|
||||||
|
*/
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Maintainer: Put new private prototypes here ^ */
|
/* Maintainer: Put new private prototypes here ^ */
|
||||||
|
|
||||||
#include "pngdebug.h"
|
#include "pngdebug.h"
|
||||||
|
|||||||
72
pngrutil.c
72
pngrutil.c
@ -3863,66 +3863,6 @@ png_read_filter_row_paeth_multibyte_pixel(png_row_infop row_info, png_bytep row,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef PNG_ARM_NEON
|
|
||||||
|
|
||||||
#ifdef __linux__
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <elf.h>
|
|
||||||
#include <asm/hwcap.h>
|
|
||||||
|
|
||||||
static int png_have_hwcap(unsigned cap)
|
|
||||||
{
|
|
||||||
FILE *f = fopen("/proc/self/auxv", "r");
|
|
||||||
Elf32_auxv_t aux;
|
|
||||||
int have_cap = 0;
|
|
||||||
|
|
||||||
if (!f)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
while (fread(&aux, (sizeof aux), 1, f) > 0)
|
|
||||||
{
|
|
||||||
if (aux.a_type == AT_HWCAP &&
|
|
||||||
aux.a_un.a_val & cap)
|
|
||||||
{
|
|
||||||
have_cap = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose(f);
|
|
||||||
|
|
||||||
return have_cap;
|
|
||||||
}
|
|
||||||
#endif /* __linux__ */
|
|
||||||
|
|
||||||
static void
|
|
||||||
png_init_filter_functions_neon(png_structrp pp, unsigned int bpp)
|
|
||||||
{
|
|
||||||
#ifdef __linux__
|
|
||||||
if (!png_have_hwcap(HWCAP_NEON))
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_neon;
|
|
||||||
|
|
||||||
if (bpp == 3)
|
|
||||||
{
|
|
||||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_neon;
|
|
||||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_neon;
|
|
||||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
|
||||||
png_read_filter_row_paeth3_neon;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (bpp == 4)
|
|
||||||
{
|
|
||||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_neon;
|
|
||||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_neon;
|
|
||||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
|
||||||
png_read_filter_row_paeth4_neon;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* PNG_ARM_NEON */
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
png_init_filter_functions(png_structrp pp)
|
png_init_filter_functions(png_structrp pp)
|
||||||
{
|
{
|
||||||
@ -3938,8 +3878,16 @@ png_init_filter_functions(png_structrp pp)
|
|||||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||||
png_read_filter_row_paeth_multibyte_pixel;
|
png_read_filter_row_paeth_multibyte_pixel;
|
||||||
|
|
||||||
#ifdef PNG_ARM_NEON
|
#ifdef PNG_FILTER_OPTIMIZATIONS
|
||||||
png_init_filter_functions_neon(pp, bpp);
|
/* To use this define PNG_FILTER_OPTIMIZATIONS as the name of a function to
|
||||||
|
* call to install hardware optimizations for the above functions; simply
|
||||||
|
* replace whatever elements of the pp->read_filter[] array with a hardware
|
||||||
|
* specific (or, for that matter, generic) optimization.
|
||||||
|
*
|
||||||
|
* To see an example of this examine what configure.ac does when
|
||||||
|
* --enable-arm-neon is specified on the command line.
|
||||||
|
*/
|
||||||
|
PNG_FILTER_OPTIMIZATIONS(pp, bpp);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user