mirror of
https://git.code.sf.net/p/libpng/code.git
synced 2025-07-10 18:04:09 +02:00
[libpng16] Added pngdebug() statements to the new intel code and the arm code.
This commit is contained in:
parent
52846504da
commit
da9d1d7aa6
@ -1,9 +1,9 @@
|
|||||||
|
|
||||||
/* arm_init.c - NEON optimised filter functions
|
/* arm_init.c - NEON optimised filter functions
|
||||||
*
|
*
|
||||||
* Copyright (c) 2014 Glenn Randers-Pehrson
|
* Copyright (c) 2014,2016 Glenn Randers-Pehrson
|
||||||
* Written by Mans Rullgard, 2011.
|
* Written by Mans Rullgard, 2011.
|
||||||
* Last changed in libpng 1.6.16 [December 22, 2014]
|
* Last changed in libpng 1.6.22 [(PENDING RELEASE)]
|
||||||
*
|
*
|
||||||
* This code is released under the libpng license.
|
* This code is released under the libpng license.
|
||||||
* For conditions of distribution and use, see the disclaimer
|
* For conditions of distribution and use, see the disclaimer
|
||||||
@ -66,6 +66,7 @@ png_init_filter_functions_neon(png_structp pp, unsigned int bpp)
|
|||||||
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF,
|
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF,
|
||||||
* as documented in png.h
|
* as documented in png.h
|
||||||
*/
|
*/
|
||||||
|
png_debug(1, "in png_init_filter_functions_neon");
|
||||||
#ifdef PNG_ARM_NEON_API_SUPPORTED
|
#ifdef PNG_ARM_NEON_API_SUPPORTED
|
||||||
switch ((pp->options >> PNG_ARM_NEON) & 3)
|
switch ((pp->options >> PNG_ARM_NEON) & 3)
|
||||||
{
|
{
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
|
|
||||||
/* filter_neon_intrinsics.c - NEON optimised filter functions
|
/* filter_neon_intrinsics.c - NEON optimised filter functions
|
||||||
*
|
*
|
||||||
* Copyright (c) 2014 Glenn Randers-Pehrson
|
* Copyright (c) 2014,2016 Glenn Randers-Pehrson
|
||||||
* Written by James Yu <james.yu at linaro.org>, October 2013.
|
* Written by James Yu <james.yu at linaro.org>, October 2013.
|
||||||
* Based on filter_neon.S, written by Mans Rullgard, 2011.
|
* Based on filter_neon.S, written by Mans Rullgard, 2011.
|
||||||
*
|
*
|
||||||
* Last changed in libpng 1.6.16 [December 22, 2014]
|
* Last changed in libpng 1.6.22 [(PENDING RELEASE)]
|
||||||
*
|
*
|
||||||
* This code is released under the libpng license.
|
* This code is released under the libpng license.
|
||||||
* For conditions of distribution and use, see the disclaimer
|
* For conditions of distribution and use, see the disclaimer
|
||||||
@ -47,6 +47,8 @@ png_read_filter_row_up_neon(png_row_infop row_info, png_bytep row,
|
|||||||
png_bytep rp_stop = row + row_info->rowbytes;
|
png_bytep rp_stop = row + row_info->rowbytes;
|
||||||
png_const_bytep pp = prev_row;
|
png_const_bytep pp = prev_row;
|
||||||
|
|
||||||
|
png_debug(1, "in png_read_filter_row_up_neon");
|
||||||
|
|
||||||
for (; rp < rp_stop; rp += 16, pp += 16)
|
for (; rp < rp_stop; rp += 16, pp += 16)
|
||||||
{
|
{
|
||||||
uint8x16_t qrp, qpp;
|
uint8x16_t qrp, qpp;
|
||||||
@ -72,6 +74,8 @@ png_read_filter_row_sub3_neon(png_row_infop row_info, png_bytep row,
|
|||||||
uint8x8x4_t vdest;
|
uint8x8x4_t vdest;
|
||||||
vdest.val[3] = vdup_n_u8(0);
|
vdest.val[3] = vdup_n_u8(0);
|
||||||
|
|
||||||
|
png_debug(1, "in png_read_filter_row_sub3_neon");
|
||||||
|
|
||||||
for (; rp < rp_stop;)
|
for (; rp < rp_stop;)
|
||||||
{
|
{
|
||||||
uint8x8_t vtmp1, vtmp2;
|
uint8x8_t vtmp1, vtmp2;
|
||||||
@ -113,6 +117,8 @@ png_read_filter_row_sub4_neon(png_row_infop row_info, png_bytep row,
|
|||||||
uint8x8x4_t vdest;
|
uint8x8x4_t vdest;
|
||||||
vdest.val[3] = vdup_n_u8(0);
|
vdest.val[3] = vdup_n_u8(0);
|
||||||
|
|
||||||
|
png_debug(1, "in png_read_filter_row_sub4_neon");
|
||||||
|
|
||||||
for (; rp < rp_stop; rp += 16)
|
for (; rp < rp_stop; rp += 16)
|
||||||
{
|
{
|
||||||
uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp));
|
uint32x2x4_t vtmp = vld4_u32(png_ptr(uint32_t,rp));
|
||||||
@ -148,6 +154,8 @@ png_read_filter_row_avg3_neon(png_row_infop row_info, png_bytep row,
|
|||||||
vrpt = png_ptr(uint8x8x2_t,&vtmp);
|
vrpt = png_ptr(uint8x8x2_t,&vtmp);
|
||||||
vrp = *vrpt;
|
vrp = *vrpt;
|
||||||
|
|
||||||
|
png_debug(1, "in png_read_filter_row_avg3_neon");
|
||||||
|
|
||||||
for (; rp < rp_stop; pp += 12)
|
for (; rp < rp_stop; pp += 12)
|
||||||
{
|
{
|
||||||
uint8x8_t vtmp1, vtmp2, vtmp3;
|
uint8x8_t vtmp1, vtmp2, vtmp3;
|
||||||
@ -207,6 +215,8 @@ png_read_filter_row_avg4_neon(png_row_infop row_info, png_bytep row,
|
|||||||
uint8x8x4_t vdest;
|
uint8x8x4_t vdest;
|
||||||
vdest.val[3] = vdup_n_u8(0);
|
vdest.val[3] = vdup_n_u8(0);
|
||||||
|
|
||||||
|
png_debug(1, "in png_read_filter_row_avg4_neon");
|
||||||
|
|
||||||
for (; rp < rp_stop; rp += 16, pp += 16)
|
for (; rp < rp_stop; rp += 16, pp += 16)
|
||||||
{
|
{
|
||||||
uint32x2x4_t vtmp;
|
uint32x2x4_t vtmp;
|
||||||
@ -280,6 +290,8 @@ png_read_filter_row_paeth3_neon(png_row_infop row_info, png_bytep row,
|
|||||||
vrpt = png_ptr(uint8x8x2_t,&vtmp);
|
vrpt = png_ptr(uint8x8x2_t,&vtmp);
|
||||||
vrp = *vrpt;
|
vrp = *vrpt;
|
||||||
|
|
||||||
|
png_debug(1, "in png_read_filter_row_paeth3_neon");
|
||||||
|
|
||||||
for (; rp < rp_stop; pp += 12)
|
for (; rp < rp_stop; pp += 12)
|
||||||
{
|
{
|
||||||
uint8x8x2_t *vppt;
|
uint8x8x2_t *vppt;
|
||||||
@ -339,6 +351,8 @@ png_read_filter_row_paeth4_neon(png_row_infop row_info, png_bytep row,
|
|||||||
uint8x8x4_t vdest;
|
uint8x8x4_t vdest;
|
||||||
vdest.val[3] = vdup_n_u8(0);
|
vdest.val[3] = vdup_n_u8(0);
|
||||||
|
|
||||||
|
png_debug(1, "in png_read_filter_row_paeth4_neon");
|
||||||
|
|
||||||
for (; rp < rp_stop; rp += 16, pp += 16)
|
for (; rp < rp_stop; rp += 16, pp += 16)
|
||||||
{
|
{
|
||||||
uint32x2x4_t vtmp;
|
uint32x2x4_t vtmp;
|
||||||
|
@ -2,3 +2,7 @@
|
|||||||
To enable SSE support in libpng, manually edit configure.ac and Makefile.am,
|
To enable SSE support in libpng, manually edit configure.ac and Makefile.am,
|
||||||
following the instructions in the configure.ac.patch and Makefile.am.patch
|
following the instructions in the configure.ac.patch and Makefile.am.patch
|
||||||
files, then configure with -DPNG_INTEL_SSE in CPPFLAGS.
|
files, then configure with -DPNG_INTEL_SSE in CPPFLAGS.
|
||||||
|
|
||||||
|
If you have moved the *.c files to a different directory, be sure to update
|
||||||
|
the '#include "../../pngpriv.h"' line in both files if necessary to point
|
||||||
|
to the correct relative location of pngpriv.h.
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
* Copyright (c) 2016 Google, Inc.
|
* Copyright (c) 2016 Google, Inc.
|
||||||
* Written by Mike Klein and Matt Sarett
|
* Written by Mike Klein and Matt Sarett
|
||||||
* Derived from arm/filter_neon_intrinsics.c, which was
|
* Derived from arm/filter_neon_intrinsics.c, which was
|
||||||
* Copyright (c) 2014 Glenn Randers-Pehrson
|
* Copyright (c) 2014,2016 Glenn Randers-Pehrson
|
||||||
*
|
*
|
||||||
* Last changed in libpng 1.6.22 [(PENDING RELEASE)]
|
* Last changed in libpng 1.6.22 [(PENDING RELEASE)]
|
||||||
*
|
*
|
||||||
@ -55,6 +55,7 @@ void png_read_filter_row_sub3_sse2(png_row_infop row_info, png_bytep row,
|
|||||||
* There is no pixel to the left of the first pixel. It's encoded directly.
|
* There is no pixel to the left of the first pixel. It's encoded directly.
|
||||||
* That works with our main loop if we just say that left pixel was zero.
|
* That works with our main loop if we just say that left pixel was zero.
|
||||||
*/
|
*/
|
||||||
|
png_debug(1, "in png_read_filter_row_sub3_sse2");
|
||||||
__m128i a, d = _mm_setzero_si128();
|
__m128i a, d = _mm_setzero_si128();
|
||||||
|
|
||||||
int rb = row_info->rowbytes;
|
int rb = row_info->rowbytes;
|
||||||
@ -75,6 +76,7 @@ void png_read_filter_row_sub4_sse2(png_row_infop row_info, png_bytep row,
|
|||||||
* There is no pixel to the left of the first pixel. It's encoded directly.
|
* There is no pixel to the left of the first pixel. It's encoded directly.
|
||||||
* That works with our main loop if we just say that left pixel was zero.
|
* That works with our main loop if we just say that left pixel was zero.
|
||||||
*/
|
*/
|
||||||
|
png_debug(1, "in png_read_filter_row_sub4_sse2");
|
||||||
__m128i a, d = _mm_setzero_si128();
|
__m128i a, d = _mm_setzero_si128();
|
||||||
|
|
||||||
int rb = row_info->rowbytes;
|
int rb = row_info->rowbytes;
|
||||||
@ -96,6 +98,7 @@ void png_read_filter_row_avg3_sse2(png_row_infop row_info, png_bytep row,
|
|||||||
* predicted to be half of the pixel above it. So again, this works
|
* predicted to be half of the pixel above it. So again, this works
|
||||||
* perfectly with our loop if we make sure a starts at zero.
|
* perfectly with our loop if we make sure a starts at zero.
|
||||||
*/
|
*/
|
||||||
|
png_debug(1, "in png_read_filter_row_avg3_sse2");
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
__m128i b;
|
__m128i b;
|
||||||
__m128i a, d = zero;
|
__m128i a, d = zero;
|
||||||
@ -128,6 +131,7 @@ void png_read_filter_row_avg4_sse2(png_row_infop row_info, png_bytep row,
|
|||||||
* predicted to be half of the pixel above it. So again, this works
|
* predicted to be half of the pixel above it. So again, this works
|
||||||
* perfectly with our loop if we make sure a starts at zero.
|
* perfectly with our loop if we make sure a starts at zero.
|
||||||
*/
|
*/
|
||||||
|
png_debug(1, "in png_read_filter_row_avg4_sse2");
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
__m128i b;
|
__m128i b;
|
||||||
__m128i a, d = zero;
|
__m128i a, d = zero;
|
||||||
@ -196,6 +200,7 @@ void png_read_filter_row_paeth3_sse2(png_row_infop row_info, png_bytep row,
|
|||||||
* Here we zero b and d, which become c and a respectively at the start of
|
* Here we zero b and d, which become c and a respectively at the start of
|
||||||
* the loop.
|
* the loop.
|
||||||
*/
|
*/
|
||||||
|
png_debug(1, "in png_read_filter_row_paeth3_sse2");
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
__m128i c, b = zero,
|
__m128i c, b = zero,
|
||||||
a, d = zero;
|
a, d = zero;
|
||||||
@ -254,6 +259,7 @@ void png_read_filter_row_paeth4_sse2(png_row_infop row_info, png_bytep row,
|
|||||||
* Here we zero b and d, which become c and a respectively at the start of
|
* Here we zero b and d, which become c and a respectively at the start of
|
||||||
* the loop.
|
* the loop.
|
||||||
*/
|
*/
|
||||||
|
png_debug(1, "in png_read_filter_row_paeth4_sse2");
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
__m128i c, b = zero,
|
__m128i c, b = zero,
|
||||||
a, d = zero;
|
a, d = zero;
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
* Copyright (c) 2016 Google, Inc.
|
* Copyright (c) 2016 Google, Inc.
|
||||||
* Written by Mike Klein and Matt Sarett
|
* Written by Mike Klein and Matt Sarett
|
||||||
* Derived from arm/arm_init.c, which was
|
* Derived from arm/arm_init.c, which was
|
||||||
* Copyright (c) 2014 Glenn Randers-Pehrson
|
* Copyright (c) 2014,2016 Glenn Randers-Pehrson
|
||||||
*
|
*
|
||||||
* Last changed in libpng 1.6.22 [(PENDING RELEASE)]
|
* Last changed in libpng 1.6.22 [(PENDING RELEASE)]
|
||||||
*
|
*
|
||||||
@ -29,6 +29,7 @@ png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
|
|||||||
* Most of these can be implemented using only MMX and 64-bit registers,
|
* Most of these can be implemented using only MMX and 64-bit registers,
|
||||||
* but they end up a bit slower than using the equally-ubiquitous SSE2.
|
* but they end up a bit slower than using the equally-ubiquitous SSE2.
|
||||||
*/
|
*/
|
||||||
|
png_debug(1, "in png_init_filter_functions_sse2");
|
||||||
if (bpp == 3)
|
if (bpp == 3)
|
||||||
{
|
{
|
||||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
|
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user