mirror of
https://git.code.sf.net/p/libpng/code.git
synced 2025-07-10 18:04:09 +02:00
Optimize png_do_expand_palette for ARM
ARM-specific optimization processes 8 or 4 pixels at once. Improves performance by around 10-22% on a recent ARM Chromebook.
This commit is contained in:
committed by
Cosmin Truta
parent
2fc5ddff18
commit
7734cda20c
49
pngrtran.c
49
pngrtran.c
@@ -18,6 +18,13 @@
|
||||
|
||||
#include "pngpriv.h"
|
||||
|
||||
#ifdef PNG_ARM_NEON_IMPLEMENTATION
|
||||
#if PNG_ARM_NEON_IMPLEMENTATION == 1
|
||||
#define PNG_ARM_NEON_INTRINSICS_AVAILABLE
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef PNG_READ_SUPPORTED
|
||||
|
||||
/* Set the action on getting a CRC error for an ancillary or critical chunk. */
|
||||
@@ -4195,8 +4202,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
|
||||
* upon whether you supply trans and num_trans.
|
||||
*/
|
||||
static void
|
||||
png_do_expand_palette(png_row_infop row_info, png_bytep row,
|
||||
png_const_colorp palette, png_const_bytep trans_alpha, int num_trans)
|
||||
png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
|
||||
png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
|
||||
int num_trans)
|
||||
{
|
||||
int shift, value;
|
||||
png_bytep sp, dp;
|
||||
@@ -4300,14 +4308,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
|
||||
sp = row + (size_t)row_width - 1;
|
||||
dp = row + ((size_t)row_width << 2) - 1;
|
||||
|
||||
for (i = 0; i < row_width; i++)
|
||||
i = 0;
|
||||
#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
|
||||
if (png_ptr->riffled_palette != NULL) {
|
||||
/* The RGBA optimization works with png_ptr->bit_depth == 8
|
||||
but sometimes row_info->bit_depth has been changed to 8.
|
||||
In these cases, the palette hasn't been riffled. */
|
||||
i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (; i < row_width; i++)
|
||||
{
|
||||
if ((int)(*sp) >= num_trans)
|
||||
*dp-- = 0xff;
|
||||
|
||||
else
|
||||
*dp-- = trans_alpha[*sp];
|
||||
|
||||
*dp-- = palette[*sp].blue;
|
||||
*dp-- = palette[*sp].green;
|
||||
*dp-- = palette[*sp].red;
|
||||
@@ -4324,8 +4340,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
|
||||
{
|
||||
sp = row + (size_t)row_width - 1;
|
||||
dp = row + (size_t)(row_width * 3) - 1;
|
||||
i = 0;
|
||||
#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
|
||||
i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < row_width; i++)
|
||||
for (; i < row_width; i++)
|
||||
{
|
||||
*dp-- = palette[*sp].blue;
|
||||
*dp-- = palette[*sp].green;
|
||||
@@ -4739,8 +4759,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
|
||||
{
|
||||
if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
|
||||
{
|
||||
png_do_expand_palette(row_info, png_ptr->row_buf + 1,
|
||||
png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
|
||||
#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
|
||||
if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) {
|
||||
/* Allocate space for the decompressed full palette. */
|
||||
if (png_ptr->riffled_palette == NULL) {
|
||||
png_ptr->riffled_palette = png_malloc(png_ptr, 256*4);
|
||||
if (png_ptr->riffled_palette == NULL) {
|
||||
png_error(png_ptr, "NULL row buffer");
|
||||
}
|
||||
/* Build the RGBA palette. */
|
||||
png_riffle_palette_rgba(png_ptr, row_info);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
|
||||
png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
|
||||
}
|
||||
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user