Optimize png_do_expand_palette for ARM

ARM-specific optimization processes 8 or 4 pixels at once.
Improves performance by around 10-22% on a recent ARM Chromebook.
This commit is contained in:
Richard Townsend
2018-01-25 19:03:04 +00:00
committed by Cosmin Truta
parent 2fc5ddff18
commit 7734cda20c
7 changed files with 213 additions and 10 deletions

View File

@@ -18,6 +18,13 @@
#include "pngpriv.h"
#ifdef PNG_ARM_NEON_IMPLEMENTATION
#if PNG_ARM_NEON_IMPLEMENTATION == 1
#define PNG_ARM_NEON_INTRINSICS_AVAILABLE
#include <arm_neon.h>
#endif
#endif
#ifdef PNG_READ_SUPPORTED
/* Set the action on getting a CRC error for an ancillary or critical chunk. */
@@ -4195,8 +4202,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
* upon whether you supply trans and num_trans.
*/
static void
png_do_expand_palette(png_row_infop row_info, png_bytep row,
png_const_colorp palette, png_const_bytep trans_alpha, int num_trans)
png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
int num_trans)
{
int shift, value;
png_bytep sp, dp;
@@ -4300,14 +4308,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
sp = row + (size_t)row_width - 1;
dp = row + ((size_t)row_width << 2) - 1;
for (i = 0; i < row_width; i++)
i = 0;
#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
if (png_ptr->riffled_palette != NULL) {
/* The RGBA optimization works with png_ptr->bit_depth == 8
but sometimes row_info->bit_depth has been changed to 8.
In these cases, the palette hasn't been riffled. */
i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp);
}
#endif
for (; i < row_width; i++)
{
if ((int)(*sp) >= num_trans)
*dp-- = 0xff;
else
*dp-- = trans_alpha[*sp];
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
*dp-- = palette[*sp].red;
@@ -4324,8 +4340,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
{
sp = row + (size_t)row_width - 1;
dp = row + (size_t)(row_width * 3) - 1;
i = 0;
#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp);
#endif
for (i = 0; i < row_width; i++)
for (; i < row_width; i++)
{
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
@@ -4739,8 +4759,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
{
if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
{
png_do_expand_palette(row_info, png_ptr->row_buf + 1,
png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) {
/* Allocate space for the decompressed full palette. */
if (png_ptr->riffled_palette == NULL) {
png_ptr->riffled_palette = png_malloc(png_ptr, 256*4);
if (png_ptr->riffled_palette == NULL) {
png_error(png_ptr, "NULL row buffer");
}
/* Build the RGBA palette. */
png_riffle_palette_rgba(png_ptr, row_info);
}
}
#endif
png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
}
else