mirror of
https://git.code.sf.net/p/libpng/code.git
synced 2025-07-10 18:04:09 +02:00

Largely based off of the ARM NEON implementation. Signed-off-by: Cosmin Truta <ctruta@gmail.com>
105 lines
2.7 KiB
C
105 lines
2.7 KiB
C
/* palette_neon_intrinsics.c - RISC-V Vector optimized palette expansion
|
|
* functions
|
|
*
|
|
* Copyright (c) 2023 Google LLC
|
|
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
|
|
*
|
|
* This code is released under the libpng license.
|
|
* For conditions of distribution and use, see the disclaimer
|
|
* and license in png.h
|
|
*/
|
|
|
|
#include "../pngpriv.h"
|
|
|
|
#if PNG_ARM_NEON_IMPLEMENTATION == 1
|
|
|
|
#include <riscv_vector.h>
|
|
|
|
void
|
|
png_riffle_palette_vector(png_structrp png_ptr)
|
|
{
|
|
png_const_bytep palette = (png_const_bytep)png_ptr->palette;
|
|
png_bytep riffled_palette = png_ptr->riffled_palette;
|
|
png_const_bytep trans_alpha = png_ptr->trans_alpha;
|
|
|
|
size_t len = 256;
|
|
|
|
vuint8m1_t r;
|
|
vuint8m1_t g;
|
|
vuint8m1_t b;
|
|
|
|
for (size_t vl; len > 0; len -= vl, palette += vl * 3, trans_alpha += vl, riffled_palette += vl * 4) {
|
|
vl = __riscv_vsetvl_e8m1(len);
|
|
|
|
__riscv_vlseg3e8_v_u8m1(&r, &g, &b, palette, vl);
|
|
vuint8m1_t a = __riscv_vle8_v_u8m1(trans_alpha, vl);
|
|
|
|
__riscv_vsseg4e8_v_u8m1(riffled_palette, r, g, b, a, vl);
|
|
}
|
|
}
|
|
|
|
int
|
|
png_do_expand_palette_rgba8_vector(png_structrp png_ptr, png_row_infop row_info,
|
|
png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
|
|
{
|
|
size_t row_width = (size_t)row_info->width;
|
|
const png_uint_32* palette = (const png_uint_32*)png_ptr->riffled_palette;
|
|
|
|
size_t vl = __riscv_vsetvl_e8m1(row_width);
|
|
png_bytep sp = *ssp - vl;
|
|
png_bytep dp = *ddp - vl * 4;
|
|
|
|
for (; row_width > 0; row_width -= vl, dp -= vl * 4, sp -= vl) {
|
|
vl = __riscv_vsetvl_e8m1(row_width);
|
|
|
|
vuint8m1_t indices = __riscv_vle8_v_u8m1(sp, vl);
|
|
|
|
vuint32m4_t pixels = __riscv_vluxei8_v_u32m4(palette, indices, vl);
|
|
|
|
__riscv_vse32_v_u32m4((unsigned int *)dp, pixels, vl);
|
|
}
|
|
|
|
row_width = (size_t)row_info->width;
|
|
|
|
*ssp = *ssp - row_width;
|
|
*ddp = *ddp - row_width * 4;
|
|
|
|
return row_width;
|
|
}
|
|
|
|
int
|
|
png_do_expand_palette_rgb8_vector(png_structrp png_ptr, png_row_infop row_info,
|
|
png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
|
|
{
|
|
size_t row_width = (size_t)row_info->width;
|
|
const png_const_bytep palette = (png_const_bytep)png_ptr->palette;
|
|
|
|
size_t vl = __riscv_vsetvl_e8m1(row_width);
|
|
png_bytep sp = *ssp - vl;
|
|
png_bytep dp = *ddp - vl * 3;
|
|
|
|
vuint8m1_t r;
|
|
vuint8m1_t g;
|
|
vuint8m1_t b;
|
|
|
|
|
|
for (; row_width > 0; row_width -= vl, dp -= vl * 3, sp -= vl) {
|
|
vl = __riscv_vsetvl_e8m1(row_width);
|
|
|
|
vuint16m2_t indices = __riscv_vwmulu_vx_u16m2(__riscv_vle8_v_u8m1(sp, vl), 3, vl);
|
|
|
|
__riscv_vluxseg3ei16_v_u8m1(&r, &g, &b, palette, indices, vl);
|
|
|
|
__riscv_vsseg3e8_v_u8m1(dp, r, g, b, vl);
|
|
}
|
|
|
|
row_width = (size_t)row_info->width;
|
|
|
|
*ssp = *ssp - row_width;
|
|
*ddp = *ddp - row_width * 3;
|
|
|
|
return row_width;
|
|
}
|
|
|
|
#endif /* PNG_ARM_NEON_IMPLEMENTATION */
|