Add optimized RISC-V Vector functions

Largely based off of the ARM NEON implementation.

Signed-off-by: Cosmin Truta <ctruta@gmail.com>
This commit is contained in:
Dragoș Tiselice 2023-05-02 10:36:08 +00:00 committed by Cosmin Truta
parent b4800bae33
commit cc5ee6b213
17 changed files with 886 additions and 11 deletions

View File

@ -40,8 +40,9 @@ Authors, for copyright and licensing purposes.
- Zixu Wang (王子旭)
* Arm Holdings
- Richard Townsend
* Google Inc.
* Google LLC
- Dan Field
- Dragoș Tiselice
- Leon Scroggins III
- Matt Sarett
- Mike Klein

View File

@ -312,6 +312,31 @@ if(PNG_HARDWARE_OPTIMIZATIONS)
endif()
endif()
# Set definitions and sources for RISC-V.
if(TARGET_ARCH MATCHES "riscv*")
set(PNG_RISCV_VECTOR_POSSIBLE_VALUES check on off)
set(PNG_RISCV_VECTOR "off"
CACHE STRING "Enable RISC-V Vector optimizations: check|on|off; off is default")
set_property(CACHE PNG_RISCV_VECTOR
PROPERTY STRINGS ${PNG_RISCV_VECTOR_POSSIBLE_VALUES})
list(FIND PNG_RISCV_VECTOR_POSSIBLE_VALUES ${PNG_RISCV_VECTOR} index)
if(index EQUAL -1)
message(FATAL_ERROR "PNG_RISCV_VECTOR must be one of [${PNG_RISCV_VECTOR_POSSIBLE_VALUES}]")
elseif(NOT ${PNG_RISCV_VECTOR} STREQUAL "off")
set(libpng_riscv_sources
riscv/filter_vector_intrinsics.c
riscv/palette_vector_intrinsics.c
riscv/riscv_init.c)
if(${PNG_RISCV_VECTOR} STREQUAL "on")
add_definitions(-DPNG_RISCV_VECTOR_OPT=2)
elseif(${PNG_RISCV_VECTOR} STREQUAL "check")
add_definitions(-DPNG_RISCV_VECTOR_CHECK_SUPPORTED)
endif()
else()
add_definitions(-DPNG_RISCV_VECTOR_OPT=0)
endif()
endif()
else(PNG_HARDWARE_OPTIMIZATIONS)
# Set definitions and sources for ARM.
@ -339,6 +364,11 @@ else(PNG_HARDWARE_OPTIMIZATIONS)
add_definitions(-DPNG_LOONGARCH_LSX_OPT=0)
endif()
# Set definitions and sources for RISC-V.
if(TARGET_ARCH MATCHES "^riscv")
add_definitions(-DPNG_RISCV_VECTOR_OPT=0)
endif()
endif(PNG_HARDWARE_OPTIMIZATIONS)
option(ld-version-script "Enable linker version script" ON)
@ -613,7 +643,9 @@ set(libpng_sources
${libpng_mips_sources}
${libpng_powerpc_sources}
${libpng_loongarch_sources}
${libpng_riscv_sources}
)
set(pngtest_sources
pngtest.c
)

10
INSTALL
View File

@ -136,7 +136,7 @@ Your directory structure should look like this:
depcomp, install-sh, mkinstalldirs, test-pngtest.sh, etc.
contrib
arm-neon, conftest, examples, gregbook, libtests, pngminim,
pngminus, pngsuite, tools, visupng
pngminus, pngsuite, tools, visupng, riscv-vector
projects
owatcom, visualc71, vstudio
scripts
@ -289,6 +289,7 @@ such as one of
--enable-mips-msa=yes
--enable-intel-sse=yes
--enable-powerpc-vsx=yes
--enable-riscv-vector=yes
or enable them all at once with
@ -301,6 +302,7 @@ or more of
CPPFLAGS += "-DPNG_MIPS_MSA"
CPPFLAGS += "-DPNG_INTEL_SSE"
CPPFLAGS += "-DPNG_POWERPC_VSX"
CPPFLAGS += "-DPNG_RISCV_VECTOR"
See for example scripts/makefile.linux-opt
@ -317,13 +319,15 @@ to disable a particular one,
or via compiler-command options such as
CPPFLAGS += "-DPNG_ARM_NEON_OPT=0, -DPNG_MIPS_MSA_OPT=0,
-DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0"
-DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0,
-DPNG_RISCV_VECTOR_OPT=0"
If you are using cmake, hardware optimizations are "on"
by default. To disable them, use
cmake . -DPNG_ARM_NEON=no -DPNG_INTEL_SSE=no \
-DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no
-DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no \
-DPNG_RISCV_VECTOR=no
or disable them all at once with

View File

@ -134,6 +134,11 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\
powerpc/filter_vsx_intrinsics.c
endif
if PNG_RISCV_VECTOR
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += riscv/riscv_init.c\
riscv/filter_vector_intrinsics.c riscv/palette_vector_intrinsics.c
endif
if PNG_LOONGARCH_LSX
noinst_LTLIBRARIES= libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx_la_SOURCES = loongarch/loongarch_lsx_init.c\

View File

@ -132,6 +132,10 @@ host_triplet = @host@
@HAVE_LD_VERSION_SCRIPT_FALSE@am__append_10 = -export-symbols libpng.sym
@PNG_LOONGARCH_LSX_TRUE@am__append_11 = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la
@DO_PNG_PREFIX_TRUE@am__append_12 = -DPNG_PREFIX='@PNG_PREFIX@'
@PNG_RISCV_VECTOR_TRUE@am__append_13 = riscv/riscv_init.c\
@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.c riscv/palette_vector_intrinsics.c
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/scripts/autoconf/libtool.m4 \
@ -193,7 +197,9 @@ am__libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES_DIST = png.c \
mips/mips_init.c mips/filter_msa_intrinsics.c \
mips/filter_mmi_inline_assembly.c intel/intel_init.c \
intel/filter_sse2_intrinsics.c powerpc/powerpc_init.c \
powerpc/filter_vsx_intrinsics.c
powerpc/filter_vsx_intrinsics.c riscv/riscv_init.c \
riscv/filter_vector_intrinsics.c \
riscv/palette_vector_intrinsics.c
am__dirstamp = $(am__leading_dot)dirstamp
@PNG_ARM_NEON_TRUE@am__objects_1 = arm/arm_init.lo \
@PNG_ARM_NEON_TRUE@ arm/filter_neon_intrinsics.lo \
@ -207,12 +213,15 @@ am__dirstamp = $(am__leading_dot)dirstamp
@PNG_INTEL_SSE_TRUE@ intel/filter_sse2_intrinsics.lo
@PNG_POWERPC_VSX_TRUE@am__objects_6 = powerpc/powerpc_init.lo \
@PNG_POWERPC_VSX_TRUE@ powerpc/filter_vsx_intrinsics.lo
@PNG_RISCV_VECTOR_TRUE@am__objects_7 = riscv/riscv_init.lo \
@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.lo \
@PNG_RISCV_VECTOR_TRUE@ riscv/palette_vector_intrinsics.lo
am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = png.lo pngerror.lo \
pngget.lo pngmem.lo pngpread.lo pngread.lo pngrio.lo \
pngrtran.lo pngrutil.lo pngset.lo pngtrans.lo pngwio.lo \
pngwrite.lo pngwtran.lo pngwutil.lo $(am__objects_1) \
$(am__objects_2) $(am__objects_3) $(am__objects_4) \
$(am__objects_5) $(am__objects_6)
$(am__objects_5) $(am__objects_6) $(am__objects_7)
nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS =
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = \
$(am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) \
@ -332,7 +341,10 @@ am__depfiles_remade = ./$(DEPDIR)/png.Plo ./$(DEPDIR)/pngerror.Plo \
mips/$(DEPDIR)/filter_msa_intrinsics.Plo \
mips/$(DEPDIR)/mips_init.Plo \
powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo \
powerpc/$(DEPDIR)/powerpc_init.Plo
powerpc/$(DEPDIR)/powerpc_init.Plo \
riscv/$(DEPDIR)/filter_vector_intrinsics.Plo \
riscv/$(DEPDIR)/palette_vector_intrinsics.Plo \
riscv/$(DEPDIR)/riscv_init.Plo
am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
@ -825,7 +837,7 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c \
pngwutil.c png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h \
pngstruct.h pngusr.dfa $(am__append_2) $(am__append_3) \
$(am__append_4) $(am__append_5) $(am__append_6) \
$(am__append_7)
$(am__append_7) $(am__append_13)
@PNG_LOONGARCH_LSX_TRUE@noinst_LTLIBRARIES = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la
@PNG_LOONGARCH_LSX_TRUE@libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx_la_SOURCES = loongarch/loongarch_lsx_init.c\
@PNG_LOONGARCH_LSX_TRUE@ loongarch/filter_lsx_intrinsics.c
@ -1071,6 +1083,18 @@ powerpc/powerpc_init.lo: powerpc/$(am__dirstamp) \
powerpc/$(DEPDIR)/$(am__dirstamp)
powerpc/filter_vsx_intrinsics.lo: powerpc/$(am__dirstamp) \
powerpc/$(DEPDIR)/$(am__dirstamp)
riscv/$(am__dirstamp):
@$(MKDIR_P) riscv
@: > riscv/$(am__dirstamp)
riscv/$(DEPDIR)/$(am__dirstamp):
@$(MKDIR_P) riscv/$(DEPDIR)
@: > riscv/$(DEPDIR)/$(am__dirstamp)
riscv/riscv_init.lo: riscv/$(am__dirstamp) \
riscv/$(DEPDIR)/$(am__dirstamp)
riscv/filter_vector_intrinsics.lo: riscv/$(am__dirstamp) \
riscv/$(DEPDIR)/$(am__dirstamp)
riscv/palette_vector_intrinsics.lo: riscv/$(am__dirstamp) \
riscv/$(DEPDIR)/$(am__dirstamp)
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@.la: $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) $(EXTRA_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES)
$(AM_V_CCLD)$(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LINK) -rpath $(libdir) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LIBADD) $(LIBS)
@ -1202,6 +1226,8 @@ mostlyclean-compile:
-rm -f mips/*.lo
-rm -f powerpc/*.$(OBJEXT)
-rm -f powerpc/*.lo
-rm -f riscv/*.$(OBJEXT)
-rm -f riscv/*.lo
distclean-compile:
-rm -f *.tab.c
@ -1297,6 +1323,7 @@ clean-libtool:
-rm -rf loongarch/.libs loongarch/_libs
-rm -rf mips/.libs mips/_libs
-rm -rf powerpc/.libs powerpc/_libs
-rm -rf riscv/.libs riscv/_libs
distclean-libtool:
-rm -f libtool config.lt
@ -2201,6 +2228,9 @@ distclean: distclean-am
-rm -f mips/$(DEPDIR)/mips_init.Plo
-rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo
-rm -f powerpc/$(DEPDIR)/powerpc_init.Plo
-rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo
-rm -f riscv/$(DEPDIR)/palette_vector_intrinsics.Plo
-rm -f riscv/$(DEPDIR)/riscv_init.Plo
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
distclean-hdr distclean-libtool distclean-tags
@ -2288,6 +2318,9 @@ maintainer-clean: maintainer-clean-am
-rm -f mips/$(DEPDIR)/mips_init.Plo
-rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo
-rm -f powerpc/$(DEPDIR)/powerpc_init.Plo
-rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo
-rm -f riscv/$(DEPDIR)/palette_vector_intrinsics.Plo
-rm -f riscv/$(DEPDIR)/riscv_init.Plo
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic

2
README
View File

@ -147,6 +147,7 @@ Files included in this distribution
loongarch/ => Optimized code for LoongArch LSX
mips/ => Optimized code for MIPS MSA and MIPS MMI
powerpc/ => Optimized code for PowerPC VSX
riscv/ => Optimized code for the RISC-V platform
ci/ => Scripts for continuous integration
contrib/ => External contributions
arm-neon/ => Optimized code for the ARM-NEON platform
@ -162,6 +163,7 @@ Files included in this distribution
programs demonstrating the use of pngusr.dfa
pngminus/ => Simple pnm2png and png2pnm programs
pngsuite/ => Test images
riscv-vector/ => Optimized code for the RISC-V Vector platform
testpngs/ => Test images
tools/ => Various tools
visupng/ => VisualPng, a Windows viewer for PNG images

View File

@ -108,7 +108,16 @@
/* Enable POWERPC VSX optimizations */
#undef PNG_POWERPC_VSX_OPT
/* Define to 1 if all of the C89 standard headers exist (not just the ones
/* Turn on RISC-V Vector optimizations at run-time */
#undef PNG_RISCV_VECTOR_API_SUPPORTED
/* Check for RISC-V Vector support at run-time */
#undef PNG_RISCV_VECTOR_CHECK_SUPPORTED
/* Enable RISC-V Vector optimizations */
#undef PNG_RISCV_VECTOR_OPT
/* Define to 1 if all of the C90 standard headers exist (not just the ones
required in a freestanding environment). This macro is provided for
backward compatibility; new code need not use it. */
#undef STDC_HEADERS

View File

@ -343,6 +343,9 @@ AC_ARG_ENABLE([hardware-optimizations],
enable_loongarch_lsx=no
AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0],
[Disable LOONGARCH_LSX optimizations])
enable_riscv_vector=no
AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0],
[Disable RISC-V Vector optimizations])
;;
*)
# allow enabling hardware optimization on any system:
@ -374,6 +377,10 @@ AC_ARG_ENABLE([hardware-optimizations],
enable_loongarch_lsx=yes
AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1],
[Enable LOONGARCH_LSX optimizations])
riscv*)
enable_riscv_vector=yes
AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2],
[Enable RISC-V Vector optimizations])
;;
esac
;;
@ -661,6 +668,53 @@ AM_CONDITIONAL([PNG_LOONGARCH_LSX],
*) test "$enable_loongarch_lsx" != '' ;;
esac])
# RISC-V
# ======
#
# RISC-V Vector support.
AC_ARG_ENABLE([riscv-vector],
AS_HELP_STRING([[[--enable-riscv-vector]]],
[Enable RISC-V Vector optimizations: =no/off, check, api, yes/on:]
[no/off: disable the optimizations; check: use internal checking code]
[api: disable by default, enable by a call to png_set_option]
[yes/on: turn on unconditionally.]
[If not specified: determined by the compiler.]),
[case "$enableval" in
no|off)
# disable the default enabling on __ppc64__ systems:
AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0],
[Disable RISC-V Vector optimizations])
# Prevent inclusion of the platform-specific files below:
enable_riscv_vector=no ;;
check)
AC_DEFINE([PNG_RISCV_VECTOR_CHECK_SUPPORTED], [],
[Check for RISC-V Vector support at run-time])
AC_MSG_WARN([--enable-riscv-vector Please check contrib/riscv-vector/README file]
[for the list of supported OSes.]);;
api)
AC_DEFINE([PNG_RISCV_VECTOR_API_SUPPORTED], [],
[Turn on RISC-V Vector optimizations at run-time]);;
yes|on)
AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2],
[Enable RISC-V Vector optimizations])
AC_MSG_WARN([--enable-riscv-vector: please specify 'check' or 'api', if]
[you want the optimizations unconditionally pass e.g. '-march=rv64gcv']
[to the compiler.]);;
*)
AC_MSG_ERROR([--enable-riscv-vector=${enable_riscv_vector}: invalid value])
esac])
# Add RISC-V-specific files to all builds where $host_cpu is riscv ('riscv*')
# or where RISC-V optimizations were explicitly requested (this allows a fallback
# if a future host CPU does not match 'riscv*')
AM_CONDITIONAL([PNG_RISCV_VECTOR],
[test "$enable_riscv_vector" != 'no' &&
case "$host_cpu" in
riscv*) : ;;
esac])
AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]])
# Config files, substituting as above

View File

@ -0,0 +1,85 @@
OPERATING SYSTEM SPECIFIC ARM NEON DETECTION
--------------------------------------------
Detection of the ability to execute RISC-V Vector on a RISC-V processor
requires operating system support. (The information is not available in user
mode.)
HOW TO USE THIS
---------------
This directory contains C code fragments that can be included in
riscv/riscv_init.c by setting the macro PNG_RISCV_VECTOR_FILE to the file name
in "" or <> at build time. This setting is not recorded in pnglibconf.h and
can be changed simply by rebuilding riscv/riscv_init.o with the required macro
definition.
For any of this code to be used the RISC-V Vector code must be enabled and run
time checks must be supported. I.e.:
#if PNG_RISCV_VECTOR_OPT > 0
#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED
This is done in a 'configure' build by passing configure the argument:
--enable-riscv-vector=check
Apart from the basic Linux implementation in contrib/riscv-vector/linux.c this
code is unsupported. That means that it is not even compiled on a regular
basis and may be broken in any given minor release.
FILE FORMAT
-----------
Each file documents its testing status as of the last time it was tested (which
may have been a long time ago):
STATUS: one of:
SUPPORTED: This indicates that the file is included in the regularly
performed test builds and bugs are fixed when discovered.
COMPILED: This indicates that the code did compile at least once. See the
more detailed description for the extent to which the result was
successful.
TESTED: This means the code was fully compiled into the libpng test programs
and these were run at least once.
BUG REPORTS: an email address to which to send reports of problems
The file is a fragment of C code. It should not define any 'extern' symbols;
everything should be static. It must define the function:
static int png_have_vector(png_structp png_ptr);
That function must return 1 if RISC-V Vector instructions are supported, 0 if
not. It must not execute png_error unless it detects a bug. A png_error will
prevent the reading of the PNG and in the future, writing too.
BUG REPORTS
-----------
If you mail a bug report for any file that is not SUPPORTED there may only be
limited response. Consider fixing it and sending a patch to fix the problem -
this is more likely to result in action.
CONTRIBUTIONS
-------------
You may send contributions of new implementations to
png-mng-implement@sourceforge.net. Please write code in strict C90 C where
possible. Obviously OS dependencies are to be expected. If you submit code you
must have the authors permission and it must have a license that is acceptable
to the current maintainer; in particular that license must permit modification
and redistribution.
Please try to make the contribution a single file and give the file a clear and
unambiguous name that identifies the target OS. If multiple files really are
required put them all in a sub-directory.
You must also be prepared to handle bug reports from users of the code, either
by joining the png-mng-implement mailing list or by providing an email for the
"BUG REPORTS" entry or both. Please make sure that the header of the file
contains the STATUS and BUG REPORTS fields as above.
Please list the OS requirements as precisely as possible. Ideally you should
also list the environment in which the code has been tested and certainly list
any environments where you suspect it might not work.

View File

@ -0,0 +1,57 @@
/* contrib/riscv-vector/linux.c
*
* Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*
* SEE contrib/riscv-vector/README before reporting bugs
*
* STATUS: SUPPORTED
* BUG REPORTS: png-mng-implement@sourceforge.net
*
* png_have_vector implemented for Linux by reading the widely available
* pseudo-file /proc/cpuinfo.
*
* This code is strict ANSI-C and is probably moderately portable; it does
* however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
*/
#include <stdio.h>
#include <string.h>
static int
png_have_vector(png_structp png_ptr) {
FILE* f = fopen("/proc/cpuinfo", "rb");
if (f == NULL) {
#ifdef PNG_WARNINGS_SUPPORTED
png_warning(png_ptr, "/proc/cpuinfo open failed");
#endif
return 0;
}
char line[256];
while (fgets(line, sizeof line, f)) {
if (strncmp(line, "isa", 3) != 0) {
continue;
}
char* isa = strstr(line, "rv");
if (isa == NULL) {
continue;
}
if (strchr(isa + 2, 'v') != NULL) {
return 1;
}
}
fclose(f);
return 0;
}

View File

@ -288,6 +288,10 @@
# define PNG_LOONGARCH_LSX_IMPLEMENTATION 0
#endif
#if PNG_RISCV_VECTOR_OPT > 0
# define PNG_RISCV_VECTOR_IMPLEMENTATION 1
#endif
/* Is this a build of a DLL where compilation of the object modules requires
* different preprocessor settings to those required for a simple library? If
* so PNG_BUILD_DLL must be set.
@ -1522,6 +1526,27 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif
#if PNG_RISCV_VECTOR_OPT > 0
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vector,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector_128,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector_256,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector_128,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector_256,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vector,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vector,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vector,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vector,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif
/* Choose the best filter to use and filter the row data */
PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr,
png_row_infop row_info),PNG_EMPTY);
@ -2134,6 +2159,11 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx,
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
#endif
# if PNG_RISCV_VECTOR_OPT > 0
PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_vector,
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
#endif
PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr,
png_const_charp key, png_bytep new_key), PNG_EMPTY);
@ -2160,6 +2190,29 @@ PNG_INTERNAL_FUNCTION(int,
PNG_EMPTY);
#endif
#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1
PNG_INTERNAL_FUNCTION(void,
png_riffle_palette_vector,
(png_structrp),
PNG_EMPTY);
PNG_INTERNAL_FUNCTION(int,
png_do_expand_palette_rgba8_vector,
(png_structrp,
png_row_infop,
png_const_bytep,
const png_bytepp,
const png_bytepp),
PNG_EMPTY);
PNG_INTERNAL_FUNCTION(int,
png_do_expand_palette_rgb8_vector,
(png_structrp,
png_row_infop,
png_const_bytep,
const png_bytepp,
const png_bytepp),
PNG_EMPTY);
#endif
/* Maintainer: Put new private prototypes here ^ */
#include "pngdebug.h"

View File

@ -809,7 +809,7 @@ png_read_destroy(png_structrp png_ptr)
#endif
#if defined(PNG_READ_EXPAND_SUPPORTED) && \
defined(PNG_ARM_NEON_IMPLEMENTATION)
(defined(PNG_ARM_NEON_IMPLEMENTATION) || defined(PNG_RISCV_VECTOR_IMPLEMENTATION))
png_free(png_ptr, png_ptr->riffled_palette);
png_ptr->riffled_palette = NULL;
#endif

View File

@ -28,6 +28,12 @@
# endif
#endif
#ifdef PNG_RISCV_VECTOR_IMPLEMENTATION
# if PNG_RISCV_VECTOR_IMPLEMENTATION == 1
# define PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE
# endif
#endif
#ifdef PNG_READ_SUPPORTED
/* Set the action on getting a CRC error for an ancillary or critical chunk. */
@ -4397,6 +4403,12 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row,
&sp, &dp);
}
#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE
if (png_ptr->riffled_palette != NULL)
{
i = png_do_expand_palette_rgba8_vector(png_ptr, row_info, row,
&sp, &dp);
}
#else
PNG_UNUSED(png_ptr)
#endif
@ -4427,6 +4439,9 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row,
&sp, &dp);
#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE
i = png_do_expand_palette_rgb8_vector(png_ptr, row_info, row,
&sp, &dp);
#else
PNG_UNUSED(png_ptr)
#endif
@ -4856,6 +4871,17 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
png_riffle_palette_neon(png_ptr);
}
}
#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE
if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8))
{
if (png_ptr->riffled_palette == NULL)
{
/* Initialize the accelerated palette expansion. */
png_ptr->riffled_palette =
(png_bytep)png_malloc(png_ptr, 256 * 4);
png_riffle_palette_vector(png_ptr);
}
}
#endif
png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);

View File

@ -375,7 +375,7 @@ struct png_struct_def
/* New member added in libpng-1.6.36 */
#if defined(PNG_READ_EXPAND_SUPPORTED) && \
defined(PNG_ARM_NEON_IMPLEMENTATION)
(defined(PNG_ARM_NEON_IMPLEMENTATION) || defined(PNG_RISCV_VECTOR_IMPLEMENTATION))
png_bytep riffled_palette; /* buffer for accelerated palette expansion */
#endif

View File

@ -0,0 +1,280 @@
/* filter_neon_intrinsics.c - RISC-V Vector optimized filter functions
*
* Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
#include <riscv_vector.h>
void
png_read_filter_row_up_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
for (size_t vl; len > 0; len -= vl, row += vl, prev_row += vl) {
vl = __riscv_vsetvl_e8m8(len);
vuint8m8_t prev_vals = __riscv_vle8_v_u8m8(prev_row, vl);
vuint8m8_t row_vals = __riscv_vle8_v_u8m8(row, vl);
row_vals = __riscv_vadd_vv_u8m8(row_vals, prev_vals, vl);
__riscv_vse8_v_u8m8(row, row_vals, vl);
}
}
static inline void
png_read_filter_row_sub_vector_128(size_t len, size_t vl, unsigned char* row)
{
vuint8m1_t sum;
vuint8m1_t chunk = __riscv_vmv_v_x_u8m1(0, vl);
for (; len > 0; len -= vl, row += vl) {
__riscv_vsetvl_e8m1(vl);
sum = chunk;
chunk = __riscv_vle8_v_u8m1(row, vl);
chunk = __riscv_vadd_vv_u8m1(chunk, sum, vl);
__riscv_vse8_v_u8m1(row, chunk, vl);
}
}
void
png_read_filter_row_sub3_vector_128(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_sub_vector_128(len, 3, row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_sub4_vector_128(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_sub_vector_128(len, 4, row);
PNG_UNUSED(prev_row)
}
static inline void
png_read_filter_row_avg_vector(size_t len, size_t vl, unsigned char* row,
const unsigned char* prev_row)
{
vuint8m1_t avg;
vuint8m1_t chunk = __riscv_vmv_v_x_u8m1(0, vl);
for (; len > 0; len -= vl, row += vl) {
__riscv_vsetvl_e8m1(vl);
vuint8m1_t prev_chunk = __riscv_vle8_v_u8m1(prev_row, vl);
avg = chunk;
chunk = __riscv_vle8_v_u8m1(row, vl);
vuint8m1_t sum = __riscv_vadd_vv_u8m1(chunk, prev_chunk, vl);
vuint8m1_t avg = __riscv_vsrl_vx_u8m1(sum, 1, vl);
chunk = __riscv_vadd_vv_u8m1(chunk, avg, vl);
__riscv_vse8_v_u8m1(row, chunk, vl);
}
}
void
png_read_filter_row_avg3_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_avg_vector(len, 3, row, prev_row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_avg4_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_avg_vector(len, 4, row, prev_row);
PNG_UNUSED(prev_row)
}
#define MIN_CHUNK_LEN 256
#define MAX_CHUNK_LEN 2048
static inline vuint8m1_t
prefix_sum(vuint8m1_t chunk, unsigned char* carry, size_t vl,
size_t max_chunk_len)
{
size_t r;
for (r = 1; r < MIN_CHUNK_LEN; r <<= 1) {
vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl);
chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl);
}
for (r = MIN_CHUNK_LEN; r < MAX_CHUNK_LEN && r < max_chunk_len; r <<= 1) {
vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl);
chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl);
}
chunk = __riscv_vadd_vx_u8m1(chunk, *carry, vl);
*carry = __riscv_vmv_x_s_u8m1_u8(__riscv_vslidedown_vx_u8m1(chunk, vl - 1, vl));
return chunk;
}
void
png_read_filter_row_sub3_vector_256(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
const size_t max_chunk_len = __riscv_vsetvlmax_e8m1();
vuint8m1_t r;
vuint8m1_t g;
vuint8m1_t b;
unsigned char r_carry = 0;
unsigned char g_carry = 0;
unsigned char b_carry = 0;
for (size_t vl; len > 0; len -= vl * 3, row += vl * 3) {
vl = __riscv_vsetvl_e8m1(len / 3);
__riscv_vlseg3e8_v_u8m1(&r, &g, &b, row, vl);
r = prefix_sum(r, &r_carry, vl, max_chunk_len);
g = prefix_sum(g, &g_carry, vl, max_chunk_len);
b = prefix_sum(b, &b_carry, vl, max_chunk_len);
__riscv_vsseg3e8_v_u8m1(row, r, g, b, vl);
}
}
void
png_read_filter_row_sub4_vector_256(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
const size_t max_chunk_len = __riscv_vsetvlmax_e8m1();
vuint8m1_t r;
vuint8m1_t g;
vuint8m1_t b;
vuint8m1_t a;
unsigned char r_carry = 0;
unsigned char g_carry = 0;
unsigned char b_carry = 0;
unsigned char a_carry = 0;
for (size_t vl; len > 0; len -= vl * 4, row += vl * 4) {
vl = __riscv_vsetvl_e8m1(len / 4);
__riscv_vlseg4e8_v_u8m1(&r, &g, &b, &a, row, vl);
r = prefix_sum(r, &r_carry, vl, max_chunk_len);
g = prefix_sum(g, &g_carry, vl, max_chunk_len);
b = prefix_sum(b, &b_carry, vl, max_chunk_len);
a = prefix_sum(a, &a_carry, vl, max_chunk_len);
__riscv_vsseg4e8_v_u8m1(row, r, g, b, a, vl);
}
}
static inline vint16m1_t
abs_diff(vuint16m1_t a, vuint16m1_t b, size_t vl)
{
vint16m1_t diff = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vsub_vv_u16m1(a, b, vl));
vint16m1_t neg = __riscv_vneg_v_i16m1(diff, vl);
return __riscv_vmax_vv_i16m1(diff, neg, vl);
}
static inline vint16m1_t
abs_sum(vint16m1_t a, vint16m1_t b, size_t vl)
{
vint16m1_t sum = __riscv_vadd_vv_i16m1(a, b, vl);
vint16m1_t neg = __riscv_vneg_v_i16m1(sum, vl);
return __riscv_vmax_vv_i16m1(sum, neg, vl);
}
static inline void
png_read_filter_row_paeth_vector(size_t len, size_t vl, unsigned char* row,
const unsigned char* prev)
{
vuint16m1_t a;
vuint16m1_t b = __riscv_vmv_v_x_u16m1(0, vl);
vuint16m1_t c;
vuint16m1_t d = __riscv_vmv_v_x_u16m1(0, vl);
for (; len > 0; len -= vl, row += vl, prev += vl) {
__riscv_vsetvl_e16m1(vl);
c = b;
b = __riscv_vzext_vf2_u16m1(__riscv_vle8_v_u8mf2(prev, vl), vl);
a = d;
d = __riscv_vzext_vf2_u16m1(__riscv_vle8_v_u8mf2(row, vl), vl);
vint16m1_t pa = abs_diff(b, c, vl);
vint16m1_t pb = abs_diff(a, c, vl);
vint16m1_t pc = abs_sum(pa, pb, vl);
vint16m1_t smallest = __riscv_vmin_vv_i16m1(pa, __riscv_vmin_vv_i16m1(pb, pc, vl), vl);
vuint16m1_t nearest = c;
nearest = __riscv_vmerge_vvm_u16m1(nearest, a, __riscv_vmseq_vv_i16m1_b16(smallest, pa, vl), vl);
nearest = __riscv_vmerge_vvm_u16m1(nearest, b, __riscv_vmseq_vv_i16m1_b16(smallest, pb, vl), vl);
d = __riscv_vadd_vv_u16m1(d, nearest, vl);
__riscv_vse8_v_u8mf2(row, __riscv_vnsrl_wx_u8mf2(d, 0, vl), vl);
}
}
void
png_read_filter_row_paeth3_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_paeth_vector(len, 3, row, prev_row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_paeth4_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_paeth_vector(len, 4, row, prev_row);
PNG_UNUSED(prev_row)
}
#endif /* PNG_RISCV_VECTOR_IMPLEMENTATION */
#endif /* READ */

View File

@ -0,0 +1,104 @@
/* palette_neon_intrinsics.c - RISC-V Vector optimized palette expansion
* functions
*
* Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../pngpriv.h"
#if PNG_ARM_NEON_IMPLEMENTATION == 1
#include <riscv_vector.h>
void
png_riffle_palette_vector(png_structrp png_ptr)
{
png_const_bytep palette = (png_const_bytep)png_ptr->palette;
png_bytep riffled_palette = png_ptr->riffled_palette;
png_const_bytep trans_alpha = png_ptr->trans_alpha;
size_t len = 256;
vuint8m1_t r;
vuint8m1_t g;
vuint8m1_t b;
for (size_t vl; len > 0; len -= vl, palette += vl * 3, trans_alpha += vl, riffled_palette += vl * 4) {
vl = __riscv_vsetvl_e8m1(len);
__riscv_vlseg3e8_v_u8m1(&r, &g, &b, palette, vl);
vuint8m1_t a = __riscv_vle8_v_u8m1(trans_alpha, vl);
__riscv_vsseg4e8_v_u8m1(riffled_palette, r, g, b, a, vl);
}
}
int
png_do_expand_palette_rgba8_vector(png_structrp png_ptr, png_row_infop row_info,
png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
{
size_t row_width = (size_t)row_info->width;
const png_uint_32* palette = (const png_uint_32*)png_ptr->riffled_palette;
size_t vl = __riscv_vsetvl_e8m1(row_width);
png_bytep sp = *ssp - vl;
png_bytep dp = *ddp - vl * 4;
for (; row_width > 0; row_width -= vl, dp -= vl * 4, sp -= vl) {
vl = __riscv_vsetvl_e8m1(row_width);
vuint8m1_t indices = __riscv_vle8_v_u8m1(sp, vl);
vuint32m4_t pixels = __riscv_vluxei8_v_u32m4(palette, indices, vl);
__riscv_vse32_v_u32m4((unsigned int *)dp, pixels, vl);
}
row_width = (size_t)row_info->width;
*ssp = *ssp - row_width;
*ddp = *ddp - row_width * 4;
return row_width;
}
int
png_do_expand_palette_rgb8_vector(png_structrp png_ptr, png_row_infop row_info,
png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
{
size_t row_width = (size_t)row_info->width;
const png_const_bytep palette = (png_const_bytep)png_ptr->palette;
size_t vl = __riscv_vsetvl_e8m1(row_width);
png_bytep sp = *ssp - vl;
png_bytep dp = *ddp - vl * 3;
vuint8m1_t r;
vuint8m1_t g;
vuint8m1_t b;
for (; row_width > 0; row_width -= vl, dp -= vl * 3, sp -= vl) {
vl = __riscv_vsetvl_e8m1(row_width);
vuint16m2_t indices = __riscv_vwmulu_vx_u16m2(__riscv_vle8_v_u8m1(sp, vl), 3, vl);
__riscv_vluxseg3ei16_v_u8m1(&r, &g, &b, palette, indices, vl);
__riscv_vsseg3e8_v_u8m1(dp, r, g, b, vl);
}
row_width = (size_t)row_info->width;
*ssp = *ssp - row_width;
*ddp = *ddp - row_width * 3;
return row_width;
}
#endif /* PNG_ARM_NEON_IMPLEMENTATION */

130
riscv/riscv_init.c Normal file
View File

@ -0,0 +1,130 @@
/* arm_init.c - RISC-V Vector optimized filter functions
*
* Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_RISCV_VECTOR_OPT > 0
#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED /* Do run-time checks */
/* WARNING: it is strongly recommended that you do not build libpng with
* run-time checks for CPU features if at all possible. In the case of the
* RISC-V Vector instructions there is no processor-specific way of detecting
* the presence of the required support, therefore run-time detection is
* extremely OS specific.
*
* You may set the macro PNG_RISCV_VECTOR_FILE to the file name of file containing
* a fragment of C source code which defines the png_have_neon function. There
* are a number of implementations in contrib/riscv-vector, but the only one that
* has partial support is contrib/riscv-vector/linux.c - a generic Linux
* implementation which reads /proc/cpuinfo.
*/
#ifndef PNG_RISCV_VECTOR_FILE
# if defined(__linux__)
# define PNG_RISCV_VECTOR_FILE "contrib/riscv-vector/linux.c"
# else
# error "No support for run-time RISC-V Vector checking; use compile-time options"
# endif
#endif
static int png_have_vector(png_structp png_ptr);
#ifdef PNG_RISCV_VECTOR_FILE
# include PNG_RISCV_VECTOR_FILE
#endif
#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */
void
png_init_filter_functions_vector(png_structp pp, unsigned int bpp)
{
/* The switch statement is compiled in for RISCV_VECTOR_API, the call to
* png_have_vector is compiled in for RISCV_VECTOR_CHECK. If both are
* defined the check is only performed if the API has not set the VECTOR
* option on or off explicitly. In this case the check controls what
* happens.
*
* If the CHECK is not compiled in and the option is UNSET the behavior prior
* to 1.6.7 was to use the NEON code - this was a bug caused by having the
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF,
* as documented in png.h
*/
png_debug(1, "in png_init_filter_functions_vector");
#ifdef PNG_RISCV_VECTOR_API_SUPPORTED
switch ((pp->options >> PNG_RISCV_VECTOR) & 3)
{
case PNG_OPTION_UNSET:
/* Allow the run-time check to execute if it has been enabled -
* thus both API and CHECK can be turned on. If it isn't supported
* this case will fall through to the 'default' below, which just
* returns.
*/
#endif /* PNG_RISCV_VECTOR_API_SUPPORTED */
#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED
{
static volatile sig_atomic_t no_vector = -1; /* not checked */
if (no_vector < 0)
no_vector = !png_have_vector(pp);
if (no_vector)
return;
}
#ifdef PNG_RISCV_VECTOR_API_SUPPORTED
break;
#endif
#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */
#ifdef PNG_RISCV_VECTOR_API_SUPPORTED
default: /* OFF or INVALID */
return;
case PNG_OPTION_ON:
/* Option turned on */
break;
}
#endif
/* IMPORTANT: any new external functions used here must be declared using
* PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
* 'prefix' option to configure works:
*
* ./configure --with-libpng-prefix=foobar_
*
* Verify you have got this right by running the above command, doing a build
* and examining pngprefix.h; it must contain a #define for every external
* function you add. (Notice that this happens automatically for the
* initialization function.)
*/
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vector;
if (bpp == 3)
{
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vector;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vector;
if (__riscv_vsetvlmax_e8m1() > 16) {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector_128;
} else {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector_256;
}
}
else if (bpp == 4)
{
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vector;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vector;
if (__riscv_vsetvlmax_e8m1() > 16) {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector_128;
} else {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector_256;
}
}
}
#endif /* PNG_RISCV_VECTOR_OPT > 0 */
#endif /* PNG_READ_SUPPORTED */