diff --git a/AUTHORS b/AUTHORS index f30a4ee19..625c70b62 100644 --- a/AUTHORS +++ b/AUTHORS @@ -40,8 +40,9 @@ Authors, for copyright and licensing purposes. - Zixu Wang (王子旭) * Arm Holdings - Richard Townsend - * Google Inc. + * Google LLC - Dan Field + - Dragoș Tiselice - Leon Scroggins III - Matt Sarett - Mike Klein diff --git a/CMakeLists.txt b/CMakeLists.txt index 4258e8b2f..07e0c6a60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -312,6 +312,31 @@ if(PNG_HARDWARE_OPTIMIZATIONS) endif() endif() + # Set definitions and sources for RISC-V. + if(TARGET_ARCH MATCHES "riscv*") + set(PNG_RISCV_VECTOR_POSSIBLE_VALUES check on off) + set(PNG_RISCV_VECTOR "off" + CACHE STRING "Enable RISC-V Vector optimizations: check|on|off; off is default") + set_property(CACHE PNG_RISCV_VECTOR + PROPERTY STRINGS ${PNG_RISCV_VECTOR_POSSIBLE_VALUES}) + list(FIND PNG_RISCV_VECTOR_POSSIBLE_VALUES ${PNG_RISCV_VECTOR} index) + if(index EQUAL -1) + message(FATAL_ERROR "PNG_RISCV_VECTOR must be one of [${PNG_RISCV_VECTOR_POSSIBLE_VALUES}]") + elseif(NOT ${PNG_RISCV_VECTOR} STREQUAL "off") + set(libpng_riscv_sources + riscv/filter_vector_intrinsics.c + riscv/palette_vector_intrinsics.c + riscv/riscv_init.c) + if(${PNG_RISCV_VECTOR} STREQUAL "on") + add_definitions(-DPNG_RISCV_VECTOR_OPT=2) + elseif(${PNG_RISCV_VECTOR} STREQUAL "check") + add_definitions(-DPNG_RISCV_VECTOR_CHECK_SUPPORTED) + endif() + else() + add_definitions(-DPNG_RISCV_VECTOR_OPT=0) + endif() + endif() + else(PNG_HARDWARE_OPTIMIZATIONS) # Set definitions and sources for ARM. @@ -339,6 +364,11 @@ else(PNG_HARDWARE_OPTIMIZATIONS) add_definitions(-DPNG_LOONGARCH_LSX_OPT=0) endif() + # Set definitions and sources for RISC-V. + if(TARGET_ARCH MATCHES "^riscv") + add_definitions(-DPNG_RISCV_VECTOR_OPT=0) + endif() + endif(PNG_HARDWARE_OPTIMIZATIONS) option(ld-version-script "Enable linker version script" ON) @@ -613,7 +643,9 @@ set(libpng_sources ${libpng_mips_sources} ${libpng_powerpc_sources} ${libpng_loongarch_sources} + ${libpng_riscv_sources} ) + set(pngtest_sources pngtest.c ) diff --git a/INSTALL b/INSTALL index df1a49446..21278c608 100644 --- a/INSTALL +++ b/INSTALL @@ -136,7 +136,7 @@ Your directory structure should look like this: depcomp, install-sh, mkinstalldirs, test-pngtest.sh, etc. contrib arm-neon, conftest, examples, gregbook, libtests, pngminim, - pngminus, pngsuite, tools, visupng + pngminus, pngsuite, tools, visupng, riscv-vector projects owatcom, visualc71, vstudio scripts @@ -289,6 +289,7 @@ such as one of --enable-mips-msa=yes --enable-intel-sse=yes --enable-powerpc-vsx=yes + --enable-riscv-vector=yes or enable them all at once with @@ -301,6 +302,7 @@ or more of CPPFLAGS += "-DPNG_MIPS_MSA" CPPFLAGS += "-DPNG_INTEL_SSE" CPPFLAGS += "-DPNG_POWERPC_VSX" + CPPFLAGS += "-DPNG_RISCV_VECTOR" See for example scripts/makefile.linux-opt @@ -317,13 +319,15 @@ to disable a particular one, or via compiler-command options such as CPPFLAGS += "-DPNG_ARM_NEON_OPT=0, -DPNG_MIPS_MSA_OPT=0, - -DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0" + -DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0, + -DPNG_RISCV_VECTOR_OPT=0" If you are using cmake, hardware optimizations are "on" by default. To disable them, use cmake . -DPNG_ARM_NEON=no -DPNG_INTEL_SSE=no \ - -DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no + -DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no \ + -DPNG_RISCV_VECTOR=no or disable them all at once with diff --git a/Makefile.am b/Makefile.am index 217f1af84..1c95bec78 100644 --- a/Makefile.am +++ b/Makefile.am @@ -134,6 +134,11 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\ powerpc/filter_vsx_intrinsics.c endif +if PNG_RISCV_VECTOR +libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += riscv/riscv_init.c\ + riscv/filter_vector_intrinsics.c riscv/palette_vector_intrinsics.c +endif + if PNG_LOONGARCH_LSX noinst_LTLIBRARIES= libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx_la_SOURCES = loongarch/loongarch_lsx_init.c\ diff --git a/Makefile.in b/Makefile.in index a8dc362bd..107932cb5 100644 --- a/Makefile.in +++ b/Makefile.in @@ -132,6 +132,10 @@ host_triplet = @host@ @HAVE_LD_VERSION_SCRIPT_FALSE@am__append_10 = -export-symbols libpng.sym @PNG_LOONGARCH_LSX_TRUE@am__append_11 = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la @DO_PNG_PREFIX_TRUE@am__append_12 = -DPNG_PREFIX='@PNG_PREFIX@' + +@PNG_RISCV_VECTOR_TRUE@am__append_13 = riscv/riscv_init.c\ +@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.c riscv/palette_vector_intrinsics.c + subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/scripts/autoconf/libtool.m4 \ @@ -193,7 +197,9 @@ am__libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES_DIST = png.c \ mips/mips_init.c mips/filter_msa_intrinsics.c \ mips/filter_mmi_inline_assembly.c intel/intel_init.c \ intel/filter_sse2_intrinsics.c powerpc/powerpc_init.c \ - powerpc/filter_vsx_intrinsics.c + powerpc/filter_vsx_intrinsics.c riscv/riscv_init.c \ + riscv/filter_vector_intrinsics.c \ + riscv/palette_vector_intrinsics.c am__dirstamp = $(am__leading_dot)dirstamp @PNG_ARM_NEON_TRUE@am__objects_1 = arm/arm_init.lo \ @PNG_ARM_NEON_TRUE@ arm/filter_neon_intrinsics.lo \ @@ -207,12 +213,15 @@ am__dirstamp = $(am__leading_dot)dirstamp @PNG_INTEL_SSE_TRUE@ intel/filter_sse2_intrinsics.lo @PNG_POWERPC_VSX_TRUE@am__objects_6 = powerpc/powerpc_init.lo \ @PNG_POWERPC_VSX_TRUE@ powerpc/filter_vsx_intrinsics.lo +@PNG_RISCV_VECTOR_TRUE@am__objects_7 = riscv/riscv_init.lo \ +@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.lo \ +@PNG_RISCV_VECTOR_TRUE@ riscv/palette_vector_intrinsics.lo am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = png.lo pngerror.lo \ pngget.lo pngmem.lo pngpread.lo pngread.lo pngrio.lo \ pngrtran.lo pngrutil.lo pngset.lo pngtrans.lo pngwio.lo \ pngwrite.lo pngwtran.lo pngwutil.lo $(am__objects_1) \ $(am__objects_2) $(am__objects_3) $(am__objects_4) \ - $(am__objects_5) $(am__objects_6) + $(am__objects_5) $(am__objects_6) $(am__objects_7) nodist_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = \ $(am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) \ @@ -332,7 +341,10 @@ am__depfiles_remade = ./$(DEPDIR)/png.Plo ./$(DEPDIR)/pngerror.Plo \ mips/$(DEPDIR)/filter_msa_intrinsics.Plo \ mips/$(DEPDIR)/mips_init.Plo \ powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo \ - powerpc/$(DEPDIR)/powerpc_init.Plo + powerpc/$(DEPDIR)/powerpc_init.Plo \ + riscv/$(DEPDIR)/filter_vector_intrinsics.Plo \ + riscv/$(DEPDIR)/palette_vector_intrinsics.Plo \ + riscv/$(DEPDIR)/riscv_init.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) @@ -825,7 +837,7 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES = png.c pngerror.c \ pngwutil.c png.h pngconf.h pngdebug.h pnginfo.h pngpriv.h \ pngstruct.h pngusr.dfa $(am__append_2) $(am__append_3) \ $(am__append_4) $(am__append_5) $(am__append_6) \ - $(am__append_7) + $(am__append_7) $(am__append_13) @PNG_LOONGARCH_LSX_TRUE@noinst_LTLIBRARIES = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la @PNG_LOONGARCH_LSX_TRUE@libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx_la_SOURCES = loongarch/loongarch_lsx_init.c\ @PNG_LOONGARCH_LSX_TRUE@ loongarch/filter_lsx_intrinsics.c @@ -1071,6 +1083,18 @@ powerpc/powerpc_init.lo: powerpc/$(am__dirstamp) \ powerpc/$(DEPDIR)/$(am__dirstamp) powerpc/filter_vsx_intrinsics.lo: powerpc/$(am__dirstamp) \ powerpc/$(DEPDIR)/$(am__dirstamp) +riscv/$(am__dirstamp): + @$(MKDIR_P) riscv + @: > riscv/$(am__dirstamp) +riscv/$(DEPDIR)/$(am__dirstamp): + @$(MKDIR_P) riscv/$(DEPDIR) + @: > riscv/$(DEPDIR)/$(am__dirstamp) +riscv/riscv_init.lo: riscv/$(am__dirstamp) \ + riscv/$(DEPDIR)/$(am__dirstamp) +riscv/filter_vector_intrinsics.lo: riscv/$(am__dirstamp) \ + riscv/$(DEPDIR)/$(am__dirstamp) +riscv/palette_vector_intrinsics.lo: riscv/$(am__dirstamp) \ + riscv/$(DEPDIR)/$(am__dirstamp) libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@.la: $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) $(EXTRA_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) $(AM_V_CCLD)$(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LINK) -rpath $(libdir) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_LIBADD) $(LIBS) @@ -1202,6 +1226,8 @@ mostlyclean-compile: -rm -f mips/*.lo -rm -f powerpc/*.$(OBJEXT) -rm -f powerpc/*.lo + -rm -f riscv/*.$(OBJEXT) + -rm -f riscv/*.lo distclean-compile: -rm -f *.tab.c @@ -1297,6 +1323,7 @@ clean-libtool: -rm -rf loongarch/.libs loongarch/_libs -rm -rf mips/.libs mips/_libs -rm -rf powerpc/.libs powerpc/_libs + -rm -rf riscv/.libs riscv/_libs distclean-libtool: -rm -f libtool config.lt @@ -2201,6 +2228,9 @@ distclean: distclean-am -rm -f mips/$(DEPDIR)/mips_init.Plo -rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo -rm -f powerpc/$(DEPDIR)/powerpc_init.Plo + -rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo + -rm -f riscv/$(DEPDIR)/palette_vector_intrinsics.Plo + -rm -f riscv/$(DEPDIR)/riscv_init.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-hdr distclean-libtool distclean-tags @@ -2288,6 +2318,9 @@ maintainer-clean: maintainer-clean-am -rm -f mips/$(DEPDIR)/mips_init.Plo -rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo -rm -f powerpc/$(DEPDIR)/powerpc_init.Plo + -rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo + -rm -f riscv/$(DEPDIR)/palette_vector_intrinsics.Plo + -rm -f riscv/$(DEPDIR)/riscv_init.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic diff --git a/README b/README index d25ab0067..64e216820 100644 --- a/README +++ b/README @@ -147,6 +147,7 @@ Files included in this distribution loongarch/ => Optimized code for LoongArch LSX mips/ => Optimized code for MIPS MSA and MIPS MMI powerpc/ => Optimized code for PowerPC VSX + riscv/ => Optimized code for the RISC-V platform ci/ => Scripts for continuous integration contrib/ => External contributions arm-neon/ => Optimized code for the ARM-NEON platform @@ -162,6 +163,7 @@ Files included in this distribution programs demonstrating the use of pngusr.dfa pngminus/ => Simple pnm2png and png2pnm programs pngsuite/ => Test images + riscv-vector/ => Optimized code for the RISC-V Vector platform testpngs/ => Test images tools/ => Various tools visupng/ => VisualPng, a Windows viewer for PNG images diff --git a/config.h.in b/config.h.in index 3ec0c8ae8..c21ea2553 100644 --- a/config.h.in +++ b/config.h.in @@ -108,7 +108,16 @@ /* Enable POWERPC VSX optimizations */ #undef PNG_POWERPC_VSX_OPT -/* Define to 1 if all of the C89 standard headers exist (not just the ones +/* Turn on RISC-V Vector optimizations at run-time */ +#undef PNG_RISCV_VECTOR_API_SUPPORTED + +/* Check for RISC-V Vector support at run-time */ +#undef PNG_RISCV_VECTOR_CHECK_SUPPORTED + +/* Enable RISC-V Vector optimizations */ +#undef PNG_RISCV_VECTOR_OPT + +/* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for backward compatibility; new code need not use it. */ #undef STDC_HEADERS diff --git a/configure.ac b/configure.ac index 296912d31..66c6183fa 100644 --- a/configure.ac +++ b/configure.ac @@ -343,6 +343,9 @@ AC_ARG_ENABLE([hardware-optimizations], enable_loongarch_lsx=no AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0], [Disable LOONGARCH_LSX optimizations]) + enable_riscv_vector=no + AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0], + [Disable RISC-V Vector optimizations]) ;; *) # allow enabling hardware optimization on any system: @@ -374,6 +377,10 @@ AC_ARG_ENABLE([hardware-optimizations], enable_loongarch_lsx=yes AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1], [Enable LOONGARCH_LSX optimizations]) + riscv*) + enable_riscv_vector=yes + AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2], + [Enable RISC-V Vector optimizations]) ;; esac ;; @@ -661,6 +668,53 @@ AM_CONDITIONAL([PNG_LOONGARCH_LSX], *) test "$enable_loongarch_lsx" != '' ;; esac]) +# RISC-V +# ====== +# +# RISC-V Vector support. + +AC_ARG_ENABLE([riscv-vector], +AS_HELP_STRING([[[--enable-riscv-vector]]], + [Enable RISC-V Vector optimizations: =no/off, check, api, yes/on:] + [no/off: disable the optimizations; check: use internal checking code] + [api: disable by default, enable by a call to png_set_option] + [yes/on: turn on unconditionally.] + [If not specified: determined by the compiler.]), + [case "$enableval" in + no|off) + # disable the default enabling on __ppc64__ systems: + AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0], + [Disable RISC-V Vector optimizations]) + # Prevent inclusion of the platform-specific files below: + enable_riscv_vector=no ;; + check) + AC_DEFINE([PNG_RISCV_VECTOR_CHECK_SUPPORTED], [], + [Check for RISC-V Vector support at run-time]) + AC_MSG_WARN([--enable-riscv-vector Please check contrib/riscv-vector/README file] + [for the list of supported OSes.]);; + api) + AC_DEFINE([PNG_RISCV_VECTOR_API_SUPPORTED], [], + [Turn on RISC-V Vector optimizations at run-time]);; + yes|on) + AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2], + [Enable RISC-V Vector optimizations]) + AC_MSG_WARN([--enable-riscv-vector: please specify 'check' or 'api', if] + [you want the optimizations unconditionally pass e.g. '-march=rv64gcv'] + [to the compiler.]);; + *) + AC_MSG_ERROR([--enable-riscv-vector=${enable_riscv_vector}: invalid value]) + esac]) + +# Add RISC-V-specific files to all builds where $host_cpu is riscv ('riscv*') +# or where RISC-V optimizations were explicitly requested (this allows a fallback +# if a future host CPU does not match 'riscv*') + +AM_CONDITIONAL([PNG_RISCV_VECTOR], + [test "$enable_riscv_vector" != 'no' && + case "$host_cpu" in + riscv*) : ;; + esac]) + AC_MSG_NOTICE([[Extra options for compiler: $PNG_COPTS]]) # Config files, substituting as above diff --git a/contrib/riscv-vector/README b/contrib/riscv-vector/README new file mode 100644 index 000000000..08311367f --- /dev/null +++ b/contrib/riscv-vector/README @@ -0,0 +1,85 @@ +OPERATING SYSTEM SPECIFIC ARM NEON DETECTION +-------------------------------------------- + +Detection of the ability to execute RISC-V Vector on a RISC-V processor +requires operating system support. (The information is not available in user +mode.) + +HOW TO USE THIS +--------------- + +This directory contains C code fragments that can be included in +riscv/riscv_init.c by setting the macro PNG_RISCV_VECTOR_FILE to the file name +in "" or <> at build time. This setting is not recorded in pnglibconf.h and +can be changed simply by rebuilding riscv/riscv_init.o with the required macro +definition. + +For any of this code to be used the RISC-V Vector code must be enabled and run +time checks must be supported. I.e.: + +#if PNG_RISCV_VECTOR_OPT > 0 +#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED + +This is done in a 'configure' build by passing configure the argument: + + --enable-riscv-vector=check + +Apart from the basic Linux implementation in contrib/riscv-vector/linux.c this +code is unsupported. That means that it is not even compiled on a regular +basis and may be broken in any given minor release. + +FILE FORMAT +----------- + +Each file documents its testing status as of the last time it was tested (which +may have been a long time ago): + +STATUS: one of: + SUPPORTED: This indicates that the file is included in the regularly + performed test builds and bugs are fixed when discovered. + COMPILED: This indicates that the code did compile at least once. See the + more detailed description for the extent to which the result was + successful. + TESTED: This means the code was fully compiled into the libpng test programs + and these were run at least once. + +BUG REPORTS: an email address to which to send reports of problems + +The file is a fragment of C code. It should not define any 'extern' symbols; +everything should be static. It must define the function: + +static int png_have_vector(png_structp png_ptr); + +That function must return 1 if RISC-V Vector instructions are supported, 0 if +not. It must not execute png_error unless it detects a bug. A png_error will +prevent the reading of the PNG and in the future, writing too. + +BUG REPORTS +----------- + +If you mail a bug report for any file that is not SUPPORTED there may only be +limited response. Consider fixing it and sending a patch to fix the problem - +this is more likely to result in action. + +CONTRIBUTIONS +------------- + +You may send contributions of new implementations to +png-mng-implement@sourceforge.net. Please write code in strict C90 C where +possible. Obviously OS dependencies are to be expected. If you submit code you +must have the authors permission and it must have a license that is acceptable +to the current maintainer; in particular that license must permit modification +and redistribution. + +Please try to make the contribution a single file and give the file a clear and +unambiguous name that identifies the target OS. If multiple files really are +required put them all in a sub-directory. + +You must also be prepared to handle bug reports from users of the code, either +by joining the png-mng-implement mailing list or by providing an email for the +"BUG REPORTS" entry or both. Please make sure that the header of the file +contains the STATUS and BUG REPORTS fields as above. + +Please list the OS requirements as precisely as possible. Ideally you should +also list the environment in which the code has been tested and certainly list +any environments where you suspect it might not work. diff --git a/contrib/riscv-vector/linux.c b/contrib/riscv-vector/linux.c new file mode 100644 index 000000000..c5fd1c6c5 --- /dev/null +++ b/contrib/riscv-vector/linux.c @@ -0,0 +1,57 @@ +/* contrib/riscv-vector/linux.c + * + * Copyright (c) 2023 Google LLC + * Written by Dragoș Tiselice , May 2023. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * SEE contrib/riscv-vector/README before reporting bugs + * + * STATUS: SUPPORTED + * BUG REPORTS: png-mng-implement@sourceforge.net + * + * png_have_vector implemented for Linux by reading the widely available + * pseudo-file /proc/cpuinfo. + * + * This code is strict ANSI-C and is probably moderately portable; it does + * however use and it assumes that /proc/cpuinfo is never localized. + */ + +#include +#include + +static int +png_have_vector(png_structp png_ptr) { + FILE* f = fopen("/proc/cpuinfo", "rb"); + + if (f == NULL) { +#ifdef PNG_WARNINGS_SUPPORTED + png_warning(png_ptr, "/proc/cpuinfo open failed"); +#endif + return 0; + } + + char line[256]; + + while (fgets(line, sizeof line, f)) { + if (strncmp(line, "isa", 3) != 0) { + continue; + } + + char* isa = strstr(line, "rv"); + + if (isa == NULL) { + continue; + } + + if (strchr(isa + 2, 'v') != NULL) { + return 1; + } + } + + fclose(f); + + return 0; +} diff --git a/pngpriv.h b/pngpriv.h index a6f8d67d3..a5b7e02c1 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -288,6 +288,10 @@ # define PNG_LOONGARCH_LSX_IMPLEMENTATION 0 #endif +#if PNG_RISCV_VECTOR_OPT > 0 +# define PNG_RISCV_VECTOR_IMPLEMENTATION 1 +#endif + /* Is this a build of a DLL where compilation of the object modules requires * different preprocessor settings to those required for a simple library? If * so PNG_BUILD_DLL must be set. @@ -1522,6 +1526,27 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); #endif +#if PNG_RISCV_VECTOR_OPT > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vector,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector_128,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector_256,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector_128,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector_256,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vector,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vector,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vector,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vector,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + /* Choose the best filter to use and filter the row data */ PNG_INTERNAL_FUNCTION(void,png_write_find_filter,(png_structrp png_ptr, png_row_infop row_info),PNG_EMPTY); @@ -2134,6 +2159,11 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); #endif +# if PNG_RISCV_VECTOR_OPT > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_vector, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +#endif + PNG_INTERNAL_FUNCTION(png_uint_32, png_check_keyword, (png_structrp png_ptr, png_const_charp key, png_bytep new_key), PNG_EMPTY); @@ -2160,6 +2190,29 @@ PNG_INTERNAL_FUNCTION(int, PNG_EMPTY); #endif +#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 +PNG_INTERNAL_FUNCTION(void, + png_riffle_palette_vector, + (png_structrp), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(int, + png_do_expand_palette_rgba8_vector, + (png_structrp, + png_row_infop, + png_const_bytep, + const png_bytepp, + const png_bytepp), + PNG_EMPTY); +PNG_INTERNAL_FUNCTION(int, + png_do_expand_palette_rgb8_vector, + (png_structrp, + png_row_infop, + png_const_bytep, + const png_bytepp, + const png_bytepp), + PNG_EMPTY); +#endif + /* Maintainer: Put new private prototypes here ^ */ #include "pngdebug.h" diff --git a/pngread.c b/pngread.c index 4f55f6408..e7f562362 100644 --- a/pngread.c +++ b/pngread.c @@ -809,7 +809,7 @@ png_read_destroy(png_structrp png_ptr) #endif #if defined(PNG_READ_EXPAND_SUPPORTED) && \ - defined(PNG_ARM_NEON_IMPLEMENTATION) + (defined(PNG_ARM_NEON_IMPLEMENTATION) || defined(PNG_RISCV_VECTOR_IMPLEMENTATION)) png_free(png_ptr, png_ptr->riffled_palette); png_ptr->riffled_palette = NULL; #endif diff --git a/pngrtran.c b/pngrtran.c index 0fea13a41..622543f04 100644 --- a/pngrtran.c +++ b/pngrtran.c @@ -28,6 +28,12 @@ # endif #endif +#ifdef PNG_RISCV_VECTOR_IMPLEMENTATION +# if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 +# define PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE +# endif +#endif + #ifdef PNG_READ_SUPPORTED /* Set the action on getting a CRC error for an ancillary or critical chunk. */ @@ -4397,6 +4403,12 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row, &sp, &dp); } +#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE + if (png_ptr->riffled_palette != NULL) + { + i = png_do_expand_palette_rgba8_vector(png_ptr, row_info, row, + &sp, &dp); + } #else PNG_UNUSED(png_ptr) #endif @@ -4427,6 +4439,9 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, #ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row, &sp, &dp); +#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE + i = png_do_expand_palette_rgb8_vector(png_ptr, row_info, row, + &sp, &dp); #else PNG_UNUSED(png_ptr) #endif @@ -4856,6 +4871,17 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) png_riffle_palette_neon(png_ptr); } } +#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE + if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) + { + if (png_ptr->riffled_palette == NULL) + { + /* Initialize the accelerated palette expansion. */ + png_ptr->riffled_palette = + (png_bytep)png_malloc(png_ptr, 256 * 4); + png_riffle_palette_vector(png_ptr); + } + } #endif png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); diff --git a/pngstruct.h b/pngstruct.h index b17a54fe8..0f0ffd992 100644 --- a/pngstruct.h +++ b/pngstruct.h @@ -375,7 +375,7 @@ struct png_struct_def /* New member added in libpng-1.6.36 */ #if defined(PNG_READ_EXPAND_SUPPORTED) && \ - defined(PNG_ARM_NEON_IMPLEMENTATION) + (defined(PNG_ARM_NEON_IMPLEMENTATION) || defined(PNG_RISCV_VECTOR_IMPLEMENTATION)) png_bytep riffled_palette; /* buffer for accelerated palette expansion */ #endif diff --git a/riscv/filter_vector_intrinsics.c b/riscv/filter_vector_intrinsics.c new file mode 100644 index 000000000..09a714b6b --- /dev/null +++ b/riscv/filter_vector_intrinsics.c @@ -0,0 +1,280 @@ +/* filter_neon_intrinsics.c - RISC-V Vector optimized filter functions + * + * Copyright (c) 2023 Google LLC + * Written by Dragoș Tiselice , May 2023. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ + +#include + +void +png_read_filter_row_up_vector(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + for (size_t vl; len > 0; len -= vl, row += vl, prev_row += vl) { + vl = __riscv_vsetvl_e8m8(len); + + vuint8m8_t prev_vals = __riscv_vle8_v_u8m8(prev_row, vl); + vuint8m8_t row_vals = __riscv_vle8_v_u8m8(row, vl); + + row_vals = __riscv_vadd_vv_u8m8(row_vals, prev_vals, vl); + + __riscv_vse8_v_u8m8(row, row_vals, vl); + } +} + +static inline void +png_read_filter_row_sub_vector_128(size_t len, size_t vl, unsigned char* row) +{ + vuint8m1_t sum; + vuint8m1_t chunk = __riscv_vmv_v_x_u8m1(0, vl); + + for (; len > 0; len -= vl, row += vl) { + __riscv_vsetvl_e8m1(vl); + + sum = chunk; + chunk = __riscv_vle8_v_u8m1(row, vl); + + chunk = __riscv_vadd_vv_u8m1(chunk, sum, vl); + + __riscv_vse8_v_u8m1(row, chunk, vl); + } +} + +void +png_read_filter_row_sub3_vector_128(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_sub_vector_128(len, 3, row); + + PNG_UNUSED(prev_row) +} + +void +png_read_filter_row_sub4_vector_128(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_sub_vector_128(len, 4, row); + + PNG_UNUSED(prev_row) +} + +static inline void +png_read_filter_row_avg_vector(size_t len, size_t vl, unsigned char* row, + const unsigned char* prev_row) +{ + vuint8m1_t avg; + vuint8m1_t chunk = __riscv_vmv_v_x_u8m1(0, vl); + + for (; len > 0; len -= vl, row += vl) { + __riscv_vsetvl_e8m1(vl); + + vuint8m1_t prev_chunk = __riscv_vle8_v_u8m1(prev_row, vl); + avg = chunk; + chunk = __riscv_vle8_v_u8m1(row, vl); + + vuint8m1_t sum = __riscv_vadd_vv_u8m1(chunk, prev_chunk, vl); + vuint8m1_t avg = __riscv_vsrl_vx_u8m1(sum, 1, vl); + + chunk = __riscv_vadd_vv_u8m1(chunk, avg, vl); + + __riscv_vse8_v_u8m1(row, chunk, vl); + } +} + +void +png_read_filter_row_avg3_vector(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_avg_vector(len, 3, row, prev_row); + + PNG_UNUSED(prev_row) +} + +void +png_read_filter_row_avg4_vector(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_avg_vector(len, 4, row, prev_row); + + PNG_UNUSED(prev_row) +} + +#define MIN_CHUNK_LEN 256 +#define MAX_CHUNK_LEN 2048 + +static inline vuint8m1_t +prefix_sum(vuint8m1_t chunk, unsigned char* carry, size_t vl, + size_t max_chunk_len) +{ + size_t r; + + for (r = 1; r < MIN_CHUNK_LEN; r <<= 1) { + vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl); + chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl); + } + + for (r = MIN_CHUNK_LEN; r < MAX_CHUNK_LEN && r < max_chunk_len; r <<= 1) { + vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl); + chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl); + } + + chunk = __riscv_vadd_vx_u8m1(chunk, *carry, vl); + *carry = __riscv_vmv_x_s_u8m1_u8(__riscv_vslidedown_vx_u8m1(chunk, vl - 1, vl)); + + return chunk; +} + +void +png_read_filter_row_sub3_vector_256(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + const size_t max_chunk_len = __riscv_vsetvlmax_e8m1(); + + vuint8m1_t r; + vuint8m1_t g; + vuint8m1_t b; + + unsigned char r_carry = 0; + unsigned char g_carry = 0; + unsigned char b_carry = 0; + + for (size_t vl; len > 0; len -= vl * 3, row += vl * 3) { + vl = __riscv_vsetvl_e8m1(len / 3); + + __riscv_vlseg3e8_v_u8m1(&r, &g, &b, row, vl); + + r = prefix_sum(r, &r_carry, vl, max_chunk_len); + g = prefix_sum(g, &g_carry, vl, max_chunk_len); + b = prefix_sum(b, &b_carry, vl, max_chunk_len); + + __riscv_vsseg3e8_v_u8m1(row, r, g, b, vl); + } +} + +void +png_read_filter_row_sub4_vector_256(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + const size_t max_chunk_len = __riscv_vsetvlmax_e8m1(); + + vuint8m1_t r; + vuint8m1_t g; + vuint8m1_t b; + vuint8m1_t a; + + unsigned char r_carry = 0; + unsigned char g_carry = 0; + unsigned char b_carry = 0; + unsigned char a_carry = 0; + + for (size_t vl; len > 0; len -= vl * 4, row += vl * 4) { + vl = __riscv_vsetvl_e8m1(len / 4); + + __riscv_vlseg4e8_v_u8m1(&r, &g, &b, &a, row, vl); + + r = prefix_sum(r, &r_carry, vl, max_chunk_len); + g = prefix_sum(g, &g_carry, vl, max_chunk_len); + b = prefix_sum(b, &b_carry, vl, max_chunk_len); + a = prefix_sum(a, &a_carry, vl, max_chunk_len); + + __riscv_vsseg4e8_v_u8m1(row, r, g, b, a, vl); + } +} + +static inline vint16m1_t +abs_diff(vuint16m1_t a, vuint16m1_t b, size_t vl) +{ + vint16m1_t diff = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vsub_vv_u16m1(a, b, vl)); + vint16m1_t neg = __riscv_vneg_v_i16m1(diff, vl); + + return __riscv_vmax_vv_i16m1(diff, neg, vl); +} + +static inline vint16m1_t +abs_sum(vint16m1_t a, vint16m1_t b, size_t vl) +{ + vint16m1_t sum = __riscv_vadd_vv_i16m1(a, b, vl); + vint16m1_t neg = __riscv_vneg_v_i16m1(sum, vl); + + return __riscv_vmax_vv_i16m1(sum, neg, vl); +} + +static inline void +png_read_filter_row_paeth_vector(size_t len, size_t vl, unsigned char* row, + const unsigned char* prev) +{ + vuint16m1_t a; + vuint16m1_t b = __riscv_vmv_v_x_u16m1(0, vl); + vuint16m1_t c; + vuint16m1_t d = __riscv_vmv_v_x_u16m1(0, vl); + + for (; len > 0; len -= vl, row += vl, prev += vl) { + __riscv_vsetvl_e16m1(vl); + + c = b; + b = __riscv_vzext_vf2_u16m1(__riscv_vle8_v_u8mf2(prev, vl), vl); + a = d; + d = __riscv_vzext_vf2_u16m1(__riscv_vle8_v_u8mf2(row, vl), vl); + + vint16m1_t pa = abs_diff(b, c, vl); + vint16m1_t pb = abs_diff(a, c, vl); + vint16m1_t pc = abs_sum(pa, pb, vl); + + vint16m1_t smallest = __riscv_vmin_vv_i16m1(pa, __riscv_vmin_vv_i16m1(pb, pc, vl), vl); + + vuint16m1_t nearest = c; + nearest = __riscv_vmerge_vvm_u16m1(nearest, a, __riscv_vmseq_vv_i16m1_b16(smallest, pa, vl), vl); + nearest = __riscv_vmerge_vvm_u16m1(nearest, b, __riscv_vmseq_vv_i16m1_b16(smallest, pb, vl), vl); + + d = __riscv_vadd_vv_u16m1(d, nearest, vl); + + __riscv_vse8_v_u8mf2(row, __riscv_vnsrl_wx_u8mf2(d, 0, vl), vl); + } +} + +void +png_read_filter_row_paeth3_vector(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_paeth_vector(len, 3, row, prev_row); + + PNG_UNUSED(prev_row) +} + +void +png_read_filter_row_paeth4_vector(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_paeth_vector(len, 4, row, prev_row); + + PNG_UNUSED(prev_row) +} + +#endif /* PNG_RISCV_VECTOR_IMPLEMENTATION */ +#endif /* READ */ diff --git a/riscv/palette_vector_intrinsics.c b/riscv/palette_vector_intrinsics.c new file mode 100644 index 000000000..982d956cd --- /dev/null +++ b/riscv/palette_vector_intrinsics.c @@ -0,0 +1,104 @@ +/* palette_neon_intrinsics.c - RISC-V Vector optimized palette expansion + * functions + * + * Copyright (c) 2023 Google LLC + * Written by Dragoș Tiselice , May 2023. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "../pngpriv.h" + +#if PNG_ARM_NEON_IMPLEMENTATION == 1 + +#include + +void +png_riffle_palette_vector(png_structrp png_ptr) +{ + png_const_bytep palette = (png_const_bytep)png_ptr->palette; + png_bytep riffled_palette = png_ptr->riffled_palette; + png_const_bytep trans_alpha = png_ptr->trans_alpha; + + size_t len = 256; + + vuint8m1_t r; + vuint8m1_t g; + vuint8m1_t b; + + for (size_t vl; len > 0; len -= vl, palette += vl * 3, trans_alpha += vl, riffled_palette += vl * 4) { + vl = __riscv_vsetvl_e8m1(len); + + __riscv_vlseg3e8_v_u8m1(&r, &g, &b, palette, vl); + vuint8m1_t a = __riscv_vle8_v_u8m1(trans_alpha, vl); + + __riscv_vsseg4e8_v_u8m1(riffled_palette, r, g, b, a, vl); + } +} + +int +png_do_expand_palette_rgba8_vector(png_structrp png_ptr, png_row_infop row_info, + png_const_bytep row, png_bytepp ssp, png_bytepp ddp) +{ + size_t row_width = (size_t)row_info->width; + const png_uint_32* palette = (const png_uint_32*)png_ptr->riffled_palette; + + size_t vl = __riscv_vsetvl_e8m1(row_width); + png_bytep sp = *ssp - vl; + png_bytep dp = *ddp - vl * 4; + + for (; row_width > 0; row_width -= vl, dp -= vl * 4, sp -= vl) { + vl = __riscv_vsetvl_e8m1(row_width); + + vuint8m1_t indices = __riscv_vle8_v_u8m1(sp, vl); + + vuint32m4_t pixels = __riscv_vluxei8_v_u32m4(palette, indices, vl); + + __riscv_vse32_v_u32m4((unsigned int *)dp, pixels, vl); + } + + row_width = (size_t)row_info->width; + + *ssp = *ssp - row_width; + *ddp = *ddp - row_width * 4; + + return row_width; +} + +int +png_do_expand_palette_rgb8_vector(png_structrp png_ptr, png_row_infop row_info, + png_const_bytep row, png_bytepp ssp, png_bytepp ddp) +{ + size_t row_width = (size_t)row_info->width; + const png_const_bytep palette = (png_const_bytep)png_ptr->palette; + + size_t vl = __riscv_vsetvl_e8m1(row_width); + png_bytep sp = *ssp - vl; + png_bytep dp = *ddp - vl * 3; + + vuint8m1_t r; + vuint8m1_t g; + vuint8m1_t b; + + + for (; row_width > 0; row_width -= vl, dp -= vl * 3, sp -= vl) { + vl = __riscv_vsetvl_e8m1(row_width); + + vuint16m2_t indices = __riscv_vwmulu_vx_u16m2(__riscv_vle8_v_u8m1(sp, vl), 3, vl); + + __riscv_vluxseg3ei16_v_u8m1(&r, &g, &b, palette, indices, vl); + + __riscv_vsseg3e8_v_u8m1(dp, r, g, b, vl); + } + + row_width = (size_t)row_info->width; + + *ssp = *ssp - row_width; + *ddp = *ddp - row_width * 3; + + return row_width; +} + +#endif /* PNG_ARM_NEON_IMPLEMENTATION */ diff --git a/riscv/riscv_init.c b/riscv/riscv_init.c new file mode 100644 index 000000000..ed4f81e22 --- /dev/null +++ b/riscv/riscv_init.c @@ -0,0 +1,130 @@ +/* arm_init.c - RISC-V Vector optimized filter functions + * + * Copyright (c) 2023 Google LLC + * Written by Dragoș Tiselice , May 2023. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +#if PNG_RISCV_VECTOR_OPT > 0 +#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED /* Do run-time checks */ +/* WARNING: it is strongly recommended that you do not build libpng with + * run-time checks for CPU features if at all possible. In the case of the + * RISC-V Vector instructions there is no processor-specific way of detecting + * the presence of the required support, therefore run-time detection is + * extremely OS specific. + * + * You may set the macro PNG_RISCV_VECTOR_FILE to the file name of file containing + * a fragment of C source code which defines the png_have_neon function. There + * are a number of implementations in contrib/riscv-vector, but the only one that + * has partial support is contrib/riscv-vector/linux.c - a generic Linux + * implementation which reads /proc/cpuinfo. + */ + +#ifndef PNG_RISCV_VECTOR_FILE +# if defined(__linux__) +# define PNG_RISCV_VECTOR_FILE "contrib/riscv-vector/linux.c" +# else +# error "No support for run-time RISC-V Vector checking; use compile-time options" +# endif +#endif + +static int png_have_vector(png_structp png_ptr); +#ifdef PNG_RISCV_VECTOR_FILE +# include PNG_RISCV_VECTOR_FILE +#endif +#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */ + +void +png_init_filter_functions_vector(png_structp pp, unsigned int bpp) +{ + /* The switch statement is compiled in for RISCV_VECTOR_API, the call to + * png_have_vector is compiled in for RISCV_VECTOR_CHECK. If both are + * defined the check is only performed if the API has not set the VECTOR + * option on or off explicitly. In this case the check controls what + * happens. + * + * If the CHECK is not compiled in and the option is UNSET the behavior prior + * to 1.6.7 was to use the NEON code - this was a bug caused by having the + * wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF, + * as documented in png.h + */ + png_debug(1, "in png_init_filter_functions_vector"); +#ifdef PNG_RISCV_VECTOR_API_SUPPORTED + switch ((pp->options >> PNG_RISCV_VECTOR) & 3) + { + case PNG_OPTION_UNSET: + /* Allow the run-time check to execute if it has been enabled - + * thus both API and CHECK can be turned on. If it isn't supported + * this case will fall through to the 'default' below, which just + * returns. + */ +#endif /* PNG_RISCV_VECTOR_API_SUPPORTED */ +#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED + { + static volatile sig_atomic_t no_vector = -1; /* not checked */ + + if (no_vector < 0) + no_vector = !png_have_vector(pp); + + if (no_vector) + return; + } +#ifdef PNG_RISCV_VECTOR_API_SUPPORTED + break; +#endif +#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */ + +#ifdef PNG_RISCV_VECTOR_API_SUPPORTED + default: /* OFF or INVALID */ + return; + + case PNG_OPTION_ON: + /* Option turned on */ + break; + } +#endif + + /* IMPORTANT: any new external functions used here must be declared using + * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the + * 'prefix' option to configure works: + * + * ./configure --with-libpng-prefix=foobar_ + * + * Verify you have got this right by running the above command, doing a build + * and examining pngprefix.h; it must contain a #define for every external + * function you add. (Notice that this happens automatically for the + * initialization function.) + */ + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vector; + + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vector; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vector; + if (__riscv_vsetvlmax_e8m1() > 16) { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector_128; + } else { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector_256; + } + } + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vector; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vector; + if (__riscv_vsetvlmax_e8m1() > 16) { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector_128; + } else { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector_256; + } + } +} + +#endif /* PNG_RISCV_VECTOR_OPT > 0 */ +#endif /* PNG_READ_SUPPORTED */