From ffb8e8b26f2c18798863a173945cf4f292ab1f0a Mon Sep 17 00:00:00 2001 From: Filip Wasil Date: Tue, 11 Mar 2025 13:07:00 +0100 Subject: [PATCH] Fix and improve the RISC-V Vector (RVV) implementation Changes include manually merged code from Manfred SCHLAEGL. Co-authored-by: Manfred SCHLAEGL Signed-off-by: Cosmin Truta --- AUTHORS | 3 + CMakeLists.txt | 38 ++- INSTALL | 8 +- Makefile.am | 4 +- Makefile.in | 25 +- config.h.in | 6 +- configure | 12 + configure.ac | 30 +- contrib/{riscv-vector => riscv-rvv}/README | 10 +- contrib/riscv-rvv/linux.c | 36 +++ contrib/riscv-vector/linux.c | 57 ---- pngpriv.h | 83 ++--- pngread.c | 3 +- pngrtran.c | 26 +- pngstruct.h | 3 +- riscv/filter_rvv_intrinsics.c | 360 +++++++++++++++++++++ riscv/filter_vector_intrinsics.c | 280 ---------------- riscv/palette_vector_intrinsics.c | 104 ------ riscv/riscv_init.c | 81 +++-- scripts/makefile.c89 | 3 +- scripts/makefile.clang | 3 +- scripts/makefile.darwin | 3 +- scripts/makefile.gcc | 3 +- scripts/makefile.linux | 3 +- scripts/makefile.msys | 3 +- scripts/makefile.std | 3 +- 26 files changed, 575 insertions(+), 615 deletions(-) rename contrib/{riscv-vector => riscv-rvv}/README (93%) create mode 100644 contrib/riscv-rvv/linux.c delete mode 100644 contrib/riscv-vector/linux.c create mode 100644 riscv/filter_rvv_intrinsics.c delete mode 100644 riscv/filter_vector_intrinsics.c delete mode 100644 riscv/palette_vector_intrinsics.c diff --git a/AUTHORS b/AUTHORS index 625c70b62..60355f32c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -20,6 +20,7 @@ Authors, for copyright and licensing purposes. * Lucas Chollet * Magnus Holmgren * Mandar Sahastrabuddhe + * Manfred Schlaegl * Mans Rullgard * Matt Sarett * Mike Klein @@ -52,6 +53,8 @@ Authors, for copyright and licensing purposes. - GuXiWei (顾希伟) - JinBo (金波) - ZhangLixia (张利霞) + * Samsung Group + - Filip Wasil The build projects, the build scripts, the test scripts, and other files in the "projects", "scripts" and "tests" directories, have diff --git a/CMakeLists.txt b/CMakeLists.txt index 07e0c6a60..5d37967f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -313,27 +313,31 @@ if(PNG_HARDWARE_OPTIMIZATIONS) endif() # Set definitions and sources for RISC-V. - if(TARGET_ARCH MATCHES "riscv*") - set(PNG_RISCV_VECTOR_POSSIBLE_VALUES check on off) - set(PNG_RISCV_VECTOR "off" + if(PNG_TARGET_ARCHITECTURE MATCHES "^(riscv)") + include(CheckCCompilerFlag) + set(PNG_RISCV_RVV_POSSIBLE_VALUES check on off) + set(PNG_RISCV_RVV "off" CACHE STRING "Enable RISC-V Vector optimizations: check|on|off; off is default") - set_property(CACHE PNG_RISCV_VECTOR - PROPERTY STRINGS ${PNG_RISCV_VECTOR_POSSIBLE_VALUES}) - list(FIND PNG_RISCV_VECTOR_POSSIBLE_VALUES ${PNG_RISCV_VECTOR} index) + set_property(CACHE PNG_RISCV_RVV + PROPERTY STRINGS ${PNG_RISCV_RVV_POSSIBLE_VALUES}) + list(FIND PNG_RISCV_RVV_POSSIBLE_VALUES ${PNG_RISCV_RVV} index) if(index EQUAL -1) - message(FATAL_ERROR "PNG_RISCV_VECTOR must be one of [${PNG_RISCV_VECTOR_POSSIBLE_VALUES}]") - elseif(NOT ${PNG_RISCV_VECTOR} STREQUAL "off") + message(FATAL_ERROR "PNG_RISCV_RVV must be one of [${PNG_RISCV_RVV_POSSIBLE_VALUES}]") + elseif(NOT ${PNG_RISCV_RVV} STREQUAL "off") + check_c_compiler_flag("-march=rv64gv1p0" COMPILER_SUPPORTS_RVV) + if(NOT COMPILER_SUPPORTS_RVV) + message(FATAL_ERROR "Compiler does not support -march=rv64gv1p0 option") + endif() set(libpng_riscv_sources - riscv/filter_vector_intrinsics.c - riscv/palette_vector_intrinsics.c + riscv/filter_rvv_intrinsics.c riscv/riscv_init.c) - if(${PNG_RISCV_VECTOR} STREQUAL "on") - add_definitions(-DPNG_RISCV_VECTOR_OPT=2) - elseif(${PNG_RISCV_VECTOR} STREQUAL "check") - add_definitions(-DPNG_RISCV_VECTOR_CHECK_SUPPORTED) + if(${PNG_RISCV_RVV} STREQUAL "on") + add_definitions(-DPNG_RISCV_RVV_OPT=2) + elseif(${PNG_RISCV_RVV} STREQUAL "check") + add_definitions(-DPNG_RISCV_RVV_CHECK_SUPPORTED) endif() else() - add_definitions(-DPNG_RISCV_VECTOR_OPT=0) + add_definitions(-DPNG_RISCV_RVV_OPT=0) endif() endif() @@ -365,8 +369,8 @@ else(PNG_HARDWARE_OPTIMIZATIONS) endif() # Set definitions and sources for RISC-V. - if(TARGET_ARCH MATCHES "^riscv") - add_definitions(-DPNG_RISCV_VECTOR_OPT=0) + if(PNG_TARGET_ARCHITECTURE MATCHES "^(riscv)") + add_definitions(-DPNG_RISCV_RVV_OPT=0) endif() endif(PNG_HARDWARE_OPTIMIZATIONS) diff --git a/INSTALL b/INSTALL index 21278c608..4fba3f6ef 100644 --- a/INSTALL +++ b/INSTALL @@ -289,7 +289,7 @@ such as one of --enable-mips-msa=yes --enable-intel-sse=yes --enable-powerpc-vsx=yes - --enable-riscv-vector=yes + --enable-riscv-rvv=yes or enable them all at once with @@ -302,7 +302,7 @@ or more of CPPFLAGS += "-DPNG_MIPS_MSA" CPPFLAGS += "-DPNG_INTEL_SSE" CPPFLAGS += "-DPNG_POWERPC_VSX" - CPPFLAGS += "-DPNG_RISCV_VECTOR" + CPPFLAGS += "-DPNG_RISCV_RVV" See for example scripts/makefile.linux-opt @@ -320,14 +320,14 @@ or via compiler-command options such as CPPFLAGS += "-DPNG_ARM_NEON_OPT=0, -DPNG_MIPS_MSA_OPT=0, -DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0, - -DPNG_RISCV_VECTOR_OPT=0" + -DPNG_RISCV_RVV_OPT=0" If you are using cmake, hardware optimizations are "on" by default. To disable them, use cmake . -DPNG_ARM_NEON=no -DPNG_INTEL_SSE=no \ -DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no \ - -DPNG_RISCV_VECTOR=no + -DPNG_RISCV_RVV=no or disable them all at once with diff --git a/Makefile.am b/Makefile.am index 1c95bec78..44ab0ea0c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -134,9 +134,9 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\ powerpc/filter_vsx_intrinsics.c endif -if PNG_RISCV_VECTOR +if PNG_RISCV_RVV libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += riscv/riscv_init.c\ - riscv/filter_vector_intrinsics.c riscv/palette_vector_intrinsics.c + riscv/filter_rvv_intrinsics.c endif if PNG_LOONGARCH_LSX diff --git a/Makefile.in b/Makefile.in index 107932cb5..667f2a4bf 100644 --- a/Makefile.in +++ b/Makefile.in @@ -133,8 +133,8 @@ host_triplet = @host@ @PNG_LOONGARCH_LSX_TRUE@am__append_11 = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la @DO_PNG_PREFIX_TRUE@am__append_12 = -DPNG_PREFIX='@PNG_PREFIX@' -@PNG_RISCV_VECTOR_TRUE@am__append_13 = riscv/riscv_init.c\ -@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.c riscv/palette_vector_intrinsics.c +@PNG_RISCV_RVV_TRUE@am__append_13 = riscv/riscv_init.c\ +@PNG_RISCV_RVV_TRUE@ riscv/filter_rvv_intrinsics.c subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -198,8 +198,7 @@ am__libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES_DIST = png.c \ mips/filter_mmi_inline_assembly.c intel/intel_init.c \ intel/filter_sse2_intrinsics.c powerpc/powerpc_init.c \ powerpc/filter_vsx_intrinsics.c riscv/riscv_init.c \ - riscv/filter_vector_intrinsics.c \ - riscv/palette_vector_intrinsics.c + riscv/filter_rvv_intrinsics.c am__dirstamp = $(am__leading_dot)dirstamp @PNG_ARM_NEON_TRUE@am__objects_1 = arm/arm_init.lo \ @PNG_ARM_NEON_TRUE@ arm/filter_neon_intrinsics.lo \ @@ -213,9 +212,8 @@ am__dirstamp = $(am__leading_dot)dirstamp @PNG_INTEL_SSE_TRUE@ intel/filter_sse2_intrinsics.lo @PNG_POWERPC_VSX_TRUE@am__objects_6 = powerpc/powerpc_init.lo \ @PNG_POWERPC_VSX_TRUE@ powerpc/filter_vsx_intrinsics.lo -@PNG_RISCV_VECTOR_TRUE@am__objects_7 = riscv/riscv_init.lo \ -@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.lo \ -@PNG_RISCV_VECTOR_TRUE@ riscv/palette_vector_intrinsics.lo +@PNG_RISCV_RVV_TRUE@am__objects_7 = riscv/riscv_init.lo \ +@PNG_RISCV_RVV_TRUE@ riscv/filter_rvv_intrinsics.lo am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = png.lo pngerror.lo \ pngget.lo pngmem.lo pngpread.lo pngread.lo pngrio.lo \ pngrtran.lo pngrutil.lo pngset.lo pngtrans.lo pngwio.lo \ @@ -342,8 +340,7 @@ am__depfiles_remade = ./$(DEPDIR)/png.Plo ./$(DEPDIR)/pngerror.Plo \ mips/$(DEPDIR)/mips_init.Plo \ powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo \ powerpc/$(DEPDIR)/powerpc_init.Plo \ - riscv/$(DEPDIR)/filter_vector_intrinsics.Plo \ - riscv/$(DEPDIR)/palette_vector_intrinsics.Plo \ + riscv/$(DEPDIR)/filter_rvv_intrinsics.Plo \ riscv/$(DEPDIR)/riscv_init.Plo am__mv = mv -f COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ @@ -1091,9 +1088,7 @@ riscv/$(DEPDIR)/$(am__dirstamp): @: > riscv/$(DEPDIR)/$(am__dirstamp) riscv/riscv_init.lo: riscv/$(am__dirstamp) \ riscv/$(DEPDIR)/$(am__dirstamp) -riscv/filter_vector_intrinsics.lo: riscv/$(am__dirstamp) \ - riscv/$(DEPDIR)/$(am__dirstamp) -riscv/palette_vector_intrinsics.lo: riscv/$(am__dirstamp) \ +riscv/filter_rvv_intrinsics.lo: riscv/$(am__dirstamp) \ riscv/$(DEPDIR)/$(am__dirstamp) libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@.la: $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) $(EXTRA_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) @@ -2228,8 +2223,7 @@ distclean: distclean-am -rm -f mips/$(DEPDIR)/mips_init.Plo -rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo -rm -f powerpc/$(DEPDIR)/powerpc_init.Plo - -rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo - -rm -f riscv/$(DEPDIR)/palette_vector_intrinsics.Plo + -rm -f riscv/$(DEPDIR)/filter_rvv_intrinsics.Plo -rm -f riscv/$(DEPDIR)/riscv_init.Plo -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ @@ -2318,8 +2312,7 @@ maintainer-clean: maintainer-clean-am -rm -f mips/$(DEPDIR)/mips_init.Plo -rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo -rm -f powerpc/$(DEPDIR)/powerpc_init.Plo - -rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo - -rm -f riscv/$(DEPDIR)/palette_vector_intrinsics.Plo + -rm -f riscv/$(DEPDIR)/filter_rvv_intrinsics.Plo -rm -f riscv/$(DEPDIR)/riscv_init.Plo -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic diff --git a/config.h.in b/config.h.in index c21ea2553..2c6fa2895 100644 --- a/config.h.in +++ b/config.h.in @@ -109,13 +109,13 @@ #undef PNG_POWERPC_VSX_OPT /* Turn on RISC-V Vector optimizations at run-time */ -#undef PNG_RISCV_VECTOR_API_SUPPORTED +#undef PNG_RISCV_RVV_API_SUPPORTED /* Check for RISC-V Vector support at run-time */ -#undef PNG_RISCV_VECTOR_CHECK_SUPPORTED +#undef PNG_RISCV_RVV_CHECK_SUPPORTED /* Enable RISC-V Vector optimizations */ -#undef PNG_RISCV_VECTOR_OPT +#undef PNG_RISCV_RVV_OPT /* Define to 1 if all of the C90 standard headers exist (not just the ones required in a freestanding environment). This macro is provided for diff --git a/configure b/configure index adec893ae..63cd0cdb5 100755 --- a/configure +++ b/configure @@ -656,6 +656,8 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS +PNG_RISCV_RVV_FALSE +PNG_RISCV_RVV_TRUE PNG_LOONGARCH_LSX_FALSE PNG_LOONGARCH_LSX_TRUE PNG_POWERPC_VSX_FALSE @@ -858,6 +860,7 @@ enable_mips_mmi enable_intel_sse enable_powerpc_vsx enable_loongarch_lsx +enable_riscv_rvv ' ac_precious_vars='build_alias host_alias @@ -15156,6 +15159,10 @@ printf "%s\n" "#define PNG_INTEL_SSE_OPT 0" >>confdefs.h printf "%s\n" "#define PNG_LOONGARCH_LSX_OPT 0" >>confdefs.h + enable_riscv_rvv=no + +printf "%s\n" "#define PNG_RISCV_RVV_OPT 0" >>confdefs.h + ;; *) # allow enabling hardware optimization on any system: @@ -15193,6 +15200,11 @@ printf "%s\n" "#define PNG_POWERPC_VSX_OPT 2" >>confdefs.h printf "%s\n" "#define PNG_LOONGARCH_LSX_OPT 1" >>confdefs.h + ;; + riscv*) + enable_riscv_rvv=yes + +printf "%s\n" "#define PNG_RISCV_RVV_OPT 1" >>confdefs.h ;; esac ;; diff --git a/configure.ac b/configure.ac index 66c6183fa..18abfcd9c 100644 --- a/configure.ac +++ b/configure.ac @@ -343,8 +343,8 @@ AC_ARG_ENABLE([hardware-optimizations], enable_loongarch_lsx=no AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0], [Disable LOONGARCH_LSX optimizations]) - enable_riscv_vector=no - AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0], + enable_riscv_rvv=no + AC_DEFINE([PNG_RISCV_RVV_OPT], [0], [Disable RISC-V Vector optimizations]) ;; *) @@ -378,8 +378,8 @@ AC_ARG_ENABLE([hardware-optimizations], AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1], [Enable LOONGARCH_LSX optimizations]) riscv*) - enable_riscv_vector=yes - AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2], + enable_riscv_rvv=yes + AC_DEFINE([PNG_RISCV_RVV_OPT], [2], [Enable RISC-V Vector optimizations]) ;; esac @@ -674,7 +674,7 @@ AM_CONDITIONAL([PNG_LOONGARCH_LSX], # RISC-V Vector support. AC_ARG_ENABLE([riscv-vector], -AS_HELP_STRING([[[--enable-riscv-vector]]], +AS_HELP_STRING([[[--enable-riscv-rvv]]], [Enable RISC-V Vector optimizations: =no/off, check, api, yes/on:] [no/off: disable the optimizations; check: use internal checking code] [api: disable by default, enable by a call to png_set_option] @@ -683,34 +683,34 @@ AS_HELP_STRING([[[--enable-riscv-vector]]], [case "$enableval" in no|off) # disable the default enabling on __ppc64__ systems: - AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0], + AC_DEFINE([PNG_RISCV_RVV_OPT], [0], [Disable RISC-V Vector optimizations]) # Prevent inclusion of the platform-specific files below: - enable_riscv_vector=no ;; + enable_riscv_rvv=no ;; check) - AC_DEFINE([PNG_RISCV_VECTOR_CHECK_SUPPORTED], [], + AC_DEFINE([PNG_RISCV_RVV_CHECK_SUPPORTED], [], [Check for RISC-V Vector support at run-time]) - AC_MSG_WARN([--enable-riscv-vector Please check contrib/riscv-vector/README file] + AC_MSG_WARN([--enable-riscv-rvv Please check contrib/riscv-vector/README file] [for the list of supported OSes.]);; api) - AC_DEFINE([PNG_RISCV_VECTOR_API_SUPPORTED], [], + AC_DEFINE([PNG_RISCV_RVV_API_SUPPORTED], [], [Turn on RISC-V Vector optimizations at run-time]);; yes|on) - AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2], + AC_DEFINE([PNG_RISCV_RVV_OPT], [2], [Enable RISC-V Vector optimizations]) - AC_MSG_WARN([--enable-riscv-vector: please specify 'check' or 'api', if] + AC_MSG_WARN([--enable-riscv-rvv: please specify 'check' or 'api', if] [you want the optimizations unconditionally pass e.g. '-march=rv64gcv'] [to the compiler.]);; *) - AC_MSG_ERROR([--enable-riscv-vector=${enable_riscv_vector}: invalid value]) + AC_MSG_ERROR([--enable-riscv-rvv=${enable_riscv_rvv}: invalid value]) esac]) # Add RISC-V-specific files to all builds where $host_cpu is riscv ('riscv*') # or where RISC-V optimizations were explicitly requested (this allows a fallback # if a future host CPU does not match 'riscv*') -AM_CONDITIONAL([PNG_RISCV_VECTOR], - [test "$enable_riscv_vector" != 'no' && +AM_CONDITIONAL([PNG_RISCV_RVV], + [test "$enable_riscv_rvv" != 'no' && case "$host_cpu" in riscv*) : ;; esac]) diff --git a/contrib/riscv-vector/README b/contrib/riscv-rvv/README similarity index 93% rename from contrib/riscv-vector/README rename to contrib/riscv-rvv/README index 08311367f..338e25f1a 100644 --- a/contrib/riscv-vector/README +++ b/contrib/riscv-rvv/README @@ -9,7 +9,7 @@ HOW TO USE THIS --------------- This directory contains C code fragments that can be included in -riscv/riscv_init.c by setting the macro PNG_RISCV_VECTOR_FILE to the file name +riscv/riscv_init.c by setting the macro PNG_RISCV_RVV_FILE to the file name in "" or <> at build time. This setting is not recorded in pnglibconf.h and can be changed simply by rebuilding riscv/riscv_init.o with the required macro definition. @@ -17,12 +17,12 @@ definition. For any of this code to be used the RISC-V Vector code must be enabled and run time checks must be supported. I.e.: -#if PNG_RISCV_VECTOR_OPT > 0 -#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED +#if PNG_RISCV_RVV_OPT > 0 +#ifdef PNG_RISCV_RVV_CHECK_SUPPORTED This is done in a 'configure' build by passing configure the argument: - --enable-riscv-vector=check + --enable-riscv-rvv=check Apart from the basic Linux implementation in contrib/riscv-vector/linux.c this code is unsupported. That means that it is not even compiled on a regular @@ -48,7 +48,7 @@ BUG REPORTS: an email address to which to send reports of problems The file is a fragment of C code. It should not define any 'extern' symbols; everything should be static. It must define the function: -static int png_have_vector(png_structp png_ptr); +static int png_have_rvv(png_structp png_ptr); That function must return 1 if RISC-V Vector instructions are supported, 0 if not. It must not execute png_error unless it detects a bug. A png_error will diff --git a/contrib/riscv-rvv/linux.c b/contrib/riscv-rvv/linux.c new file mode 100644 index 000000000..dc022704d --- /dev/null +++ b/contrib/riscv-rvv/linux.c @@ -0,0 +1,36 @@ +/* contrib/riscv-rvv/linux.c + * + * Copyright (c) 2023 Google LLC + * Written by Dragoș Tiselice , May 2023. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + * SEE contrib/riscv-rvv/README before reporting bugs + * + * STATUS: SUPPORTED + * BUG REPORTS: png-mng-implement@sourceforge.net + * + * png_have_rvv implemented for Linux by reading the widely available + * pseudo-file /proc/cpuinfo. + * + * This code is strict ANSI-C and is probably moderately portable; it does + * however use and it assumes that /proc/cpuinfo is never localized. + */ + +#if defined(__linux__) +#include +#include +#endif + +static int +png_have_rvv(png_structp png_ptr) { +#if defined(__linux__) + return getauxval (AT_HWCAP) & COMPAT_HWCAP_ISA_V ? 1 : 0; +#else +#pragma message( \ + "warning: RISC-V Vector not supported for this platform") + return 0; +#endif +} diff --git a/contrib/riscv-vector/linux.c b/contrib/riscv-vector/linux.c deleted file mode 100644 index c5fd1c6c5..000000000 --- a/contrib/riscv-vector/linux.c +++ /dev/null @@ -1,57 +0,0 @@ -/* contrib/riscv-vector/linux.c - * - * Copyright (c) 2023 Google LLC - * Written by Dragoș Tiselice , May 2023. - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - * - * SEE contrib/riscv-vector/README before reporting bugs - * - * STATUS: SUPPORTED - * BUG REPORTS: png-mng-implement@sourceforge.net - * - * png_have_vector implemented for Linux by reading the widely available - * pseudo-file /proc/cpuinfo. - * - * This code is strict ANSI-C and is probably moderately portable; it does - * however use and it assumes that /proc/cpuinfo is never localized. - */ - -#include -#include - -static int -png_have_vector(png_structp png_ptr) { - FILE* f = fopen("/proc/cpuinfo", "rb"); - - if (f == NULL) { -#ifdef PNG_WARNINGS_SUPPORTED - png_warning(png_ptr, "/proc/cpuinfo open failed"); -#endif - return 0; - } - - char line[256]; - - while (fgets(line, sizeof line, f)) { - if (strncmp(line, "isa", 3) != 0) { - continue; - } - - char* isa = strstr(line, "rv"); - - if (isa == NULL) { - continue; - } - - if (strchr(isa + 2, 'v') != NULL) { - return 1; - } - } - - fclose(f); - - return 0; -} diff --git a/pngpriv.h b/pngpriv.h index a5b7e02c1..bfbc94d92 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -143,6 +143,32 @@ # endif #endif +#ifndef PNG_RISCV_RVV_OPT + /* RISCV_RVV optimizations are being controlled by the compiler settings, + * typically the target FPU then the compiler will define __RVV__ and we can rely + * unconditionally on NEON instructions not crashing, otherwise we must + * disable use of NEON instructions. + * + * NOTE: at present these optimizations depend on 'ALIGNED_MEMORY', so they + * can only be turned on automatically if that is supported too. If + * PNG_RISCV_RVV_OPT is set in CPPFLAGS (to >0) then riscv/riscv_init.c will fail + * to compile with an appropriate #error if ALIGNED_MEMORY has been turned + * off. + * + * Note that gcc and clang use the same __RVV__ flag. No known variations + * of this name is know as writing this code. + * + * To disable RISCV_RVV optimizations entirely, and skip compiling the + * associated assembler code, pass --enable-riscv-rvv=no to configure + * or put -DPNG_RISCV_RVV_OPT=0 in CPPFLAGS. + */ +# if defined(__RVV__) && defined(PNG_ALIGNED_MEMORY_SUPPORTED) +# define PNG_RISCV_RVV_OPT 2 +# else +# define PNG_RISCV_RVV_OPT 0 +# endif +#endif + #if PNG_ARM_NEON_OPT > 0 /* NEON optimizations are to be at least considered by libpng, so enable the * callbacks to do this. @@ -288,8 +314,14 @@ # define PNG_LOONGARCH_LSX_IMPLEMENTATION 0 #endif -#if PNG_RISCV_VECTOR_OPT > 0 -# define PNG_RISCV_VECTOR_IMPLEMENTATION 1 +#if PNG_RISCV_RVV_OPT > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_rvv +# ifndef PNG_RISCV_RVV_IMPLEMENTATION + /* Use the intrinsics code by default. */ +# define PNG_RISCV_RVV_IMPLEMENTATION 1 +# endif +#else +# define PNG_RISCV_RVV_IMPLEMENTATION 0 #endif /* Is this a build of a DLL where compilation of the object modules requires @@ -1526,24 +1558,20 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); #endif -#if PNG_RISCV_VECTOR_OPT > 0 -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vector,(png_row_infop +#if PNG_RISCV_RVV_OPT > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_rvv,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector_128,(png_row_infop +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_rvv,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector_256,(png_row_infop +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_rvv,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector_128,(png_row_infop +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_rvv,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector_256,(png_row_infop +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_rvv,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vector,(png_row_infop +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_rvv,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vector,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vector,(png_row_infop - row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); -PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vector,(png_row_infop +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_rvv,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); #endif @@ -2159,8 +2187,8 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); #endif -# if PNG_RISCV_VECTOR_OPT > 0 -PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_vector, +# if PNG_RISCV_RVV_OPT > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_rvv, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); #endif @@ -2190,29 +2218,6 @@ PNG_INTERNAL_FUNCTION(int, PNG_EMPTY); #endif -#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 -PNG_INTERNAL_FUNCTION(void, - png_riffle_palette_vector, - (png_structrp), - PNG_EMPTY); -PNG_INTERNAL_FUNCTION(int, - png_do_expand_palette_rgba8_vector, - (png_structrp, - png_row_infop, - png_const_bytep, - const png_bytepp, - const png_bytepp), - PNG_EMPTY); -PNG_INTERNAL_FUNCTION(int, - png_do_expand_palette_rgb8_vector, - (png_structrp, - png_row_infop, - png_const_bytep, - const png_bytepp, - const png_bytepp), - PNG_EMPTY); -#endif - /* Maintainer: Put new private prototypes here ^ */ #include "pngdebug.h" diff --git a/pngread.c b/pngread.c index e7f562362..a0dbf2ae2 100644 --- a/pngread.c +++ b/pngread.c @@ -809,7 +809,8 @@ png_read_destroy(png_structrp png_ptr) #endif #if defined(PNG_READ_EXPAND_SUPPORTED) && \ - (defined(PNG_ARM_NEON_IMPLEMENTATION) || defined(PNG_RISCV_VECTOR_IMPLEMENTATION)) + (defined(PNG_ARM_NEON_IMPLEMENTATION) || \ + defined(PNG_RISCV_RVV_IMPLEMENTATION)) png_free(png_ptr, png_ptr->riffled_palette); png_ptr->riffled_palette = NULL; #endif diff --git a/pngrtran.c b/pngrtran.c index 622543f04..1809db704 100644 --- a/pngrtran.c +++ b/pngrtran.c @@ -28,9 +28,9 @@ # endif #endif -#ifdef PNG_RISCV_VECTOR_IMPLEMENTATION -# if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 -# define PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE +#ifdef PNG_RISCV_RVV_IMPLEMENTATION +# if PNG_RISCV_RVV_IMPLEMENTATION == 1 +# define PNG_RISCV_RVV_INTRINSICS_AVAILABLE # endif #endif @@ -4403,12 +4403,6 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row, &sp, &dp); } -#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE - if (png_ptr->riffled_palette != NULL) - { - i = png_do_expand_palette_rgba8_vector(png_ptr, row_info, row, - &sp, &dp); - } #else PNG_UNUSED(png_ptr) #endif @@ -4439,9 +4433,6 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, #ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row, &sp, &dp); -#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE - i = png_do_expand_palette_rgb8_vector(png_ptr, row_info, row, - &sp, &dp); #else PNG_UNUSED(png_ptr) #endif @@ -4871,17 +4862,6 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) png_riffle_palette_neon(png_ptr); } } -#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE - if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) - { - if (png_ptr->riffled_palette == NULL) - { - /* Initialize the accelerated palette expansion. */ - png_ptr->riffled_palette = - (png_bytep)png_malloc(png_ptr, 256 * 4); - png_riffle_palette_vector(png_ptr); - } - } #endif png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); diff --git a/pngstruct.h b/pngstruct.h index 0f0ffd992..084422bc1 100644 --- a/pngstruct.h +++ b/pngstruct.h @@ -375,7 +375,8 @@ struct png_struct_def /* New member added in libpng-1.6.36 */ #if defined(PNG_READ_EXPAND_SUPPORTED) && \ - (defined(PNG_ARM_NEON_IMPLEMENTATION) || defined(PNG_RISCV_VECTOR_IMPLEMENTATION)) + (defined(PNG_ARM_NEON_IMPLEMENTATION) || \ + defined(PNG_RISCV_RVV_IMPLEMENTATION)) png_bytep riffled_palette; /* buffer for accelerated palette expansion */ #endif diff --git a/riscv/filter_rvv_intrinsics.c b/riscv/filter_rvv_intrinsics.c new file mode 100644 index 000000000..4ac35c872 --- /dev/null +++ b/riscv/filter_rvv_intrinsics.c @@ -0,0 +1,360 @@ +/* filter_rvv_intrinsics.c - RISC-V Vector optimized filter functions + * + * Copyright (c) 2023 Google LLC + * Written by Manfred SCHLAEGL, 2022 + * Dragoș Tiselice , May 2023. + * Filip Wasil , March 2025. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +#if PNG_RISCV_RVV_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ + +#include + +void +png_read_filter_row_up_rvv(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + for (size_t vl; len > 0; len -= vl, row += vl, prev_row += vl) { + vl = __riscv_vsetvl_e8m8(len); + + vuint8m8_t prev_vals = __riscv_vle8_v_u8m8(prev_row, vl); + vuint8m8_t row_vals = __riscv_vle8_v_u8m8(row, vl); + + row_vals = __riscv_vadd_vv_u8m8(row_vals, prev_vals, vl); + + __riscv_vse8_v_u8m8(row, row_vals, vl); + } +} + +static inline void +png_read_filter_row_sub_rvv(size_t len, size_t bpp, unsigned char* row) +{ + png_bytep rp_end = row + len; + + /* + * row: | a | x | + * + * a = a + x + * + * a .. [v0](e8) + * x .. [v8](e8) + */ + + asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp)); + + /* a = *row */ + asm volatile ("vle8.v v0, (%0)" : : "r" (row)); + row += bpp; + + while (row < rp_end) { + + /* x = *row */ + asm volatile ("vle8.v v8, (%0)" : : "r" (row)); + /* a = a + x */ + asm volatile ("vadd.vv v0, v0, v8"); + + /* *row = a */ + asm volatile ("vse8.v v0, (%0)" : : "r" (row)); + row += bpp; + } +} + +void +png_read_filter_row_sub3_rvv(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_sub_rvv(len, 3, row); + + PNG_UNUSED(prev_row) +} + +void +png_read_filter_row_sub4_rvv(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_sub_rvv(len, 4, row); + + PNG_UNUSED(prev_row) +} + +static inline void +png_read_filter_row_avg_rvv(size_t len, size_t bpp, unsigned char* row, + const unsigned char* prev_row) +{ + png_bytep rp_end = row + len; + + /* + * row: | a | x | + * prev_row: | | b | + * + * a .. [v2](e8) + * b .. [v4](e8) + * x .. [v8](e8) + * tmp .. [v12-v13](e16) + */ + + /* first pixel */ + + asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp)); + + /* b = *prev_row */ + asm volatile ("vle8.v v4, (%0)" : : "r" (prev_row)); + prev_row += bpp; + + /* x = *row */ + asm volatile ("vle8.v v8, (%0)" : : "r" (row)); + + /* b = b / 2 */ + asm volatile ("vsrl.vi v4, v4, 1"); + /* a = x + b */ + asm volatile ("vadd.vv v2, v4, v8"); + + /* *row = a */ + asm volatile ("vse8.v v2, (%0)" : : "r" (row)); + row += bpp; + + /* remaining pixels */ + + while (row < rp_end) { + + /* b = *prev_row */ + asm volatile ("vle8.v v4, (%0)" : : "r" (prev_row)); + prev_row += bpp; + + /* x = *row */ + asm volatile ("vle8.v v8, (%0)" : : "r" (row)); + + /* tmp = a + b */ + asm volatile ("vwaddu.vv v12, v2, v4"); /* add with widening */ + /* a = tmp/2 */ + asm volatile ("vnsrl.wi v2, v12, 1"); /* divide/shift with narrowing */ + /* a += x */ + asm volatile ("vadd.vv v2, v2, v8"); + + /* *row = a */ + asm volatile ("vse8.v v2, (%0)" : : "r" (row)); + row += bpp; + } +} + +void +png_read_filter_row_avg3_rvv(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_avg_rvv(len, 3, row, prev_row); + + PNG_UNUSED(prev_row) +} + +void +png_read_filter_row_avg4_rvv(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_avg_rvv(len, 4, row, prev_row); + + PNG_UNUSED(prev_row) +} + +#define MIN_CHUNK_LEN 256 +#define MAX_CHUNK_LEN 2048 + +static inline vuint8m1_t +prefix_sum(vuint8m1_t chunk, unsigned char* carry, size_t vl, + size_t max_chunk_len) +{ + size_t r; + + for (r = 1; r < MIN_CHUNK_LEN; r <<= 1) { + vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl); + chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl); + } + + for (r = MIN_CHUNK_LEN; r < MAX_CHUNK_LEN && r < max_chunk_len; r <<= 1) { + vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl); + chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl); + } + + chunk = __riscv_vadd_vx_u8m1(chunk, *carry, vl); + *carry = __riscv_vmv_x_s_u8m1_u8(__riscv_vslidedown_vx_u8m1(chunk, vl - 1, vl)); + + return chunk; +} + +static inline vint16m1_t +abs_diff(vuint16m1_t a, vuint16m1_t b, size_t vl) +{ + vint16m1_t diff = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vsub_vv_u16m1(a, b, vl)); + vint16m1_t neg = __riscv_vneg_v_i16m1(diff, vl); + + return __riscv_vmax_vv_i16m1(diff, neg, vl); +} + +static inline vint16m1_t +abs_sum(vint16m1_t a, vint16m1_t b, size_t vl) +{ + vint16m1_t sum = __riscv_vadd_vv_i16m1(a, b, vl); + vint16m1_t neg = __riscv_vneg_v_i16m1(sum, vl); + + return __riscv_vmax_vv_i16m1(sum, neg, vl); +} + +static inline void +png_read_filter_row_paeth_rvv(size_t len, size_t bpp, unsigned char* row, + const unsigned char* prev) +{ + png_bytep rp_end = row + len; + + /* + * row: | a | x | + * prev: | c | b | + * + * mask .. [v0] + * a .. [v2](e8) + * b .. [v4](e8) + * c .. [v6](e8) + * x .. [v8](e8) + * p .. [v12-v13](e16) + * pa .. [v16-v17](e16) + * pb .. [v20-v21](e16) + * pc .. [v24-v25](e16) + * tmpmask ..[v31] + */ + + /* first pixel */ + + asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp)); + + /* a = *row + *prev_row */ + asm volatile ("vle8.v v2, (%0)" : : "r" (row)); + asm volatile ("vle8.v v6, (%0)" : : "r" (prev)); + prev += bpp; + asm volatile ("vadd.vv v2, v2, v6"); + + /* *row = a */ + asm volatile ("vse8.v v2, (%0)" : : "r" (row)); + row += bpp; + + /* remaining pixels */ + while (row < rp_end) { + + /* b = *prev_row */ + asm volatile ("vle8.v v4, (%0)" : : "r" (prev)); + prev += bpp; + + /* x = *row */ + asm volatile ("vle8.v v8, (%0)" : : "r" (row)); + + /* sub (widening to 16bit) */ + /* p = b - c */ + asm volatile ("vwsubu.vv v12, v4, v6"); + /* pc = a - c */ + asm volatile ("vwsubu.vv v24, v2, v6"); + + /* switch to widened */ + asm volatile ("vsetvli zero, %0, e16, m2" : : "r" (bpp)); + + /* pa = abs(p) -> pa = p < 0 ? -p : p */ + asm volatile ("vmv.v.v v16, v12"); /* pa = p */ + asm volatile ("vmslt.vx v0, v16, zero"); /* set mask[i] if pa[i] < 0 */ + asm volatile ("vrsub.vx v16, v16, zero, v0.t"); /* invert negative values in pa; vd[i] = 0 - vs2[i] (if mask[i]) + * could be replaced by vneg in rvv >= 1.0 + */ + + /* pb = abs(p) -> pb = pc < 0 ? -pc : pc */ + asm volatile ("vmv.v.v v20, v24"); /* pb = pc */ + asm volatile ("vmslt.vx v0, v20, zero"); /* set mask[i] if pc[i] < 0 */ + asm volatile ("vrsub.vx v20, v20, zero, v0.t"); /* invert negative values in pb; vd[i] = 0 - vs2[i] (if mask[i]) + * could be replaced by vneg in rvv >= 1.0 + */ + + /* pc = abs(p + pc) -> pc = (p + pc) < 0 ? -(p + pc) : p + pc */ + asm volatile ("vadd.vv v24, v24, v12"); /* pc = p + pc */ + asm volatile ("vmslt.vx v0, v24, zero"); /* set mask[i] if pc[i] < 0 */ + asm volatile ("vrsub.vx v24, v24, zero, v0.t"); /* invert negative values in pc; vd[i] = 0 - vs2[i] (if mask[i]) + * could be replaced by vneg in rvv >= 1.0 + */ + + /* + * if (pb < pa) { + * pa = pb; + * a = b; (see (*1)) + * } + */ + asm volatile ("vmslt.vv v0, v20, v16"); /* set mask[i] if pb[i] < pa[i] */ + asm volatile ("vmerge.vvm v16, v16, v20, v0"); /* pa[i] = pb[i] (if mask[i]) */ + + /* + * if (pc < pa) + * a = c; (see (*2)) + */ + asm volatile ("vmslt.vv v31, v24, v16"); /* set tmpmask[i] if pc[i] < pa[i] */ + + /* switch to narrow */ + asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp)); + + /* (*1) */ + asm volatile ("vmerge.vvm v2, v2, v4, v0"); /* a = b (if mask[i]) */ + + /* (*2) */ + asm volatile ("vmand.mm v0, v31, v31"); /* mask = tmpmask + * vmand works for rvv 0.7 up to 1.0 + * could be replaced by vmcpy in 0.7.1/0.8.1 + * or vmmv.m in 1.0 + */ + asm volatile ("vmerge.vvm v2, v2, v6, v0"); /* a = c (if mask[i]) */ + + /* a += x */ + asm volatile ("vadd.vv v2, v2, v8"); + + /* *row = a */ + asm volatile ("vse8.v v2, (%0)" : : "r" (row)); + row += bpp; + + /* prepare next iteration (prev is already in a) */ + /* c = b */ + asm volatile ("vmv.v.v v6, v4"); + } +} + +void +png_read_filter_row_paeth3_rvv(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_paeth_rvv(len, 3, row, prev_row); + + PNG_UNUSED(prev_row) +} + +void +png_read_filter_row_paeth4_rvv(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + size_t len = row_info->rowbytes; + + png_read_filter_row_paeth_rvv(len, 4, row, prev_row); + + PNG_UNUSED(prev_row) +} + +#endif /* PNG_RISCV_RVV_IMPLEMENTATION */ +#endif /* READ */ diff --git a/riscv/filter_vector_intrinsics.c b/riscv/filter_vector_intrinsics.c deleted file mode 100644 index 09a714b6b..000000000 --- a/riscv/filter_vector_intrinsics.c +++ /dev/null @@ -1,280 +0,0 @@ -/* filter_neon_intrinsics.c - RISC-V Vector optimized filter functions - * - * Copyright (c) 2023 Google LLC - * Written by Dragoș Tiselice , May 2023. - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -#include "../pngpriv.h" - -#ifdef PNG_READ_SUPPORTED - -#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ - -#include - -void -png_read_filter_row_up_vector(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - - for (size_t vl; len > 0; len -= vl, row += vl, prev_row += vl) { - vl = __riscv_vsetvl_e8m8(len); - - vuint8m8_t prev_vals = __riscv_vle8_v_u8m8(prev_row, vl); - vuint8m8_t row_vals = __riscv_vle8_v_u8m8(row, vl); - - row_vals = __riscv_vadd_vv_u8m8(row_vals, prev_vals, vl); - - __riscv_vse8_v_u8m8(row, row_vals, vl); - } -} - -static inline void -png_read_filter_row_sub_vector_128(size_t len, size_t vl, unsigned char* row) -{ - vuint8m1_t sum; - vuint8m1_t chunk = __riscv_vmv_v_x_u8m1(0, vl); - - for (; len > 0; len -= vl, row += vl) { - __riscv_vsetvl_e8m1(vl); - - sum = chunk; - chunk = __riscv_vle8_v_u8m1(row, vl); - - chunk = __riscv_vadd_vv_u8m1(chunk, sum, vl); - - __riscv_vse8_v_u8m1(row, chunk, vl); - } -} - -void -png_read_filter_row_sub3_vector_128(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - - png_read_filter_row_sub_vector_128(len, 3, row); - - PNG_UNUSED(prev_row) -} - -void -png_read_filter_row_sub4_vector_128(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - - png_read_filter_row_sub_vector_128(len, 4, row); - - PNG_UNUSED(prev_row) -} - -static inline void -png_read_filter_row_avg_vector(size_t len, size_t vl, unsigned char* row, - const unsigned char* prev_row) -{ - vuint8m1_t avg; - vuint8m1_t chunk = __riscv_vmv_v_x_u8m1(0, vl); - - for (; len > 0; len -= vl, row += vl) { - __riscv_vsetvl_e8m1(vl); - - vuint8m1_t prev_chunk = __riscv_vle8_v_u8m1(prev_row, vl); - avg = chunk; - chunk = __riscv_vle8_v_u8m1(row, vl); - - vuint8m1_t sum = __riscv_vadd_vv_u8m1(chunk, prev_chunk, vl); - vuint8m1_t avg = __riscv_vsrl_vx_u8m1(sum, 1, vl); - - chunk = __riscv_vadd_vv_u8m1(chunk, avg, vl); - - __riscv_vse8_v_u8m1(row, chunk, vl); - } -} - -void -png_read_filter_row_avg3_vector(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - - png_read_filter_row_avg_vector(len, 3, row, prev_row); - - PNG_UNUSED(prev_row) -} - -void -png_read_filter_row_avg4_vector(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - - png_read_filter_row_avg_vector(len, 4, row, prev_row); - - PNG_UNUSED(prev_row) -} - -#define MIN_CHUNK_LEN 256 -#define MAX_CHUNK_LEN 2048 - -static inline vuint8m1_t -prefix_sum(vuint8m1_t chunk, unsigned char* carry, size_t vl, - size_t max_chunk_len) -{ - size_t r; - - for (r = 1; r < MIN_CHUNK_LEN; r <<= 1) { - vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl); - chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl); - } - - for (r = MIN_CHUNK_LEN; r < MAX_CHUNK_LEN && r < max_chunk_len; r <<= 1) { - vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl); - chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl); - } - - chunk = __riscv_vadd_vx_u8m1(chunk, *carry, vl); - *carry = __riscv_vmv_x_s_u8m1_u8(__riscv_vslidedown_vx_u8m1(chunk, vl - 1, vl)); - - return chunk; -} - -void -png_read_filter_row_sub3_vector_256(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - const size_t max_chunk_len = __riscv_vsetvlmax_e8m1(); - - vuint8m1_t r; - vuint8m1_t g; - vuint8m1_t b; - - unsigned char r_carry = 0; - unsigned char g_carry = 0; - unsigned char b_carry = 0; - - for (size_t vl; len > 0; len -= vl * 3, row += vl * 3) { - vl = __riscv_vsetvl_e8m1(len / 3); - - __riscv_vlseg3e8_v_u8m1(&r, &g, &b, row, vl); - - r = prefix_sum(r, &r_carry, vl, max_chunk_len); - g = prefix_sum(g, &g_carry, vl, max_chunk_len); - b = prefix_sum(b, &b_carry, vl, max_chunk_len); - - __riscv_vsseg3e8_v_u8m1(row, r, g, b, vl); - } -} - -void -png_read_filter_row_sub4_vector_256(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - const size_t max_chunk_len = __riscv_vsetvlmax_e8m1(); - - vuint8m1_t r; - vuint8m1_t g; - vuint8m1_t b; - vuint8m1_t a; - - unsigned char r_carry = 0; - unsigned char g_carry = 0; - unsigned char b_carry = 0; - unsigned char a_carry = 0; - - for (size_t vl; len > 0; len -= vl * 4, row += vl * 4) { - vl = __riscv_vsetvl_e8m1(len / 4); - - __riscv_vlseg4e8_v_u8m1(&r, &g, &b, &a, row, vl); - - r = prefix_sum(r, &r_carry, vl, max_chunk_len); - g = prefix_sum(g, &g_carry, vl, max_chunk_len); - b = prefix_sum(b, &b_carry, vl, max_chunk_len); - a = prefix_sum(a, &a_carry, vl, max_chunk_len); - - __riscv_vsseg4e8_v_u8m1(row, r, g, b, a, vl); - } -} - -static inline vint16m1_t -abs_diff(vuint16m1_t a, vuint16m1_t b, size_t vl) -{ - vint16m1_t diff = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vsub_vv_u16m1(a, b, vl)); - vint16m1_t neg = __riscv_vneg_v_i16m1(diff, vl); - - return __riscv_vmax_vv_i16m1(diff, neg, vl); -} - -static inline vint16m1_t -abs_sum(vint16m1_t a, vint16m1_t b, size_t vl) -{ - vint16m1_t sum = __riscv_vadd_vv_i16m1(a, b, vl); - vint16m1_t neg = __riscv_vneg_v_i16m1(sum, vl); - - return __riscv_vmax_vv_i16m1(sum, neg, vl); -} - -static inline void -png_read_filter_row_paeth_vector(size_t len, size_t vl, unsigned char* row, - const unsigned char* prev) -{ - vuint16m1_t a; - vuint16m1_t b = __riscv_vmv_v_x_u16m1(0, vl); - vuint16m1_t c; - vuint16m1_t d = __riscv_vmv_v_x_u16m1(0, vl); - - for (; len > 0; len -= vl, row += vl, prev += vl) { - __riscv_vsetvl_e16m1(vl); - - c = b; - b = __riscv_vzext_vf2_u16m1(__riscv_vle8_v_u8mf2(prev, vl), vl); - a = d; - d = __riscv_vzext_vf2_u16m1(__riscv_vle8_v_u8mf2(row, vl), vl); - - vint16m1_t pa = abs_diff(b, c, vl); - vint16m1_t pb = abs_diff(a, c, vl); - vint16m1_t pc = abs_sum(pa, pb, vl); - - vint16m1_t smallest = __riscv_vmin_vv_i16m1(pa, __riscv_vmin_vv_i16m1(pb, pc, vl), vl); - - vuint16m1_t nearest = c; - nearest = __riscv_vmerge_vvm_u16m1(nearest, a, __riscv_vmseq_vv_i16m1_b16(smallest, pa, vl), vl); - nearest = __riscv_vmerge_vvm_u16m1(nearest, b, __riscv_vmseq_vv_i16m1_b16(smallest, pb, vl), vl); - - d = __riscv_vadd_vv_u16m1(d, nearest, vl); - - __riscv_vse8_v_u8mf2(row, __riscv_vnsrl_wx_u8mf2(d, 0, vl), vl); - } -} - -void -png_read_filter_row_paeth3_vector(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - - png_read_filter_row_paeth_vector(len, 3, row, prev_row); - - PNG_UNUSED(prev_row) -} - -void -png_read_filter_row_paeth4_vector(png_row_infop row_info, png_bytep row, - png_const_bytep prev_row) -{ - size_t len = row_info->rowbytes; - - png_read_filter_row_paeth_vector(len, 4, row, prev_row); - - PNG_UNUSED(prev_row) -} - -#endif /* PNG_RISCV_VECTOR_IMPLEMENTATION */ -#endif /* READ */ diff --git a/riscv/palette_vector_intrinsics.c b/riscv/palette_vector_intrinsics.c deleted file mode 100644 index 982d956cd..000000000 --- a/riscv/palette_vector_intrinsics.c +++ /dev/null @@ -1,104 +0,0 @@ -/* palette_neon_intrinsics.c - RISC-V Vector optimized palette expansion - * functions - * - * Copyright (c) 2023 Google LLC - * Written by Dragoș Tiselice , May 2023. - * - * This code is released under the libpng license. - * For conditions of distribution and use, see the disclaimer - * and license in png.h - */ - -#include "../pngpriv.h" - -#if PNG_ARM_NEON_IMPLEMENTATION == 1 - -#include - -void -png_riffle_palette_vector(png_structrp png_ptr) -{ - png_const_bytep palette = (png_const_bytep)png_ptr->palette; - png_bytep riffled_palette = png_ptr->riffled_palette; - png_const_bytep trans_alpha = png_ptr->trans_alpha; - - size_t len = 256; - - vuint8m1_t r; - vuint8m1_t g; - vuint8m1_t b; - - for (size_t vl; len > 0; len -= vl, palette += vl * 3, trans_alpha += vl, riffled_palette += vl * 4) { - vl = __riscv_vsetvl_e8m1(len); - - __riscv_vlseg3e8_v_u8m1(&r, &g, &b, palette, vl); - vuint8m1_t a = __riscv_vle8_v_u8m1(trans_alpha, vl); - - __riscv_vsseg4e8_v_u8m1(riffled_palette, r, g, b, a, vl); - } -} - -int -png_do_expand_palette_rgba8_vector(png_structrp png_ptr, png_row_infop row_info, - png_const_bytep row, png_bytepp ssp, png_bytepp ddp) -{ - size_t row_width = (size_t)row_info->width; - const png_uint_32* palette = (const png_uint_32*)png_ptr->riffled_palette; - - size_t vl = __riscv_vsetvl_e8m1(row_width); - png_bytep sp = *ssp - vl; - png_bytep dp = *ddp - vl * 4; - - for (; row_width > 0; row_width -= vl, dp -= vl * 4, sp -= vl) { - vl = __riscv_vsetvl_e8m1(row_width); - - vuint8m1_t indices = __riscv_vle8_v_u8m1(sp, vl); - - vuint32m4_t pixels = __riscv_vluxei8_v_u32m4(palette, indices, vl); - - __riscv_vse32_v_u32m4((unsigned int *)dp, pixels, vl); - } - - row_width = (size_t)row_info->width; - - *ssp = *ssp - row_width; - *ddp = *ddp - row_width * 4; - - return row_width; -} - -int -png_do_expand_palette_rgb8_vector(png_structrp png_ptr, png_row_infop row_info, - png_const_bytep row, png_bytepp ssp, png_bytepp ddp) -{ - size_t row_width = (size_t)row_info->width; - const png_const_bytep palette = (png_const_bytep)png_ptr->palette; - - size_t vl = __riscv_vsetvl_e8m1(row_width); - png_bytep sp = *ssp - vl; - png_bytep dp = *ddp - vl * 3; - - vuint8m1_t r; - vuint8m1_t g; - vuint8m1_t b; - - - for (; row_width > 0; row_width -= vl, dp -= vl * 3, sp -= vl) { - vl = __riscv_vsetvl_e8m1(row_width); - - vuint16m2_t indices = __riscv_vwmulu_vx_u16m2(__riscv_vle8_v_u8m1(sp, vl), 3, vl); - - __riscv_vluxseg3ei16_v_u8m1(&r, &g, &b, palette, indices, vl); - - __riscv_vsseg3e8_v_u8m1(dp, r, g, b, vl); - } - - row_width = (size_t)row_info->width; - - *ssp = *ssp - row_width; - *ddp = *ddp - row_width * 3; - - return row_width; -} - -#endif /* PNG_ARM_NEON_IMPLEMENTATION */ diff --git a/riscv/riscv_init.c b/riscv/riscv_init.c index ed4f81e22..869211a8d 100644 --- a/riscv/riscv_init.c +++ b/riscv/riscv_init.c @@ -2,7 +2,7 @@ * * Copyright (c) 2023 Google LLC * Written by Dragoș Tiselice , May 2023. - * + * Filip Wasil , March 2025. * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer * and license in png.h @@ -12,40 +12,47 @@ #ifdef PNG_READ_SUPPORTED -#if PNG_RISCV_VECTOR_OPT > 0 -#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED /* Do run-time checks */ +#if PNG_RISCV_RVV_OPT > 0 + +#include + +#ifdef PNG_RISCV_RVV_CHECK_SUPPORTED /* Do run-time checks */ /* WARNING: it is strongly recommended that you do not build libpng with * run-time checks for CPU features if at all possible. In the case of the * RISC-V Vector instructions there is no processor-specific way of detecting * the presence of the required support, therefore run-time detection is * extremely OS specific. * - * You may set the macro PNG_RISCV_VECTOR_FILE to the file name of file containing + * You may set the macro PNG_RISCV_RVV_FILE to the file name of file containing * a fragment of C source code which defines the png_have_neon function. There * are a number of implementations in contrib/riscv-vector, but the only one that * has partial support is contrib/riscv-vector/linux.c - a generic Linux * implementation which reads /proc/cpuinfo. */ -#ifndef PNG_RISCV_VECTOR_FILE +#ifndef PNG_RISCV_RVV_FILE # if defined(__linux__) -# define PNG_RISCV_VECTOR_FILE "contrib/riscv-vector/linux.c" +# define PNG_RISCV_RVV_FILE "contrib/riscv-vector/linux.c" # else # error "No support for run-time RISC-V Vector checking; use compile-time options" # endif #endif -static int png_have_vector(png_structp png_ptr); -#ifdef PNG_RISCV_VECTOR_FILE -# include PNG_RISCV_VECTOR_FILE +static int png_have_rvv(png_structp png_ptr); +#ifdef PNG_RISCV_RVV_FILE +# include PNG_RISCV_RVV_FILE +#endif +#endif /* PNG_RISCV_RVV_CHECK_SUPPORTED */ + +#ifndef PNG_ALIGNED_MEMORY_SUPPORTED +# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" #endif -#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */ void -png_init_filter_functions_vector(png_structp pp, unsigned int bpp) +png_init_filter_functions_rvv(png_structp pp, unsigned int bpp) { - /* The switch statement is compiled in for RISCV_VECTOR_API, the call to - * png_have_vector is compiled in for RISCV_VECTOR_CHECK. If both are + /* The switch statement is compiled in for RISCV_rvv_API, the call to + * png_have_rvv is compiled in for RISCV_rvv_CHECK. If both are * defined the check is only performed if the API has not set the VECTOR * option on or off explicitly. In this case the check controls what * happens. @@ -55,9 +62,9 @@ png_init_filter_functions_vector(png_structp pp, unsigned int bpp) * wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF, * as documented in png.h */ - png_debug(1, "in png_init_filter_functions_vector"); -#ifdef PNG_RISCV_VECTOR_API_SUPPORTED - switch ((pp->options >> PNG_RISCV_VECTOR) & 3) + png_debug(1, "in png_init_filter_functions_rvv"); +#ifdef PNG_RISCV_RVV_API_SUPPORTED + switch ((pp->options >> PNG_RISCV_RVV) & 3) { case PNG_OPTION_UNSET: /* Allow the run-time check to execute if it has been enabled - @@ -65,23 +72,23 @@ png_init_filter_functions_vector(png_structp pp, unsigned int bpp) * this case will fall through to the 'default' below, which just * returns. */ -#endif /* PNG_RISCV_VECTOR_API_SUPPORTED */ -#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED +#endif /* PNG_RISCV_RVV_API_SUPPORTED */ +#ifdef PNG_RISCV_RVV_CHECK_SUPPORTED { - static volatile sig_atomic_t no_vector = -1; /* not checked */ + static volatile sig_atomic_t no_rvv = -1; /* not checked */ - if (no_vector < 0) - no_vector = !png_have_vector(pp); + if (no_rvv < 0) + no_rvv = !png_have_rvv(pp); - if (no_vector) + if (no_rvv) return; } -#ifdef PNG_RISCV_VECTOR_API_SUPPORTED +#ifdef PNG_RISCV_RVV_API_SUPPORTED break; #endif -#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */ +#endif /* PNG_RISCV_RVV_CHECK_SUPPORTED */ -#ifdef PNG_RISCV_VECTOR_API_SUPPORTED +#ifdef PNG_RISCV_RVV_API_SUPPORTED default: /* OFF or INVALID */ return; @@ -102,29 +109,21 @@ png_init_filter_functions_vector(png_structp pp, unsigned int bpp) * function you add. (Notice that this happens automatically for the * initialization function.) */ - pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vector; + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_rvv; if (bpp == 3) { - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vector; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vector; - if (__riscv_vsetvlmax_e8m1() > 16) { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector_128; - } else { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector_256; - } + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_rvv; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_rvv; + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_rvv; } else if (bpp == 4) { - pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vector; - pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vector; - if (__riscv_vsetvlmax_e8m1() > 16) { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector_128; - } else { - pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector_256; - } + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_rvv; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_rvv; + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_rvv; } } -#endif /* PNG_RISCV_VECTOR_OPT > 0 */ +#endif /* PNG_RISCV_RVV_OPT > 0 */ #endif /* PNG_READ_SUPPORTED */ diff --git a/scripts/makefile.c89 b/scripts/makefile.c89 index 1f2fc1d61..2000d5726 100644 --- a/scripts/makefile.c89 +++ b/scripts/makefile.c89 @@ -22,7 +22,8 @@ RM_F = rm -f # Compiler and linker flags NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ - -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 + -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \ + -DPNG_RISCV_RVV_OPT=0 STDC = -pedantic-errors -std=c89 WARN = -Wall -Wextra -Wundef WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ diff --git a/scripts/makefile.clang b/scripts/makefile.clang index 52eaa1bad..6f48e99a3 100644 --- a/scripts/makefile.clang +++ b/scripts/makefile.clang @@ -21,7 +21,8 @@ RM_F = rm -f # Compiler and linker flags NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ - -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 + -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \ + -DPNG_RISCV_RVV_OPT=0 STDC = -pedantic-errors # -std=c99 WARN = -Wall -Wextra -Wundef WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ diff --git a/scripts/makefile.darwin b/scripts/makefile.darwin index 3e42c5c8e..ad4f25731 100644 --- a/scripts/makefile.darwin +++ b/scripts/makefile.darwin @@ -28,7 +28,8 @@ RM_F=rm -f # Compiler and linker flags NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ - -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 + -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \ + -DPNG_RISCV_RVV_OPT=0 STDC = -pedantic-errors WARN = -Wall -Wextra -Wundef WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ diff --git a/scripts/makefile.gcc b/scripts/makefile.gcc index 7a11744dc..aae145849 100644 --- a/scripts/makefile.gcc +++ b/scripts/makefile.gcc @@ -21,7 +21,8 @@ RM_F = rm -f # Compiler and linker flags NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ - -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 + -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \ + -DPNG_RISCV_RVV_OPT=0 STDC = -pedantic-errors # -std=c99 WARN = -Wall -Wextra -Wundef WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ diff --git a/scripts/makefile.linux b/scripts/makefile.linux index 09bbe2481..a26d92938 100644 --- a/scripts/makefile.linux +++ b/scripts/makefile.linux @@ -26,7 +26,8 @@ RM_F=rm -f # Compiler and linker flags NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ - -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 + -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \ + -DPNG_RISCV_RVV_OPT=0 STDC = -pedantic-errors WARN = -Wall -Wextra -Wundef WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ diff --git a/scripts/makefile.msys b/scripts/makefile.msys index 3951c7467..1386af578 100644 --- a/scripts/makefile.msys +++ b/scripts/makefile.msys @@ -32,7 +32,8 @@ LN_SF = ln -sf # Compiler and linker flags NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ - -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 + -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \ + -DPNG_RISCV_RVV_OPT=0 STDC = -pedantic-errors WARN = -Wall -Wextra -Wundef WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ diff --git a/scripts/makefile.std b/scripts/makefile.std index 5c793eaf2..5a01af4e0 100644 --- a/scripts/makefile.std +++ b/scripts/makefile.std @@ -23,7 +23,8 @@ RM_F = rm -f AWK = awk NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ - -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 + -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \ + -DPNG_RISCV_RVV_OPT=0 DFNFLAGS = # DFNFLAGS contains -D options to use in the libpng build DFA_EXTRA = # extra files that can be used to control configuration CPPFLAGS = -I$(ZLIBINC) $(NOHWOPT) # -DPNG_DEBUG=5