Fix and improve the RISC-V Vector (RVV) implementation

Changes include manually merged code from Manfred SCHLAEGL.

Co-authored-by: Manfred SCHLAEGL <manfred.schlaegl@gmx.at>
Signed-off-by: Cosmin Truta <ctruta@gmail.com>
This commit is contained in:
Filip Wasil 2025-03-11 13:07:00 +01:00 committed by Cosmin Truta
parent cc5ee6b213
commit ffb8e8b26f
26 changed files with 575 additions and 615 deletions

View File

@ -20,6 +20,7 @@ Authors, for copyright and licensing purposes.
* Lucas Chollet * Lucas Chollet
* Magnus Holmgren * Magnus Holmgren
* Mandar Sahastrabuddhe * Mandar Sahastrabuddhe
* Manfred Schlaegl
* Mans Rullgard * Mans Rullgard
* Matt Sarett * Matt Sarett
* Mike Klein * Mike Klein
@ -52,6 +53,8 @@ Authors, for copyright and licensing purposes.
- GuXiWei (顾希伟) - GuXiWei (顾希伟)
- JinBo (金波) - JinBo (金波)
- ZhangLixia (张利霞) - ZhangLixia (张利霞)
* Samsung Group
- Filip Wasil
The build projects, the build scripts, the test scripts, and other The build projects, the build scripts, the test scripts, and other
files in the "projects", "scripts" and "tests" directories, have files in the "projects", "scripts" and "tests" directories, have

View File

@ -313,27 +313,31 @@ if(PNG_HARDWARE_OPTIMIZATIONS)
endif() endif()
# Set definitions and sources for RISC-V. # Set definitions and sources for RISC-V.
if(TARGET_ARCH MATCHES "riscv*") if(PNG_TARGET_ARCHITECTURE MATCHES "^(riscv)")
set(PNG_RISCV_VECTOR_POSSIBLE_VALUES check on off) include(CheckCCompilerFlag)
set(PNG_RISCV_VECTOR "off" set(PNG_RISCV_RVV_POSSIBLE_VALUES check on off)
set(PNG_RISCV_RVV "off"
CACHE STRING "Enable RISC-V Vector optimizations: check|on|off; off is default") CACHE STRING "Enable RISC-V Vector optimizations: check|on|off; off is default")
set_property(CACHE PNG_RISCV_VECTOR set_property(CACHE PNG_RISCV_RVV
PROPERTY STRINGS ${PNG_RISCV_VECTOR_POSSIBLE_VALUES}) PROPERTY STRINGS ${PNG_RISCV_RVV_POSSIBLE_VALUES})
list(FIND PNG_RISCV_VECTOR_POSSIBLE_VALUES ${PNG_RISCV_VECTOR} index) list(FIND PNG_RISCV_RVV_POSSIBLE_VALUES ${PNG_RISCV_RVV} index)
if(index EQUAL -1) if(index EQUAL -1)
message(FATAL_ERROR "PNG_RISCV_VECTOR must be one of [${PNG_RISCV_VECTOR_POSSIBLE_VALUES}]") message(FATAL_ERROR "PNG_RISCV_RVV must be one of [${PNG_RISCV_RVV_POSSIBLE_VALUES}]")
elseif(NOT ${PNG_RISCV_VECTOR} STREQUAL "off") elseif(NOT ${PNG_RISCV_RVV} STREQUAL "off")
check_c_compiler_flag("-march=rv64gv1p0" COMPILER_SUPPORTS_RVV)
if(NOT COMPILER_SUPPORTS_RVV)
message(FATAL_ERROR "Compiler does not support -march=rv64gv1p0 option")
endif()
set(libpng_riscv_sources set(libpng_riscv_sources
riscv/filter_vector_intrinsics.c riscv/filter_rvv_intrinsics.c
riscv/palette_vector_intrinsics.c
riscv/riscv_init.c) riscv/riscv_init.c)
if(${PNG_RISCV_VECTOR} STREQUAL "on") if(${PNG_RISCV_RVV} STREQUAL "on")
add_definitions(-DPNG_RISCV_VECTOR_OPT=2) add_definitions(-DPNG_RISCV_RVV_OPT=2)
elseif(${PNG_RISCV_VECTOR} STREQUAL "check") elseif(${PNG_RISCV_RVV} STREQUAL "check")
add_definitions(-DPNG_RISCV_VECTOR_CHECK_SUPPORTED) add_definitions(-DPNG_RISCV_RVV_CHECK_SUPPORTED)
endif() endif()
else() else()
add_definitions(-DPNG_RISCV_VECTOR_OPT=0) add_definitions(-DPNG_RISCV_RVV_OPT=0)
endif() endif()
endif() endif()
@ -365,8 +369,8 @@ else(PNG_HARDWARE_OPTIMIZATIONS)
endif() endif()
# Set definitions and sources for RISC-V. # Set definitions and sources for RISC-V.
if(TARGET_ARCH MATCHES "^riscv") if(PNG_TARGET_ARCHITECTURE MATCHES "^(riscv)")
add_definitions(-DPNG_RISCV_VECTOR_OPT=0) add_definitions(-DPNG_RISCV_RVV_OPT=0)
endif() endif()
endif(PNG_HARDWARE_OPTIMIZATIONS) endif(PNG_HARDWARE_OPTIMIZATIONS)

View File

@ -289,7 +289,7 @@ such as one of
--enable-mips-msa=yes --enable-mips-msa=yes
--enable-intel-sse=yes --enable-intel-sse=yes
--enable-powerpc-vsx=yes --enable-powerpc-vsx=yes
--enable-riscv-vector=yes --enable-riscv-rvv=yes
or enable them all at once with or enable them all at once with
@ -302,7 +302,7 @@ or more of
CPPFLAGS += "-DPNG_MIPS_MSA" CPPFLAGS += "-DPNG_MIPS_MSA"
CPPFLAGS += "-DPNG_INTEL_SSE" CPPFLAGS += "-DPNG_INTEL_SSE"
CPPFLAGS += "-DPNG_POWERPC_VSX" CPPFLAGS += "-DPNG_POWERPC_VSX"
CPPFLAGS += "-DPNG_RISCV_VECTOR" CPPFLAGS += "-DPNG_RISCV_RVV"
See for example scripts/makefile.linux-opt See for example scripts/makefile.linux-opt
@ -320,14 +320,14 @@ or via compiler-command options such as
CPPFLAGS += "-DPNG_ARM_NEON_OPT=0, -DPNG_MIPS_MSA_OPT=0, CPPFLAGS += "-DPNG_ARM_NEON_OPT=0, -DPNG_MIPS_MSA_OPT=0,
-DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0, -DPNG_INTEL_SSE_OPT=0, -DPNG_POWERPC_VSX_OPT=0,
-DPNG_RISCV_VECTOR_OPT=0" -DPNG_RISCV_RVV_OPT=0"
If you are using cmake, hardware optimizations are "on" If you are using cmake, hardware optimizations are "on"
by default. To disable them, use by default. To disable them, use
cmake . -DPNG_ARM_NEON=no -DPNG_INTEL_SSE=no \ cmake . -DPNG_ARM_NEON=no -DPNG_INTEL_SSE=no \
-DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no \ -DPNG_MIPS_MSA=no -DPNG_POWERPC_VSX=no \
-DPNG_RISCV_VECTOR=no -DPNG_RISCV_RVV=no
or disable them all at once with or disable them all at once with

View File

@ -134,9 +134,9 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += powerpc/powerpc_init.c\
powerpc/filter_vsx_intrinsics.c powerpc/filter_vsx_intrinsics.c
endif endif
if PNG_RISCV_VECTOR if PNG_RISCV_RVV
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += riscv/riscv_init.c\ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += riscv/riscv_init.c\
riscv/filter_vector_intrinsics.c riscv/palette_vector_intrinsics.c riscv/filter_rvv_intrinsics.c
endif endif
if PNG_LOONGARCH_LSX if PNG_LOONGARCH_LSX

View File

@ -133,8 +133,8 @@ host_triplet = @host@
@PNG_LOONGARCH_LSX_TRUE@am__append_11 = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la @PNG_LOONGARCH_LSX_TRUE@am__append_11 = libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@lsx.la
@DO_PNG_PREFIX_TRUE@am__append_12 = -DPNG_PREFIX='@PNG_PREFIX@' @DO_PNG_PREFIX_TRUE@am__append_12 = -DPNG_PREFIX='@PNG_PREFIX@'
@PNG_RISCV_VECTOR_TRUE@am__append_13 = riscv/riscv_init.c\ @PNG_RISCV_RVV_TRUE@am__append_13 = riscv/riscv_init.c\
@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.c riscv/palette_vector_intrinsics.c @PNG_RISCV_RVV_TRUE@ riscv/filter_rvv_intrinsics.c
subdir = . subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@ -198,8 +198,7 @@ am__libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES_DIST = png.c \
mips/filter_mmi_inline_assembly.c intel/intel_init.c \ mips/filter_mmi_inline_assembly.c intel/intel_init.c \
intel/filter_sse2_intrinsics.c powerpc/powerpc_init.c \ intel/filter_sse2_intrinsics.c powerpc/powerpc_init.c \
powerpc/filter_vsx_intrinsics.c riscv/riscv_init.c \ powerpc/filter_vsx_intrinsics.c riscv/riscv_init.c \
riscv/filter_vector_intrinsics.c \ riscv/filter_rvv_intrinsics.c
riscv/palette_vector_intrinsics.c
am__dirstamp = $(am__leading_dot)dirstamp am__dirstamp = $(am__leading_dot)dirstamp
@PNG_ARM_NEON_TRUE@am__objects_1 = arm/arm_init.lo \ @PNG_ARM_NEON_TRUE@am__objects_1 = arm/arm_init.lo \
@PNG_ARM_NEON_TRUE@ arm/filter_neon_intrinsics.lo \ @PNG_ARM_NEON_TRUE@ arm/filter_neon_intrinsics.lo \
@ -213,9 +212,8 @@ am__dirstamp = $(am__leading_dot)dirstamp
@PNG_INTEL_SSE_TRUE@ intel/filter_sse2_intrinsics.lo @PNG_INTEL_SSE_TRUE@ intel/filter_sse2_intrinsics.lo
@PNG_POWERPC_VSX_TRUE@am__objects_6 = powerpc/powerpc_init.lo \ @PNG_POWERPC_VSX_TRUE@am__objects_6 = powerpc/powerpc_init.lo \
@PNG_POWERPC_VSX_TRUE@ powerpc/filter_vsx_intrinsics.lo @PNG_POWERPC_VSX_TRUE@ powerpc/filter_vsx_intrinsics.lo
@PNG_RISCV_VECTOR_TRUE@am__objects_7 = riscv/riscv_init.lo \ @PNG_RISCV_RVV_TRUE@am__objects_7 = riscv/riscv_init.lo \
@PNG_RISCV_VECTOR_TRUE@ riscv/filter_vector_intrinsics.lo \ @PNG_RISCV_RVV_TRUE@ riscv/filter_rvv_intrinsics.lo
@PNG_RISCV_VECTOR_TRUE@ riscv/palette_vector_intrinsics.lo
am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = png.lo pngerror.lo \ am_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS = png.lo pngerror.lo \
pngget.lo pngmem.lo pngpread.lo pngread.lo pngrio.lo \ pngget.lo pngmem.lo pngpread.lo pngread.lo pngrio.lo \
pngrtran.lo pngrutil.lo pngset.lo pngtrans.lo pngwio.lo \ pngrtran.lo pngrutil.lo pngset.lo pngtrans.lo pngwio.lo \
@ -342,8 +340,7 @@ am__depfiles_remade = ./$(DEPDIR)/png.Plo ./$(DEPDIR)/pngerror.Plo \
mips/$(DEPDIR)/mips_init.Plo \ mips/$(DEPDIR)/mips_init.Plo \
powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo \ powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo \
powerpc/$(DEPDIR)/powerpc_init.Plo \ powerpc/$(DEPDIR)/powerpc_init.Plo \
riscv/$(DEPDIR)/filter_vector_intrinsics.Plo \ riscv/$(DEPDIR)/filter_rvv_intrinsics.Plo \
riscv/$(DEPDIR)/palette_vector_intrinsics.Plo \
riscv/$(DEPDIR)/riscv_init.Plo riscv/$(DEPDIR)/riscv_init.Plo
am__mv = mv -f am__mv = mv -f
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
@ -1091,9 +1088,7 @@ riscv/$(DEPDIR)/$(am__dirstamp):
@: > riscv/$(DEPDIR)/$(am__dirstamp) @: > riscv/$(DEPDIR)/$(am__dirstamp)
riscv/riscv_init.lo: riscv/$(am__dirstamp) \ riscv/riscv_init.lo: riscv/$(am__dirstamp) \
riscv/$(DEPDIR)/$(am__dirstamp) riscv/$(DEPDIR)/$(am__dirstamp)
riscv/filter_vector_intrinsics.lo: riscv/$(am__dirstamp) \ riscv/filter_rvv_intrinsics.lo: riscv/$(am__dirstamp) \
riscv/$(DEPDIR)/$(am__dirstamp)
riscv/palette_vector_intrinsics.lo: riscv/$(am__dirstamp) \
riscv/$(DEPDIR)/$(am__dirstamp) riscv/$(DEPDIR)/$(am__dirstamp)
libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@.la: $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) $(EXTRA_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@.la: $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_OBJECTS) $(libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES) $(EXTRA_libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_DEPENDENCIES)
@ -2228,8 +2223,7 @@ distclean: distclean-am
-rm -f mips/$(DEPDIR)/mips_init.Plo -rm -f mips/$(DEPDIR)/mips_init.Plo
-rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo -rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo
-rm -f powerpc/$(DEPDIR)/powerpc_init.Plo -rm -f powerpc/$(DEPDIR)/powerpc_init.Plo
-rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo -rm -f riscv/$(DEPDIR)/filter_rvv_intrinsics.Plo
-rm -f riscv/$(DEPDIR)/palette_vector_intrinsics.Plo
-rm -f riscv/$(DEPDIR)/riscv_init.Plo -rm -f riscv/$(DEPDIR)/riscv_init.Plo
-rm -f Makefile -rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \ distclean-am: clean-am distclean-compile distclean-generic \
@ -2318,8 +2312,7 @@ maintainer-clean: maintainer-clean-am
-rm -f mips/$(DEPDIR)/mips_init.Plo -rm -f mips/$(DEPDIR)/mips_init.Plo
-rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo -rm -f powerpc/$(DEPDIR)/filter_vsx_intrinsics.Plo
-rm -f powerpc/$(DEPDIR)/powerpc_init.Plo -rm -f powerpc/$(DEPDIR)/powerpc_init.Plo
-rm -f riscv/$(DEPDIR)/filter_vector_intrinsics.Plo -rm -f riscv/$(DEPDIR)/filter_rvv_intrinsics.Plo
-rm -f riscv/$(DEPDIR)/palette_vector_intrinsics.Plo
-rm -f riscv/$(DEPDIR)/riscv_init.Plo -rm -f riscv/$(DEPDIR)/riscv_init.Plo
-rm -f Makefile -rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic maintainer-clean-am: distclean-am maintainer-clean-generic

View File

@ -109,13 +109,13 @@
#undef PNG_POWERPC_VSX_OPT #undef PNG_POWERPC_VSX_OPT
/* Turn on RISC-V Vector optimizations at run-time */ /* Turn on RISC-V Vector optimizations at run-time */
#undef PNG_RISCV_VECTOR_API_SUPPORTED #undef PNG_RISCV_RVV_API_SUPPORTED
/* Check for RISC-V Vector support at run-time */ /* Check for RISC-V Vector support at run-time */
#undef PNG_RISCV_VECTOR_CHECK_SUPPORTED #undef PNG_RISCV_RVV_CHECK_SUPPORTED
/* Enable RISC-V Vector optimizations */ /* Enable RISC-V Vector optimizations */
#undef PNG_RISCV_VECTOR_OPT #undef PNG_RISCV_RVV_OPT
/* Define to 1 if all of the C90 standard headers exist (not just the ones /* Define to 1 if all of the C90 standard headers exist (not just the ones
required in a freestanding environment). This macro is provided for required in a freestanding environment). This macro is provided for

12
configure vendored
View File

@ -656,6 +656,8 @@ ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE am__EXEEXT_TRUE
LTLIBOBJS LTLIBOBJS
LIBOBJS LIBOBJS
PNG_RISCV_RVV_FALSE
PNG_RISCV_RVV_TRUE
PNG_LOONGARCH_LSX_FALSE PNG_LOONGARCH_LSX_FALSE
PNG_LOONGARCH_LSX_TRUE PNG_LOONGARCH_LSX_TRUE
PNG_POWERPC_VSX_FALSE PNG_POWERPC_VSX_FALSE
@ -858,6 +860,7 @@ enable_mips_mmi
enable_intel_sse enable_intel_sse
enable_powerpc_vsx enable_powerpc_vsx
enable_loongarch_lsx enable_loongarch_lsx
enable_riscv_rvv
' '
ac_precious_vars='build_alias ac_precious_vars='build_alias
host_alias host_alias
@ -15156,6 +15159,10 @@ printf "%s\n" "#define PNG_INTEL_SSE_OPT 0" >>confdefs.h
printf "%s\n" "#define PNG_LOONGARCH_LSX_OPT 0" >>confdefs.h printf "%s\n" "#define PNG_LOONGARCH_LSX_OPT 0" >>confdefs.h
enable_riscv_rvv=no
printf "%s\n" "#define PNG_RISCV_RVV_OPT 0" >>confdefs.h
;; ;;
*) *)
# allow enabling hardware optimization on any system: # allow enabling hardware optimization on any system:
@ -15193,6 +15200,11 @@ printf "%s\n" "#define PNG_POWERPC_VSX_OPT 2" >>confdefs.h
printf "%s\n" "#define PNG_LOONGARCH_LSX_OPT 1" >>confdefs.h printf "%s\n" "#define PNG_LOONGARCH_LSX_OPT 1" >>confdefs.h
;;
riscv*)
enable_riscv_rvv=yes
printf "%s\n" "#define PNG_RISCV_RVV_OPT 1" >>confdefs.h
;; ;;
esac esac
;; ;;

View File

@ -343,8 +343,8 @@ AC_ARG_ENABLE([hardware-optimizations],
enable_loongarch_lsx=no enable_loongarch_lsx=no
AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0], AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [0],
[Disable LOONGARCH_LSX optimizations]) [Disable LOONGARCH_LSX optimizations])
enable_riscv_vector=no enable_riscv_rvv=no
AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0], AC_DEFINE([PNG_RISCV_RVV_OPT], [0],
[Disable RISC-V Vector optimizations]) [Disable RISC-V Vector optimizations])
;; ;;
*) *)
@ -378,8 +378,8 @@ AC_ARG_ENABLE([hardware-optimizations],
AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1], AC_DEFINE([PNG_LOONGARCH_LSX_OPT], [1],
[Enable LOONGARCH_LSX optimizations]) [Enable LOONGARCH_LSX optimizations])
riscv*) riscv*)
enable_riscv_vector=yes enable_riscv_rvv=yes
AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2], AC_DEFINE([PNG_RISCV_RVV_OPT], [2],
[Enable RISC-V Vector optimizations]) [Enable RISC-V Vector optimizations])
;; ;;
esac esac
@ -674,7 +674,7 @@ AM_CONDITIONAL([PNG_LOONGARCH_LSX],
# RISC-V Vector support. # RISC-V Vector support.
AC_ARG_ENABLE([riscv-vector], AC_ARG_ENABLE([riscv-vector],
AS_HELP_STRING([[[--enable-riscv-vector]]], AS_HELP_STRING([[[--enable-riscv-rvv]]],
[Enable RISC-V Vector optimizations: =no/off, check, api, yes/on:] [Enable RISC-V Vector optimizations: =no/off, check, api, yes/on:]
[no/off: disable the optimizations; check: use internal checking code] [no/off: disable the optimizations; check: use internal checking code]
[api: disable by default, enable by a call to png_set_option] [api: disable by default, enable by a call to png_set_option]
@ -683,34 +683,34 @@ AS_HELP_STRING([[[--enable-riscv-vector]]],
[case "$enableval" in [case "$enableval" in
no|off) no|off)
# disable the default enabling on __ppc64__ systems: # disable the default enabling on __ppc64__ systems:
AC_DEFINE([PNG_RISCV_VECTOR_OPT], [0], AC_DEFINE([PNG_RISCV_RVV_OPT], [0],
[Disable RISC-V Vector optimizations]) [Disable RISC-V Vector optimizations])
# Prevent inclusion of the platform-specific files below: # Prevent inclusion of the platform-specific files below:
enable_riscv_vector=no ;; enable_riscv_rvv=no ;;
check) check)
AC_DEFINE([PNG_RISCV_VECTOR_CHECK_SUPPORTED], [], AC_DEFINE([PNG_RISCV_RVV_CHECK_SUPPORTED], [],
[Check for RISC-V Vector support at run-time]) [Check for RISC-V Vector support at run-time])
AC_MSG_WARN([--enable-riscv-vector Please check contrib/riscv-vector/README file] AC_MSG_WARN([--enable-riscv-rvv Please check contrib/riscv-vector/README file]
[for the list of supported OSes.]);; [for the list of supported OSes.]);;
api) api)
AC_DEFINE([PNG_RISCV_VECTOR_API_SUPPORTED], [], AC_DEFINE([PNG_RISCV_RVV_API_SUPPORTED], [],
[Turn on RISC-V Vector optimizations at run-time]);; [Turn on RISC-V Vector optimizations at run-time]);;
yes|on) yes|on)
AC_DEFINE([PNG_RISCV_VECTOR_OPT], [2], AC_DEFINE([PNG_RISCV_RVV_OPT], [2],
[Enable RISC-V Vector optimizations]) [Enable RISC-V Vector optimizations])
AC_MSG_WARN([--enable-riscv-vector: please specify 'check' or 'api', if] AC_MSG_WARN([--enable-riscv-rvv: please specify 'check' or 'api', if]
[you want the optimizations unconditionally pass e.g. '-march=rv64gcv'] [you want the optimizations unconditionally pass e.g. '-march=rv64gcv']
[to the compiler.]);; [to the compiler.]);;
*) *)
AC_MSG_ERROR([--enable-riscv-vector=${enable_riscv_vector}: invalid value]) AC_MSG_ERROR([--enable-riscv-rvv=${enable_riscv_rvv}: invalid value])
esac]) esac])
# Add RISC-V-specific files to all builds where $host_cpu is riscv ('riscv*') # Add RISC-V-specific files to all builds where $host_cpu is riscv ('riscv*')
# or where RISC-V optimizations were explicitly requested (this allows a fallback # or where RISC-V optimizations were explicitly requested (this allows a fallback
# if a future host CPU does not match 'riscv*') # if a future host CPU does not match 'riscv*')
AM_CONDITIONAL([PNG_RISCV_VECTOR], AM_CONDITIONAL([PNG_RISCV_RVV],
[test "$enable_riscv_vector" != 'no' && [test "$enable_riscv_rvv" != 'no' &&
case "$host_cpu" in case "$host_cpu" in
riscv*) : ;; riscv*) : ;;
esac]) esac])

View File

@ -9,7 +9,7 @@ HOW TO USE THIS
--------------- ---------------
This directory contains C code fragments that can be included in This directory contains C code fragments that can be included in
riscv/riscv_init.c by setting the macro PNG_RISCV_VECTOR_FILE to the file name riscv/riscv_init.c by setting the macro PNG_RISCV_RVV_FILE to the file name
in "" or <> at build time. This setting is not recorded in pnglibconf.h and in "" or <> at build time. This setting is not recorded in pnglibconf.h and
can be changed simply by rebuilding riscv/riscv_init.o with the required macro can be changed simply by rebuilding riscv/riscv_init.o with the required macro
definition. definition.
@ -17,12 +17,12 @@ definition.
For any of this code to be used the RISC-V Vector code must be enabled and run For any of this code to be used the RISC-V Vector code must be enabled and run
time checks must be supported. I.e.: time checks must be supported. I.e.:
#if PNG_RISCV_VECTOR_OPT > 0 #if PNG_RISCV_RVV_OPT > 0
#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED #ifdef PNG_RISCV_RVV_CHECK_SUPPORTED
This is done in a 'configure' build by passing configure the argument: This is done in a 'configure' build by passing configure the argument:
--enable-riscv-vector=check --enable-riscv-rvv=check
Apart from the basic Linux implementation in contrib/riscv-vector/linux.c this Apart from the basic Linux implementation in contrib/riscv-vector/linux.c this
code is unsupported. That means that it is not even compiled on a regular code is unsupported. That means that it is not even compiled on a regular
@ -48,7 +48,7 @@ BUG REPORTS: an email address to which to send reports of problems
The file is a fragment of C code. It should not define any 'extern' symbols; The file is a fragment of C code. It should not define any 'extern' symbols;
everything should be static. It must define the function: everything should be static. It must define the function:
static int png_have_vector(png_structp png_ptr); static int png_have_rvv(png_structp png_ptr);
That function must return 1 if RISC-V Vector instructions are supported, 0 if That function must return 1 if RISC-V Vector instructions are supported, 0 if
not. It must not execute png_error unless it detects a bug. A png_error will not. It must not execute png_error unless it detects a bug. A png_error will

36
contrib/riscv-rvv/linux.c Normal file
View File

@ -0,0 +1,36 @@
/* contrib/riscv-rvv/linux.c
*
* Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*
* SEE contrib/riscv-rvv/README before reporting bugs
*
* STATUS: SUPPORTED
* BUG REPORTS: png-mng-implement@sourceforge.net
*
* png_have_rvv implemented for Linux by reading the widely available
* pseudo-file /proc/cpuinfo.
*
* This code is strict ANSI-C and is probably moderately portable; it does
* however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
*/
#if defined(__linux__)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#endif
static int
png_have_rvv(png_structp png_ptr) {
#if defined(__linux__)
return getauxval (AT_HWCAP) & COMPAT_HWCAP_ISA_V ? 1 : 0;
#else
#pragma message( \
"warning: RISC-V Vector not supported for this platform")
return 0;
#endif
}

View File

@ -1,57 +0,0 @@
/* contrib/riscv-vector/linux.c
*
* Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*
* SEE contrib/riscv-vector/README before reporting bugs
*
* STATUS: SUPPORTED
* BUG REPORTS: png-mng-implement@sourceforge.net
*
* png_have_vector implemented for Linux by reading the widely available
* pseudo-file /proc/cpuinfo.
*
* This code is strict ANSI-C and is probably moderately portable; it does
* however use <stdio.h> and it assumes that /proc/cpuinfo is never localized.
*/
#include <stdio.h>
#include <string.h>
static int
png_have_vector(png_structp png_ptr) {
FILE* f = fopen("/proc/cpuinfo", "rb");
if (f == NULL) {
#ifdef PNG_WARNINGS_SUPPORTED
png_warning(png_ptr, "/proc/cpuinfo open failed");
#endif
return 0;
}
char line[256];
while (fgets(line, sizeof line, f)) {
if (strncmp(line, "isa", 3) != 0) {
continue;
}
char* isa = strstr(line, "rv");
if (isa == NULL) {
continue;
}
if (strchr(isa + 2, 'v') != NULL) {
return 1;
}
}
fclose(f);
return 0;
}

View File

@ -143,6 +143,32 @@
# endif # endif
#endif #endif
#ifndef PNG_RISCV_RVV_OPT
/* RISCV_RVV optimizations are being controlled by the compiler settings,
* typically the target FPU then the compiler will define __RVV__ and we can rely
* unconditionally on NEON instructions not crashing, otherwise we must
* disable use of NEON instructions.
*
* NOTE: at present these optimizations depend on 'ALIGNED_MEMORY', so they
* can only be turned on automatically if that is supported too. If
* PNG_RISCV_RVV_OPT is set in CPPFLAGS (to >0) then riscv/riscv_init.c will fail
* to compile with an appropriate #error if ALIGNED_MEMORY has been turned
* off.
*
* Note that gcc and clang use the same __RVV__ flag. No known variations
* of this name is know as writing this code.
*
* To disable RISCV_RVV optimizations entirely, and skip compiling the
* associated assembler code, pass --enable-riscv-rvv=no to configure
* or put -DPNG_RISCV_RVV_OPT=0 in CPPFLAGS.
*/
# if defined(__RVV__) && defined(PNG_ALIGNED_MEMORY_SUPPORTED)
# define PNG_RISCV_RVV_OPT 2
# else
# define PNG_RISCV_RVV_OPT 0
# endif
#endif
#if PNG_ARM_NEON_OPT > 0 #if PNG_ARM_NEON_OPT > 0
/* NEON optimizations are to be at least considered by libpng, so enable the /* NEON optimizations are to be at least considered by libpng, so enable the
* callbacks to do this. * callbacks to do this.
@ -288,8 +314,14 @@
# define PNG_LOONGARCH_LSX_IMPLEMENTATION 0 # define PNG_LOONGARCH_LSX_IMPLEMENTATION 0
#endif #endif
#if PNG_RISCV_VECTOR_OPT > 0 #if PNG_RISCV_RVV_OPT > 0
# define PNG_RISCV_VECTOR_IMPLEMENTATION 1 # define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_rvv
# ifndef PNG_RISCV_RVV_IMPLEMENTATION
/* Use the intrinsics code by default. */
# define PNG_RISCV_RVV_IMPLEMENTATION 1
# endif
#else
# define PNG_RISCV_RVV_IMPLEMENTATION 0
#endif #endif
/* Is this a build of a DLL where compilation of the object modules requires /* Is this a build of a DLL where compilation of the object modules requires
@ -1526,24 +1558,20 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_lsx,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif #endif
#if PNG_RISCV_VECTOR_OPT > 0 #if PNG_RISCV_RVV_OPT > 0
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vector,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_rvv,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector_128,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_rvv,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_vector_256,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_rvv,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector_128,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_rvv,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_vector_256,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_rvv,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_vector,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_rvv,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_vector,(png_row_infop PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_rvv,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_vector,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_vector,(png_row_infop
row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY);
#endif #endif
@ -2159,8 +2187,8 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_lsx,
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY); (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
#endif #endif
# if PNG_RISCV_VECTOR_OPT > 0 # if PNG_RISCV_RVV_OPT > 0
PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_vector, PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_rvv,
(png_structp png_ptr, unsigned int bpp), PNG_EMPTY); (png_structp png_ptr, unsigned int bpp), PNG_EMPTY);
#endif #endif
@ -2190,29 +2218,6 @@ PNG_INTERNAL_FUNCTION(int,
PNG_EMPTY); PNG_EMPTY);
#endif #endif
#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1
PNG_INTERNAL_FUNCTION(void,
png_riffle_palette_vector,
(png_structrp),
PNG_EMPTY);
PNG_INTERNAL_FUNCTION(int,
png_do_expand_palette_rgba8_vector,
(png_structrp,
png_row_infop,
png_const_bytep,
const png_bytepp,
const png_bytepp),
PNG_EMPTY);
PNG_INTERNAL_FUNCTION(int,
png_do_expand_palette_rgb8_vector,
(png_structrp,
png_row_infop,
png_const_bytep,
const png_bytepp,
const png_bytepp),
PNG_EMPTY);
#endif
/* Maintainer: Put new private prototypes here ^ */ /* Maintainer: Put new private prototypes here ^ */
#include "pngdebug.h" #include "pngdebug.h"

View File

@ -809,7 +809,8 @@ png_read_destroy(png_structrp png_ptr)
#endif #endif
#if defined(PNG_READ_EXPAND_SUPPORTED) && \ #if defined(PNG_READ_EXPAND_SUPPORTED) && \
(defined(PNG_ARM_NEON_IMPLEMENTATION) || defined(PNG_RISCV_VECTOR_IMPLEMENTATION)) (defined(PNG_ARM_NEON_IMPLEMENTATION) || \
defined(PNG_RISCV_RVV_IMPLEMENTATION))
png_free(png_ptr, png_ptr->riffled_palette); png_free(png_ptr, png_ptr->riffled_palette);
png_ptr->riffled_palette = NULL; png_ptr->riffled_palette = NULL;
#endif #endif

View File

@ -28,9 +28,9 @@
# endif # endif
#endif #endif
#ifdef PNG_RISCV_VECTOR_IMPLEMENTATION #ifdef PNG_RISCV_RVV_IMPLEMENTATION
# if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 # if PNG_RISCV_RVV_IMPLEMENTATION == 1
# define PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE # define PNG_RISCV_RVV_INTRINSICS_AVAILABLE
# endif # endif
#endif #endif
@ -4403,12 +4403,6 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row, i = png_do_expand_palette_rgba8_neon(png_ptr, row_info, row,
&sp, &dp); &sp, &dp);
} }
#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE
if (png_ptr->riffled_palette != NULL)
{
i = png_do_expand_palette_rgba8_vector(png_ptr, row_info, row,
&sp, &dp);
}
#else #else
PNG_UNUSED(png_ptr) PNG_UNUSED(png_ptr)
#endif #endif
@ -4439,9 +4433,6 @@ png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE #ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row, i = png_do_expand_palette_rgb8_neon(png_ptr, row_info, row,
&sp, &dp); &sp, &dp);
#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE
i = png_do_expand_palette_rgb8_vector(png_ptr, row_info, row,
&sp, &dp);
#else #else
PNG_UNUSED(png_ptr) PNG_UNUSED(png_ptr)
#endif #endif
@ -4871,17 +4862,6 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
png_riffle_palette_neon(png_ptr); png_riffle_palette_neon(png_ptr);
} }
} }
#elif defined PNG_RISCV_VECTOR_INTRINSICS_AVAILABLE
if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8))
{
if (png_ptr->riffled_palette == NULL)
{
/* Initialize the accelerated palette expansion. */
png_ptr->riffled_palette =
(png_bytep)png_malloc(png_ptr, 256 * 4);
png_riffle_palette_vector(png_ptr);
}
}
#endif #endif
png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);

View File

@ -375,7 +375,8 @@ struct png_struct_def
/* New member added in libpng-1.6.36 */ /* New member added in libpng-1.6.36 */
#if defined(PNG_READ_EXPAND_SUPPORTED) && \ #if defined(PNG_READ_EXPAND_SUPPORTED) && \
(defined(PNG_ARM_NEON_IMPLEMENTATION) || defined(PNG_RISCV_VECTOR_IMPLEMENTATION)) (defined(PNG_ARM_NEON_IMPLEMENTATION) || \
defined(PNG_RISCV_RVV_IMPLEMENTATION))
png_bytep riffled_palette; /* buffer for accelerated palette expansion */ png_bytep riffled_palette; /* buffer for accelerated palette expansion */
#endif #endif

View File

@ -0,0 +1,360 @@
/* filter_rvv_intrinsics.c - RISC-V Vector optimized filter functions
*
* Copyright (c) 2023 Google LLC
* Written by Manfred SCHLAEGL, 2022
* Dragoș Tiselice <dtiselice@google.com>, May 2023.
* Filip Wasil <f.wasil@samsung.com>, March 2025.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_RISCV_RVV_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
#include <riscv_vector.h>
void
png_read_filter_row_up_rvv(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
for (size_t vl; len > 0; len -= vl, row += vl, prev_row += vl) {
vl = __riscv_vsetvl_e8m8(len);
vuint8m8_t prev_vals = __riscv_vle8_v_u8m8(prev_row, vl);
vuint8m8_t row_vals = __riscv_vle8_v_u8m8(row, vl);
row_vals = __riscv_vadd_vv_u8m8(row_vals, prev_vals, vl);
__riscv_vse8_v_u8m8(row, row_vals, vl);
}
}
static inline void
png_read_filter_row_sub_rvv(size_t len, size_t bpp, unsigned char* row)
{
png_bytep rp_end = row + len;
/*
* row: | a | x |
*
* a = a + x
*
* a .. [v0](e8)
* x .. [v8](e8)
*/
asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp));
/* a = *row */
asm volatile ("vle8.v v0, (%0)" : : "r" (row));
row += bpp;
while (row < rp_end) {
/* x = *row */
asm volatile ("vle8.v v8, (%0)" : : "r" (row));
/* a = a + x */
asm volatile ("vadd.vv v0, v0, v8");
/* *row = a */
asm volatile ("vse8.v v0, (%0)" : : "r" (row));
row += bpp;
}
}
void
png_read_filter_row_sub3_rvv(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_sub_rvv(len, 3, row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_sub4_rvv(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_sub_rvv(len, 4, row);
PNG_UNUSED(prev_row)
}
static inline void
png_read_filter_row_avg_rvv(size_t len, size_t bpp, unsigned char* row,
const unsigned char* prev_row)
{
png_bytep rp_end = row + len;
/*
* row: | a | x |
* prev_row: | | b |
*
* a .. [v2](e8)
* b .. [v4](e8)
* x .. [v8](e8)
* tmp .. [v12-v13](e16)
*/
/* first pixel */
asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp));
/* b = *prev_row */
asm volatile ("vle8.v v4, (%0)" : : "r" (prev_row));
prev_row += bpp;
/* x = *row */
asm volatile ("vle8.v v8, (%0)" : : "r" (row));
/* b = b / 2 */
asm volatile ("vsrl.vi v4, v4, 1");
/* a = x + b */
asm volatile ("vadd.vv v2, v4, v8");
/* *row = a */
asm volatile ("vse8.v v2, (%0)" : : "r" (row));
row += bpp;
/* remaining pixels */
while (row < rp_end) {
/* b = *prev_row */
asm volatile ("vle8.v v4, (%0)" : : "r" (prev_row));
prev_row += bpp;
/* x = *row */
asm volatile ("vle8.v v8, (%0)" : : "r" (row));
/* tmp = a + b */
asm volatile ("vwaddu.vv v12, v2, v4"); /* add with widening */
/* a = tmp/2 */
asm volatile ("vnsrl.wi v2, v12, 1"); /* divide/shift with narrowing */
/* a += x */
asm volatile ("vadd.vv v2, v2, v8");
/* *row = a */
asm volatile ("vse8.v v2, (%0)" : : "r" (row));
row += bpp;
}
}
void
png_read_filter_row_avg3_rvv(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_avg_rvv(len, 3, row, prev_row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_avg4_rvv(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_avg_rvv(len, 4, row, prev_row);
PNG_UNUSED(prev_row)
}
#define MIN_CHUNK_LEN 256
#define MAX_CHUNK_LEN 2048
static inline vuint8m1_t
prefix_sum(vuint8m1_t chunk, unsigned char* carry, size_t vl,
size_t max_chunk_len)
{
size_t r;
for (r = 1; r < MIN_CHUNK_LEN; r <<= 1) {
vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl);
chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl);
}
for (r = MIN_CHUNK_LEN; r < MAX_CHUNK_LEN && r < max_chunk_len; r <<= 1) {
vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl);
chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl);
}
chunk = __riscv_vadd_vx_u8m1(chunk, *carry, vl);
*carry = __riscv_vmv_x_s_u8m1_u8(__riscv_vslidedown_vx_u8m1(chunk, vl - 1, vl));
return chunk;
}
static inline vint16m1_t
abs_diff(vuint16m1_t a, vuint16m1_t b, size_t vl)
{
vint16m1_t diff = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vsub_vv_u16m1(a, b, vl));
vint16m1_t neg = __riscv_vneg_v_i16m1(diff, vl);
return __riscv_vmax_vv_i16m1(diff, neg, vl);
}
static inline vint16m1_t
abs_sum(vint16m1_t a, vint16m1_t b, size_t vl)
{
vint16m1_t sum = __riscv_vadd_vv_i16m1(a, b, vl);
vint16m1_t neg = __riscv_vneg_v_i16m1(sum, vl);
return __riscv_vmax_vv_i16m1(sum, neg, vl);
}
static inline void
png_read_filter_row_paeth_rvv(size_t len, size_t bpp, unsigned char* row,
const unsigned char* prev)
{
png_bytep rp_end = row + len;
/*
* row: | a | x |
* prev: | c | b |
*
* mask .. [v0]
* a .. [v2](e8)
* b .. [v4](e8)
* c .. [v6](e8)
* x .. [v8](e8)
* p .. [v12-v13](e16)
* pa .. [v16-v17](e16)
* pb .. [v20-v21](e16)
* pc .. [v24-v25](e16)
* tmpmask ..[v31]
*/
/* first pixel */
asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp));
/* a = *row + *prev_row */
asm volatile ("vle8.v v2, (%0)" : : "r" (row));
asm volatile ("vle8.v v6, (%0)" : : "r" (prev));
prev += bpp;
asm volatile ("vadd.vv v2, v2, v6");
/* *row = a */
asm volatile ("vse8.v v2, (%0)" : : "r" (row));
row += bpp;
/* remaining pixels */
while (row < rp_end) {
/* b = *prev_row */
asm volatile ("vle8.v v4, (%0)" : : "r" (prev));
prev += bpp;
/* x = *row */
asm volatile ("vle8.v v8, (%0)" : : "r" (row));
/* sub (widening to 16bit) */
/* p = b - c */
asm volatile ("vwsubu.vv v12, v4, v6");
/* pc = a - c */
asm volatile ("vwsubu.vv v24, v2, v6");
/* switch to widened */
asm volatile ("vsetvli zero, %0, e16, m2" : : "r" (bpp));
/* pa = abs(p) -> pa = p < 0 ? -p : p */
asm volatile ("vmv.v.v v16, v12"); /* pa = p */
asm volatile ("vmslt.vx v0, v16, zero"); /* set mask[i] if pa[i] < 0 */
asm volatile ("vrsub.vx v16, v16, zero, v0.t"); /* invert negative values in pa; vd[i] = 0 - vs2[i] (if mask[i])
* could be replaced by vneg in rvv >= 1.0
*/
/* pb = abs(p) -> pb = pc < 0 ? -pc : pc */
asm volatile ("vmv.v.v v20, v24"); /* pb = pc */
asm volatile ("vmslt.vx v0, v20, zero"); /* set mask[i] if pc[i] < 0 */
asm volatile ("vrsub.vx v20, v20, zero, v0.t"); /* invert negative values in pb; vd[i] = 0 - vs2[i] (if mask[i])
* could be replaced by vneg in rvv >= 1.0
*/
/* pc = abs(p + pc) -> pc = (p + pc) < 0 ? -(p + pc) : p + pc */
asm volatile ("vadd.vv v24, v24, v12"); /* pc = p + pc */
asm volatile ("vmslt.vx v0, v24, zero"); /* set mask[i] if pc[i] < 0 */
asm volatile ("vrsub.vx v24, v24, zero, v0.t"); /* invert negative values in pc; vd[i] = 0 - vs2[i] (if mask[i])
* could be replaced by vneg in rvv >= 1.0
*/
/*
* if (pb < pa) {
* pa = pb;
* a = b; (see (*1))
* }
*/
asm volatile ("vmslt.vv v0, v20, v16"); /* set mask[i] if pb[i] < pa[i] */
asm volatile ("vmerge.vvm v16, v16, v20, v0"); /* pa[i] = pb[i] (if mask[i]) */
/*
* if (pc < pa)
* a = c; (see (*2))
*/
asm volatile ("vmslt.vv v31, v24, v16"); /* set tmpmask[i] if pc[i] < pa[i] */
/* switch to narrow */
asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp));
/* (*1) */
asm volatile ("vmerge.vvm v2, v2, v4, v0"); /* a = b (if mask[i]) */
/* (*2) */
asm volatile ("vmand.mm v0, v31, v31"); /* mask = tmpmask
* vmand works for rvv 0.7 up to 1.0
* could be replaced by vmcpy in 0.7.1/0.8.1
* or vmmv.m in 1.0
*/
asm volatile ("vmerge.vvm v2, v2, v6, v0"); /* a = c (if mask[i]) */
/* a += x */
asm volatile ("vadd.vv v2, v2, v8");
/* *row = a */
asm volatile ("vse8.v v2, (%0)" : : "r" (row));
row += bpp;
/* prepare next iteration (prev is already in a) */
/* c = b */
asm volatile ("vmv.v.v v6, v4");
}
}
void
png_read_filter_row_paeth3_rvv(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_paeth_rvv(len, 3, row, prev_row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_paeth4_rvv(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_paeth_rvv(len, 4, row, prev_row);
PNG_UNUSED(prev_row)
}
#endif /* PNG_RISCV_RVV_IMPLEMENTATION */
#endif /* READ */

View File

@ -1,280 +0,0 @@
/* filter_neon_intrinsics.c - RISC-V Vector optimized filter functions
*
* Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../pngpriv.h"
#ifdef PNG_READ_SUPPORTED
#if PNG_RISCV_VECTOR_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
#include <riscv_vector.h>
void
png_read_filter_row_up_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
for (size_t vl; len > 0; len -= vl, row += vl, prev_row += vl) {
vl = __riscv_vsetvl_e8m8(len);
vuint8m8_t prev_vals = __riscv_vle8_v_u8m8(prev_row, vl);
vuint8m8_t row_vals = __riscv_vle8_v_u8m8(row, vl);
row_vals = __riscv_vadd_vv_u8m8(row_vals, prev_vals, vl);
__riscv_vse8_v_u8m8(row, row_vals, vl);
}
}
static inline void
png_read_filter_row_sub_vector_128(size_t len, size_t vl, unsigned char* row)
{
vuint8m1_t sum;
vuint8m1_t chunk = __riscv_vmv_v_x_u8m1(0, vl);
for (; len > 0; len -= vl, row += vl) {
__riscv_vsetvl_e8m1(vl);
sum = chunk;
chunk = __riscv_vle8_v_u8m1(row, vl);
chunk = __riscv_vadd_vv_u8m1(chunk, sum, vl);
__riscv_vse8_v_u8m1(row, chunk, vl);
}
}
void
png_read_filter_row_sub3_vector_128(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_sub_vector_128(len, 3, row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_sub4_vector_128(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_sub_vector_128(len, 4, row);
PNG_UNUSED(prev_row)
}
static inline void
png_read_filter_row_avg_vector(size_t len, size_t vl, unsigned char* row,
const unsigned char* prev_row)
{
vuint8m1_t avg;
vuint8m1_t chunk = __riscv_vmv_v_x_u8m1(0, vl);
for (; len > 0; len -= vl, row += vl) {
__riscv_vsetvl_e8m1(vl);
vuint8m1_t prev_chunk = __riscv_vle8_v_u8m1(prev_row, vl);
avg = chunk;
chunk = __riscv_vle8_v_u8m1(row, vl);
vuint8m1_t sum = __riscv_vadd_vv_u8m1(chunk, prev_chunk, vl);
vuint8m1_t avg = __riscv_vsrl_vx_u8m1(sum, 1, vl);
chunk = __riscv_vadd_vv_u8m1(chunk, avg, vl);
__riscv_vse8_v_u8m1(row, chunk, vl);
}
}
void
png_read_filter_row_avg3_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_avg_vector(len, 3, row, prev_row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_avg4_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_avg_vector(len, 4, row, prev_row);
PNG_UNUSED(prev_row)
}
#define MIN_CHUNK_LEN 256
#define MAX_CHUNK_LEN 2048
static inline vuint8m1_t
prefix_sum(vuint8m1_t chunk, unsigned char* carry, size_t vl,
size_t max_chunk_len)
{
size_t r;
for (r = 1; r < MIN_CHUNK_LEN; r <<= 1) {
vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl);
chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl);
}
for (r = MIN_CHUNK_LEN; r < MAX_CHUNK_LEN && r < max_chunk_len; r <<= 1) {
vbool8_t shift_mask = __riscv_vmsgeu_vx_u8m1_b8(__riscv_vid_v_u8m1(vl), r, vl);
chunk = __riscv_vadd_vv_u8m1_mu(shift_mask, chunk, chunk, __riscv_vslideup_vx_u8m1(__riscv_vundefined_u8m1(), chunk, r, vl), vl);
}
chunk = __riscv_vadd_vx_u8m1(chunk, *carry, vl);
*carry = __riscv_vmv_x_s_u8m1_u8(__riscv_vslidedown_vx_u8m1(chunk, vl - 1, vl));
return chunk;
}
void
png_read_filter_row_sub3_vector_256(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
const size_t max_chunk_len = __riscv_vsetvlmax_e8m1();
vuint8m1_t r;
vuint8m1_t g;
vuint8m1_t b;
unsigned char r_carry = 0;
unsigned char g_carry = 0;
unsigned char b_carry = 0;
for (size_t vl; len > 0; len -= vl * 3, row += vl * 3) {
vl = __riscv_vsetvl_e8m1(len / 3);
__riscv_vlseg3e8_v_u8m1(&r, &g, &b, row, vl);
r = prefix_sum(r, &r_carry, vl, max_chunk_len);
g = prefix_sum(g, &g_carry, vl, max_chunk_len);
b = prefix_sum(b, &b_carry, vl, max_chunk_len);
__riscv_vsseg3e8_v_u8m1(row, r, g, b, vl);
}
}
void
png_read_filter_row_sub4_vector_256(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
const size_t max_chunk_len = __riscv_vsetvlmax_e8m1();
vuint8m1_t r;
vuint8m1_t g;
vuint8m1_t b;
vuint8m1_t a;
unsigned char r_carry = 0;
unsigned char g_carry = 0;
unsigned char b_carry = 0;
unsigned char a_carry = 0;
for (size_t vl; len > 0; len -= vl * 4, row += vl * 4) {
vl = __riscv_vsetvl_e8m1(len / 4);
__riscv_vlseg4e8_v_u8m1(&r, &g, &b, &a, row, vl);
r = prefix_sum(r, &r_carry, vl, max_chunk_len);
g = prefix_sum(g, &g_carry, vl, max_chunk_len);
b = prefix_sum(b, &b_carry, vl, max_chunk_len);
a = prefix_sum(a, &a_carry, vl, max_chunk_len);
__riscv_vsseg4e8_v_u8m1(row, r, g, b, a, vl);
}
}
static inline vint16m1_t
abs_diff(vuint16m1_t a, vuint16m1_t b, size_t vl)
{
vint16m1_t diff = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vsub_vv_u16m1(a, b, vl));
vint16m1_t neg = __riscv_vneg_v_i16m1(diff, vl);
return __riscv_vmax_vv_i16m1(diff, neg, vl);
}
static inline vint16m1_t
abs_sum(vint16m1_t a, vint16m1_t b, size_t vl)
{
vint16m1_t sum = __riscv_vadd_vv_i16m1(a, b, vl);
vint16m1_t neg = __riscv_vneg_v_i16m1(sum, vl);
return __riscv_vmax_vv_i16m1(sum, neg, vl);
}
static inline void
png_read_filter_row_paeth_vector(size_t len, size_t vl, unsigned char* row,
const unsigned char* prev)
{
vuint16m1_t a;
vuint16m1_t b = __riscv_vmv_v_x_u16m1(0, vl);
vuint16m1_t c;
vuint16m1_t d = __riscv_vmv_v_x_u16m1(0, vl);
for (; len > 0; len -= vl, row += vl, prev += vl) {
__riscv_vsetvl_e16m1(vl);
c = b;
b = __riscv_vzext_vf2_u16m1(__riscv_vle8_v_u8mf2(prev, vl), vl);
a = d;
d = __riscv_vzext_vf2_u16m1(__riscv_vle8_v_u8mf2(row, vl), vl);
vint16m1_t pa = abs_diff(b, c, vl);
vint16m1_t pb = abs_diff(a, c, vl);
vint16m1_t pc = abs_sum(pa, pb, vl);
vint16m1_t smallest = __riscv_vmin_vv_i16m1(pa, __riscv_vmin_vv_i16m1(pb, pc, vl), vl);
vuint16m1_t nearest = c;
nearest = __riscv_vmerge_vvm_u16m1(nearest, a, __riscv_vmseq_vv_i16m1_b16(smallest, pa, vl), vl);
nearest = __riscv_vmerge_vvm_u16m1(nearest, b, __riscv_vmseq_vv_i16m1_b16(smallest, pb, vl), vl);
d = __riscv_vadd_vv_u16m1(d, nearest, vl);
__riscv_vse8_v_u8mf2(row, __riscv_vnsrl_wx_u8mf2(d, 0, vl), vl);
}
}
void
png_read_filter_row_paeth3_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_paeth_vector(len, 3, row, prev_row);
PNG_UNUSED(prev_row)
}
void
png_read_filter_row_paeth4_vector(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
size_t len = row_info->rowbytes;
png_read_filter_row_paeth_vector(len, 4, row, prev_row);
PNG_UNUSED(prev_row)
}
#endif /* PNG_RISCV_VECTOR_IMPLEMENTATION */
#endif /* READ */

View File

@ -1,104 +0,0 @@
/* palette_neon_intrinsics.c - RISC-V Vector optimized palette expansion
* functions
*
* Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
*
* This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer
* and license in png.h
*/
#include "../pngpriv.h"
#if PNG_ARM_NEON_IMPLEMENTATION == 1
#include <riscv_vector.h>
void
png_riffle_palette_vector(png_structrp png_ptr)
{
png_const_bytep palette = (png_const_bytep)png_ptr->palette;
png_bytep riffled_palette = png_ptr->riffled_palette;
png_const_bytep trans_alpha = png_ptr->trans_alpha;
size_t len = 256;
vuint8m1_t r;
vuint8m1_t g;
vuint8m1_t b;
for (size_t vl; len > 0; len -= vl, palette += vl * 3, trans_alpha += vl, riffled_palette += vl * 4) {
vl = __riscv_vsetvl_e8m1(len);
__riscv_vlseg3e8_v_u8m1(&r, &g, &b, palette, vl);
vuint8m1_t a = __riscv_vle8_v_u8m1(trans_alpha, vl);
__riscv_vsseg4e8_v_u8m1(riffled_palette, r, g, b, a, vl);
}
}
int
png_do_expand_palette_rgba8_vector(png_structrp png_ptr, png_row_infop row_info,
png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
{
size_t row_width = (size_t)row_info->width;
const png_uint_32* palette = (const png_uint_32*)png_ptr->riffled_palette;
size_t vl = __riscv_vsetvl_e8m1(row_width);
png_bytep sp = *ssp - vl;
png_bytep dp = *ddp - vl * 4;
for (; row_width > 0; row_width -= vl, dp -= vl * 4, sp -= vl) {
vl = __riscv_vsetvl_e8m1(row_width);
vuint8m1_t indices = __riscv_vle8_v_u8m1(sp, vl);
vuint32m4_t pixels = __riscv_vluxei8_v_u32m4(palette, indices, vl);
__riscv_vse32_v_u32m4((unsigned int *)dp, pixels, vl);
}
row_width = (size_t)row_info->width;
*ssp = *ssp - row_width;
*ddp = *ddp - row_width * 4;
return row_width;
}
int
png_do_expand_palette_rgb8_vector(png_structrp png_ptr, png_row_infop row_info,
png_const_bytep row, png_bytepp ssp, png_bytepp ddp)
{
size_t row_width = (size_t)row_info->width;
const png_const_bytep palette = (png_const_bytep)png_ptr->palette;
size_t vl = __riscv_vsetvl_e8m1(row_width);
png_bytep sp = *ssp - vl;
png_bytep dp = *ddp - vl * 3;
vuint8m1_t r;
vuint8m1_t g;
vuint8m1_t b;
for (; row_width > 0; row_width -= vl, dp -= vl * 3, sp -= vl) {
vl = __riscv_vsetvl_e8m1(row_width);
vuint16m2_t indices = __riscv_vwmulu_vx_u16m2(__riscv_vle8_v_u8m1(sp, vl), 3, vl);
__riscv_vluxseg3ei16_v_u8m1(&r, &g, &b, palette, indices, vl);
__riscv_vsseg3e8_v_u8m1(dp, r, g, b, vl);
}
row_width = (size_t)row_info->width;
*ssp = *ssp - row_width;
*ddp = *ddp - row_width * 3;
return row_width;
}
#endif /* PNG_ARM_NEON_IMPLEMENTATION */

View File

@ -2,7 +2,7 @@
* *
* Copyright (c) 2023 Google LLC * Copyright (c) 2023 Google LLC
* Written by Dragoș Tiselice <dtiselice@google.com>, May 2023. * Written by Dragoș Tiselice <dtiselice@google.com>, May 2023.
* * Filip Wasil <f.wasil@samsung.com>, March 2025.
* This code is released under the libpng license. * This code is released under the libpng license.
* For conditions of distribution and use, see the disclaimer * For conditions of distribution and use, see the disclaimer
* and license in png.h * and license in png.h
@ -12,40 +12,47 @@
#ifdef PNG_READ_SUPPORTED #ifdef PNG_READ_SUPPORTED
#if PNG_RISCV_VECTOR_OPT > 0 #if PNG_RISCV_RVV_OPT > 0
#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED /* Do run-time checks */
#include <riscv_vector.h>
#ifdef PNG_RISCV_RVV_CHECK_SUPPORTED /* Do run-time checks */
/* WARNING: it is strongly recommended that you do not build libpng with /* WARNING: it is strongly recommended that you do not build libpng with
* run-time checks for CPU features if at all possible. In the case of the * run-time checks for CPU features if at all possible. In the case of the
* RISC-V Vector instructions there is no processor-specific way of detecting * RISC-V Vector instructions there is no processor-specific way of detecting
* the presence of the required support, therefore run-time detection is * the presence of the required support, therefore run-time detection is
* extremely OS specific. * extremely OS specific.
* *
* You may set the macro PNG_RISCV_VECTOR_FILE to the file name of file containing * You may set the macro PNG_RISCV_RVV_FILE to the file name of file containing
* a fragment of C source code which defines the png_have_neon function. There * a fragment of C source code which defines the png_have_neon function. There
* are a number of implementations in contrib/riscv-vector, but the only one that * are a number of implementations in contrib/riscv-vector, but the only one that
* has partial support is contrib/riscv-vector/linux.c - a generic Linux * has partial support is contrib/riscv-vector/linux.c - a generic Linux
* implementation which reads /proc/cpuinfo. * implementation which reads /proc/cpuinfo.
*/ */
#ifndef PNG_RISCV_VECTOR_FILE #ifndef PNG_RISCV_RVV_FILE
# if defined(__linux__) # if defined(__linux__)
# define PNG_RISCV_VECTOR_FILE "contrib/riscv-vector/linux.c" # define PNG_RISCV_RVV_FILE "contrib/riscv-vector/linux.c"
# else # else
# error "No support for run-time RISC-V Vector checking; use compile-time options" # error "No support for run-time RISC-V Vector checking; use compile-time options"
# endif # endif
#endif #endif
static int png_have_vector(png_structp png_ptr); static int png_have_rvv(png_structp png_ptr);
#ifdef PNG_RISCV_VECTOR_FILE #ifdef PNG_RISCV_RVV_FILE
# include PNG_RISCV_VECTOR_FILE # include PNG_RISCV_RVV_FILE
#endif
#endif /* PNG_RISCV_RVV_CHECK_SUPPORTED */
#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
#endif #endif
#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */
void void
png_init_filter_functions_vector(png_structp pp, unsigned int bpp) png_init_filter_functions_rvv(png_structp pp, unsigned int bpp)
{ {
/* The switch statement is compiled in for RISCV_VECTOR_API, the call to /* The switch statement is compiled in for RISCV_rvv_API, the call to
* png_have_vector is compiled in for RISCV_VECTOR_CHECK. If both are * png_have_rvv is compiled in for RISCV_rvv_CHECK. If both are
* defined the check is only performed if the API has not set the VECTOR * defined the check is only performed if the API has not set the VECTOR
* option on or off explicitly. In this case the check controls what * option on or off explicitly. In this case the check controls what
* happens. * happens.
@ -55,9 +62,9 @@ png_init_filter_functions_vector(png_structp pp, unsigned int bpp)
* wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF, * wrong order of the 'ON' and 'default' cases. UNSET now defaults to OFF,
* as documented in png.h * as documented in png.h
*/ */
png_debug(1, "in png_init_filter_functions_vector"); png_debug(1, "in png_init_filter_functions_rvv");
#ifdef PNG_RISCV_VECTOR_API_SUPPORTED #ifdef PNG_RISCV_RVV_API_SUPPORTED
switch ((pp->options >> PNG_RISCV_VECTOR) & 3) switch ((pp->options >> PNG_RISCV_RVV) & 3)
{ {
case PNG_OPTION_UNSET: case PNG_OPTION_UNSET:
/* Allow the run-time check to execute if it has been enabled - /* Allow the run-time check to execute if it has been enabled -
@ -65,23 +72,23 @@ png_init_filter_functions_vector(png_structp pp, unsigned int bpp)
* this case will fall through to the 'default' below, which just * this case will fall through to the 'default' below, which just
* returns. * returns.
*/ */
#endif /* PNG_RISCV_VECTOR_API_SUPPORTED */ #endif /* PNG_RISCV_RVV_API_SUPPORTED */
#ifdef PNG_RISCV_VECTOR_CHECK_SUPPORTED #ifdef PNG_RISCV_RVV_CHECK_SUPPORTED
{ {
static volatile sig_atomic_t no_vector = -1; /* not checked */ static volatile sig_atomic_t no_rvv = -1; /* not checked */
if (no_vector < 0) if (no_rvv < 0)
no_vector = !png_have_vector(pp); no_rvv = !png_have_rvv(pp);
if (no_vector) if (no_rvv)
return; return;
} }
#ifdef PNG_RISCV_VECTOR_API_SUPPORTED #ifdef PNG_RISCV_RVV_API_SUPPORTED
break; break;
#endif #endif
#endif /* PNG_RISCV_VECTOR_CHECK_SUPPORTED */ #endif /* PNG_RISCV_RVV_CHECK_SUPPORTED */
#ifdef PNG_RISCV_VECTOR_API_SUPPORTED #ifdef PNG_RISCV_RVV_API_SUPPORTED
default: /* OFF or INVALID */ default: /* OFF or INVALID */
return; return;
@ -102,29 +109,21 @@ png_init_filter_functions_vector(png_structp pp, unsigned int bpp)
* function you add. (Notice that this happens automatically for the * function you add. (Notice that this happens automatically for the
* initialization function.) * initialization function.)
*/ */
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vector; pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_rvv;
if (bpp == 3) if (bpp == 3)
{ {
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vector; pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_rvv;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vector; pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_rvv;
if (__riscv_vsetvlmax_e8m1() > 16) { pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_rvv;
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector_128;
} else {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vector_256;
}
} }
else if (bpp == 4) else if (bpp == 4)
{ {
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vector; pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_rvv;
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vector; pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_rvv;
if (__riscv_vsetvlmax_e8m1() > 16) { pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_rvv;
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector_128;
} else {
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vector_256;
}
} }
} }
#endif /* PNG_RISCV_VECTOR_OPT > 0 */ #endif /* PNG_RISCV_RVV_OPT > 0 */
#endif /* PNG_READ_SUPPORTED */ #endif /* PNG_READ_SUPPORTED */

View File

@ -22,7 +22,8 @@ RM_F = rm -f
# Compiler and linker flags # Compiler and linker flags
NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \
-DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \
-DPNG_RISCV_RVV_OPT=0
STDC = -pedantic-errors -std=c89 STDC = -pedantic-errors -std=c89
WARN = -Wall -Wextra -Wundef WARN = -Wall -Wextra -Wundef
WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \

View File

@ -21,7 +21,8 @@ RM_F = rm -f
# Compiler and linker flags # Compiler and linker flags
NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \
-DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \
-DPNG_RISCV_RVV_OPT=0
STDC = -pedantic-errors # -std=c99 STDC = -pedantic-errors # -std=c99
WARN = -Wall -Wextra -Wundef WARN = -Wall -Wextra -Wundef
WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \

View File

@ -28,7 +28,8 @@ RM_F=rm -f
# Compiler and linker flags # Compiler and linker flags
NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \
-DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \
-DPNG_RISCV_RVV_OPT=0
STDC = -pedantic-errors STDC = -pedantic-errors
WARN = -Wall -Wextra -Wundef WARN = -Wall -Wextra -Wundef
WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \

View File

@ -21,7 +21,8 @@ RM_F = rm -f
# Compiler and linker flags # Compiler and linker flags
NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \
-DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \
-DPNG_RISCV_RVV_OPT=0
STDC = -pedantic-errors # -std=c99 STDC = -pedantic-errors # -std=c99
WARN = -Wall -Wextra -Wundef WARN = -Wall -Wextra -Wundef
WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \

View File

@ -26,7 +26,8 @@ RM_F=rm -f
# Compiler and linker flags # Compiler and linker flags
NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \
-DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \
-DPNG_RISCV_RVV_OPT=0
STDC = -pedantic-errors STDC = -pedantic-errors
WARN = -Wall -Wextra -Wundef WARN = -Wall -Wextra -Wundef
WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \

View File

@ -32,7 +32,8 @@ LN_SF = ln -sf
# Compiler and linker flags # Compiler and linker flags
NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \
-DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \
-DPNG_RISCV_RVV_OPT=0
STDC = -pedantic-errors STDC = -pedantic-errors
WARN = -Wall -Wextra -Wundef WARN = -Wall -Wextra -Wundef
WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \ WARNMORE = -Wcast-align -Wconversion -Wshadow -Wpointer-arith -Wwrite-strings \

View File

@ -23,7 +23,8 @@ RM_F = rm -f
AWK = awk AWK = awk
NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \ NOHWOPT = -DPNG_ARM_NEON_OPT=0 -DPNG_MIPS_MSA_OPT=0 \
-DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 -DPNG_POWERPC_VSX_OPT=0 -DPNG_INTEL_SSE_OPT=0 \
-DPNG_RISCV_RVV_OPT=0
DFNFLAGS = # DFNFLAGS contains -D options to use in the libpng build DFNFLAGS = # DFNFLAGS contains -D options to use in the libpng build
DFA_EXTRA = # extra files that can be used to control configuration DFA_EXTRA = # extra files that can be used to control configuration
CPPFLAGS = -I$(ZLIBINC) $(NOHWOPT) # -DPNG_DEBUG=5 CPPFLAGS = -I$(ZLIBINC) $(NOHWOPT) # -DPNG_DEBUG=5