mirror of
https://git.code.sf.net/p/libpng/code.git
synced 2025-07-10 18:04:09 +02:00
Compare commits
4 Commits
21895b05ab
...
4266c75f40
Author | SHA1 | Date | |
---|---|---|---|
![]() |
4266c75f40 | ||
![]() |
f451a4de09 | ||
![]() |
6aa47debba | ||
![]() |
3391bb98e3 |
@ -322,9 +322,9 @@ if(PNG_HARDWARE_OPTIMIZATIONS)
|
||||
# Set definitions and sources for RISC-V.
|
||||
if(PNG_TARGET_ARCHITECTURE MATCHES "^(riscv)")
|
||||
include(CheckCCompilerFlag)
|
||||
set(PNG_RISCV_RVV_POSSIBLE_VALUES check on off)
|
||||
set(PNG_RISCV_RVV_POSSIBLE_VALUES on off)
|
||||
set(PNG_RISCV_RVV "off"
|
||||
CACHE STRING "Enable RISC-V Vector optimizations: check|on|off; off is default")
|
||||
CACHE STRING "Enable RISC-V Vector optimizations: on|off; off is default")
|
||||
set_property(CACHE PNG_RISCV_RVV
|
||||
PROPERTY STRINGS ${PNG_RISCV_RVV_POSSIBLE_VALUES})
|
||||
list(FIND PNG_RISCV_RVV_POSSIBLE_VALUES ${PNG_RISCV_RVV} index)
|
||||
@ -332,9 +332,6 @@ if(PNG_HARDWARE_OPTIMIZATIONS)
|
||||
message(FATAL_ERROR "PNG_RISCV_RVV must be one of [${PNG_RISCV_RVV_POSSIBLE_VALUES}]")
|
||||
elseif(NOT PNG_RISCV_RVV STREQUAL "off")
|
||||
|
||||
set(_SAVED_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "-march=rv64gv1p0")
|
||||
|
||||
check_c_source_compiles("
|
||||
#include <riscv_vector.h>
|
||||
#include <asm/hwcap.h>
|
||||
@ -349,8 +346,6 @@ if(PNG_HARDWARE_OPTIMIZATIONS)
|
||||
return (int)__riscv_vfmv_f_s_f32m1_f32(val);
|
||||
}" COMPILER_SUPPORTS_RVV)
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS ${_SAVED_CMAKE_REQUIRED_FLAGS})
|
||||
|
||||
if(NOT COMPILER_SUPPORTS_RVV)
|
||||
message(FATAL_ERROR "Compiler does not support RISC-V Vector extension or its unable to detect it")
|
||||
endif()
|
||||
@ -359,9 +354,8 @@ if(PNG_HARDWARE_OPTIMIZATIONS)
|
||||
riscv/riscv_init.c)
|
||||
if(PNG_RISCV_RVV STREQUAL "on")
|
||||
add_definitions(-DPNG_RISCV_RVV_OPT=2)
|
||||
elseif(PNG_RISCV_RVV STREQUAL "check")
|
||||
add_definitions(-DPNG_RISCV_RVV_CHECK_SUPPORTED)
|
||||
add_definitions(-DPNG_RISCV_RVV_OPT=1)
|
||||
else()
|
||||
add_definitions(-DPNG_RISCV_RVV_OPT=0)
|
||||
endif()
|
||||
else()
|
||||
add_definitions(-DPNG_RISCV_RVV_OPT=0)
|
||||
|
19
configure.ac
19
configure.ac
@ -679,8 +679,7 @@ AS_HELP_STRING([[[--enable-riscv-rvv]]],
|
||||
[Enable RISC-V Vector optimizations: =no/off, check, api, yes/on:]
|
||||
[no/off: disable the optimizations; check: use internal checking code]
|
||||
[api: disable by default, enable by a call to png_set_option]
|
||||
[yes/on: turn on unconditionally.]
|
||||
[If not specified: determined by the compiler.]),
|
||||
[yes/on: turn on. If not specified: determined by the compiler.]),
|
||||
[case "$enableval" in
|
||||
no|off)
|
||||
# disable the default enabling on __riscv systems:
|
||||
@ -688,20 +687,11 @@ AS_HELP_STRING([[[--enable-riscv-rvv]]],
|
||||
[Disable RISC-V Vector optimizations])
|
||||
# Prevent inclusion of the platform-specific files below:
|
||||
enable_riscv_rvv=no ;;
|
||||
check)
|
||||
AC_DEFINE([PNG_RISCV_RVV_CHECK_SUPPORTED], [],
|
||||
[Check for RISC-V Vector support at run-time])
|
||||
AC_MSG_WARN([--enable-riscv-rvv Please check contrib/riscv-rvv/README file]
|
||||
[for the list of supported OSes.]);;
|
||||
api)
|
||||
AC_DEFINE([PNG_RISCV_RVV_API_SUPPORTED], [],
|
||||
[Turn on RISC-V Vector optimizations at run-time]);;
|
||||
yes|on)
|
||||
AC_DEFINE([PNG_RISCV_RVV_OPT], [2],
|
||||
[Enable RISC-V Vector optimizations])
|
||||
AC_MSG_WARN([--enable-riscv-rvv: please specify 'check' or 'api', if]
|
||||
[you want the optimizations unconditionally pass e.g. '-march=rv64gv1p0']
|
||||
[to the compiler.]);;
|
||||
AC_MSG_WARN([--enable-riscv-rvv:]
|
||||
[if you want the optimizations pass e.g. '-march=rv64gv1p0' to the compiler.]);;
|
||||
*)
|
||||
AC_MSG_ERROR([--enable-riscv-rvv=${enable_riscv_rvv}: invalid value])
|
||||
esac])
|
||||
@ -715,8 +705,6 @@ then
|
||||
compiler_support_riscv_rvv=no
|
||||
AC_MSG_CHECKING(whether to use RISC-V RVV intrinsics)
|
||||
|
||||
save_CFLAGS=$CFLAGS
|
||||
CFLAGS="$CFLAGS -march=rv64gv1p0"
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
#include <riscv_vector.h>
|
||||
#include <asm/hwcap.h>
|
||||
@ -737,7 +725,6 @@ int main() {
|
||||
else
|
||||
AC_MSG_WARN([Compiler does not support riscv rvv.])
|
||||
fi
|
||||
CFLAGS=$save_CFLAGS
|
||||
fi
|
||||
|
||||
# Add RISC-V-specific files to all builds where $host_cpu is riscv ('riscv64')
|
||||
|
@ -25,7 +25,7 @@
|
||||
#endif
|
||||
|
||||
static int
|
||||
png_have_rvv(png_structp png_ptr) {
|
||||
png_have_rvv() {
|
||||
#if defined(__linux__)
|
||||
return getauxval (AT_HWCAP) & COMPAT_HWCAP_ISA_V ? 1 : 0;
|
||||
#else
|
||||
|
@ -51,21 +51,22 @@ png_read_filter_row_sub_rvv(size_t len, size_t bpp, unsigned char* row)
|
||||
* x .. [v8](e8)
|
||||
*/
|
||||
|
||||
asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp));
|
||||
size_t vl = __riscv_vsetvl_e8m1(bpp);
|
||||
|
||||
/* a = *row */
|
||||
asm volatile ("vle8.v v0, (%0)" : : "r" (row));
|
||||
vuint8m1_t a = __riscv_vle8_v_u8m1(row, vl);
|
||||
row += bpp;
|
||||
|
||||
while (row < rp_end)
|
||||
{
|
||||
/* x = *row */
|
||||
asm volatile ("vle8.v v8, (%0)" : : "r" (row));
|
||||
vuint8m1_t x = __riscv_vle8_v_u8m1(row, vl);
|
||||
|
||||
/* a = a + x */
|
||||
asm volatile ("vadd.vv v0, v0, v8");
|
||||
a = __riscv_vadd_vv_u8m1(a, x, vl);
|
||||
|
||||
/* *row = a */
|
||||
asm volatile ("vse8.v v0, (%0)" : : "r" (row));
|
||||
__riscv_vse8_v_u8m1(row, a, vl);
|
||||
row += bpp;
|
||||
}
|
||||
}
|
||||
@ -110,44 +111,46 @@ png_read_filter_row_avg_rvv(size_t len, size_t bpp, unsigned char* row,
|
||||
|
||||
/* first pixel */
|
||||
|
||||
asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp));
|
||||
size_t vl = __riscv_vsetvl_e8m1(bpp);
|
||||
|
||||
/* b = *prev_row */
|
||||
asm volatile ("vle8.v v4, (%0)" : : "r" (prev_row));
|
||||
vuint8m1_t b = __riscv_vle8_v_u8m1(prev_row, vl);
|
||||
prev_row += bpp;
|
||||
|
||||
/* x = *row */
|
||||
asm volatile ("vle8.v v8, (%0)" : : "r" (row));
|
||||
vuint8m1_t x = __riscv_vle8_v_u8m1(row, vl);
|
||||
|
||||
/* b = b / 2 */
|
||||
asm volatile ("vsrl.vi v4, v4, 1");
|
||||
b = __riscv_vsrl_vx_u8m1(b, 1, vl);
|
||||
|
||||
/* a = x + b */
|
||||
asm volatile ("vadd.vv v2, v4, v8");
|
||||
vuint8m1_t a = __riscv_vadd_vv_u8m1(b, x, vl);
|
||||
|
||||
/* *row = a */
|
||||
asm volatile ("vse8.v v2, (%0)" : : "r" (row));
|
||||
__riscv_vse8_v_u8m1(row, a, vl);
|
||||
row += bpp;
|
||||
|
||||
/* remaining pixels */
|
||||
|
||||
while (row < rp_end)
|
||||
{
|
||||
/* b = *prev_row */
|
||||
asm volatile ("vle8.v v4, (%0)" : : "r" (prev_row));
|
||||
b = __riscv_vle8_v_u8m1(prev_row, vl);
|
||||
prev_row += bpp;
|
||||
|
||||
/* x = *row */
|
||||
asm volatile ("vle8.v v8, (%0)" : : "r" (row));
|
||||
x = __riscv_vle8_v_u8m1(row, vl);
|
||||
|
||||
/* tmp = a + b */
|
||||
asm volatile ("vwaddu.vv v12, v2, v4"); /* add with widening */
|
||||
vuint16m2_t tmp = __riscv_vwaddu_vv_u16m2(a, b, vl);
|
||||
|
||||
/* a = tmp/2 */
|
||||
asm volatile ("vnsrl.wi v2, v12, 1"); /* divide/shift with narrowing */
|
||||
a = __riscv_vnsrl_wx_u8m1(tmp, 1, vl);
|
||||
|
||||
/* a += x */
|
||||
asm volatile ("vadd.vv v2, v2, v8");
|
||||
a = __riscv_vadd_vv_u8m1(a, x, vl);
|
||||
|
||||
/* *row = a */
|
||||
asm volatile ("vse8.v v2, (%0)" : : "r" (row));
|
||||
__riscv_vse8_v_u8m1(row, a, vl);
|
||||
row += bpp;
|
||||
}
|
||||
}
|
||||
@ -205,18 +208,14 @@ static inline vint16m1_t
|
||||
abs_diff(vuint16m1_t a, vuint16m1_t b, size_t vl)
|
||||
{
|
||||
vint16m1_t diff = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vsub_vv_u16m1(a, b, vl));
|
||||
vint16m1_t neg = __riscv_vneg_v_i16m1(diff, vl);
|
||||
|
||||
return __riscv_vmax_vv_i16m1(diff, neg, vl);
|
||||
vbool16_t mask = __riscv_vmslt_vx_i16m1_b16(diff, 0, vl);
|
||||
return __riscv_vrsub_vx_i16m1_m(mask, diff, 0, vl);
|
||||
}
|
||||
|
||||
static inline vint16m1_t
|
||||
abs_sum(vint16m1_t a, vint16m1_t b, size_t vl)
|
||||
{
|
||||
vint16m1_t sum = __riscv_vadd_vv_i16m1(a, b, vl);
|
||||
vint16m1_t neg = __riscv_vneg_v_i16m1(sum, vl);
|
||||
|
||||
return __riscv_vmax_vv_i16m1(sum, neg, vl);
|
||||
return __riscv_vadd_vv_i16m1(a, b, vl);
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -229,119 +228,106 @@ png_read_filter_row_paeth_rvv(size_t len, size_t bpp, unsigned char* row,
|
||||
* row: | a | x |
|
||||
* prev: | c | b |
|
||||
*
|
||||
* mask .. [v0]
|
||||
* a .. [v2](e8)
|
||||
* b .. [v4](e8)
|
||||
* c .. [v6](e8)
|
||||
* x .. [v8](e8)
|
||||
* p .. [v12-v13](e16)
|
||||
* pa .. [v16-v17](e16)
|
||||
* pb .. [v20-v21](e16)
|
||||
* pc .. [v24-v25](e16)
|
||||
* tmpmask ..[v31]
|
||||
* pa, pb, pc .. [v16-v17, v20-v21, v24-v25](e16)
|
||||
*/
|
||||
|
||||
/* first pixel */
|
||||
|
||||
asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp));
|
||||
size_t vl = __riscv_vsetvl_e8m1(bpp);
|
||||
|
||||
/* a = *row + *prev_row */
|
||||
asm volatile ("vle8.v v2, (%0)" : : "r" (row));
|
||||
asm volatile ("vle8.v v6, (%0)" : : "r" (prev));
|
||||
prev += bpp;
|
||||
asm volatile ("vadd.vv v2, v2, v6");
|
||||
/* a = *row */
|
||||
vuint8m1_t a = __riscv_vle8_v_u8m1(row, vl);
|
||||
|
||||
/* c = *prev */
|
||||
vuint8m1_t c = __riscv_vle8_v_u8m1(prev, vl);
|
||||
|
||||
/* a += c */
|
||||
a = __riscv_vadd_vv_u8m1(a, c, vl);
|
||||
|
||||
/* *row = a */
|
||||
asm volatile ("vse8.v v2, (%0)" : : "r" (row));
|
||||
__riscv_vse8_v_u8m1(row, a, vl);
|
||||
row += bpp;
|
||||
prev += bpp;
|
||||
|
||||
/* remaining pixels */
|
||||
|
||||
while (row < rp_end)
|
||||
{
|
||||
/* b = *prev_row */
|
||||
asm volatile ("vle8.v v4, (%0)" : : "r" (prev));
|
||||
/* b = *prev */
|
||||
vuint8m1_t b = __riscv_vle8_v_u8m1(prev, vl);
|
||||
prev += bpp;
|
||||
|
||||
/* x = *row */
|
||||
asm volatile ("vle8.v v8, (%0)" : : "r" (row));
|
||||
vuint8m1_t x = __riscv_vle8_v_u8m1(row, vl);
|
||||
|
||||
/* sub (widening to 16bit) */
|
||||
/* p = b - c */
|
||||
asm volatile ("vwsubu.vv v12, v4, v6");
|
||||
/* pc = a - c */
|
||||
asm volatile ("vwsubu.vv v24, v2, v6");
|
||||
/* Calculate p = b - c and pc = a - c using widening subtraction */
|
||||
vuint16m2_t p_wide = __riscv_vwsubu_vv_u16m2(b, c, vl);
|
||||
vuint16m2_t pc_wide = __riscv_vwsubu_vv_u16m2(a, c, vl);
|
||||
|
||||
/* switch to widened */
|
||||
asm volatile ("vsetvli zero, %0, e16, m2" : : "r" (bpp));
|
||||
/* Convert to signed for easier manipulation */
|
||||
size_t vl16 = __riscv_vsetvl_e16m2(bpp);
|
||||
vint16m2_t p = __riscv_vreinterpret_v_u16m2_i16m2(p_wide);
|
||||
vint16m2_t pc = __riscv_vreinterpret_v_u16m2_i16m2(pc_wide);
|
||||
|
||||
/* pa = abs(p) -> pa = p < 0 ? -p : p */
|
||||
asm volatile ("vmv.v.v v16, v12"); /* pa = p */
|
||||
asm volatile ("vmslt.vx v0, v16, zero"); /* set mask[i] if pa[i] < 0 */
|
||||
asm volatile ("vrsub.vx v16, v16, zero, v0.t"); /* invert negative values in pa; vd[i] = 0 - vs2[i] (if mask[i])
|
||||
* could be replaced by vneg in rvv >= 1.0
|
||||
*/
|
||||
/* pa = |p| */
|
||||
vbool8_t p_neg_mask = __riscv_vmslt_vx_i16m2_b8(p, 0, vl16);
|
||||
vint16m2_t pa = __riscv_vrsub_vx_i16m2_m(p_neg_mask, p, 0, vl16);
|
||||
|
||||
/* pb = abs(p) -> pb = pc < 0 ? -pc : pc */
|
||||
asm volatile ("vmv.v.v v20, v24"); /* pb = pc */
|
||||
asm volatile ("vmslt.vx v0, v20, zero"); /* set mask[i] if pc[i] < 0 */
|
||||
asm volatile ("vrsub.vx v20, v20, zero, v0.t"); /* invert negative values in pb; vd[i] = 0 - vs2[i] (if mask[i])
|
||||
* could be replaced by vneg in rvv >= 1.0
|
||||
*/
|
||||
/* pb = |pc| */
|
||||
vbool8_t pc_neg_mask = __riscv_vmslt_vx_i16m2_b8(pc, 0, vl16);
|
||||
vint16m2_t pb = __riscv_vrsub_vx_i16m2_m(pc_neg_mask, pc, 0, vl16);
|
||||
|
||||
/* pc = abs(p + pc) -> pc = (p + pc) < 0 ? -(p + pc) : p + pc */
|
||||
asm volatile ("vadd.vv v24, v24, v12"); /* pc = p + pc */
|
||||
asm volatile ("vmslt.vx v0, v24, zero"); /* set mask[i] if pc[i] < 0 */
|
||||
asm volatile ("vrsub.vx v24, v24, zero, v0.t"); /* invert negative values in pc; vd[i] = 0 - vs2[i] (if mask[i])
|
||||
* could be replaced by vneg in rvv >= 1.0
|
||||
*/
|
||||
/* pc = |p + pc| */
|
||||
vint16m2_t p_plus_pc = __riscv_vadd_vv_i16m2(p, pc, vl16);
|
||||
vbool8_t p_plus_pc_neg_mask = __riscv_vmslt_vx_i16m2_b8(p_plus_pc, 0, vl16);
|
||||
pc = __riscv_vrsub_vx_i16m2_m(p_plus_pc_neg_mask, p_plus_pc, 0, vl16);
|
||||
|
||||
/*
|
||||
* if (pb < pa)
|
||||
* {
|
||||
* pa = pb;
|
||||
* a = b;
|
||||
* // see (*1)
|
||||
* }
|
||||
* The key insight is that we want the minimum of pa, pb, pc.
|
||||
* - If pa <= pb and pa <= pc, use a
|
||||
* - Else if pb <= pc, use b
|
||||
* - Else use c
|
||||
*/
|
||||
asm volatile ("vmslt.vv v0, v20, v16"); /* set mask[i] if pb[i] < pa[i] */
|
||||
asm volatile ("vmerge.vvm v16, v16, v20, v0"); /* pa[i] = pb[i] (if mask[i]) */
|
||||
|
||||
/*
|
||||
* if (pc < pa)
|
||||
* {
|
||||
* a = c;
|
||||
* // see (*2)
|
||||
* }
|
||||
*/
|
||||
asm volatile ("vmslt.vv v31, v24, v16"); /* set tmpmask[i] if pc[i] < pa[i] */
|
||||
/* Find which predictor to use based on minimum absolute difference */
|
||||
vbool8_t pa_le_pb = __riscv_vmsle_vv_i16m2_b8(pa, pb, vl16);
|
||||
vbool8_t pa_le_pc = __riscv_vmsle_vv_i16m2_b8(pa, pc, vl16);
|
||||
vbool8_t pb_le_pc = __riscv_vmsle_vv_i16m2_b8(pb, pc, vl16);
|
||||
|
||||
/* switch to narrow */
|
||||
asm volatile ("vsetvli zero, %0, e8, m1" : : "r" (bpp));
|
||||
/* use_a = pa <= pb && pa <= pc */
|
||||
vbool8_t use_a = __riscv_vmand_mm_b8(pa_le_pb, pa_le_pc, vl16);
|
||||
|
||||
/* (*1) */
|
||||
asm volatile ("vmerge.vvm v2, v2, v4, v0"); /* a = b (if mask[i]) */
|
||||
/* use_b = !use_a && pb <= pc */
|
||||
vbool8_t not_use_a = __riscv_vmnot_m_b8(use_a, vl16);
|
||||
vbool8_t use_b = __riscv_vmand_mm_b8(not_use_a, pb_le_pc, vl16);
|
||||
|
||||
/* (*2) */
|
||||
asm volatile ("vmand.mm v0, v31, v31"); /* mask = tmpmask
|
||||
* vmand works for rvv 0.7 up to 1.0
|
||||
* could be replaced by vmcpy in 0.7.1/0.8.1
|
||||
* or vmmv.m in 1.0
|
||||
*/
|
||||
asm volatile ("vmerge.vvm v2, v2, v6, v0"); /* a = c (if mask[i]) */
|
||||
/* Switch back to e8m1 for final operations */
|
||||
vl = __riscv_vsetvl_e8m1(bpp);
|
||||
|
||||
/* a += x */
|
||||
asm volatile ("vadd.vv v2, v2, v8");
|
||||
/* Start with a, then conditionally replace with b or c */
|
||||
vuint8m1_t result = a;
|
||||
result = __riscv_vmerge_vvm_u8m1(result, b, use_b, vl);
|
||||
|
||||
/* use_c = !use_a && !use_b */
|
||||
vbool8_t use_c = __riscv_vmnand_mm_b8(__riscv_vmor_mm_b8(use_a, use_b, vl), __riscv_vmor_mm_b8(use_a, use_b, vl), vl);
|
||||
result = __riscv_vmerge_vvm_u8m1(result, c, use_c, vl);
|
||||
|
||||
/* a = result + x */
|
||||
a = __riscv_vadd_vv_u8m1(result, x, vl);
|
||||
|
||||
/* *row = a */
|
||||
asm volatile ("vse8.v v2, (%0)" : : "r" (row));
|
||||
__riscv_vse8_v_u8m1(row, a, vl);
|
||||
row += bpp;
|
||||
|
||||
/* prepare next iteration (prev is already in a) */
|
||||
/* c = b */
|
||||
asm volatile ("vmv.v.v v6, v4");
|
||||
/* c = b for next iteration */
|
||||
c = b;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
png_read_filter_row_paeth3_rvv(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev_row)
|
||||
@ -349,8 +335,6 @@ png_read_filter_row_paeth3_rvv(png_row_infop row_info, png_bytep row,
|
||||
size_t len = row_info->rowbytes;
|
||||
|
||||
png_read_filter_row_paeth_rvv(len, 3, row, prev_row);
|
||||
|
||||
PNG_UNUSED(prev_row)
|
||||
}
|
||||
|
||||
void
|
||||
@ -360,9 +344,7 @@ png_read_filter_row_paeth4_rvv(png_row_infop row_info, png_bytep row,
|
||||
size_t len = row_info->rowbytes;
|
||||
|
||||
png_read_filter_row_paeth_rvv(len, 4, row, prev_row);
|
||||
|
||||
PNG_UNUSED(prev_row)
|
||||
}
|
||||
|
||||
#endif /* PNG_RISCV_RVV_IMPLEMENTATION */
|
||||
#endif /* READ */
|
||||
#endif /* PNG_RISCV_RVV_IMPLEMENTATION == 1 */
|
||||
#endif /* PNG_READ_SUPPORTED */
|
||||
|
@ -16,20 +16,6 @@
|
||||
|
||||
#include <riscv_vector.h>
|
||||
|
||||
#ifdef PNG_RISCV_RVV_CHECK_SUPPORTED /* Do run-time checks */
|
||||
/* WARNING: it is strongly recommended that you do not build libpng with
|
||||
* run-time checks for CPU features if at all possible. In the case of the
|
||||
* RISC-V Vector instructions there is no processor-specific way of detecting
|
||||
* the presence of the required support, therefore run-time detection is
|
||||
* extremely OS specific.
|
||||
*
|
||||
* You may set the macro PNG_RISCV_RVV_FILE to the file name of file containing
|
||||
* a fragment of C source code which defines the png_have_rvv function. There
|
||||
* are a number of implementations in contrib/riscv-rvv, but the only one that
|
||||
* has partial support is contrib/riscv-rvv/linux.c - a generic Linux
|
||||
* implementation which reads /proc/cpuinfo.
|
||||
*/
|
||||
|
||||
#include <signal.h>
|
||||
|
||||
#ifndef PNG_RISCV_RVV_FILE
|
||||
@ -40,11 +26,10 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
static int png_have_rvv(png_structp png_ptr);
|
||||
static int png_have_rvv();
|
||||
#ifdef PNG_RISCV_RVV_FILE
|
||||
# include PNG_RISCV_RVV_FILE
|
||||
#endif
|
||||
#endif /* PNG_RISCV_RVV_CHECK_SUPPORTED */
|
||||
|
||||
#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
|
||||
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
|
||||
@ -53,59 +38,16 @@ static int png_have_rvv(png_structp png_ptr);
|
||||
void
|
||||
png_init_filter_functions_rvv(png_structp pp, unsigned int bpp)
|
||||
{
|
||||
/* The switch statement is compiled in for RISCV_RVV_API, the call to
|
||||
* png_have_rvv is compiled in for RISCV_RVV_CHECK. If both are
|
||||
* defined the check is only performed if the API has not set the VECTOR
|
||||
* option on or off explicitly. In this case the check controls what
|
||||
* happens.
|
||||
*/
|
||||
png_debug(1, "in png_init_filter_functions_rvv");
|
||||
#ifdef PNG_RISCV_RVV_API_SUPPORTED
|
||||
switch ((pp->options >> PNG_RISCV_RVV) & 3)
|
||||
{
|
||||
case PNG_OPTION_UNSET:
|
||||
/* Allow the run-time check to execute if it has been enabled -
|
||||
* thus both API and CHECK can be turned on. If it isn't supported
|
||||
* this case will fall through to the 'default' below, which just
|
||||
* returns.
|
||||
*/
|
||||
#endif /* PNG_RISCV_RVV_API_SUPPORTED */
|
||||
#ifdef PNG_RISCV_RVV_CHECK_SUPPORTED
|
||||
{
|
||||
|
||||
static volatile sig_atomic_t no_rvv = -1; /* not checked */
|
||||
|
||||
if (no_rvv < 0)
|
||||
no_rvv = !png_have_rvv(pp);
|
||||
no_rvv = !png_have_rvv();
|
||||
|
||||
if (no_rvv)
|
||||
return;
|
||||
}
|
||||
#ifdef PNG_RISCV_RVV_API_SUPPORTED
|
||||
break;
|
||||
#endif
|
||||
#endif /* PNG_RISCV_RVV_CHECK_SUPPORTED */
|
||||
|
||||
#ifdef PNG_RISCV_RVV_API_SUPPORTED
|
||||
default: /* OFF or INVALID */
|
||||
return;
|
||||
|
||||
case PNG_OPTION_ON:
|
||||
/* Option turned on */
|
||||
break;
|
||||
}
|
||||
#endif /* PNG_RISCV_RVV_API_SUPPORTED */
|
||||
|
||||
/* IMPORTANT: any new external functions used here must be declared using
|
||||
* PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the
|
||||
* 'prefix' option to configure works:
|
||||
*
|
||||
* ./configure --with-libpng-prefix=foobar_
|
||||
*
|
||||
* Verify you have got this right by running the above command, doing a build
|
||||
* and examining pngprefix.h; it must contain a #define for every external
|
||||
* function you add. (Notice that this happens automatically for the
|
||||
* initialization function.)
|
||||
*/
|
||||
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_rvv;
|
||||
|
||||
if (bpp == 3)
|
||||
|
Loading…
x
Reference in New Issue
Block a user