From ce4b632dd756d426c8cd7cd973904e8ea2e1e763 Mon Sep 17 00:00:00 2001 From: gxw Date: Mon, 8 Jan 2024 17:46:41 +0800 Subject: [PATCH] Optimize png16 with loongson mmi for 64-bit os --- CMakeLists.txt | 41 ++- Makefile.am | 7 + configure.ac | 51 +++ contrib/mips-mmi/linux.c | 140 ++++++++ mips/filter_mmi_inline_assembly.c | 524 ++++++++++++++++++++++++++++++ mips/mips_init.c | 82 ++++- png.h | 6 +- pngpriv.h | 52 ++- scripts/pnglibconf.dfa | 49 +++ 9 files changed, 937 insertions(+), 15 deletions(-) create mode 100644 contrib/mips-mmi/linux.c create mode 100644 mips/filter_mmi_inline_assembly.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c6975647..9a57e8364 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -227,19 +227,44 @@ if(TARGET_ARCH MATCHES "^(mipsel|mips64el)") CACHE STRING "Enable MIPS_MSA optimizations: on|off; on is default") set_property(CACHE PNG_MIPS_MSA PROPERTY STRINGS ${PNG_MIPS_MSA_POSSIBLE_VALUES}) - list(FIND PNG_MIPS_MSA_POSSIBLE_VALUES ${PNG_MIPS_MSA} index) - if(index EQUAL -1) + list(FIND PNG_MIPS_MSA_POSSIBLE_VALUES ${PNG_MIPS_MSA} index_msa) + if(index_msa EQUAL -1) message(FATAL_ERROR "PNG_MIPS_MSA must be one of [${PNG_MIPS_MSA_POSSIBLE_VALUES}]") - elseif(NOT PNG_MIPS_MSA STREQUAL "off") + endif() + + set(PNG_MIPS_MMI_POSSIBLE_VALUES on off) + set(PNG_MIPS_MMI "on" + CACHE STRING "Enable MIPS_MMI optimizations: on|off; on is default") + set_property(CACHE PNG_MIPS_MMI + PROPERTY STRINGS ${PNG_MIPS_MMI_POSSIBLE_VALUES}) + list(FIND PNG_MIPS_MMI_POSSIBLE_VALUES ${PNG_MIPS_MMI} index_mmi) + if(index_mmi EQUAL -1) + message(FATAL_ERROR "PNG_MIPS_MMI must be one of [${PNG_MIPS_MMI_POSSIBLE_VALUES}]") + endif() + + if(PNG_MIPS_MSA STREQUAL "on" AND PNG_MIPS_MMI STREQUAL "on") + set(libpng_mips_sources + mips/mips_init.c + mips/filter_msa_intrinsics.c + mips/filter_mmi_inline_assembly.c) + add_definitions(-DPNG_MIPS_MSA_OPT=2) + add_definitions(-DPNG_MIPS_MMI_OPT=1) + elseif(PNG_MIPS_MSA STREQUAL "on") set(libpng_mips_sources mips/mips_init.c mips/filter_msa_intrinsics.c) - if(PNG_MIPS_MSA STREQUAL "on") - add_definitions(-DPNG_MIPS_MSA_OPT=2) - endif() - else() + add_definitions(-DPNG_MIPS_MSA_OPT=2) + add_definitions(-DPNG_MIPS_MMI_OPT=0) + elseif(PNG_MIPS_MMI STREQUAL "on") + set(libpng_mips_sources + mips/mips_init.c + mips/filter_mmi_inline_assembly.c) add_definitions(-DPNG_MIPS_MSA_OPT=0) - endif() + add_definitions(-DPNG_MIPS_MMI_OPT=1) + else() + add_definitions(-DPNG_MIPS_MSA_OPT=0) + add_definitions(-DPNG_MIPS_MMI_OPT=0) + endif() endif() else(PNG_HARDWARE_OPTIMIZATIONS) diff --git a/Makefile.am b/Makefile.am index 370bdbf78..43ad6e2a1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -117,6 +117,13 @@ libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c\ mips/filter_msa_intrinsics.c endif +if PNG_MIPS_MMI +if !PNG_MIPS_MSA +libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/mips_init.c +endif +libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += mips/filter_mmi_inline_assembly.c +endif + if PNG_INTEL_SSE libpng@PNGLIB_MAJOR@@PNGLIB_MINOR@_la_SOURCES += intel/intel_init.c\ intel/filter_sse2_intrinsics.c diff --git a/configure.ac b/configure.ac index 938c106a7..c485a6f34 100644 --- a/configure.ac +++ b/configure.ac @@ -328,6 +328,9 @@ AC_ARG_ENABLE([hardware-optimizations], enable_mips_msa=no AC_DEFINE([PNG_MIPS_MSA_OPT], [0], [Disable MIPS_MSA optimizations]) + enable_mips_mmi=no + AC_DEFINE([PNG_MIPS_MMI_OPT], [0], + [Disable MIPS_MMI optimizations]) enable_powerpc_vsx=no AC_DEFINE([PNG_POWERPC_VSX_OPT], [0], [Disable POWERPC VSX optimizations]) @@ -347,7 +350,10 @@ AC_ARG_ENABLE([hardware-optimizations], [Enable ARM_NEON optimizations]) ;; mipsel*|mips64el*) + enable_mips_mmi=yes enable_mips_msa=yes + AC_DEFINE([PNG_MIPS_MMI_OPT], [1], + [Enable MIPS_MMI optimizations]) AC_DEFINE([PNG_MIPS_MSA_OPT], [2], [Enable MIPS_MSA optimizations]) ;; @@ -461,6 +467,51 @@ AM_CONDITIONAL([PNG_MIPS_MSA], mipsel*|mips64el*) : ;; esac]) +# MIPS +# === +# +# MIPS MMI (SIMD) support. + +AC_ARG_ENABLE([mips-mmi], + AS_HELP_STRING([[[--enable-mips-mmi]]], + [Enable MIPS MMI optimizations: =no/off, check, api, yes/on:] + [no/off: disable the optimizations; check: use internal checking code] + [(deprecated and poorly supported); api: disable by default, enable by] + [a call to png_set_option; yes/on: turn on unconditionally.] + [If not specified: determined by the compiler.]), + [case "$enableval" in + no|off) + # disable the default enabling on __mips_mmi systems: + AC_DEFINE([PNG_MIPS_MMI_OPT], [0], + [Disable MIPS MMI optimizations]) + # Prevent inclusion of the assembler files below: + enable_mips_mmi=no;; + check) + AC_DEFINE([PNG_MIPS_MMI_CHECK_SUPPORTED], [], + [Check for MIPS MMI support at run-time]);; + api) + AC_DEFINE([PNG_MIPS_MMI_API_SUPPORTED], [], + [Turn on MIPS MMI optimizations at run-time]);; + yes|on) + AC_DEFINE([PNG_MIPS_MMI_OPT], [1], + [Enable MIPS MMI optimizations]) + AC_MSG_WARN([--enable-mips-mmi: please specify 'check' or 'api', if] + [you want the optimizations unconditionally pass '-mloongson-mmi -march=loongson3a'] + [to the compiler.]);; + *) + AC_MSG_ERROR([--enable-mips-mmi=${enable_mips_mmi}: invalid value]) + esac]) + +# Add MIPS specific files to all builds where the host_cpu is mips ('mips*') or +# where MIPS optimizations were explicitly requested (this allows a fallback if a +# future host CPU does not match 'mips*') + +AM_CONDITIONAL([PNG_MIPS_MMI], + [test "$enable_mips_mmi" != 'no' && + case "$host_cpu" in + mipsel*|mips64el*) :;; + esac]) + # INTEL # ===== # diff --git a/contrib/mips-mmi/linux.c b/contrib/mips-mmi/linux.c new file mode 100644 index 000000000..5bb79a60b --- /dev/null +++ b/contrib/mips-mmi/linux.c @@ -0,0 +1,140 @@ +/* contrib/mips-mmi/linux.c + * + * Written by guxiwei 2023 + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + * + */ +#include +#include +#include +#include + +/* + * parse_r var, r - Helper assembler macro for parsing register names. + * + * This converts the register name in $n form provided in \r to the + * corresponding register number, which is assigned to the variable \var. It is + * needed to allow explicit encoding of instructions in inline assembly where + * registers are chosen by the compiler in $n form, allowing us to avoid using + * fixed register numbers. + * + * It also allows newer instructions (not implemented by the assembler) to be + * transparently implemented using assembler macros, instead of needing separate + * cases depending on toolchain support. + * + * Simple usage example: + * __asm__ __volatile__("parse_r __rt, %0\n\t" + * ".insn\n\t" + * "# di %0\n\t" + * ".word (0x41606000 | (__rt << 16))" + * : "=r" (status); + */ + +/* Match an individual register number and assign to \var */ +#define _IFC_REG(n) \ + ".ifc \\r, $" #n "\n\t" \ + "\\var = " #n "\n\t" \ + ".endif\n\t" + +__asm__(".macro parse_r var r\n\t" + "\\var = -1\n\t" + _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) + _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) + _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) + _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) + _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) + _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) + _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) + _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) + ".iflt \\var\n\t" + ".error \"Unable to parse register name \\r\"\n\t" + ".endif\n\t" + ".endm"); + +#define HWCAP_LOONGSON_CPUCFG (1 << 14) + +static int cpucfg_available(void) +{ + return getauxval(AT_HWCAP) & HWCAP_LOONGSON_CPUCFG; +} + +static int strstart(const char *str, const char *pfx, const char **ptr) +{ + while (*pfx && *pfx == *str) { + pfx++; + str++; + } + if (!*pfx && ptr) + *ptr = str; + return !*pfx; +} + +/* Most toolchains have no CPUCFG support yet */ +static uint32_t read_cpucfg(uint32_t reg) +{ + uint32_t __res; + + __asm__ __volatile__( + "parse_r __res,%0\n\t" + "parse_r reg,%1\n\t" + ".insn \n\t" + ".word (0xc8080118 | (reg << 21) | (__res << 11))\n\t" + :"=r"(__res) + :"r"(reg) + : + ); + return __res; +} + +#define LOONGSON_CFG1 0x1 + +#define LOONGSON_CFG1_MMI (1 << 4) + +static int cpu_flags_cpucfg(void) +{ + int flags = 0; + uint32_t cfg1 = read_cpucfg(LOONGSON_CFG1); + + if (cfg1 & LOONGSON_CFG1_MMI) + flags = 1; + + return flags; +} + +static int cpu_flags_cpuinfo(void) +{ + FILE *f = fopen("/proc/cpuinfo", "r"); + char buf[200]; + int flags = 0; + + if (!f) + return flags; + + while (fgets(buf, sizeof(buf), f)) { + /* Legacy kernel may not export MMI in ASEs implemented */ + if (strstart(buf, "cpu model", NULL)) { + if (strstr(buf, "Loongson-3 ")) + flags = 1; + break; + } + if (strstart(buf, "ASEs implemented", NULL)) { + if (strstr(buf, " loongson-mmi")) + flags = 1; + break; + } + } + fclose(f); + return flags; +} + +static int png_have_mmi() +{ + if (cpucfg_available()) + return cpu_flags_cpucfg(); + else + return cpu_flags_cpuinfo(); + return 0; +} diff --git a/mips/filter_mmi_inline_assembly.c b/mips/filter_mmi_inline_assembly.c new file mode 100644 index 000000000..06cb1cc78 --- /dev/null +++ b/mips/filter_mmi_inline_assembly.c @@ -0,0 +1,524 @@ +/* filter_mmi_intrinsics.c - MMI optimized filter functions + * + * Written by zhanglixia and guxiwei + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +#if PNG_MIPS_MMI_IMPLEMENTATION == 2 /* Inline Assembly */ + +/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). + * They're positioned like this: + * prev: c b + * row: a d + * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be + * whichever of a, b, or c is closest to p=a+b-c. + */ + +void png_read_filter_row_up_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + int istop = row_info->rowbytes; + double rp,pp; + __asm__ volatile ( + "1: \n\t" + "ldc1 %[rp], 0x00(%[row]) \n\t" + "ldc1 %[pp], 0x00(%[prev_row]) \n\t" + "paddb %[rp], %[rp], %[pp] \n\t" + "sdc1 %[rp], 0x00(%[row]) \n\t" + + "daddiu %[row], %[row], 0x08 \n\t" + "daddiu %[prev_row], %[prev_row], 0x08 \n\t" + "daddiu %[istop], %[istop], -0x08 \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp) + : [row]"r"(row), [prev_row]"r"(prev_row), + [istop]"r"(istop) + : "memory" + ); +} + +void png_read_filter_row_sub3_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + int istop = row_info->rowbytes; + double rp, pp, dest; + double eight, sixteen, twenty_four, forty_eight; + double tmp0; + double ftmp[2]; + + __asm__ volatile ( + "li %[tmp0], 0x08 \n\t" + "dmtc1 %[tmp0], %[eight] \n\t" + "li %[tmp0], 0x10 \n\t" + "dmtc1 %[tmp0], %[sixteen] \n\t" + "li %[tmp0], 0x18 \n\t" + "dmtc1 %[tmp0], %[twenty_four] \n\t" + "li %[tmp0], 0x30 \n\t" + "dmtc1 %[tmp0], %[forty_eight] \n\t" + "xor %[dest], %[dest], %[dest] \n\t" + + "1: \n\t" + "gsldrc1 %[rp], 0x00(%[row]) \n\t" + "gsldlc1 %[rp], 0x07(%[row]) \n\t" + "gsldrc1 %[pp], 0x08(%[row]) \n\t" + "gsldlc1 %[pp], 0x0f(%[row]) \n\t" + + "paddb %[ftmp0], %[dest], %[rp] \n\t" + "swc1 %[ftmp0], 0x00(%[row]) \n\t" + + "dsrl %[ftmp1], %[rp], %[twenty_four] \n\t" + "paddb %[dest], %[ftmp1], %[ftmp0] \n\t" + "gsswrc1 %[dest], 0x03(%[row]) \n\t" + "gsswlc1 %[dest], 0x06(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[pp], %[sixteen] \n\t" + "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" + "paddb %[dest], %[dest], %[ftmp0] \n\t" + "gsswrc1 %[dest], 0x06(%[row]) \n\t" + "gsswlc1 %[dest], 0x09(%[row]) \n\t" + + "dsrl %[ftmp0], %[pp], %[eight] \n\t" + "paddb %[dest], %[dest], %[ftmp0] \n\t" + "gsswrc1 %[dest], 0x09(%[row]) \n\t" + "daddiu %[row], %[row], 0x0c \n\t" + "daddiu %[istop], %[istop], -0x0c \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [dest]"=&f"(dest), + [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), + [ftmp1]"=&f"(ftmp[1]), [eight]"=&f"(eight), + [sixteen]"=&f"(sixteen), [twenty_four]"=&f"(twenty_four), + [forty_eight]"=&f"(forty_eight) + : [row]"r"(row), [istop]"r"(istop) + : "memory" + ); + + PNG_UNUSED(prev) +} + +void png_read_filter_row_sub4_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + /* The Sub filter predicts each pixel as the previous pixel, a. + * There is no pixel to the left of the first pixel. It's encoded directly. + * That works with our main loop if we just say that left pixel was zero. + */ + int istop = row_info->rowbytes; + double rp,pp; + + __asm__ volatile ( + "1: \n\t" + "lwc1 %[pp], 0x00(%[row]) \n\t" + "lwc1 %[rp], 0x04(%[row]) \n\t" + "paddb %[rp], %[rp], %[pp] \n\t" + "swc1 %[rp], 0x04(%[row]) \n\t" + + "daddiu %[row], %[row], 0x04 \n\t" + "daddiu %[istop], %[istop], -0x04 \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp) + : [row]"r"(row), [istop]"r"(istop) + : "memory" + ); + + PNG_UNUSED(prev) +} + +void png_read_filter_row_avg3_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + int istop = row_info->rowbytes; + double rp, pp, rp1, pp1; + double tmp0; + double ftmp[3]; + double one, dest; + double eight, sixteen, twenty_four, forty_eight; + + __asm__ volatile ( + "li %[tmp0], 0x08 \n\t" + "dmtc1 %[tmp0], %[eight] \n\t" + "li %[tmp0], 0x10 \n\t" + "dmtc1 %[tmp0], %[sixteen] \n\t" + "li %[tmp0], 0x18 \n\t" + "dmtc1 %[tmp0], %[twenty_four] \n\t" + "li %[tmp0], 0x30 \n\t" + "dmtc1 %[tmp0], %[forty_eight] \n\t" + "xor %[dest], %[dest], %[dest] \n\t" + + "li %[tmp0], 0x01 \n\t" + "ins %[tmp0], %[tmp0], 8, 8 \n\t" + "dmtc1 %[tmp0], %[one] \n\t" + "pshufh %[one], %[one], %[dest] \n\t" + + "1: \n\t" + "gsldrc1 %[rp], 0x00(%[row]) \n\t" + "gsldlc1 %[rp], 0x07(%[row]) \n\t" + "gsldrc1 %[pp], 0x00(%[prev]) \n\t" + "gsldlc1 %[pp], 0x07(%[prev]) \n\t" + "gsldrc1 %[rp1], 0x08(%[row]) \n\t" + "gsldlc1 %[rp1], 0x0f(%[row]) \n\t" + "gsldrc1 %[pp1], 0x08(%[prev]) \n\t" + "gsldlc1 %[pp1], 0x0f(%[prev]) \n\t" + + "xor %[ftmp0], %[pp], %[dest] \n\t" + "pavgb %[ftmp1], %[pp], %[dest] \n\t" + "and %[ftmp0], %[ftmp0], %[one] \n\t" + "psubb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" + "paddb %[dest], %[rp], %[ftmp1] \n\t" + "swc1 %[dest], 0x00(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[twenty_four] \n\t" + "dsrl %[ftmp1], %[pp], %[twenty_four] \n\t" + + "xor %[ftmp2], %[ftmp1], %[dest] \n\t" + "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t" + "and %[ftmp2], %[ftmp2], %[one] \n\t" + "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" + "paddb %[dest], %[ftmp0], %[ftmp1] \n\t" + "gsswrc1 %[dest], 0x03(%[row]) \n\t" + "gsswlc1 %[dest], 0x06(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[rp1], %[sixteen] \n\t" + "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" + "dsrl %[ftmp2], %[pp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[pp1], %[sixteen] \n\t" + "or %[ftmp1], %[ftmp2], %[ftmp1] \n\t" + + "xor %[ftmp2], %[ftmp1], %[dest] \n\t" + "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t" + "and %[ftmp2], %[ftmp2], %[one] \n\t" + "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" + "paddb %[dest], %[ftmp0], %[ftmp1] \n\t" + "gsswrc1 %[dest], 0x06(%[row]) \n\t" + "gsswlc1 %[dest], 0x09(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp1], %[eight] \n\t" + "dsrl %[ftmp1], %[pp1], %[eight] \n\t" + + "xor %[ftmp2], %[ftmp1], %[dest] \n\t" + "pavgb %[ftmp1], %[ftmp1], %[dest] \n\t" + "and %[ftmp2], %[ftmp2], %[one] \n\t" + "psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" + "paddb %[dest], %[ftmp0], %[ftmp1] \n\t" + "gsswrc1 %[dest], 0x09(%[row]) \n\t" + "daddiu %[row], %[row], 0x0c \n\t" + "daddiu %[prev], %[prev], 0x0c \n\t" + "daddiu %[istop], %[istop], -0x0c \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1), + [pp1]"=&f"(pp1), [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), + [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [one]"=&f"(one), + [dest]"=&f"(dest), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen), + [twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight) + : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop) + : "memory" + ); +} + +void png_read_filter_row_avg4_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + int istop = row_info->rowbytes; + double rp,pp; + double dest; + double ftmp[2]; + double tmp; + + __asm__ volatile ( + "xor %[dest], %[dest], %[dest] \n\t" + "li %[tmp], 0x01 \n\t" + "ins %[tmp], %[tmp], 8, 8 \n\t" + "dmtc1 %[tmp], %[ftmp1] \n\t" + "pshufh %[ftmp1], %[ftmp1], %[dest] \n\t" + + "1: \n\t" + "lwc1 %[rp], 0x00(%[row]) \n\t" + "lwc1 %[pp], 0x00(%[prev]) \n\t" + "xor %[ftmp0], %[pp], %[dest] \n\t" + "pavgb %[pp], %[pp], %[dest] \n\t" + "and %[ftmp0], %[ftmp0], %[ftmp1] \n\t" + "psubb %[pp], %[pp], %[ftmp0] \n\t" + "paddb %[dest], %[rp], %[pp] \n\t" + "swc1 %[dest], 0x00(%[row]) \n\t" + "daddiu %[row], %[row], 0x04 \n\t" + "daddiu %[prev], %[prev], 0x04 \n\t" + "daddiu %[istop], %[istop], -0x04 \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [ftmp0]"=&f"(ftmp[0]), + [ftmp1]"=&f"(ftmp[1]), [dest]"=&f"(dest), [tmp]"=&r"(tmp) + : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop) + : "memory" + ); +} + +void png_read_filter_row_paeth3_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + /* Paeth tries to predict pixel d using the pixel to the left of it, a, + * and two pixels from the previous row, b and c: + * prev: c b + * row: a d + * The Paeth function predicts d to be whichever of a, b, or c is nearest to + * p=a+b-c. + * + * The first pixel has no left context, and so uses an Up filter, p = b. + * This works naturally with our main loop's p = a+b-c if we force a and c + * to zero. + * Here we zero b and d, which become c and a respectively at the start of + * the loop. + */ + int istop = row_info->rowbytes; + double rp, pp, rp1, pp1, zero; + double a, b, c, d, pa, pb, pc; + double tmp0; + double ftmp[3]; + double eight, sixteen, twenty_four, forty_eight; + + __asm__ volatile ( + "xor %[a], %[a], %[a] \n\t" + "xor %[c], %[c], %[c] \n\t" + "xor %[zero], %[zero], %[zero] \n\t" + "li %[tmp0], 0x08 \n\t" + "dmtc1 %[tmp0], %[eight] \n\t" + "li %[tmp0], 0x10 \n\t" + "dmtc1 %[tmp0], %[sixteen] \n\t" + "li %[tmp0], 0x18 \n\t" + "dmtc1 %[tmp0], %[twenty_four] \n\t" + "li %[tmp0], 0x30 \n\t" + "dmtc1 %[tmp0], %[forty_eight] \n\t" + + "1: \n\t" + "gsldrc1 %[rp], 0x00(%[row]) \n\t" + "gsldlc1 %[rp], 0x07(%[row]) \n\t" + "gsldrc1 %[pp], 0x00(%[prev]) \n\t" + "gsldlc1 %[pp], 0x07(%[prev]) \n\t" + "gsldrc1 %[rp1], 0x08(%[row]) \n\t" + "gsldlc1 %[rp1], 0x0f(%[row]) \n\t" + "gsldrc1 %[pp1], 0x08(%[prev]) \n\t" + "gsldlc1 %[pp1], 0x0f(%[prev]) \n\t" + + "punpcklbh %[b], %[pp], %[zero] \n\t" + "punpcklbh %[d], %[rp], %[zero] \n\t" + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[pp], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "punpcklbh %[c], %[pp], %[zero] \n\t" + "swc1 %[d], 0x00(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[twenty_four] \n\t" + "dsrl %[ftmp2], %[pp], %[twenty_four] \n\t" + + "punpcklbh %[b], %[ftmp2], %[zero] \n\t" + "punpcklbh %[d], %[ftmp0], %[zero] \n\t" + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "punpcklbh %[c], %[ftmp2], %[zero] \n\t" + "gsswrc1 %[d], 0x03(%[row]) \n\t" + "gsswlc1 %[d], 0x06(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[rp1], %[sixteen] \n\t" + "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" + "dsrl %[ftmp2], %[pp], %[forty_eight] \n\t" + "dsll %[ftmp1], %[pp1], %[sixteen] \n\t" + "or %[ftmp2], %[ftmp2], %[ftmp1] \n\t" + + "punpcklbh %[b], %[ftmp2], %[zero] \n\t" + "punpcklbh %[d], %[ftmp0], %[zero] \n\t" + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "punpcklbh %[c], %[ftmp2], %[zero] \n\t" + "gsswrc1 %[d], 0x06(%[row]) \n\t" + "gsswlc1 %[d], 0x09(%[row]) \n\t" + + "dsrl %[ftmp0], %[rp1], %[eight] \n\t" + "dsrl %[ftmp2], %[pp1], %[eight] \n\t" + + "punpcklbh %[b], %[ftmp2], %[zero] \n\t" + "punpcklbh %[d], %[ftmp0], %[zero] \n\t" + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[ftmp2], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "punpcklbh %[c], %[ftmp2], %[zero] \n\t" + "gsswrc1 %[d], 0x09(%[row]) \n\t" + + "daddiu %[row], %[row], 0x0c \n\t" + "daddiu %[prev], %[prev], 0x0c \n\t" + "daddiu %[istop], %[istop], -0x0c \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1), [pp1]"=&f"(pp1), + [zero]"=&f"(zero), [a]"=&f"(a),[b]"=&f"(b), [c]"=&f"(c), + [d]"=&f"(d), [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc), + [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [ftmp2]"=&f"(ftmp[2]), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen), + [twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight) + : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop) + : "memory" + ); +} + +void png_read_filter_row_paeth4_mmi(png_row_infop row_info, png_bytep row, + png_const_bytep prev) +{ + /* Paeth tries to predict pixel d using the pixel to the left of it, a, + * and two pixels from the previous row, b and c: + * prev: c b + * row: a d + * The Paeth function predicts d to be whichever of a, b, or c is nearest to + * p=a+b-c. + * + * The first pixel has no left context, and so uses an Up filter, p = b. + * This works naturally with our main loop's p = a+b-c if we force a and c + * to zero. + * Here we zero b and d, which become c and a respectively at the start of + * the loop. + */ + int istop = row_info->rowbytes; + double rp, pp, zero; + double a, b, c, d, pa, pb, pc; + double ftmp[2]; + + __asm__ volatile ( + "xor %[a], %[a], %[a] \n\t" + "xor %[c], %[c], %[c] \n\t" + "xor %[zero], %[zero], %[zero] \n\t" + + "1: \n\t" + "lwc1 %[rp], 0x00(%[row]) \n\t" + "lwc1 %[pp], 0x00(%[prev]) \n\t" + "punpcklbh %[b], %[pp], %[zero] \n\t" + "punpcklbh %[d], %[rp], %[zero] \n\t" + + "packushb %[ftmp0], %[c], %[c] \n\t" + "packushb %[ftmp1], %[a], %[a] \n\t" + "pasubub %[pa], %[pp], %[ftmp0] \n\t" + "pasubub %[pb], %[ftmp1], %[ftmp0] \n\t" + "psubh %[ftmp0], %[b], %[c] \n\t" + "psubh %[ftmp1], %[a], %[c] \n\t" + "paddh %[pc], %[ftmp0], %[ftmp1] \n\t" + "pcmpgth %[ftmp0], %[zero], %[pc] \n\t" + "xor %[pc], %[pc], %[ftmp0] \n\t" + "psubh %[pc], %[pc], %[ftmp0] \n\t" + + "punpcklbh %[pa], %[pa], %[zero] \n\t" + "punpcklbh %[pb], %[pb], %[zero] \n\t" + + "pcmpgth %[ftmp0], %[pa], %[pb] \n\t" + "and %[ftmp1], %[b], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "pminsh %[pa], %[pa], %[pb] \n\t" + + "pcmpgth %[ftmp0], %[pa], %[pc] \n\t" + "and %[ftmp1], %[c], %[ftmp0] \n\t" + "pandn %[a], %[ftmp0], %[a] \n\t" + "or %[a], %[a], %[ftmp1] \n\t" + "paddb %[a], %[a], %[d] \n\t" + "packushb %[d], %[a], %[a] \n\t" + "swc1 %[d], 0x00(%[row]) \n\t" + "punpcklbh %[c], %[pp], %[zero] \n\t" + "daddiu %[row], %[row], 0x04 \n\t" + "daddiu %[prev], %[prev], 0x04 \n\t" + "daddiu %[istop], %[istop], -0x04 \n\t" + "bgtz %[istop], 1b \n\t" + : [rp]"=&f"(rp), [pp]"=&f"(pp), [zero]"=&f"(zero), + [a]"=&f"(a), [b]"=&f"(b), [c]"=&f"(c), [d]"=&f"(d), + [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc), + [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]) + : [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop) + : "memory" + ); +} + +#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */ +#endif /* READ */ diff --git a/mips/mips_init.c b/mips/mips_init.c index 8dd283dee..20a9fa8f5 100644 --- a/mips/mips_init.c +++ b/mips/mips_init.c @@ -4,6 +4,7 @@ * Copyright (c) 2018 Cosmin Truta * Copyright (c) 2016 Glenn Randers-Pehrson * Written by Mandar Sahastrabuddhe, 2016. + * Update by guxiwei, 2023. * * This code is released under the libpng license. * For conditions of distribution and use, see the disclaimer @@ -20,8 +21,9 @@ #ifdef PNG_READ_SUPPORTED -#if PNG_MIPS_MSA_OPT > 0 -#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do run-time checks */ +#if PNG_MIPS_MSA_OPT > 0 || PNG_MIPS_MMI_IMPLEMENTATION > 0 + +#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do MIPS MSA run-time checks */ /* WARNING: it is strongly recommended that you do not build libpng with * run-time checks for CPU features if at all possible. In the case of the MIPS * MSA instructions there is no processor-specific way of detecting the @@ -51,13 +53,83 @@ static int png_have_msa(png_structp png_ptr); #endif /* PNG_MIPS_MSA_FILE */ #endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */ +#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED /* Do MIPS MMI run-times checks */ +#ifndef PNG_MIPS_MMI_FILE +# ifdef __linux__ +# define PNG_MIPS_MMI_FILE "contrib/mips-mmi/linux.c" +# endif +#endif + +#ifdef PNG_MIPS_MMI_FILE + +#include /* for sig_atomic_t */ +static int png_have_mmi(); +#include PNG_MIPS_MMI_FILE + +#else /* PNG_MIPS_MMI_FILE */ +# error "PNG_MIPS_MMI_FILE undefined: no support for run-time MIPS MMI checks" +#endif /* PNG_MIPS_MMI_FILE */ +#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED*/ + #ifndef PNG_ALIGNED_MEMORY_SUPPORTED # error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED" #endif +/* MIPS supports two optimizations: MMI and MSA. The appropriate + * optimization is chosen at runtime + */ void -png_init_filter_functions_msa(png_structp pp, unsigned int bpp) +png_init_filter_functions_mips(png_structp pp, unsigned int bpp) { +#if PNG_MIPS_MMI_IMPLEMENTATION > 0 +#ifdef PNG_MIPS_MMI_API_SUPPORTED + switch ((pp->options >> PNG_MIPS_MMI) & 3) + { + case PNG_OPTION_UNSET: +#endif /* PNG_MIPS_MMI_API_SUPPORTED */ +#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED + { + static volatile sig_atomic_t no_mmi = -1; /* not checked */ + + if (no_mmi < 0) + no_mmi = !png_have_mmi(); + + if (no_mmi) + goto MIPS_MSA_INIT; + } +#ifdef PNG_MIPS_MMI_API_SUPPORTED + break; +#endif +#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED */ + +#ifdef PNG_MIPS_MMI_API_SUPPORTED + default: /* OFF or INVALID */ + goto MIPS_MSA_INIT; + + case PNG_OPTION_ON: + /* Option turned on */ + break; + } +#endif + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_mmi; + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_mmi; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_mmi; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth3_mmi; + } + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_mmi; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_mmi; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth4_mmi; + } +#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */ + +MIPS_MSA_INIT: +#if PNG_MIPS_MSA_OPT > 0 /* The switch statement is compiled in for MIPS_MSA_API, the call to * png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined * the check is only performed if the API has not set the MSA option on @@ -125,6 +197,8 @@ png_init_filter_functions_msa(png_structp pp, unsigned int bpp) pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa; pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_msa; } -} #endif /* PNG_MIPS_MSA_OPT > 0 */ + return; +} +#endif /* PNG_MIPS_MSA_OPT > 0 || PNG_MIPS_MMI_IMPLEMENTATION > 0 */ #endif /* READ */ diff --git a/png.h b/png.h index eaee5a372..457c932e5 100644 --- a/png.h +++ b/png.h @@ -3208,7 +3208,11 @@ PNG_EXPORT(245, int, png_image_write_to_memory, (png_imagep image, void *memory, #ifdef PNG_POWERPC_VSX_API_SUPPORTED # define PNG_POWERPC_VSX 10 /* HARDWARE: PowerPC VSX SIMD instructions supported */ #endif -#define PNG_OPTION_NEXT 12 /* Next option - numbers must be even */ +#ifdef PNG_MIPS_MMI_API_SUPPORTED +# define PNG_MIPS_MMI 12 /* HARDWARE: MIPS MMI SIMD instructions supported */ +#endif + +#define PNG_OPTION_NEXT 14 /* Next option - numbers must be even */ /* Return values: NOTE: there are four values and 'off' is *not* zero */ #define PNG_OPTION_UNSET 0 /* Unset - defaults to off */ diff --git a/pngpriv.h b/pngpriv.h index cdbc6c342..fdf0f35c3 100644 --- a/pngpriv.h +++ b/pngpriv.h @@ -197,6 +197,16 @@ # endif #endif +#ifndef PNG_MIPS_MMI_OPT +# ifdef PNG_MIPS_MMI +# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) && defined(PNG_ALIGNED_MEMORY_SUPPORTED) +# define PNG_MIPS_MMI_OPT 1 +# else +# define PNG_MIPS_MMI_OPT 0 +# endif +# endif +#endif + #ifndef PNG_POWERPC_VSX_OPT # if defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) # define PNG_POWERPC_VSX_OPT 2 @@ -248,7 +258,7 @@ #endif #if PNG_MIPS_MSA_OPT > 0 -# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_msa +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips # ifndef PNG_MIPS_MSA_IMPLEMENTATION # if defined(__mips_msa) # if defined(__clang__) @@ -269,6 +279,22 @@ # define PNG_MIPS_MSA_IMPLEMENTATION 0 #endif /* PNG_MIPS_MSA_OPT > 0 */ +#if PNG_MIPS_MMI_OPT > 0 +# ifndef PNG_MIPS_MMI_IMPLEMENTATION +# if defined(__mips_loongson_mmi) && (_MIPS_SIM == _ABI64) +# define PNG_MIPS_MMI_IMPLEMENTATION 2 +# else /* !defined __mips_loongson_mmi || _MIPS_SIM != _ABI64 */ +# define PNG_MIPS_MMI_IMPLEMENTATION 0 +# endif /* __mips_loongson_mmi && _MIPS_SIM == _ABI64 */ +# endif /* !PNG_MIPS_MMI_IMPLEMENTATION */ + +# if PNG_MIPS_MMI_IMPLEMENTATION > 0 +# define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_mips +# endif +#else +# define PNG_INTEL_SSE_IMPLEMENTATION 0 +#endif /* PNG_MIPS_MMI_OPT > 0 */ + #if PNG_POWERPC_VSX_OPT > 0 # define PNG_FILTER_OPTIMIZATIONS png_init_filter_functions_vsx # define PNG_POWERPC_VSX_IMPLEMENTATION 1 @@ -1329,6 +1355,23 @@ PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_msa,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); #endif +#if PNG_MIPS_MMI_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_mmi,(png_row_infop row_info, + png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub3_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_sub4_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg3_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_avg4_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth3_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +PNG_INTERNAL_FUNCTION(void,png_read_filter_row_paeth4_mmi,(png_row_infop + row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); +#endif + #if PNG_POWERPC_VSX_OPT > 0 PNG_INTERNAL_FUNCTION(void,png_read_filter_row_up_vsx,(png_row_infop row_info, png_bytep row, png_const_bytep prev_row),PNG_EMPTY); @@ -2118,10 +2161,15 @@ PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_neon, #endif #if PNG_MIPS_MSA_OPT > 0 -PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_msa, +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); #endif +# if PNG_MIPS_MMI_IMPLEMENTATION > 0 +PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_mips, + (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); +# endif + # if PNG_INTEL_SSE_IMPLEMENTATION > 0 PNG_INTERNAL_FUNCTION(void, png_init_filter_functions_sse2, (png_structp png_ptr, unsigned int bpp), PNG_EMPTY); diff --git a/scripts/pnglibconf.dfa b/scripts/pnglibconf.dfa index e15a23cec..f4d14a252 100644 --- a/scripts/pnglibconf.dfa +++ b/scripts/pnglibconf.dfa @@ -256,6 +256,55 @@ option POWERPC_VSX_API disabled enables SET_OPTION, option POWERPC_VSX_CHECK disabled, sets POWERPC_VSX_OPT 1 +# These options are specific to the MIPS MSA hardware optimizations. +# +# MIPS_MSA_OPT: unset: check at compile time (__mips_msa must be defined by +# the compiler, typically as a result of specifying +# "-mmsa -mfp64" compiler flags) +# 0: disable (even if the CPU supports MSA.) +# 1: check at run time (via MIPS_MSA_{API,CHECK}) +# 2: switch on unconditionally (inadvisable - instead pass +# -mmsa -mfp64 to compiler options) +# When building libpng avoid using any setting other than '0'; '1' is +# set automatically when either 'API' or 'CHECK' are configured in, +# '2' should not be necessary as "-mmsa -mfp64" will achieve the same +# effect as well as applying MSA optimizations to the rest of the +# libpng code. +# NOTE: any setting other than '0' requires ALIGNED_MEMORY +# MIPS_MSA_API: (PNG_MIPS_MSA == 1) allow the optimization to be switched on +# with png_set_option. +# MIPS_MSA_CHECK: (PNG_MIPS_MSA == 1) compile a run-time check to see if MSA +# extensions are supported. +setting MIPS_MSA_OPT +option MIPS_MSA_API disabled requires ALIGNED_MEMORY enables SET_OPTION, + sets MIPS_MSA_OPT 1 +option MIPS_MSA_CHECK disabled requires ALIGNED_MEMORY, + sets MIPS_MSA_OPT 1 + +# These options are specific to the MIPS MMI hardware optimizations. +# +# MIPS_MMI_OPT: unset: check at compile time (__mips_loongson_mmi must be defined by +# the compiler, typically as a result of specifying +# "-mloongson-mmi -march=loongson3a" compiler flags) +# 0: disable (even if the CPU supports MMI.) +# 1: check at run time (via MIPS_MMI_{API,CHECK}) +# 2: switch on unconditionally (inadvisable - instead pass +# -mloongson-mmi -march=loongson3a to compiler options) +# When building libpng avoid using any setting other than '0'; '1' is +# set automatically when either 'API' or 'CHECK' are configured in, +# '2' should not be necessary as "-mloongson-mmi -march=loongson3a" will achieve the same +# effect as well as applying MMI optimizations to the rest of the +# libpng code. +# MIPS_MMI_API: (PNG_MIPS_MMI == 1) allow the optimization to be switched on +# with png_set_option +# MIPS_MMI_CHECK: (PNG_MIPS_MMI == 1) compile a run-time check to see if MMI +# extensions are supported. +setting MIPS_MMI_OPT +option MIPS_MMI_API disabled requires ALIGNED_MEMORY enables SET_OPTION, + sets MIPS_MMI_OPT 1 +option MIPS_MMI_CHECK disabled requires ALIGNED_MEMORY, + sets MIPS_MMI_OPT 1 + # These settings configure the default compression level (0-9) and 'strategy'; # strategy is as defined by the implementors of zlib. It describes the input