mirror of
https://git.code.sf.net/p/libpng/code.git
synced 2025-07-10 18:04:09 +02:00
Optimize png16 with loongson mmi for 64-bit os
This commit is contained in:
524
mips/filter_mmi_inline_assembly.c
Normal file
524
mips/filter_mmi_inline_assembly.c
Normal file
@@ -0,0 +1,524 @@
|
||||
/* filter_mmi_intrinsics.c - MMI optimized filter functions
|
||||
*
|
||||
* Written by zhanglixia and guxiwei
|
||||
*
|
||||
* This code is released under the libpng license.
|
||||
* For conditions of distribution and use, see the disclaimer
|
||||
* and license in png.h
|
||||
*/
|
||||
|
||||
#include "../pngpriv.h"
|
||||
|
||||
#ifdef PNG_READ_SUPPORTED
|
||||
|
||||
#if PNG_MIPS_MMI_IMPLEMENTATION == 2 /* Inline Assembly */
|
||||
|
||||
/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
|
||||
* They're positioned like this:
|
||||
* prev: c b
|
||||
* row: a d
|
||||
* The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
|
||||
* whichever of a, b, or c is closest to p=a+b-c.
|
||||
*/
|
||||
|
||||
void png_read_filter_row_up_mmi(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev_row)
|
||||
{
|
||||
int istop = row_info->rowbytes;
|
||||
double rp,pp;
|
||||
__asm__ volatile (
|
||||
"1: \n\t"
|
||||
"ldc1 %[rp], 0x00(%[row]) \n\t"
|
||||
"ldc1 %[pp], 0x00(%[prev_row]) \n\t"
|
||||
"paddb %[rp], %[rp], %[pp] \n\t"
|
||||
"sdc1 %[rp], 0x00(%[row]) \n\t"
|
||||
|
||||
"daddiu %[row], %[row], 0x08 \n\t"
|
||||
"daddiu %[prev_row], %[prev_row], 0x08 \n\t"
|
||||
"daddiu %[istop], %[istop], -0x08 \n\t"
|
||||
"bgtz %[istop], 1b \n\t"
|
||||
: [rp]"=&f"(rp), [pp]"=&f"(pp)
|
||||
: [row]"r"(row), [prev_row]"r"(prev_row),
|
||||
[istop]"r"(istop)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void png_read_filter_row_sub3_mmi(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev)
|
||||
{
|
||||
int istop = row_info->rowbytes;
|
||||
double rp, pp, dest;
|
||||
double eight, sixteen, twenty_four, forty_eight;
|
||||
double tmp0;
|
||||
double ftmp[2];
|
||||
|
||||
__asm__ volatile (
|
||||
"li %[tmp0], 0x08 \n\t"
|
||||
"dmtc1 %[tmp0], %[eight] \n\t"
|
||||
"li %[tmp0], 0x10 \n\t"
|
||||
"dmtc1 %[tmp0], %[sixteen] \n\t"
|
||||
"li %[tmp0], 0x18 \n\t"
|
||||
"dmtc1 %[tmp0], %[twenty_four] \n\t"
|
||||
"li %[tmp0], 0x30 \n\t"
|
||||
"dmtc1 %[tmp0], %[forty_eight] \n\t"
|
||||
"xor %[dest], %[dest], %[dest] \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"gsldrc1 %[rp], 0x00(%[row]) \n\t"
|
||||
"gsldlc1 %[rp], 0x07(%[row]) \n\t"
|
||||
"gsldrc1 %[pp], 0x08(%[row]) \n\t"
|
||||
"gsldlc1 %[pp], 0x0f(%[row]) \n\t"
|
||||
|
||||
"paddb %[ftmp0], %[dest], %[rp] \n\t"
|
||||
"swc1 %[ftmp0], 0x00(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp1], %[rp], %[twenty_four] \n\t"
|
||||
"paddb %[dest], %[ftmp1], %[ftmp0] \n\t"
|
||||
"gsswrc1 %[dest], 0x03(%[row]) \n\t"
|
||||
"gsswlc1 %[dest], 0x06(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
|
||||
"dsll %[ftmp1], %[pp], %[sixteen] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
"paddb %[dest], %[dest], %[ftmp0] \n\t"
|
||||
"gsswrc1 %[dest], 0x06(%[row]) \n\t"
|
||||
"gsswlc1 %[dest], 0x09(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp0], %[pp], %[eight] \n\t"
|
||||
"paddb %[dest], %[dest], %[ftmp0] \n\t"
|
||||
"gsswrc1 %[dest], 0x09(%[row]) \n\t"
|
||||
"daddiu %[row], %[row], 0x0c \n\t"
|
||||
"daddiu %[istop], %[istop], -0x0c \n\t"
|
||||
"bgtz %[istop], 1b \n\t"
|
||||
: [rp]"=&f"(rp), [pp]"=&f"(pp), [dest]"=&f"(dest),
|
||||
[tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]),
|
||||
[ftmp1]"=&f"(ftmp[1]), [eight]"=&f"(eight),
|
||||
[sixteen]"=&f"(sixteen), [twenty_four]"=&f"(twenty_four),
|
||||
[forty_eight]"=&f"(forty_eight)
|
||||
: [row]"r"(row), [istop]"r"(istop)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
PNG_UNUSED(prev)
|
||||
}
|
||||
|
||||
void png_read_filter_row_sub4_mmi(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev)
|
||||
{
|
||||
/* The Sub filter predicts each pixel as the previous pixel, a.
|
||||
* There is no pixel to the left of the first pixel. It's encoded directly.
|
||||
* That works with our main loop if we just say that left pixel was zero.
|
||||
*/
|
||||
int istop = row_info->rowbytes;
|
||||
double rp,pp;
|
||||
|
||||
__asm__ volatile (
|
||||
"1: \n\t"
|
||||
"lwc1 %[pp], 0x00(%[row]) \n\t"
|
||||
"lwc1 %[rp], 0x04(%[row]) \n\t"
|
||||
"paddb %[rp], %[rp], %[pp] \n\t"
|
||||
"swc1 %[rp], 0x04(%[row]) \n\t"
|
||||
|
||||
"daddiu %[row], %[row], 0x04 \n\t"
|
||||
"daddiu %[istop], %[istop], -0x04 \n\t"
|
||||
"bgtz %[istop], 1b \n\t"
|
||||
: [rp]"=&f"(rp), [pp]"=&f"(pp)
|
||||
: [row]"r"(row), [istop]"r"(istop)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
PNG_UNUSED(prev)
|
||||
}
|
||||
|
||||
void png_read_filter_row_avg3_mmi(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev)
|
||||
{
|
||||
int istop = row_info->rowbytes;
|
||||
double rp, pp, rp1, pp1;
|
||||
double tmp0;
|
||||
double ftmp[3];
|
||||
double one, dest;
|
||||
double eight, sixteen, twenty_four, forty_eight;
|
||||
|
||||
__asm__ volatile (
|
||||
"li %[tmp0], 0x08 \n\t"
|
||||
"dmtc1 %[tmp0], %[eight] \n\t"
|
||||
"li %[tmp0], 0x10 \n\t"
|
||||
"dmtc1 %[tmp0], %[sixteen] \n\t"
|
||||
"li %[tmp0], 0x18 \n\t"
|
||||
"dmtc1 %[tmp0], %[twenty_four] \n\t"
|
||||
"li %[tmp0], 0x30 \n\t"
|
||||
"dmtc1 %[tmp0], %[forty_eight] \n\t"
|
||||
"xor %[dest], %[dest], %[dest] \n\t"
|
||||
|
||||
"li %[tmp0], 0x01 \n\t"
|
||||
"ins %[tmp0], %[tmp0], 8, 8 \n\t"
|
||||
"dmtc1 %[tmp0], %[one] \n\t"
|
||||
"pshufh %[one], %[one], %[dest] \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"gsldrc1 %[rp], 0x00(%[row]) \n\t"
|
||||
"gsldlc1 %[rp], 0x07(%[row]) \n\t"
|
||||
"gsldrc1 %[pp], 0x00(%[prev]) \n\t"
|
||||
"gsldlc1 %[pp], 0x07(%[prev]) \n\t"
|
||||
"gsldrc1 %[rp1], 0x08(%[row]) \n\t"
|
||||
"gsldlc1 %[rp1], 0x0f(%[row]) \n\t"
|
||||
"gsldrc1 %[pp1], 0x08(%[prev]) \n\t"
|
||||
"gsldlc1 %[pp1], 0x0f(%[prev]) \n\t"
|
||||
|
||||
"xor %[ftmp0], %[pp], %[dest] \n\t"
|
||||
"pavgb %[ftmp1], %[pp], %[dest] \n\t"
|
||||
"and %[ftmp0], %[ftmp0], %[one] \n\t"
|
||||
"psubb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
|
||||
"paddb %[dest], %[rp], %[ftmp1] \n\t"
|
||||
"swc1 %[dest], 0x00(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp0], %[rp], %[twenty_four] \n\t"
|
||||
"dsrl %[ftmp1], %[pp], %[twenty_four] \n\t"
|
||||
|
||||
"xor %[ftmp2], %[ftmp1], %[dest] \n\t"
|
||||
"pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
|
||||
"and %[ftmp2], %[ftmp2], %[one] \n\t"
|
||||
"psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
|
||||
"paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
|
||||
"gsswrc1 %[dest], 0x03(%[row]) \n\t"
|
||||
"gsswlc1 %[dest], 0x06(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
|
||||
"dsll %[ftmp1], %[rp1], %[sixteen] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
"dsrl %[ftmp2], %[pp], %[forty_eight] \n\t"
|
||||
"dsll %[ftmp1], %[pp1], %[sixteen] \n\t"
|
||||
"or %[ftmp1], %[ftmp2], %[ftmp1] \n\t"
|
||||
|
||||
"xor %[ftmp2], %[ftmp1], %[dest] \n\t"
|
||||
"pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
|
||||
"and %[ftmp2], %[ftmp2], %[one] \n\t"
|
||||
"psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
|
||||
"paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
|
||||
"gsswrc1 %[dest], 0x06(%[row]) \n\t"
|
||||
"gsswlc1 %[dest], 0x09(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp0], %[rp1], %[eight] \n\t"
|
||||
"dsrl %[ftmp1], %[pp1], %[eight] \n\t"
|
||||
|
||||
"xor %[ftmp2], %[ftmp1], %[dest] \n\t"
|
||||
"pavgb %[ftmp1], %[ftmp1], %[dest] \n\t"
|
||||
"and %[ftmp2], %[ftmp2], %[one] \n\t"
|
||||
"psubb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
|
||||
"paddb %[dest], %[ftmp0], %[ftmp1] \n\t"
|
||||
"gsswrc1 %[dest], 0x09(%[row]) \n\t"
|
||||
"daddiu %[row], %[row], 0x0c \n\t"
|
||||
"daddiu %[prev], %[prev], 0x0c \n\t"
|
||||
"daddiu %[istop], %[istop], -0x0c \n\t"
|
||||
"bgtz %[istop], 1b \n\t"
|
||||
: [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1),
|
||||
[pp1]"=&f"(pp1), [tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]),
|
||||
[ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [one]"=&f"(one),
|
||||
[dest]"=&f"(dest), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen),
|
||||
[twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight)
|
||||
: [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void png_read_filter_row_avg4_mmi(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev)
|
||||
{
|
||||
int istop = row_info->rowbytes;
|
||||
double rp,pp;
|
||||
double dest;
|
||||
double ftmp[2];
|
||||
double tmp;
|
||||
|
||||
__asm__ volatile (
|
||||
"xor %[dest], %[dest], %[dest] \n\t"
|
||||
"li %[tmp], 0x01 \n\t"
|
||||
"ins %[tmp], %[tmp], 8, 8 \n\t"
|
||||
"dmtc1 %[tmp], %[ftmp1] \n\t"
|
||||
"pshufh %[ftmp1], %[ftmp1], %[dest] \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"lwc1 %[rp], 0x00(%[row]) \n\t"
|
||||
"lwc1 %[pp], 0x00(%[prev]) \n\t"
|
||||
"xor %[ftmp0], %[pp], %[dest] \n\t"
|
||||
"pavgb %[pp], %[pp], %[dest] \n\t"
|
||||
"and %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
"psubb %[pp], %[pp], %[ftmp0] \n\t"
|
||||
"paddb %[dest], %[rp], %[pp] \n\t"
|
||||
"swc1 %[dest], 0x00(%[row]) \n\t"
|
||||
"daddiu %[row], %[row], 0x04 \n\t"
|
||||
"daddiu %[prev], %[prev], 0x04 \n\t"
|
||||
"daddiu %[istop], %[istop], -0x04 \n\t"
|
||||
"bgtz %[istop], 1b \n\t"
|
||||
: [rp]"=&f"(rp), [pp]"=&f"(pp), [ftmp0]"=&f"(ftmp[0]),
|
||||
[ftmp1]"=&f"(ftmp[1]), [dest]"=&f"(dest), [tmp]"=&r"(tmp)
|
||||
: [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void png_read_filter_row_paeth3_mmi(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev)
|
||||
{
|
||||
/* Paeth tries to predict pixel d using the pixel to the left of it, a,
|
||||
* and two pixels from the previous row, b and c:
|
||||
* prev: c b
|
||||
* row: a d
|
||||
* The Paeth function predicts d to be whichever of a, b, or c is nearest to
|
||||
* p=a+b-c.
|
||||
*
|
||||
* The first pixel has no left context, and so uses an Up filter, p = b.
|
||||
* This works naturally with our main loop's p = a+b-c if we force a and c
|
||||
* to zero.
|
||||
* Here we zero b and d, which become c and a respectively at the start of
|
||||
* the loop.
|
||||
*/
|
||||
int istop = row_info->rowbytes;
|
||||
double rp, pp, rp1, pp1, zero;
|
||||
double a, b, c, d, pa, pb, pc;
|
||||
double tmp0;
|
||||
double ftmp[3];
|
||||
double eight, sixteen, twenty_four, forty_eight;
|
||||
|
||||
__asm__ volatile (
|
||||
"xor %[a], %[a], %[a] \n\t"
|
||||
"xor %[c], %[c], %[c] \n\t"
|
||||
"xor %[zero], %[zero], %[zero] \n\t"
|
||||
"li %[tmp0], 0x08 \n\t"
|
||||
"dmtc1 %[tmp0], %[eight] \n\t"
|
||||
"li %[tmp0], 0x10 \n\t"
|
||||
"dmtc1 %[tmp0], %[sixteen] \n\t"
|
||||
"li %[tmp0], 0x18 \n\t"
|
||||
"dmtc1 %[tmp0], %[twenty_four] \n\t"
|
||||
"li %[tmp0], 0x30 \n\t"
|
||||
"dmtc1 %[tmp0], %[forty_eight] \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"gsldrc1 %[rp], 0x00(%[row]) \n\t"
|
||||
"gsldlc1 %[rp], 0x07(%[row]) \n\t"
|
||||
"gsldrc1 %[pp], 0x00(%[prev]) \n\t"
|
||||
"gsldlc1 %[pp], 0x07(%[prev]) \n\t"
|
||||
"gsldrc1 %[rp1], 0x08(%[row]) \n\t"
|
||||
"gsldlc1 %[rp1], 0x0f(%[row]) \n\t"
|
||||
"gsldrc1 %[pp1], 0x08(%[prev]) \n\t"
|
||||
"gsldlc1 %[pp1], 0x0f(%[prev]) \n\t"
|
||||
|
||||
"punpcklbh %[b], %[pp], %[zero] \n\t"
|
||||
"punpcklbh %[d], %[rp], %[zero] \n\t"
|
||||
"packushb %[ftmp0], %[c], %[c] \n\t"
|
||||
"packushb %[ftmp1], %[a], %[a] \n\t"
|
||||
"pasubub %[pa], %[pp], %[ftmp0] \n\t"
|
||||
"pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
|
||||
"psubh %[ftmp0], %[b], %[c] \n\t"
|
||||
"psubh %[ftmp1], %[a], %[c] \n\t"
|
||||
"paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
|
||||
"pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
|
||||
"xor %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"psubh %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"punpcklbh %[pa], %[pa], %[zero] \n\t"
|
||||
"punpcklbh %[pb], %[pb], %[zero] \n\t"
|
||||
"pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
|
||||
"and %[ftmp1], %[b], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"pminsh %[pa], %[pa], %[pb] \n\t"
|
||||
"pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
|
||||
"and %[ftmp1], %[c], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"paddb %[a], %[a], %[d] \n\t"
|
||||
"packushb %[d], %[a], %[a] \n\t"
|
||||
"punpcklbh %[c], %[pp], %[zero] \n\t"
|
||||
"swc1 %[d], 0x00(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp0], %[rp], %[twenty_four] \n\t"
|
||||
"dsrl %[ftmp2], %[pp], %[twenty_four] \n\t"
|
||||
|
||||
"punpcklbh %[b], %[ftmp2], %[zero] \n\t"
|
||||
"punpcklbh %[d], %[ftmp0], %[zero] \n\t"
|
||||
"packushb %[ftmp0], %[c], %[c] \n\t"
|
||||
"packushb %[ftmp1], %[a], %[a] \n\t"
|
||||
"pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
|
||||
"pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
|
||||
"psubh %[ftmp0], %[b], %[c] \n\t"
|
||||
"psubh %[ftmp1], %[a], %[c] \n\t"
|
||||
"paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
|
||||
"pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
|
||||
"xor %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"psubh %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"punpcklbh %[pa], %[pa], %[zero] \n\t"
|
||||
"punpcklbh %[pb], %[pb], %[zero] \n\t"
|
||||
"pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
|
||||
"and %[ftmp1], %[b], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"pminsh %[pa], %[pa], %[pb] \n\t"
|
||||
"pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
|
||||
"and %[ftmp1], %[c], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"paddb %[a], %[a], %[d] \n\t"
|
||||
"packushb %[d], %[a], %[a] \n\t"
|
||||
"punpcklbh %[c], %[ftmp2], %[zero] \n\t"
|
||||
"gsswrc1 %[d], 0x03(%[row]) \n\t"
|
||||
"gsswlc1 %[d], 0x06(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp0], %[rp], %[forty_eight] \n\t"
|
||||
"dsll %[ftmp1], %[rp1], %[sixteen] \n\t"
|
||||
"or %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
|
||||
"dsrl %[ftmp2], %[pp], %[forty_eight] \n\t"
|
||||
"dsll %[ftmp1], %[pp1], %[sixteen] \n\t"
|
||||
"or %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
|
||||
|
||||
"punpcklbh %[b], %[ftmp2], %[zero] \n\t"
|
||||
"punpcklbh %[d], %[ftmp0], %[zero] \n\t"
|
||||
"packushb %[ftmp0], %[c], %[c] \n\t"
|
||||
"packushb %[ftmp1], %[a], %[a] \n\t"
|
||||
"pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
|
||||
"pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
|
||||
"psubh %[ftmp0], %[b], %[c] \n\t"
|
||||
"psubh %[ftmp1], %[a], %[c] \n\t"
|
||||
"paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
|
||||
"pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
|
||||
"xor %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"psubh %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"punpcklbh %[pa], %[pa], %[zero] \n\t"
|
||||
"punpcklbh %[pb], %[pb], %[zero] \n\t"
|
||||
"pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
|
||||
"and %[ftmp1], %[b], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"pminsh %[pa], %[pa], %[pb] \n\t"
|
||||
"pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
|
||||
"and %[ftmp1], %[c], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"paddb %[a], %[a], %[d] \n\t"
|
||||
"packushb %[d], %[a], %[a] \n\t"
|
||||
"punpcklbh %[c], %[ftmp2], %[zero] \n\t"
|
||||
"gsswrc1 %[d], 0x06(%[row]) \n\t"
|
||||
"gsswlc1 %[d], 0x09(%[row]) \n\t"
|
||||
|
||||
"dsrl %[ftmp0], %[rp1], %[eight] \n\t"
|
||||
"dsrl %[ftmp2], %[pp1], %[eight] \n\t"
|
||||
|
||||
"punpcklbh %[b], %[ftmp2], %[zero] \n\t"
|
||||
"punpcklbh %[d], %[ftmp0], %[zero] \n\t"
|
||||
"packushb %[ftmp0], %[c], %[c] \n\t"
|
||||
"packushb %[ftmp1], %[a], %[a] \n\t"
|
||||
"pasubub %[pa], %[ftmp2], %[ftmp0] \n\t"
|
||||
"pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
|
||||
"psubh %[ftmp0], %[b], %[c] \n\t"
|
||||
"psubh %[ftmp1], %[a], %[c] \n\t"
|
||||
"paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
|
||||
"pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
|
||||
"xor %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"psubh %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"punpcklbh %[pa], %[pa], %[zero] \n\t"
|
||||
"punpcklbh %[pb], %[pb], %[zero] \n\t"
|
||||
"pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
|
||||
"and %[ftmp1], %[b], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"pminsh %[pa], %[pa], %[pb] \n\t"
|
||||
"pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
|
||||
"and %[ftmp1], %[c], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"paddb %[a], %[a], %[d] \n\t"
|
||||
"packushb %[d], %[a], %[a] \n\t"
|
||||
"punpcklbh %[c], %[ftmp2], %[zero] \n\t"
|
||||
"gsswrc1 %[d], 0x09(%[row]) \n\t"
|
||||
|
||||
"daddiu %[row], %[row], 0x0c \n\t"
|
||||
"daddiu %[prev], %[prev], 0x0c \n\t"
|
||||
"daddiu %[istop], %[istop], -0x0c \n\t"
|
||||
"bgtz %[istop], 1b \n\t"
|
||||
: [rp]"=&f"(rp), [pp]"=&f"(pp), [rp1]"=&f"(rp1), [pp1]"=&f"(pp1),
|
||||
[zero]"=&f"(zero), [a]"=&f"(a),[b]"=&f"(b), [c]"=&f"(c),
|
||||
[d]"=&f"(d), [pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc),
|
||||
[tmp0]"=&r"(tmp0), [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
|
||||
[ftmp2]"=&f"(ftmp[2]), [eight]"=&f"(eight), [sixteen]"=&f"(sixteen),
|
||||
[twenty_four]"=&f"(twenty_four), [forty_eight]"=&f"(forty_eight)
|
||||
: [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void png_read_filter_row_paeth4_mmi(png_row_infop row_info, png_bytep row,
|
||||
png_const_bytep prev)
|
||||
{
|
||||
/* Paeth tries to predict pixel d using the pixel to the left of it, a,
|
||||
* and two pixels from the previous row, b and c:
|
||||
* prev: c b
|
||||
* row: a d
|
||||
* The Paeth function predicts d to be whichever of a, b, or c is nearest to
|
||||
* p=a+b-c.
|
||||
*
|
||||
* The first pixel has no left context, and so uses an Up filter, p = b.
|
||||
* This works naturally with our main loop's p = a+b-c if we force a and c
|
||||
* to zero.
|
||||
* Here we zero b and d, which become c and a respectively at the start of
|
||||
* the loop.
|
||||
*/
|
||||
int istop = row_info->rowbytes;
|
||||
double rp, pp, zero;
|
||||
double a, b, c, d, pa, pb, pc;
|
||||
double ftmp[2];
|
||||
|
||||
__asm__ volatile (
|
||||
"xor %[a], %[a], %[a] \n\t"
|
||||
"xor %[c], %[c], %[c] \n\t"
|
||||
"xor %[zero], %[zero], %[zero] \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"lwc1 %[rp], 0x00(%[row]) \n\t"
|
||||
"lwc1 %[pp], 0x00(%[prev]) \n\t"
|
||||
"punpcklbh %[b], %[pp], %[zero] \n\t"
|
||||
"punpcklbh %[d], %[rp], %[zero] \n\t"
|
||||
|
||||
"packushb %[ftmp0], %[c], %[c] \n\t"
|
||||
"packushb %[ftmp1], %[a], %[a] \n\t"
|
||||
"pasubub %[pa], %[pp], %[ftmp0] \n\t"
|
||||
"pasubub %[pb], %[ftmp1], %[ftmp0] \n\t"
|
||||
"psubh %[ftmp0], %[b], %[c] \n\t"
|
||||
"psubh %[ftmp1], %[a], %[c] \n\t"
|
||||
"paddh %[pc], %[ftmp0], %[ftmp1] \n\t"
|
||||
"pcmpgth %[ftmp0], %[zero], %[pc] \n\t"
|
||||
"xor %[pc], %[pc], %[ftmp0] \n\t"
|
||||
"psubh %[pc], %[pc], %[ftmp0] \n\t"
|
||||
|
||||
"punpcklbh %[pa], %[pa], %[zero] \n\t"
|
||||
"punpcklbh %[pb], %[pb], %[zero] \n\t"
|
||||
|
||||
"pcmpgth %[ftmp0], %[pa], %[pb] \n\t"
|
||||
"and %[ftmp1], %[b], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"pminsh %[pa], %[pa], %[pb] \n\t"
|
||||
|
||||
"pcmpgth %[ftmp0], %[pa], %[pc] \n\t"
|
||||
"and %[ftmp1], %[c], %[ftmp0] \n\t"
|
||||
"pandn %[a], %[ftmp0], %[a] \n\t"
|
||||
"or %[a], %[a], %[ftmp1] \n\t"
|
||||
"paddb %[a], %[a], %[d] \n\t"
|
||||
"packushb %[d], %[a], %[a] \n\t"
|
||||
"swc1 %[d], 0x00(%[row]) \n\t"
|
||||
"punpcklbh %[c], %[pp], %[zero] \n\t"
|
||||
"daddiu %[row], %[row], 0x04 \n\t"
|
||||
"daddiu %[prev], %[prev], 0x04 \n\t"
|
||||
"daddiu %[istop], %[istop], -0x04 \n\t"
|
||||
"bgtz %[istop], 1b \n\t"
|
||||
: [rp]"=&f"(rp), [pp]"=&f"(pp), [zero]"=&f"(zero),
|
||||
[a]"=&f"(a), [b]"=&f"(b), [c]"=&f"(c), [d]"=&f"(d),
|
||||
[pa]"=&f"(pa), [pb]"=&f"(pb), [pc]"=&f"(pc),
|
||||
[ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1])
|
||||
: [row]"r"(row), [prev]"r"(prev), [istop]"r"(istop)
|
||||
: "memory"
|
||||
);
|
||||
}
|
||||
|
||||
#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */
|
||||
#endif /* READ */
|
||||
@@ -4,6 +4,7 @@
|
||||
* Copyright (c) 2018 Cosmin Truta
|
||||
* Copyright (c) 2016 Glenn Randers-Pehrson
|
||||
* Written by Mandar Sahastrabuddhe, 2016.
|
||||
* Update by guxiwei, 2023.
|
||||
*
|
||||
* This code is released under the libpng license.
|
||||
* For conditions of distribution and use, see the disclaimer
|
||||
@@ -20,8 +21,9 @@
|
||||
|
||||
#ifdef PNG_READ_SUPPORTED
|
||||
|
||||
#if PNG_MIPS_MSA_OPT > 0
|
||||
#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do run-time checks */
|
||||
#if PNG_MIPS_MSA_OPT > 0 || PNG_MIPS_MMI_IMPLEMENTATION > 0
|
||||
|
||||
#ifdef PNG_MIPS_MSA_CHECK_SUPPORTED /* Do MIPS MSA run-time checks */
|
||||
/* WARNING: it is strongly recommended that you do not build libpng with
|
||||
* run-time checks for CPU features if at all possible. In the case of the MIPS
|
||||
* MSA instructions there is no processor-specific way of detecting the
|
||||
@@ -51,13 +53,83 @@ static int png_have_msa(png_structp png_ptr);
|
||||
#endif /* PNG_MIPS_MSA_FILE */
|
||||
#endif /* PNG_MIPS_MSA_CHECK_SUPPORTED */
|
||||
|
||||
#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED /* Do MIPS MMI run-times checks */
|
||||
#ifndef PNG_MIPS_MMI_FILE
|
||||
# ifdef __linux__
|
||||
# define PNG_MIPS_MMI_FILE "contrib/mips-mmi/linux.c"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef PNG_MIPS_MMI_FILE
|
||||
|
||||
#include <signal.h> /* for sig_atomic_t */
|
||||
static int png_have_mmi();
|
||||
#include PNG_MIPS_MMI_FILE
|
||||
|
||||
#else /* PNG_MIPS_MMI_FILE */
|
||||
# error "PNG_MIPS_MMI_FILE undefined: no support for run-time MIPS MMI checks"
|
||||
#endif /* PNG_MIPS_MMI_FILE */
|
||||
#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED*/
|
||||
|
||||
#ifndef PNG_ALIGNED_MEMORY_SUPPORTED
|
||||
# error "ALIGNED_MEMORY is required; set: -DPNG_ALIGNED_MEMORY_SUPPORTED"
|
||||
#endif
|
||||
|
||||
/* MIPS supports two optimizations: MMI and MSA. The appropriate
|
||||
* optimization is chosen at runtime
|
||||
*/
|
||||
void
|
||||
png_init_filter_functions_msa(png_structp pp, unsigned int bpp)
|
||||
png_init_filter_functions_mips(png_structp pp, unsigned int bpp)
|
||||
{
|
||||
#if PNG_MIPS_MMI_IMPLEMENTATION > 0
|
||||
#ifdef PNG_MIPS_MMI_API_SUPPORTED
|
||||
switch ((pp->options >> PNG_MIPS_MMI) & 3)
|
||||
{
|
||||
case PNG_OPTION_UNSET:
|
||||
#endif /* PNG_MIPS_MMI_API_SUPPORTED */
|
||||
#ifdef PNG_MIPS_MMI_CHECK_SUPPORTED
|
||||
{
|
||||
static volatile sig_atomic_t no_mmi = -1; /* not checked */
|
||||
|
||||
if (no_mmi < 0)
|
||||
no_mmi = !png_have_mmi();
|
||||
|
||||
if (no_mmi)
|
||||
goto MIPS_MSA_INIT;
|
||||
}
|
||||
#ifdef PNG_MIPS_MMI_API_SUPPORTED
|
||||
break;
|
||||
#endif
|
||||
#endif /* PNG_MIPS_MMI_CHECK_SUPPORTED */
|
||||
|
||||
#ifdef PNG_MIPS_MMI_API_SUPPORTED
|
||||
default: /* OFF or INVALID */
|
||||
goto MIPS_MSA_INIT;
|
||||
|
||||
case PNG_OPTION_ON:
|
||||
/* Option turned on */
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_mmi;
|
||||
if (bpp == 3)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_mmi;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_mmi;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth3_mmi;
|
||||
}
|
||||
else if (bpp == 4)
|
||||
{
|
||||
pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_mmi;
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_mmi;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
|
||||
png_read_filter_row_paeth4_mmi;
|
||||
}
|
||||
#endif /* PNG_MIPS_MMI_IMPLEMENTATION > 0 */
|
||||
|
||||
MIPS_MSA_INIT:
|
||||
#if PNG_MIPS_MSA_OPT > 0
|
||||
/* The switch statement is compiled in for MIPS_MSA_API, the call to
|
||||
* png_have_msa is compiled in for MIPS_MSA_CHECK. If both are defined
|
||||
* the check is only performed if the API has not set the MSA option on
|
||||
@@ -125,6 +197,8 @@ png_init_filter_functions_msa(png_structp pp, unsigned int bpp)
|
||||
pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_msa;
|
||||
pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_msa;
|
||||
}
|
||||
}
|
||||
#endif /* PNG_MIPS_MSA_OPT > 0 */
|
||||
return;
|
||||
}
|
||||
#endif /* PNG_MIPS_MSA_OPT > 0 || PNG_MIPS_MMI_IMPLEMENTATION > 0 */
|
||||
#endif /* READ */
|
||||
|
||||
Reference in New Issue
Block a user