Imported from libpng-1.2.9beta10.tar

This commit is contained in:
Glenn Randers-Pehrson
2006-03-20 07:24:58 -06:00
parent 170b70ce30
commit 26e47d9a71
59 changed files with 520 additions and 186 deletions

View File

@@ -7,7 +7,7 @@
* and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm
* for Intel's performance analysis of the MMX vs. non-MMX code.
*
* Last changed in libpng 1.2.9 March 10, 2006
* Last changed in libpng 1.2.9 March 20, 2006
* For conditions of distribution and use, see copyright notice in png.h
* Copyright (c) 1998-2006 Glenn Randers-Pehrson
* Copyright (c) 1998, Intel Corporation
@@ -1739,6 +1739,7 @@ png_do_read_interlace(png_structp png_ptr)
int dummy_value_c; // fix 'forbidden register spilled'
int dummy_value_S;
int dummy_value_D;
int dummy_value_a;
__asm__ __volatile__ (
"subl $21, %%edi \n\t"
@@ -1746,7 +1747,7 @@ png_do_read_interlace(png_structp png_ptr)
".loop3_pass0: \n\t"
"movd (%%esi), %%mm0 \n\t" // x x x x x 2 1 0
"pand _const4, %%mm0 \n\t" // z z z z z 2 1 0
"pand (%3), %%mm0 \n\t" // z z z z z 2 1 0
"movq %%mm0, %%mm1 \n\t" // z z z z z 2 1 0
"psllq $16, %%mm0 \n\t" // z z z 2 1 0 z z
"movq %%mm0, %%mm2 \n\t" // z z z 2 1 0 z z
@@ -1771,12 +1772,14 @@ png_do_read_interlace(png_structp png_ptr)
: "=c" (dummy_value_c), // output regs (dummy)
"=S" (dummy_value_S),
"=D" (dummy_value_D)
"=D" (dummy_value_D),
"=a" (dummy_value_a)
: "1" (sptr), // esi // input regs
"2" (dp), // edi
"0" (width), // ecx
"rim" (_const4) // %1(?) (0x0000000000FFFFFFLL)
"3" (&_const4) // %1(?) (0x0000000000FFFFFFLL)
#if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */
: "%mm0", "%mm1", "%mm2" // clobber list
@@ -1789,6 +1792,7 @@ png_do_read_interlace(png_structp png_ptr)
int dummy_value_c; // fix 'forbidden register spilled'
int dummy_value_S;
int dummy_value_D;
int dummy_value_a;
__asm__ __volatile__ (
"subl $9, %%edi \n\t"
@@ -1796,7 +1800,7 @@ png_do_read_interlace(png_structp png_ptr)
".loop3_pass2: \n\t"
"movd (%%esi), %%mm0 \n\t" // x x x x x 2 1 0
"pand _const4, %%mm0 \n\t" // z z z z z 2 1 0
"pand (%3), %%mm0 \n\t" // z z z z z 2 1 0
"movq %%mm0, %%mm1 \n\t" // z z z z z 2 1 0
"psllq $16, %%mm0 \n\t" // z z z 2 1 0 z z
"movq %%mm0, %%mm2 \n\t" // z z z 2 1 0 z z
@@ -1815,12 +1819,13 @@ png_do_read_interlace(png_structp png_ptr)
: "=c" (dummy_value_c), // output regs (dummy)
"=S" (dummy_value_S),
"=D" (dummy_value_D)
"=D" (dummy_value_D),
"=a" (dummy_value_a)
: "1" (sptr), // esi // input regs
"2" (dp), // edi
"0" (width), // ecx
"rim" (_const4) // (0x0000000000FFFFFFLL)
"3" (&_const4) // (0x0000000000FFFFFFLL)
#if 0 /* %mm0, ..., %mm2 not supported by gcc 2.7.2.3 or egcs 1.1 */
: "%mm0", "%mm1", "%mm2" // clobber list
@@ -1841,6 +1846,8 @@ png_do_read_interlace(png_structp png_ptr)
int dummy_value_c; // fix 'forbidden register spilled'
int dummy_value_S;
int dummy_value_D;
int dummy_value_a;
int dummy_value_d;
__asm__ __volatile__ (
"subl $3, %%esi \n\t"
@@ -1852,14 +1859,14 @@ png_do_read_interlace(png_structp png_ptr)
"movq %%mm0, %%mm1 \n\t" // x x 5 4 3 2 1 0
"movq %%mm0, %%mm2 \n\t" // x x 5 4 3 2 1 0
"psllq $24, %%mm0 \n\t" // 4 3 2 1 0 z z z
"pand _const4, %%mm1 \n\t" // z z z z z 2 1 0
"pand (%3), %%mm1 \n\t" // z z z z z 2 1 0
"psrlq $24, %%mm2 \n\t" // z z z x x 5 4 3
"por %%mm1, %%mm0 \n\t" // 4 3 2 1 0 2 1 0
"movq %%mm2, %%mm3 \n\t" // z z z x x 5 4 3
"psllq $8, %%mm2 \n\t" // z z x x 5 4 3 z
"movq %%mm0, (%%edi) \n\t"
"psrlq $16, %%mm3 \n\t" // z z z z z x x 5
"pand _const6, %%mm3 \n\t" // z z z z z z z 5
"pand (%4), %%mm3 \n\t" // z z z z z z z 5
"por %%mm3, %%mm2 \n\t" // z z x x 5 4 3 5
"subl $6, %%esi \n\t"
"movd %%mm2, 8(%%edi) \n\t"
@@ -1870,13 +1877,15 @@ png_do_read_interlace(png_structp png_ptr)
: "=c" (dummy_value_c), // output regs (dummy)
"=S" (dummy_value_S),
"=D" (dummy_value_D)
"=D" (dummy_value_D),
"=a" (dummy_value_a),
"=d" (dummy_value_d)
: "1" (sptr), // esi // input regs
"2" (dp), // edi
"0" (width_mmx), // ecx
"rim" (_const4), // 0x0000000000FFFFFFLL
"rim" (_const6) // 0x00000000000000FFLL
"3" (&_const4), // 0x0000000000FFFFFFLL
"4" (&_const6) // 0x00000000000000FFLL
#if 0 /* %mm0, ..., %mm3 not supported by gcc 2.7.2.3 or egcs 1.1 */
: "%mm0", "%mm1" // clobber list
@@ -5341,6 +5350,7 @@ int PNGAPI
png_mmx_support(void)
{
#if defined(PNG_MMX_CODE_SUPPORTED)
int result;
__asm__ __volatile__ (
"pushl %%ebx \n\t" // ebx gets clobbered by CPUID instruction
"pushl %%ecx \n\t" // so does ecx...
@@ -5382,7 +5392,6 @@ png_mmx_support(void)
"0: \n\t" // .NOT_SUPPORTED: target label for jump instructions
"movl $0, %%eax \n\t" // set return value to 0
"1: \n\t" // .RETURN: target label for jump instructions
"movl %%eax, _mmx_supported \n\t" // save in global static variable, too
"popl %%edx \n\t" // restore edx
"popl %%ecx \n\t" // restore ecx
"popl %%ebx \n\t" // restore ebx
@@ -5390,15 +5399,16 @@ png_mmx_support(void)
// "ret \n\t" // DONE: no MMX support
// (fall through to standard C "ret")
: // output list (none)
: "=a" (result) // output list
: // any variables used on input (none)
: "%eax" // clobber list
// no clobber list
// , "%ebx", "%ecx", "%edx" // GRR: we handle these manually
// , "memory" // if write to a variable gcc thought was in a reg
// , "cc" // "condition codes" (flag bits)
);
_mmx_supported = result;
#else
_mmx_supported = 0;
#endif /* PNG_MMX_CODE_SUPPORTED */