mirror of
https://git.code.sf.net/p/libpng/code.git
synced 2025-07-10 18:04:09 +02:00
Imported from libpng-1.0.10beta1.tar
This commit is contained in:
413
pnggccrd.c
413
pnggccrd.c
@@ -6,7 +6,7 @@
|
||||
* and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm
|
||||
* for Intel's performance analysis of the MMX vs. non-MMX code.
|
||||
*
|
||||
* libpng 1.0.9 - January 31, 2001
|
||||
* libpng version 1.0.10beta1 - March 14, 2001
|
||||
* For conditions of distribution and use, see copyright notice in png.h
|
||||
* Copyright (c) 1998-2001 Glenn Randers-Pehrson
|
||||
* Copyright (c) 1998, Intel Corporation
|
||||
@@ -95,11 +95,11 @@
|
||||
* variables, not the other way around. Hence _const4, _mask8_0, etc.
|
||||
*
|
||||
* 19991024:
|
||||
* - fixed mmxsupport()/png_do_interlace() first-row bug
|
||||
* - fixed mmxsupport()/png_do_read_interlace() first-row bug
|
||||
* This one was severely weird: even though mmxsupport() doesn't touch
|
||||
* ebx (where "row" pointer was stored), it nevertheless managed to zero
|
||||
* the register (even in static/non-fPIC code--see below), which in turn
|
||||
* caused png_do_interlace() to return prematurely on the first row of
|
||||
* caused png_do_read_interlace() to return prematurely on the first row of
|
||||
* interlaced images (i.e., without expanding the interlaced pixels).
|
||||
* Inspection of the generated assembly code didn't turn up any clues,
|
||||
* although it did point at a minor optimization (i.e., get rid of
|
||||
@@ -212,6 +212,9 @@
|
||||
* within MMX version of png_read_filter_row()) so no longer necessary to
|
||||
* compile it into pngrutil.o
|
||||
*
|
||||
* 20010310:
|
||||
* - fixed buffer-overrun bug in png_combine_row() C code (non-MMX)
|
||||
*
|
||||
* STILL TO DO:
|
||||
* - test png_do_read_interlace() 64-bit case (pixel_bytes == 8)
|
||||
* - write MMX code for 48-bit case (pixel_bytes == 6)
|
||||
@@ -226,12 +229,18 @@
|
||||
* - add support for runtime enable/disable/query of various MMX routines
|
||||
*/
|
||||
|
||||
//#define PNG_DEBUG 2 // GRR
|
||||
/*
|
||||
#ifndef PNG_DEBUG
|
||||
# define PNG_DEBUG 0
|
||||
#endif
|
||||
*/
|
||||
|
||||
#define PNG_INTERNAL
|
||||
#include "png.h"
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGGCCRD)
|
||||
#if defined(PNG_USE_PNGGCCRD)
|
||||
|
||||
int PNGAPI png_mmx_support(void);
|
||||
|
||||
#ifdef PNG_USE_LOCAL_ARRAYS
|
||||
static const int FARDATA png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
|
||||
@@ -239,8 +248,9 @@ static const int FARDATA png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
|
||||
static const int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};
|
||||
#endif
|
||||
|
||||
// djgpp, Win32, and Cygwin add their own underscores to global variables,
|
||||
// so define them without:
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
/* djgpp, Win32, and Cygwin add their own underscores to global variables,
|
||||
* so define them without: */
|
||||
#if defined(__DJGPP__) || defined(WIN32) || defined(__CYGWIN__)
|
||||
# define _mmx_supported mmx_supported
|
||||
# define _unmask unmask
|
||||
@@ -277,7 +287,6 @@ static const int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};
|
||||
# define _pctemp pctemp
|
||||
#endif
|
||||
|
||||
static int _mmx_supported = 2;
|
||||
|
||||
/* These constants are used in the inlined MMX assembly code.
|
||||
Ignore gcc's "At top level: defined but not used" warnings. */
|
||||
@@ -324,18 +333,24 @@ static int _dif;
|
||||
static int _patemp; // temp variables for Paeth routine
|
||||
static int _pbtemp;
|
||||
static int _pctemp;
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
|
||||
static int _mmx_supported = 2;
|
||||
|
||||
|
||||
|
||||
//===========================================================================//
|
||||
// //
|
||||
// P N G _ C O M B I N E _ R O W //
|
||||
// //
|
||||
//===========================================================================//
|
||||
/*===========================================================================*/
|
||||
/* */
|
||||
/* P N G _ C O M B I N E _ R O W */
|
||||
/* */
|
||||
/*===========================================================================*/
|
||||
|
||||
#if defined(PNG_HAVE_ASSEMBLER_COMBINE_ROW)
|
||||
|
||||
#define BPP2 2
|
||||
#define BPP3 3 /* bytes per pixel (a.k.a. pixel_bytes) */
|
||||
#define BPP4 4
|
||||
#define BPP6 6 /* (defined only to help avoid cut-and-paste errors) */
|
||||
#define BPP8 8
|
||||
|
||||
/* Combines the row recently read in with the previous row.
|
||||
This routine takes care of alpha and transparency if requested.
|
||||
This routine also handles the two methods of progressive display
|
||||
@@ -353,7 +368,7 @@ static int _pctemp;
|
||||
void /* PRIVATE */
|
||||
png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
{
|
||||
png_debug(1,"in png_combine_row_asm\n");
|
||||
png_debug(1, "in png_combine_row (pnggccrd.c)\n");
|
||||
|
||||
if (_mmx_supported == 2) {
|
||||
png_mmx_support();
|
||||
@@ -361,15 +376,15 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
|
||||
if (mask == 0xff)
|
||||
{
|
||||
png_debug(2,"mask == 0xff: doing single png_memcpy()\n");
|
||||
png_memcpy(row, png_ptr->row_buf + 1,
|
||||
(png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3));
|
||||
}
|
||||
/* GRR: png_combine_row() never called with mask == 0 */
|
||||
else
|
||||
else /* (png_combine_row() is never called with mask == 0) */
|
||||
{
|
||||
switch (png_ptr->row_info.pixel_depth)
|
||||
{
|
||||
case 1: // png_ptr->row_info.pixel_depth
|
||||
case 1: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep sp;
|
||||
png_bytep dp;
|
||||
@@ -426,7 +441,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
break;
|
||||
}
|
||||
|
||||
case 2: // png_ptr->row_info.pixel_depth
|
||||
case 2: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep sp;
|
||||
png_bytep dp;
|
||||
@@ -481,7 +496,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
break;
|
||||
}
|
||||
|
||||
case 4: // png_ptr->row_info.pixel_depth
|
||||
case 4: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep sp;
|
||||
png_bytep dp;
|
||||
@@ -535,11 +550,12 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
break;
|
||||
}
|
||||
|
||||
case 8: // png_ptr->row_info.pixel_depth
|
||||
case 8: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep srcptr;
|
||||
png_bytep dstptr;
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported )
|
||||
{
|
||||
png_uint_32 len;
|
||||
@@ -553,7 +569,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr = png_ptr->row_buf + 1;
|
||||
dstptr = row;
|
||||
len = png_ptr->width &~7; // reduce to multiple of 8
|
||||
diff = png_ptr->width & 7; // amount lost
|
||||
diff = (int) (png_ptr->width & 7); // amount lost
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"movd _unmask, %%mm7 \n\t" // load bit pattern
|
||||
@@ -627,15 +643,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
);
|
||||
}
|
||||
else /* mmx _not supported - Use modified C routine */
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
register png_uint_32 i;
|
||||
png_uint_32 initial_val = png_pass_start[png_ptr->pass];
|
||||
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
|
||||
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
|
||||
register int stride = png_pass_inc[png_ptr->pass];
|
||||
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
|
||||
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
|
||||
register int rep_bytes = png_pass_width[png_ptr->pass];
|
||||
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
|
||||
register png_uint_32 final_val = png_ptr->width;
|
||||
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
|
||||
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
|
||||
int diff = (int) (png_ptr->width & 7); /* amount lost */
|
||||
register png_uint_32 final_val = len; /* GRR bugfix */
|
||||
|
||||
srcptr = png_ptr->row_buf + 1 + initial_val;
|
||||
dstptr = row + initial_val;
|
||||
@@ -646,16 +665,30 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
} /* end of else */
|
||||
if (diff) /* number of leftover pixels: 3 for pngtest */
|
||||
{
|
||||
final_val+=diff /* *BPP1 */ ;
|
||||
for (; i < final_val; i += stride)
|
||||
{
|
||||
if (rep_bytes > (int)(final_val-i))
|
||||
rep_bytes = (int)(final_val-i);
|
||||
png_memcpy(dstptr, srcptr, rep_bytes);
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
}
|
||||
|
||||
} /* end of else (_mmx_supported) */
|
||||
|
||||
break;
|
||||
} // end 8 bpp
|
||||
} /* end 8 bpp */
|
||||
|
||||
case 16: // png_ptr->row_info.pixel_depth
|
||||
case 16: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep srcptr;
|
||||
png_bytep dstptr;
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported )
|
||||
{
|
||||
png_uint_32 len;
|
||||
@@ -669,7 +702,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr = png_ptr->row_buf + 1;
|
||||
dstptr = row;
|
||||
len = png_ptr->width &~7; // reduce to multiple of 8
|
||||
diff = png_ptr->width & 7; // amount lost
|
||||
diff = (int) (png_ptr->width & 7); // amount lost //
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"movd _unmask, %%mm7 \n\t" // load bit pattern
|
||||
@@ -759,15 +792,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
);
|
||||
}
|
||||
else /* mmx _not supported - Use modified C routine */
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
register png_uint_32 i;
|
||||
png_uint_32 initial_val = 2 * png_pass_start[png_ptr->pass];
|
||||
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
|
||||
register int stride = 2 * png_pass_inc[png_ptr->pass];
|
||||
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
|
||||
register int rep_bytes = 2 * png_pass_width[png_ptr->pass];
|
||||
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
|
||||
register png_uint_32 final_val = 2 * png_ptr->width;
|
||||
png_uint_32 initial_val = BPP2 * png_pass_start[png_ptr->pass];
|
||||
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
|
||||
register int stride = BPP2 * png_pass_inc[png_ptr->pass];
|
||||
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
|
||||
register int rep_bytes = BPP2 * png_pass_width[png_ptr->pass];
|
||||
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
|
||||
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
|
||||
int diff = (int) (png_ptr->width & 7); /* amount lost */
|
||||
register png_uint_32 final_val = BPP2 * len; /* GRR bugfix */
|
||||
|
||||
srcptr = png_ptr->row_buf + 1 + initial_val;
|
||||
dstptr = row + initial_val;
|
||||
@@ -778,16 +814,29 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
} /* end of else */
|
||||
if (diff) /* number of leftover pixels: 3 for pngtest */
|
||||
{
|
||||
final_val+=diff*BPP2;
|
||||
for (; i < final_val; i += stride)
|
||||
{
|
||||
if (rep_bytes > (int)(final_val-i))
|
||||
rep_bytes = (int)(final_val-i);
|
||||
png_memcpy(dstptr, srcptr, rep_bytes);
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
}
|
||||
} /* end of else (_mmx_supported) */
|
||||
|
||||
break;
|
||||
} // end 16 bpp
|
||||
} /* end 16 bpp */
|
||||
|
||||
case 24: // png_ptr->row_info.pixel_depth
|
||||
case 24: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep srcptr;
|
||||
png_bytep dstptr;
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported )
|
||||
{
|
||||
png_uint_32 len;
|
||||
@@ -801,7 +850,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr = png_ptr->row_buf + 1;
|
||||
dstptr = row;
|
||||
len = png_ptr->width &~7; // reduce to multiple of 8
|
||||
diff = png_ptr->width & 7; // amount lost
|
||||
diff = (int) (png_ptr->width & 7); // amount lost //
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"movd _unmask, %%mm7 \n\t" // load bit pattern
|
||||
@@ -906,15 +955,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
);
|
||||
}
|
||||
else /* mmx _not supported - Use modified C routine */
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
register png_uint_32 i;
|
||||
png_uint_32 initial_val = 3 * png_pass_start[png_ptr->pass];
|
||||
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
|
||||
register int stride = 3 * png_pass_inc[png_ptr->pass];
|
||||
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
|
||||
register int rep_bytes = 3 * png_pass_width[png_ptr->pass];
|
||||
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
|
||||
register png_uint_32 final_val = 3 * png_ptr->width;
|
||||
png_uint_32 initial_val = BPP3 * png_pass_start[png_ptr->pass];
|
||||
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
|
||||
register int stride = BPP3 * png_pass_inc[png_ptr->pass];
|
||||
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
|
||||
register int rep_bytes = BPP3 * png_pass_width[png_ptr->pass];
|
||||
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
|
||||
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
|
||||
int diff = (int) (png_ptr->width & 7); /* amount lost */
|
||||
register png_uint_32 final_val = BPP3 * len; /* GRR bugfix */
|
||||
|
||||
srcptr = png_ptr->row_buf + 1 + initial_val;
|
||||
dstptr = row + initial_val;
|
||||
@@ -925,16 +977,29 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
} /* end of else */
|
||||
if (diff) /* number of leftover pixels: 3 for pngtest */
|
||||
{
|
||||
final_val+=diff*BPP3;
|
||||
for (; i < final_val; i += stride)
|
||||
{
|
||||
if (rep_bytes > (int)(final_val-i))
|
||||
rep_bytes = (int)(final_val-i);
|
||||
png_memcpy(dstptr, srcptr, rep_bytes);
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
}
|
||||
} /* end of else (_mmx_supported) */
|
||||
|
||||
break;
|
||||
} // end 24 bpp
|
||||
} /* end 24 bpp */
|
||||
|
||||
case 32: // png_ptr->row_info.pixel_depth
|
||||
case 32: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep srcptr;
|
||||
png_bytep dstptr;
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported )
|
||||
{
|
||||
png_uint_32 len;
|
||||
@@ -948,7 +1013,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr = png_ptr->row_buf + 1;
|
||||
dstptr = row;
|
||||
len = png_ptr->width &~7; // reduce to multiple of 8
|
||||
diff = png_ptr->width & 7; // amount lost
|
||||
diff = (int) (png_ptr->width & 7); // amount lost //
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"movd _unmask, %%mm7 \n\t" // load bit pattern
|
||||
@@ -1060,15 +1125,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
);
|
||||
}
|
||||
else /* mmx _not supported - Use modified C routine */
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
register png_uint_32 i;
|
||||
png_uint_32 initial_val = 4 * png_pass_start[png_ptr->pass];
|
||||
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
|
||||
register int stride = 4 * png_pass_inc[png_ptr->pass];
|
||||
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
|
||||
register int rep_bytes = 4 * png_pass_width[png_ptr->pass];
|
||||
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
|
||||
register png_uint_32 final_val = 4 * png_ptr->width;
|
||||
png_uint_32 initial_val = BPP4 * png_pass_start[png_ptr->pass];
|
||||
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
|
||||
register int stride = BPP4 * png_pass_inc[png_ptr->pass];
|
||||
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
|
||||
register int rep_bytes = BPP4 * png_pass_width[png_ptr->pass];
|
||||
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
|
||||
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
|
||||
int diff = (int) (png_ptr->width & 7); /* amount lost */
|
||||
register png_uint_32 final_val = BPP4 * len; /* GRR bugfix */
|
||||
|
||||
srcptr = png_ptr->row_buf + 1 + initial_val;
|
||||
dstptr = row + initial_val;
|
||||
@@ -1079,16 +1147,29 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
} /* end of else */
|
||||
if (diff) /* number of leftover pixels: 3 for pngtest */
|
||||
{
|
||||
final_val+=diff*BPP4;
|
||||
for (; i < final_val; i += stride)
|
||||
{
|
||||
if (rep_bytes > (int)(final_val-i))
|
||||
rep_bytes = (int)(final_val-i);
|
||||
png_memcpy(dstptr, srcptr, rep_bytes);
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
}
|
||||
} /* end of else (_mmx_supported) */
|
||||
|
||||
break;
|
||||
} // end 32 bpp
|
||||
} /* end 32 bpp */
|
||||
|
||||
case 48: // png_ptr->row_info.pixel_depth
|
||||
case 48: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep srcptr;
|
||||
png_bytep dstptr;
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported )
|
||||
{
|
||||
png_uint_32 len;
|
||||
@@ -1102,7 +1183,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr = png_ptr->row_buf + 1;
|
||||
dstptr = row;
|
||||
len = png_ptr->width &~7; // reduce to multiple of 8
|
||||
diff = png_ptr->width & 7; // amount lost
|
||||
diff = (int) (png_ptr->width & 7); // amount lost //
|
||||
|
||||
__asm__ __volatile__ (
|
||||
"movd _unmask, %%mm7 \n\t" // load bit pattern
|
||||
@@ -1231,15 +1312,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
);
|
||||
}
|
||||
else /* mmx _not supported - Use modified C routine */
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
register png_uint_32 i;
|
||||
png_uint_32 initial_val = 6 * png_pass_start[png_ptr->pass];
|
||||
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
|
||||
register int stride = 6 * png_pass_inc[png_ptr->pass];
|
||||
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
|
||||
register int rep_bytes = 6 * png_pass_width[png_ptr->pass];
|
||||
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
|
||||
register png_uint_32 final_val = 6 * png_ptr->width;
|
||||
png_uint_32 initial_val = BPP6 * png_pass_start[png_ptr->pass];
|
||||
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
|
||||
register int stride = BPP6 * png_pass_inc[png_ptr->pass];
|
||||
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
|
||||
register int rep_bytes = BPP6 * png_pass_width[png_ptr->pass];
|
||||
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
|
||||
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
|
||||
int diff = (int) (png_ptr->width & 7); /* amount lost */
|
||||
register png_uint_32 final_val = BPP6 * len; /* GRR bugfix */
|
||||
|
||||
srcptr = png_ptr->row_buf + 1 + initial_val;
|
||||
dstptr = row + initial_val;
|
||||
@@ -1250,23 +1334,37 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
} /* end of else */
|
||||
if (diff) /* number of leftover pixels: 3 for pngtest */
|
||||
{
|
||||
final_val+=diff*BPP6;
|
||||
for (; i < final_val; i += stride)
|
||||
{
|
||||
if (rep_bytes > (int)(final_val-i))
|
||||
rep_bytes = (int)(final_val-i);
|
||||
png_memcpy(dstptr, srcptr, rep_bytes);
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
}
|
||||
} /* end of else (_mmx_supported) */
|
||||
|
||||
break;
|
||||
} // end 48 bpp
|
||||
} /* end 48 bpp */
|
||||
|
||||
case 64: // png_ptr->row_info.pixel_depth
|
||||
case 64: /* png_ptr->row_info.pixel_depth */
|
||||
{
|
||||
png_bytep srcptr;
|
||||
png_bytep dstptr;
|
||||
register png_uint_32 i;
|
||||
png_uint_32 initial_val = 8 * png_pass_start[png_ptr->pass];
|
||||
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
|
||||
register int stride = 8 * png_pass_inc[png_ptr->pass];
|
||||
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
|
||||
register int rep_bytes = 8 * png_pass_width[png_ptr->pass];
|
||||
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
|
||||
register png_uint_32 final_val = 8 * png_ptr->width;
|
||||
png_uint_32 initial_val = BPP8 * png_pass_start[png_ptr->pass];
|
||||
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
|
||||
register int stride = BPP8 * png_pass_inc[png_ptr->pass];
|
||||
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
|
||||
register int rep_bytes = BPP8 * png_pass_width[png_ptr->pass];
|
||||
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
|
||||
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
|
||||
int diff = (int) (png_ptr->width & 7); /* amount lost */
|
||||
register png_uint_32 final_val = BPP8 * len; /* GRR bugfix */
|
||||
|
||||
srcptr = png_ptr->row_buf + 1 + initial_val;
|
||||
dstptr = row + initial_val;
|
||||
@@ -1277,12 +1375,25 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
break;
|
||||
} // end 64 bpp
|
||||
if (diff) /* number of leftover pixels: 3 for pngtest */
|
||||
{
|
||||
final_val+=diff*BPP8;
|
||||
for (; i < final_val; i += stride)
|
||||
{
|
||||
if (rep_bytes > (int)(final_val-i))
|
||||
rep_bytes = (int)(final_val-i);
|
||||
png_memcpy(dstptr, srcptr, rep_bytes);
|
||||
srcptr += stride;
|
||||
dstptr += stride;
|
||||
}
|
||||
}
|
||||
|
||||
default: // png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64
|
||||
break;
|
||||
} /* end 64 bpp */
|
||||
|
||||
default: /* png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64 */
|
||||
{
|
||||
// this should never happen
|
||||
/* this should never happen */
|
||||
fprintf(stderr,
|
||||
"libpng internal error: png_ptr->row_info.pixel_depth = %d\n",
|
||||
png_ptr->row_info.pixel_depth);
|
||||
@@ -1300,11 +1411,11 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
|
||||
|
||||
|
||||
|
||||
//===========================================================================//
|
||||
// //
|
||||
// P N G _ D O _ R E A D _ I N T E R L A C E //
|
||||
// //
|
||||
//===========================================================================//
|
||||
/*===========================================================================*/
|
||||
/* */
|
||||
/* P N G _ D O _ R E A D _ I N T E R L A C E */
|
||||
/* */
|
||||
/*===========================================================================*/
|
||||
|
||||
#if defined(PNG_READ_INTERLACING_SUPPORTED)
|
||||
#if defined(PNG_HAVE_ASSEMBLER_READ_INTERLACE)
|
||||
@@ -1319,9 +1430,11 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
png_row_infop row_info = &(png_ptr->row_info);
|
||||
png_bytep row = png_ptr->row_buf + 1;
|
||||
int pass = png_ptr->pass;
|
||||
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
|
||||
png_uint_32 transformations = png_ptr->transformations;
|
||||
#endif
|
||||
|
||||
png_debug(1,"in png_do_read_interlace\n");
|
||||
png_debug(1, "in png_do_read_interlace (pnggccrd.c)\n");
|
||||
|
||||
if (_mmx_supported == 2) {
|
||||
png_mmx_support();
|
||||
@@ -1505,29 +1618,32 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
break;
|
||||
}
|
||||
|
||||
//====================================================================
|
||||
/*====================================================================*/
|
||||
|
||||
default: // 8-bit or larger (this is where the routine is modified)
|
||||
default: /* 8-bit or larger (this is where the routine is modified) */
|
||||
{
|
||||
#if 0
|
||||
// static unsigned long long _const4 = 0x0000000000FFFFFFLL; no good
|
||||
// static unsigned long long const4 = 0x0000000000FFFFFFLL; no good
|
||||
// unsigned long long _const4 = 0x0000000000FFFFFFLL; no good
|
||||
// unsigned long long const4 = 0x0000000000FFFFFFLL; no good
|
||||
#endif
|
||||
png_bytep sptr, dp;
|
||||
png_uint_32 i;
|
||||
png_size_t pixel_bytes;
|
||||
int width = row_info->width;
|
||||
int width = (int)row_info->width;
|
||||
|
||||
pixel_bytes = (row_info->pixel_depth >> 3);
|
||||
|
||||
// point sptr at the last pixel in the pre-expanded row:
|
||||
/* point sptr at the last pixel in the pre-expanded row: */
|
||||
sptr = row + (width - 1) * pixel_bytes;
|
||||
|
||||
// point dp at the last pixel position in the expanded row:
|
||||
/* point dp at the last pixel position in the expanded row: */
|
||||
dp = row + (final_width - 1) * pixel_bytes;
|
||||
|
||||
// New code by Nirav Chhatrapati - Intel Corporation
|
||||
/* New code by Nirav Chhatrapati - Intel Corporation */
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported )
|
||||
{
|
||||
//--------------------------------------------------------------
|
||||
@@ -1779,7 +1895,9 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
*/
|
||||
|
||||
for (j = 0; j < png_pass_inc[pass]; j++)
|
||||
{
|
||||
*dp-- = *sptr;
|
||||
}
|
||||
--sptr;
|
||||
}
|
||||
}
|
||||
@@ -1832,7 +1950,9 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
int j;
|
||||
|
||||
for (j = 0; j < png_pass_inc[pass]; j++)
|
||||
{
|
||||
*dp-- = *sptr;
|
||||
}
|
||||
--sptr;
|
||||
}
|
||||
}
|
||||
@@ -1884,7 +2004,9 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
int j;
|
||||
|
||||
for (j = 0; j < png_pass_inc[pass]; j++)
|
||||
{
|
||||
*dp-- = *sptr;
|
||||
}
|
||||
--sptr;
|
||||
}
|
||||
}
|
||||
@@ -2413,6 +2535,7 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
/* GRR 19991007: does it? or should pixel_bytes in each
|
||||
* block be replaced with immediate value (e.g., 1)? */
|
||||
/* GRR 19991017: replaced with constants in each case */
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
if (pixel_bytes == 1)
|
||||
{
|
||||
@@ -2420,7 +2543,9 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
{
|
||||
int j;
|
||||
for (j = 0; j < png_pass_inc[pass]; j++)
|
||||
{
|
||||
*dp-- = *sptr;
|
||||
}
|
||||
--sptr;
|
||||
}
|
||||
}
|
||||
@@ -2463,6 +2588,14 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
png_memcpy(v, sptr, 4);
|
||||
for (j = 0; j < png_pass_inc[pass]; j++)
|
||||
{
|
||||
#ifdef PNG_DEBUG
|
||||
if (dp < row || dp+3 > row+png_ptr->row_buf_size)
|
||||
{
|
||||
printf("dp out of bounds: row=%d, dp=%d, rp=%d\n",row, dp,
|
||||
row+png_ptr->row_buf_size);
|
||||
printf("row_buf=%d\n",png_ptr->row_buf_size);
|
||||
}
|
||||
#endif
|
||||
png_memcpy(dp, v, 4);
|
||||
dp -= 4;
|
||||
}
|
||||
@@ -2499,7 +2632,7 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
sptr -= 8;
|
||||
}
|
||||
}
|
||||
else // GRR: should never be reached
|
||||
else /* GRR: should never be reached */
|
||||
{
|
||||
for (i = width; i; i--)
|
||||
{
|
||||
@@ -2533,6 +2666,8 @@ png_do_read_interlace(png_structp png_ptr)
|
||||
|
||||
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
|
||||
// These variables are utilized in the functions below. They are declared
|
||||
// globally here to ensure alignment on 8-byte boundaries.
|
||||
|
||||
@@ -2545,7 +2680,6 @@ union uAll {
|
||||
|
||||
|
||||
|
||||
|
||||
//===========================================================================//
|
||||
// //
|
||||
// P N G _ R E A D _ F I L T E R _ R O W _ M M X _ A V G //
|
||||
@@ -4739,14 +4873,16 @@ png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
|
||||
|
||||
} // end of png_read_filter_row_mmx_up()
|
||||
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
|
||||
|
||||
|
||||
//===========================================================================//
|
||||
// //
|
||||
// P N G _ R E A D _ F I L T E R _ R O W //
|
||||
// //
|
||||
//===========================================================================//
|
||||
|
||||
/*===========================================================================*/
|
||||
/* */
|
||||
/* P N G _ R E A D _ F I L T E R _ R O W */
|
||||
/* */
|
||||
/*===========================================================================*/
|
||||
|
||||
#if defined(PNG_HAVE_ASSEMBLER_READ_FILTER_ROW)
|
||||
|
||||
@@ -4760,6 +4896,7 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
|
||||
char filnm[10];
|
||||
#endif
|
||||
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
/* GRR: these are superseded by png_ptr->asm_flags: */
|
||||
#define UseMMX_sub 1 // GRR: converted 20000730
|
||||
#define UseMMX_up 1 // GRR: converted 20000729
|
||||
@@ -4769,9 +4906,10 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
|
||||
if (_mmx_supported == 2) {
|
||||
png_mmx_support();
|
||||
}
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
|
||||
#ifdef PNG_DEBUG
|
||||
png_debug(1, "in png_read_filter_row\n");
|
||||
png_debug(1, "in png_read_filter_row (pnggccrd.c)\n");
|
||||
switch (filter)
|
||||
{
|
||||
case 0: sprintf(filnm, "none");
|
||||
@@ -4800,13 +4938,15 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
|
||||
break;
|
||||
|
||||
case PNG_FILTER_VALUE_SUB:
|
||||
if (
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported &&
|
||||
(row_info->pixel_depth >= PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT) &&
|
||||
(row_info->rowbytes >= PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT))
|
||||
{
|
||||
png_read_filter_row_mmx_sub(row_info, row);
|
||||
}
|
||||
else
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
png_uint_32 i;
|
||||
png_uint_32 istop = row_info->rowbytes;
|
||||
@@ -4823,13 +4963,15 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
|
||||
break;
|
||||
|
||||
case PNG_FILTER_VALUE_UP:
|
||||
if (
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported &&
|
||||
(row_info->pixel_depth >= PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT) &&
|
||||
(row_info->rowbytes >= PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT))
|
||||
{
|
||||
png_read_filter_row_mmx_up(row_info, row, prev_row);
|
||||
}
|
||||
else
|
||||
else
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
png_uint_32 i;
|
||||
png_uint_32 istop = row_info->rowbytes;
|
||||
@@ -4845,13 +4987,15 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
|
||||
break;
|
||||
|
||||
case PNG_FILTER_VALUE_AVG:
|
||||
if (
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported &&
|
||||
(row_info->pixel_depth >= PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT) &&
|
||||
(row_info->rowbytes >= PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT))
|
||||
{
|
||||
png_read_filter_row_mmx_avg(row_info, row, prev_row);
|
||||
}
|
||||
else
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
png_uint_32 i;
|
||||
png_bytep rp = row;
|
||||
@@ -4877,13 +5021,15 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
|
||||
break;
|
||||
|
||||
case PNG_FILTER_VALUE_PAETH:
|
||||
if (
|
||||
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
|
||||
if ( _mmx_supported &&
|
||||
(row_info->pixel_depth >= PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT) &&
|
||||
(row_info->rowbytes >= PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT))
|
||||
{
|
||||
png_read_filter_row_mmx_paeth(row_info, row, prev_row);
|
||||
}
|
||||
else
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
|
||||
{
|
||||
png_uint_32 i;
|
||||
png_bytep rp = row;
|
||||
@@ -4947,30 +5093,27 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
|
||||
#endif /* PNG_HAVE_ASSEMBLER_READ_FILTER_ROW */
|
||||
|
||||
|
||||
/*===========================================================================*/
|
||||
/* */
|
||||
/* P N G _ M M X _ S U P P O R T */
|
||||
/* */
|
||||
/*===========================================================================*/
|
||||
|
||||
|
||||
//===========================================================================//
|
||||
// //
|
||||
// P N G _ M M X _ S U P P O R T //
|
||||
// //
|
||||
//===========================================================================//
|
||||
|
||||
// GRR NOTES: (1) the following code assumes 386 or better (pushfl/popfl)
|
||||
// (2) all instructions compile with gcc 2.7.2.3 and later
|
||||
// (3) the function is moved down here to prevent gcc from
|
||||
// inlining it in multiple places and then barfing be-
|
||||
// cause the ".NOT_SUPPORTED" label is multiply defined
|
||||
// [is there a way to signal that a *single* function should
|
||||
// not be inlined? is there a way to modify the label for
|
||||
// each inlined instance, e.g., by appending _1, _2, etc.?
|
||||
// maybe if don't use leading "." in label name? (nope...sigh)]
|
||||
|
||||
// GRR TO DO: make sure PNGAPI doesn't do/require anything screwy here
|
||||
// [looks OK for everybody except possibly Cygwin (__cdecl)]
|
||||
/* GRR NOTES: (1) the following code assumes 386 or better (pushfl/popfl)
|
||||
* (2) all instructions compile with gcc 2.7.2.3 and later
|
||||
* (3) the function is moved down here to prevent gcc from
|
||||
* inlining it in multiple places and then barfing be-
|
||||
* cause the ".NOT_SUPPORTED" label is multiply defined
|
||||
* [is there a way to signal that a *single* function should
|
||||
* not be inlined? is there a way to modify the label for
|
||||
* each inlined instance, e.g., by appending _1, _2, etc.?
|
||||
* maybe if don't use leading "." in label name? (nope...sigh)]
|
||||
*/
|
||||
|
||||
int PNGAPI
|
||||
png_mmx_support(void)
|
||||
{
|
||||
#if defined(PNG_MMX_CODE_SUPPORTED)
|
||||
__asm__ __volatile__ (
|
||||
"pushl %%ebx \n\t" // ebx gets clobbered by CPUID instruction
|
||||
"pushl %%ecx \n\t" // so does ecx...
|
||||
@@ -5008,15 +5151,15 @@ png_mmx_support(void)
|
||||
"movl %%eax, _mmx_supported \n\t" // save in global static variable, too
|
||||
"popl %%edx \n\t" // restore edx
|
||||
"popl %%ecx \n\t" // restore ecx
|
||||
"popl %%ebx \n\t" // restore ebx ("row" in png_do_interlace)
|
||||
"popl %%ebx \n\t" // restore ebx
|
||||
"ret \n\t" // DONE: have MMX support
|
||||
|
||||
".NOT_SUPPORTED: \n\t" // target label for jump instructions
|
||||
"movl $0, %%eax \n\t" // set return value to 0
|
||||
"movl %%eax, _mmx_supported \n\t" // save in global static variable, too
|
||||
// "movl %%eax, _mmx_supported \n\t" // save in global static variable, too
|
||||
"popl %%edx \n\t" // restore edx
|
||||
"popl %%ecx \n\t" // restore ecx
|
||||
"popl %%ebx \n\t" // restore ebx ("row" in png_do_interlace)
|
||||
"popl %%ebx \n\t" // restore ebx
|
||||
// "ret \n\t" // DONE: no MMX support
|
||||
// (fall through to standard C "ret")
|
||||
|
||||
@@ -5029,8 +5172,10 @@ png_mmx_support(void)
|
||||
// , "memory" // if write to a variable gcc thought was in a reg
|
||||
// , "cc" // "condition codes" (flag bits)
|
||||
);
|
||||
#endif /* PNG_MMX_CODE_SUPPORTED */
|
||||
|
||||
// return %%eax;
|
||||
_mmx_supported = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGGCCRD */
|
||||
#endif /* PNG_USE_PNGGCCRD */
|
||||
|
||||
Reference in New Issue
Block a user