Imported from libpng-1.0.10beta1.tar

This commit is contained in:
Glenn Randers-Pehrson
2001-03-14 07:08:39 -06:00
parent d4e8109a48
commit 1909560348
52 changed files with 1171 additions and 694 deletions

View File

@@ -6,7 +6,7 @@
* and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm
* for Intel's performance analysis of the MMX vs. non-MMX code.
*
* libpng 1.0.9 - January 31, 2001
* libpng version 1.0.10beta1 - March 14, 2001
* For conditions of distribution and use, see copyright notice in png.h
* Copyright (c) 1998-2001 Glenn Randers-Pehrson
* Copyright (c) 1998, Intel Corporation
@@ -95,11 +95,11 @@
* variables, not the other way around. Hence _const4, _mask8_0, etc.
*
* 19991024:
* - fixed mmxsupport()/png_do_interlace() first-row bug
* - fixed mmxsupport()/png_do_read_interlace() first-row bug
* This one was severely weird: even though mmxsupport() doesn't touch
* ebx (where "row" pointer was stored), it nevertheless managed to zero
* the register (even in static/non-fPIC code--see below), which in turn
* caused png_do_interlace() to return prematurely on the first row of
* caused png_do_read_interlace() to return prematurely on the first row of
* interlaced images (i.e., without expanding the interlaced pixels).
* Inspection of the generated assembly code didn't turn up any clues,
* although it did point at a minor optimization (i.e., get rid of
@@ -212,6 +212,9 @@
* within MMX version of png_read_filter_row()) so no longer necessary to
* compile it into pngrutil.o
*
* 20010310:
* - fixed buffer-overrun bug in png_combine_row() C code (non-MMX)
*
* STILL TO DO:
* - test png_do_read_interlace() 64-bit case (pixel_bytes == 8)
* - write MMX code for 48-bit case (pixel_bytes == 6)
@@ -226,12 +229,18 @@
* - add support for runtime enable/disable/query of various MMX routines
*/
//#define PNG_DEBUG 2 // GRR
/*
#ifndef PNG_DEBUG
# define PNG_DEBUG 0
#endif
*/
#define PNG_INTERNAL
#include "png.h"
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGGCCRD)
#if defined(PNG_USE_PNGGCCRD)
int PNGAPI png_mmx_support(void);
#ifdef PNG_USE_LOCAL_ARRAYS
static const int FARDATA png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
@@ -239,8 +248,9 @@ static const int FARDATA png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
static const int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};
#endif
// djgpp, Win32, and Cygwin add their own underscores to global variables,
// so define them without:
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
/* djgpp, Win32, and Cygwin add their own underscores to global variables,
* so define them without: */
#if defined(__DJGPP__) || defined(WIN32) || defined(__CYGWIN__)
# define _mmx_supported mmx_supported
# define _unmask unmask
@@ -277,7 +287,6 @@ static const int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};
# define _pctemp pctemp
#endif
static int _mmx_supported = 2;
/* These constants are used in the inlined MMX assembly code.
Ignore gcc's "At top level: defined but not used" warnings. */
@@ -324,18 +333,24 @@ static int _dif;
static int _patemp; // temp variables for Paeth routine
static int _pbtemp;
static int _pctemp;
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
static int _mmx_supported = 2;
//===========================================================================//
// //
// P N G _ C O M B I N E _ R O W //
// //
//===========================================================================//
/*===========================================================================*/
/* */
/* P N G _ C O M B I N E _ R O W */
/* */
/*===========================================================================*/
#if defined(PNG_HAVE_ASSEMBLER_COMBINE_ROW)
#define BPP2 2
#define BPP3 3 /* bytes per pixel (a.k.a. pixel_bytes) */
#define BPP4 4
#define BPP6 6 /* (defined only to help avoid cut-and-paste errors) */
#define BPP8 8
/* Combines the row recently read in with the previous row.
This routine takes care of alpha and transparency if requested.
This routine also handles the two methods of progressive display
@@ -353,7 +368,7 @@ static int _pctemp;
void /* PRIVATE */
png_combine_row(png_structp png_ptr, png_bytep row, int mask)
{
png_debug(1,"in png_combine_row_asm\n");
png_debug(1, "in png_combine_row (pnggccrd.c)\n");
if (_mmx_supported == 2) {
png_mmx_support();
@@ -361,15 +376,15 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
if (mask == 0xff)
{
png_debug(2,"mask == 0xff: doing single png_memcpy()\n");
png_memcpy(row, png_ptr->row_buf + 1,
(png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3));
}
/* GRR: png_combine_row() never called with mask == 0 */
else
else /* (png_combine_row() is never called with mask == 0) */
{
switch (png_ptr->row_info.pixel_depth)
{
case 1: // png_ptr->row_info.pixel_depth
case 1: /* png_ptr->row_info.pixel_depth */
{
png_bytep sp;
png_bytep dp;
@@ -426,7 +441,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
break;
}
case 2: // png_ptr->row_info.pixel_depth
case 2: /* png_ptr->row_info.pixel_depth */
{
png_bytep sp;
png_bytep dp;
@@ -481,7 +496,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
break;
}
case 4: // png_ptr->row_info.pixel_depth
case 4: /* png_ptr->row_info.pixel_depth */
{
png_bytep sp;
png_bytep dp;
@@ -535,11 +550,12 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
break;
}
case 8: // png_ptr->row_info.pixel_depth
case 8: /* png_ptr->row_info.pixel_depth */
{
png_bytep srcptr;
png_bytep dstptr;
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported )
{
png_uint_32 len;
@@ -553,7 +569,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr = png_ptr->row_buf + 1;
dstptr = row;
len = png_ptr->width &~7; // reduce to multiple of 8
diff = png_ptr->width & 7; // amount lost
diff = (int) (png_ptr->width & 7); // amount lost
__asm__ __volatile__ (
"movd _unmask, %%mm7 \n\t" // load bit pattern
@@ -627,15 +643,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
);
}
else /* mmx _not supported - Use modified C routine */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
register png_uint_32 i;
png_uint_32 initial_val = png_pass_start[png_ptr->pass];
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
register int stride = png_pass_inc[png_ptr->pass];
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
register int rep_bytes = png_pass_width[png_ptr->pass];
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
register png_uint_32 final_val = png_ptr->width;
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
int diff = (int) (png_ptr->width & 7); /* amount lost */
register png_uint_32 final_val = len; /* GRR bugfix */
srcptr = png_ptr->row_buf + 1 + initial_val;
dstptr = row + initial_val;
@@ -646,16 +665,30 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr += stride;
dstptr += stride;
}
} /* end of else */
if (diff) /* number of leftover pixels: 3 for pngtest */
{
final_val+=diff /* *BPP1 */ ;
for (; i < final_val; i += stride)
{
if (rep_bytes > (int)(final_val-i))
rep_bytes = (int)(final_val-i);
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
}
} /* end of else (_mmx_supported) */
break;
} // end 8 bpp
} /* end 8 bpp */
case 16: // png_ptr->row_info.pixel_depth
case 16: /* png_ptr->row_info.pixel_depth */
{
png_bytep srcptr;
png_bytep dstptr;
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported )
{
png_uint_32 len;
@@ -669,7 +702,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr = png_ptr->row_buf + 1;
dstptr = row;
len = png_ptr->width &~7; // reduce to multiple of 8
diff = png_ptr->width & 7; // amount lost
diff = (int) (png_ptr->width & 7); // amount lost //
__asm__ __volatile__ (
"movd _unmask, %%mm7 \n\t" // load bit pattern
@@ -759,15 +792,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
);
}
else /* mmx _not supported - Use modified C routine */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
register png_uint_32 i;
png_uint_32 initial_val = 2 * png_pass_start[png_ptr->pass];
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
register int stride = 2 * png_pass_inc[png_ptr->pass];
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
register int rep_bytes = 2 * png_pass_width[png_ptr->pass];
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
register png_uint_32 final_val = 2 * png_ptr->width;
png_uint_32 initial_val = BPP2 * png_pass_start[png_ptr->pass];
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
register int stride = BPP2 * png_pass_inc[png_ptr->pass];
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
register int rep_bytes = BPP2 * png_pass_width[png_ptr->pass];
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
int diff = (int) (png_ptr->width & 7); /* amount lost */
register png_uint_32 final_val = BPP2 * len; /* GRR bugfix */
srcptr = png_ptr->row_buf + 1 + initial_val;
dstptr = row + initial_val;
@@ -778,16 +814,29 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr += stride;
dstptr += stride;
}
} /* end of else */
if (diff) /* number of leftover pixels: 3 for pngtest */
{
final_val+=diff*BPP2;
for (; i < final_val; i += stride)
{
if (rep_bytes > (int)(final_val-i))
rep_bytes = (int)(final_val-i);
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
}
} /* end of else (_mmx_supported) */
break;
} // end 16 bpp
} /* end 16 bpp */
case 24: // png_ptr->row_info.pixel_depth
case 24: /* png_ptr->row_info.pixel_depth */
{
png_bytep srcptr;
png_bytep dstptr;
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported )
{
png_uint_32 len;
@@ -801,7 +850,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr = png_ptr->row_buf + 1;
dstptr = row;
len = png_ptr->width &~7; // reduce to multiple of 8
diff = png_ptr->width & 7; // amount lost
diff = (int) (png_ptr->width & 7); // amount lost //
__asm__ __volatile__ (
"movd _unmask, %%mm7 \n\t" // load bit pattern
@@ -906,15 +955,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
);
}
else /* mmx _not supported - Use modified C routine */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
register png_uint_32 i;
png_uint_32 initial_val = 3 * png_pass_start[png_ptr->pass];
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
register int stride = 3 * png_pass_inc[png_ptr->pass];
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
register int rep_bytes = 3 * png_pass_width[png_ptr->pass];
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
register png_uint_32 final_val = 3 * png_ptr->width;
png_uint_32 initial_val = BPP3 * png_pass_start[png_ptr->pass];
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
register int stride = BPP3 * png_pass_inc[png_ptr->pass];
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
register int rep_bytes = BPP3 * png_pass_width[png_ptr->pass];
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
int diff = (int) (png_ptr->width & 7); /* amount lost */
register png_uint_32 final_val = BPP3 * len; /* GRR bugfix */
srcptr = png_ptr->row_buf + 1 + initial_val;
dstptr = row + initial_val;
@@ -925,16 +977,29 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr += stride;
dstptr += stride;
}
} /* end of else */
if (diff) /* number of leftover pixels: 3 for pngtest */
{
final_val+=diff*BPP3;
for (; i < final_val; i += stride)
{
if (rep_bytes > (int)(final_val-i))
rep_bytes = (int)(final_val-i);
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
}
} /* end of else (_mmx_supported) */
break;
} // end 24 bpp
} /* end 24 bpp */
case 32: // png_ptr->row_info.pixel_depth
case 32: /* png_ptr->row_info.pixel_depth */
{
png_bytep srcptr;
png_bytep dstptr;
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported )
{
png_uint_32 len;
@@ -948,7 +1013,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr = png_ptr->row_buf + 1;
dstptr = row;
len = png_ptr->width &~7; // reduce to multiple of 8
diff = png_ptr->width & 7; // amount lost
diff = (int) (png_ptr->width & 7); // amount lost //
__asm__ __volatile__ (
"movd _unmask, %%mm7 \n\t" // load bit pattern
@@ -1060,15 +1125,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
);
}
else /* mmx _not supported - Use modified C routine */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
register png_uint_32 i;
png_uint_32 initial_val = 4 * png_pass_start[png_ptr->pass];
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
register int stride = 4 * png_pass_inc[png_ptr->pass];
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
register int rep_bytes = 4 * png_pass_width[png_ptr->pass];
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
register png_uint_32 final_val = 4 * png_ptr->width;
png_uint_32 initial_val = BPP4 * png_pass_start[png_ptr->pass];
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
register int stride = BPP4 * png_pass_inc[png_ptr->pass];
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
register int rep_bytes = BPP4 * png_pass_width[png_ptr->pass];
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
int diff = (int) (png_ptr->width & 7); /* amount lost */
register png_uint_32 final_val = BPP4 * len; /* GRR bugfix */
srcptr = png_ptr->row_buf + 1 + initial_val;
dstptr = row + initial_val;
@@ -1079,16 +1147,29 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr += stride;
dstptr += stride;
}
} /* end of else */
if (diff) /* number of leftover pixels: 3 for pngtest */
{
final_val+=diff*BPP4;
for (; i < final_val; i += stride)
{
if (rep_bytes > (int)(final_val-i))
rep_bytes = (int)(final_val-i);
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
}
} /* end of else (_mmx_supported) */
break;
} // end 32 bpp
} /* end 32 bpp */
case 48: // png_ptr->row_info.pixel_depth
case 48: /* png_ptr->row_info.pixel_depth */
{
png_bytep srcptr;
png_bytep dstptr;
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported )
{
png_uint_32 len;
@@ -1102,7 +1183,7 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr = png_ptr->row_buf + 1;
dstptr = row;
len = png_ptr->width &~7; // reduce to multiple of 8
diff = png_ptr->width & 7; // amount lost
diff = (int) (png_ptr->width & 7); // amount lost //
__asm__ __volatile__ (
"movd _unmask, %%mm7 \n\t" // load bit pattern
@@ -1231,15 +1312,18 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
);
}
else /* mmx _not supported - Use modified C routine */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
register png_uint_32 i;
png_uint_32 initial_val = 6 * png_pass_start[png_ptr->pass];
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
register int stride = 6 * png_pass_inc[png_ptr->pass];
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
register int rep_bytes = 6 * png_pass_width[png_ptr->pass];
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
register png_uint_32 final_val = 6 * png_ptr->width;
png_uint_32 initial_val = BPP6 * png_pass_start[png_ptr->pass];
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
register int stride = BPP6 * png_pass_inc[png_ptr->pass];
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
register int rep_bytes = BPP6 * png_pass_width[png_ptr->pass];
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
int diff = (int) (png_ptr->width & 7); /* amount lost */
register png_uint_32 final_val = BPP6 * len; /* GRR bugfix */
srcptr = png_ptr->row_buf + 1 + initial_val;
dstptr = row + initial_val;
@@ -1250,23 +1334,37 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr += stride;
dstptr += stride;
}
} /* end of else */
if (diff) /* number of leftover pixels: 3 for pngtest */
{
final_val+=diff*BPP6;
for (; i < final_val; i += stride)
{
if (rep_bytes > (int)(final_val-i))
rep_bytes = (int)(final_val-i);
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
}
} /* end of else (_mmx_supported) */
break;
} // end 48 bpp
} /* end 48 bpp */
case 64: // png_ptr->row_info.pixel_depth
case 64: /* png_ptr->row_info.pixel_depth */
{
png_bytep srcptr;
png_bytep dstptr;
register png_uint_32 i;
png_uint_32 initial_val = 8 * png_pass_start[png_ptr->pass];
// png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0};
register int stride = 8 * png_pass_inc[png_ptr->pass];
// png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1};
register int rep_bytes = 8 * png_pass_width[png_ptr->pass];
// png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1};
register png_uint_32 final_val = 8 * png_ptr->width;
png_uint_32 initial_val = BPP8 * png_pass_start[png_ptr->pass];
/* png.c: png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; */
register int stride = BPP8 * png_pass_inc[png_ptr->pass];
/* png.c: png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; */
register int rep_bytes = BPP8 * png_pass_width[png_ptr->pass];
/* png.c: png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */
png_uint_32 len = png_ptr->width &~7; /* reduce to mult. of 8 */
int diff = (int) (png_ptr->width & 7); /* amount lost */
register png_uint_32 final_val = BPP8 * len; /* GRR bugfix */
srcptr = png_ptr->row_buf + 1 + initial_val;
dstptr = row + initial_val;
@@ -1277,12 +1375,25 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
srcptr += stride;
dstptr += stride;
}
break;
} // end 64 bpp
if (diff) /* number of leftover pixels: 3 for pngtest */
{
final_val+=diff*BPP8;
for (; i < final_val; i += stride)
{
if (rep_bytes > (int)(final_val-i))
rep_bytes = (int)(final_val-i);
png_memcpy(dstptr, srcptr, rep_bytes);
srcptr += stride;
dstptr += stride;
}
}
default: // png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64
break;
} /* end 64 bpp */
default: /* png_ptr->row_info.pixel_depth != 1,2,4,8,16,24,32,48,64 */
{
// this should never happen
/* this should never happen */
fprintf(stderr,
"libpng internal error: png_ptr->row_info.pixel_depth = %d\n",
png_ptr->row_info.pixel_depth);
@@ -1300,11 +1411,11 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask)
//===========================================================================//
// //
// P N G _ D O _ R E A D _ I N T E R L A C E //
// //
//===========================================================================//
/*===========================================================================*/
/* */
/* P N G _ D O _ R E A D _ I N T E R L A C E */
/* */
/*===========================================================================*/
#if defined(PNG_READ_INTERLACING_SUPPORTED)
#if defined(PNG_HAVE_ASSEMBLER_READ_INTERLACE)
@@ -1319,9 +1430,11 @@ png_do_read_interlace(png_structp png_ptr)
png_row_infop row_info = &(png_ptr->row_info);
png_bytep row = png_ptr->row_buf + 1;
int pass = png_ptr->pass;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
png_uint_32 transformations = png_ptr->transformations;
#endif
png_debug(1,"in png_do_read_interlace\n");
png_debug(1, "in png_do_read_interlace (pnggccrd.c)\n");
if (_mmx_supported == 2) {
png_mmx_support();
@@ -1505,29 +1618,32 @@ png_do_read_interlace(png_structp png_ptr)
break;
}
//====================================================================
/*====================================================================*/
default: // 8-bit or larger (this is where the routine is modified)
default: /* 8-bit or larger (this is where the routine is modified) */
{
#if 0
// static unsigned long long _const4 = 0x0000000000FFFFFFLL; no good
// static unsigned long long const4 = 0x0000000000FFFFFFLL; no good
// unsigned long long _const4 = 0x0000000000FFFFFFLL; no good
// unsigned long long const4 = 0x0000000000FFFFFFLL; no good
#endif
png_bytep sptr, dp;
png_uint_32 i;
png_size_t pixel_bytes;
int width = row_info->width;
int width = (int)row_info->width;
pixel_bytes = (row_info->pixel_depth >> 3);
// point sptr at the last pixel in the pre-expanded row:
/* point sptr at the last pixel in the pre-expanded row: */
sptr = row + (width - 1) * pixel_bytes;
// point dp at the last pixel position in the expanded row:
/* point dp at the last pixel position in the expanded row: */
dp = row + (final_width - 1) * pixel_bytes;
// New code by Nirav Chhatrapati - Intel Corporation
/* New code by Nirav Chhatrapati - Intel Corporation */
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported )
{
//--------------------------------------------------------------
@@ -1779,7 +1895,9 @@ png_do_read_interlace(png_structp png_ptr)
*/
for (j = 0; j < png_pass_inc[pass]; j++)
{
*dp-- = *sptr;
}
--sptr;
}
}
@@ -1832,7 +1950,9 @@ png_do_read_interlace(png_structp png_ptr)
int j;
for (j = 0; j < png_pass_inc[pass]; j++)
{
*dp-- = *sptr;
}
--sptr;
}
}
@@ -1884,7 +2004,9 @@ png_do_read_interlace(png_structp png_ptr)
int j;
for (j = 0; j < png_pass_inc[pass]; j++)
{
*dp-- = *sptr;
}
--sptr;
}
}
@@ -2413,6 +2535,7 @@ png_do_read_interlace(png_structp png_ptr)
/* GRR 19991007: does it? or should pixel_bytes in each
* block be replaced with immediate value (e.g., 1)? */
/* GRR 19991017: replaced with constants in each case */
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
if (pixel_bytes == 1)
{
@@ -2420,7 +2543,9 @@ png_do_read_interlace(png_structp png_ptr)
{
int j;
for (j = 0; j < png_pass_inc[pass]; j++)
{
*dp-- = *sptr;
}
--sptr;
}
}
@@ -2463,6 +2588,14 @@ png_do_read_interlace(png_structp png_ptr)
png_memcpy(v, sptr, 4);
for (j = 0; j < png_pass_inc[pass]; j++)
{
#ifdef PNG_DEBUG
if (dp < row || dp+3 > row+png_ptr->row_buf_size)
{
printf("dp out of bounds: row=%d, dp=%d, rp=%d\n",row, dp,
row+png_ptr->row_buf_size);
printf("row_buf=%d\n",png_ptr->row_buf_size);
}
#endif
png_memcpy(dp, v, 4);
dp -= 4;
}
@@ -2499,7 +2632,7 @@ png_do_read_interlace(png_structp png_ptr)
sptr -= 8;
}
}
else // GRR: should never be reached
else /* GRR: should never be reached */
{
for (i = width; i; i--)
{
@@ -2533,6 +2666,8 @@ png_do_read_interlace(png_structp png_ptr)
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
// These variables are utilized in the functions below. They are declared
// globally here to ensure alignment on 8-byte boundaries.
@@ -2545,7 +2680,6 @@ union uAll {
//===========================================================================//
// //
// P N G _ R E A D _ F I L T E R _ R O W _ M M X _ A V G //
@@ -4739,14 +4873,16 @@ png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
} // end of png_read_filter_row_mmx_up()
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
//===========================================================================//
// //
// P N G _ R E A D _ F I L T E R _ R O W //
// //
//===========================================================================//
/*===========================================================================*/
/* */
/* P N G _ R E A D _ F I L T E R _ R O W */
/* */
/*===========================================================================*/
#if defined(PNG_HAVE_ASSEMBLER_READ_FILTER_ROW)
@@ -4760,6 +4896,7 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
char filnm[10];
#endif
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
/* GRR: these are superseded by png_ptr->asm_flags: */
#define UseMMX_sub 1 // GRR: converted 20000730
#define UseMMX_up 1 // GRR: converted 20000729
@@ -4769,9 +4906,10 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
if (_mmx_supported == 2) {
png_mmx_support();
}
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
#ifdef PNG_DEBUG
png_debug(1, "in png_read_filter_row\n");
png_debug(1, "in png_read_filter_row (pnggccrd.c)\n");
switch (filter)
{
case 0: sprintf(filnm, "none");
@@ -4800,13 +4938,15 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
break;
case PNG_FILTER_VALUE_SUB:
if (
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported &&
(row_info->pixel_depth >= PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT) &&
(row_info->rowbytes >= PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT))
{
png_read_filter_row_mmx_sub(row_info, row);
}
else
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
png_uint_32 i;
png_uint_32 istop = row_info->rowbytes;
@@ -4823,13 +4963,15 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
break;
case PNG_FILTER_VALUE_UP:
if (
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported &&
(row_info->pixel_depth >= PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT) &&
(row_info->rowbytes >= PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT))
{
png_read_filter_row_mmx_up(row_info, row, prev_row);
}
else
else
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
png_uint_32 i;
png_uint_32 istop = row_info->rowbytes;
@@ -4845,13 +4987,15 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
break;
case PNG_FILTER_VALUE_AVG:
if (
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported &&
(row_info->pixel_depth >= PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT) &&
(row_info->rowbytes >= PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT))
{
png_read_filter_row_mmx_avg(row_info, row, prev_row);
}
else
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
png_uint_32 i;
png_bytep rp = row;
@@ -4877,13 +5021,15 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
break;
case PNG_FILTER_VALUE_PAETH:
if (
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if ( _mmx_supported &&
(row_info->pixel_depth >= PNG_MMX_BITDEPTH_THRESHOLD_DEFAULT) &&
(row_info->rowbytes >= PNG_MMX_ROWBYTES_THRESHOLD_DEFAULT))
{
png_read_filter_row_mmx_paeth(row_info, row, prev_row);
}
else
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED */
{
png_uint_32 i;
png_bytep rp = row;
@@ -4947,30 +5093,27 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
#endif /* PNG_HAVE_ASSEMBLER_READ_FILTER_ROW */
/*===========================================================================*/
/* */
/* P N G _ M M X _ S U P P O R T */
/* */
/*===========================================================================*/
//===========================================================================//
// //
// P N G _ M M X _ S U P P O R T //
// //
//===========================================================================//
// GRR NOTES: (1) the following code assumes 386 or better (pushfl/popfl)
// (2) all instructions compile with gcc 2.7.2.3 and later
// (3) the function is moved down here to prevent gcc from
// inlining it in multiple places and then barfing be-
// cause the ".NOT_SUPPORTED" label is multiply defined
// [is there a way to signal that a *single* function should
// not be inlined? is there a way to modify the label for
// each inlined instance, e.g., by appending _1, _2, etc.?
// maybe if don't use leading "." in label name? (nope...sigh)]
// GRR TO DO: make sure PNGAPI doesn't do/require anything screwy here
// [looks OK for everybody except possibly Cygwin (__cdecl)]
/* GRR NOTES: (1) the following code assumes 386 or better (pushfl/popfl)
* (2) all instructions compile with gcc 2.7.2.3 and later
* (3) the function is moved down here to prevent gcc from
* inlining it in multiple places and then barfing be-
* cause the ".NOT_SUPPORTED" label is multiply defined
* [is there a way to signal that a *single* function should
* not be inlined? is there a way to modify the label for
* each inlined instance, e.g., by appending _1, _2, etc.?
* maybe if don't use leading "." in label name? (nope...sigh)]
*/
int PNGAPI
png_mmx_support(void)
{
#if defined(PNG_MMX_CODE_SUPPORTED)
__asm__ __volatile__ (
"pushl %%ebx \n\t" // ebx gets clobbered by CPUID instruction
"pushl %%ecx \n\t" // so does ecx...
@@ -5008,15 +5151,15 @@ png_mmx_support(void)
"movl %%eax, _mmx_supported \n\t" // save in global static variable, too
"popl %%edx \n\t" // restore edx
"popl %%ecx \n\t" // restore ecx
"popl %%ebx \n\t" // restore ebx ("row" in png_do_interlace)
"popl %%ebx \n\t" // restore ebx
"ret \n\t" // DONE: have MMX support
".NOT_SUPPORTED: \n\t" // target label for jump instructions
"movl $0, %%eax \n\t" // set return value to 0
"movl %%eax, _mmx_supported \n\t" // save in global static variable, too
// "movl %%eax, _mmx_supported \n\t" // save in global static variable, too
"popl %%edx \n\t" // restore edx
"popl %%ecx \n\t" // restore ecx
"popl %%ebx \n\t" // restore ebx ("row" in png_do_interlace)
"popl %%ebx \n\t" // restore ebx
// "ret \n\t" // DONE: no MMX support
// (fall through to standard C "ret")
@@ -5029,8 +5172,10 @@ png_mmx_support(void)
// , "memory" // if write to a variable gcc thought was in a reg
// , "cc" // "condition codes" (flag bits)
);
#endif /* PNG_MMX_CODE_SUPPORTED */
// return %%eax;
_mmx_supported = 0;
return 0;
}
#endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGGCCRD */
#endif /* PNG_USE_PNGGCCRD */