diff --git a/README.txt b/README.txt index f327eeba7..1ea87fe8b 100644 --- a/README.txt +++ b/README.txt @@ -1,6 +1,8 @@ Pngcrush documentation -This is the copyright notice, disclaimer, and license: +This is is a copy of the copyright notice, disclaimer, and license, for +your convenience (the actual notice appears in the file pngcrush.c; in +case of any discrepancy, the copy in pngcrush.c shall prevail): /* * COPYRIGHT NOTICE, DISCLAIMER, AND LICENSE: @@ -38,12 +40,13 @@ This is the copyright notice, disclaimer, and license: This is the output of "pngcrush" and "pngcrush -help": - | pngcrush 1.5.1, Copyright (C) 1998, 1999, 2000 Glenn Randers-Pehrson + + | pngcrush 1.5.2, Copyright (C) 1998, 1999, 2000 Glenn Randers-Pehrson | This is a free, open-source program. Permission is | irrevocably granted to everyone to use this version | of pngcrush without payment of any fee. - | This program was built with libpng version 1.0.8, - | and is running with libpng version 1.0.8 - July 24, 2000 (header) + | This program was built with libpng version 1.0.9beta2, + | and is running with libpng version 1.0.9beta2 - November 19, 2000 (header) | Copyright (C) 1998, 1999, 2000 Glenn Randers-Pehrson, | Copyright (C) 1996, 1997 Andreas Dilger, | Copyright (C) 1995, Guy Eric Schalnat, Group 42 Inc., @@ -66,14 +69,16 @@ options: -fix (fix otherwise fatal conditions such as bad CRCs) -force (Write a new output file even if larger than input) -g gamma (float or fixed*100000, e.g., 0.45455 or 45455) + -iccp length "Profile Name" iccp_file -itxt b[efore_IDAT]|a[fter_IDAT] "keyword" "text" -l zlib_compression_level [0-9] -m method [0 through 200] - -max maximum_IDAT_size [1 through 524288] + -max maximum_IDAT_size [default 524288] -no_cc (no color counting) -n (no save; does not do compression or write output PNG) -plte_len n (truncate PLTE) -q (quiet) + -reduce (do lossless color type or bit depth reduction) -rem chunkname (or "alla" or "allb") -replace_gamma gamma (float or fixed*100000) even if gAMA is present. -res dpi @@ -86,29 +91,16 @@ options: -h (help and legal notices) -p (pause) +options (Note: any option can be spelled out for clarity, e.g., + "pngcrush -dir New -method 7 -remove bkgd *.png" + is the same as "pngcrush -d New -m 7 -rem bkgd *.png"): - | pngcrush 1.5.1, Copyright (C) 1998, 1999, 2000 Glenn Randers-Pehrson - | This is a free, open-source program. Permission is - | irrevocably granted to everyone to use this version - | of pngcrush without payment of any fee. - | This program was built with libpng version 1.0.8, - | and is running with libpng version 1.0.8 - July 24, 2000 (header) - | Copyright (C) 1998, 1999, 2000 Glenn Randers-Pehrson, - | Copyright (C) 1996, 1997 Andreas Dilger, - | Copyright (C) 1995, Guy Eric Schalnat, Group 42 Inc., - | and zlib version 1.1.3pc, Copyright (C) 1998, - | Jean-loup Gailly and Mark Adler. - - -usage: pngcrush [options] infile.png outfile.png - pngcrush -e ext [other options] files.png ... - pngcrush -d dir [other options] files.png ... - -options: -already already_crushed_size [e.g., 8192] If file has an IDAT greater than this size, it - will be considered to be already crushed. + will be considered to be already crushed and will + not be processed, unless you are making other changes + or the "-force" option is present. -brute (Use brute-force, try 114 different methods [11-124]) @@ -168,6 +160,10 @@ options: file has no gAMA chunk. To replace an existing gAMA chunk, use the '-replace_gamma' option. + -iccp length "Profile Name" iccp_file + + file with ICC profile to insert in an iCCP chunk. + -itxt b[efore_IDAT]|a[fter_IDAT] "keyword" "text" Uncompressed iTXt chunk to insert (see -text). @@ -188,7 +184,7 @@ options: 1, 4, and 7 use no filtering; methods 11 and up use specified filter, compression level, and strategy. - -max maximum_IDAT_size [1 through 524288] + -max maximum_IDAT_size [default 524288] -no_cc (no color counting) @@ -199,11 +195,14 @@ options: -plte_len n (truncate PLTE) Truncates the PLTE. Be sure not to truncate it to - less than the greatest index present in IDAT. -q (quiet) + -reduce (do lossless color type or bit depth reduction) + + (if possible) + -rem chunkname (or "alla" or "allb") Name of an ancillary chunk or optional PLTE to be @@ -249,6 +248,9 @@ options: -version (display the pngcrush version) + Look for the most recent version of pngcrush at + http://pmt.sourceforge.net + -w compression_window_size [32, 16, 8, 4, 2, 1, 512] Size of the sliding compression window, in kbytes @@ -262,6 +264,8 @@ options: zlib compression strategy to use with the preceding '-m method' argument. + -zmem zlib_compression_mem_level [1-9, default 9] + -zitxt b[efore_IDAT]|a[fter_IDAT] "keyword" "text" Compressed iTXt chunk to insert (see -text). @@ -280,3 +284,4 @@ options: e.g., type 'pngcrush -pause -help', if the help screen scrolls out of sight. + diff --git a/compress.c b/compress.c new file mode 100644 index 000000000..1cee47091 --- /dev/null +++ b/compress.c @@ -0,0 +1,68 @@ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zlib.h" + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; + int level; +{ + z_stream stream; + int err; + + stream.next_in = (Bytef*)source; + stream.avail_in = (uInt)sourceLen; +#ifdef MAXSEG_64K + /* Check for source > 64K on 16-bit machine: */ + if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR; +#endif + stream.next_out = dest; + stream.avail_out = (uInt)*destLen; + if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + err = deflate(&stream, Z_FINISH); + if (err != Z_STREAM_END) { + deflateEnd(&stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream.total_out; + + err = deflateEnd(&stream); + return err; +} + +/* =========================================================================== + */ +int ZEXPORT compress (dest, destLen, source, sourceLen) + Bytef *dest; + uLongf *destLen; + const Bytef *source; + uLong sourceLen; +{ + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} diff --git a/png.c b/png.c index 1b2531cd1..164ebf382 100644 --- a/png.c +++ b/png.c @@ -1,7 +1,7 @@ /* png.c - location for general purpose libpng functions * - * libpng version 1.0.8 - July 24, 2000 + * libpng version 1.0.9beta2 - November 19, 2000 * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) @@ -13,18 +13,18 @@ #include "png.h" /* Generate a compiler error if there is an old png.h in the search path. */ -typedef version_1_0_8 Your_png_h_is_not_version_1_0_8; +typedef version_1_0_9beta2 Your_png_h_is_not_version_1_0_9beta2; /* Version information for C files. This had better match the version * string defined in png.h. */ #ifdef PNG_USE_GLOBAL_ARRAYS /* png_libpng_ver was changed to a function in version 1.0.5c */ -char png_libpng_ver[12] = "1.0.8"; +const char png_libpng_ver[18] = "1.0.9beta2"; /* png_sig was changed to a function in version 1.0.5c */ /* Place to hold the signature string for a PNG file. */ -png_byte FARDATA png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10}; +const png_byte FARDATA png_sig[8] = {137, 80, 78, 71, 13, 10, 26, 10}; /* Invoke global declarations for constant strings for known chunk types */ PNG_IHDR; @@ -52,32 +52,33 @@ PNG_zTXt; /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */ /* start of interlace block */ -int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; +const int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; /* offset to next interlace block */ -int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; +const int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; /* start of interlace block in the y direction */ -int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1}; +const int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1}; /* offset to next interlace block in the y direction */ -int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2}; +const int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2}; /* width of interlace block (used in assembler routines only) */ #ifdef PNG_HAVE_ASSEMBLER_COMBINE_ROW -int FARDATA png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; +const int FARDATA png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; #endif /* Height of interlace block. This is not currently used - if you need * it, uncomment it here and in png.h -int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1}; +const int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1}; */ /* Mask to determine which pixels are valid in a pass */ -int FARDATA png_pass_mask[] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff}; +const int FARDATA png_pass_mask[] = {0x80, 0x08, 0x88, 0x22, 0xaa, 0x55, 0xff}; /* Mask to determine which pixels to overwrite while displaying */ -int FARDATA png_pass_dsp_mask[] = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff}; +const int FARDATA png_pass_dsp_mask[] + = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff}; #endif @@ -625,7 +626,7 @@ png_charp PNGAPI png_get_copyright(png_structp png_ptr) { if (png_ptr != NULL || png_ptr == NULL) /* silence compiler warning */ - return ((png_charp) "\n libpng version 1.0.8 - July 24, 2000\n\ + return ((png_charp) "\n libpng version 1.0.9beta2 - November 19, 2000\n\ Copyright (c) 1998-2000 Glenn Randers-Pehrson\n\ Copyright (c) 1996, 1997 Andreas Dilger\n\ Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.\n"); @@ -643,8 +644,8 @@ png_get_libpng_ver(png_structp png_ptr) { /* Version of *.c files used when building libpng */ if(png_ptr != NULL) /* silence compiler warning about unused png_ptr */ - return((png_charp) "1.0.8"); - return((png_charp) "1.0.8"); + return((png_charp) "1.0.9beta2"); + return((png_charp) "1.0.9beta2"); } png_charp PNGAPI @@ -689,9 +690,10 @@ png_reset_zstream(png_structp png_ptr) return (inflateReset(&png_ptr->zstream)); } +/* This function was added to libpng-1.0.7 */ png_uint_32 PNGAPI png_access_version_number(void) { /* Version of *.c files used when building libpng */ - return((png_uint_32) 10008L); + return((png_uint_32) 10009L); } diff --git a/png.h b/png.h index 0d8e0dc1b..ca4c8a945 100644 --- a/png.h +++ b/png.h @@ -1,7 +1,7 @@ /* png.h - header file for PNG reference library * - * libpng version 1.0.8 - July 24, 2000 + * libpng version 1.0.9beta2 - November 19, 2000 * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) * (Version 0.88 Copyright (c) 1995, 1996 Guy Eric Schalnat, Group 42, Inc.) @@ -9,7 +9,7 @@ * Authors and maintainers: * libpng versions 0.71, May 1995, through 0.88, January 1996: Guy Schalnat * libpng versions 0.89c, June 1996, through 0.96, May 1997: Andreas Dilger - * libpng versions 0.97, January 1998, through 1.0.8 - July 24, 2000: Glenn + * libpng versions 0.97, January 1998, through 1.0.9beta2 - November 19, 2000: Glenn * See also "Contributing Authors", below. * * Note about libpng version numbers: @@ -58,6 +58,7 @@ * 1.0.8beta1-4 10008 2.1.0.8beta1-4 * 1.0.8rc1 10008 2.1.0.8rc1 * 1.0.8 10008 2.1.0.8 + * 1.0.9beta1-2 10009 2.1.0.9 * * Henceforth the source version will match the shared-library major * and minor numbers; the shared-library major version number will be @@ -84,7 +85,7 @@ * If you modify libpng you may insert additional notices immediately following * this sentence. * - * libpng versions 1.0.7, July 1, 2000, through 1.0.8, July 24, 2000, are + * libpng versions 1.0.7, July 1, 2000, through 1.0.9beta2, November 19, 2000, are * Copyright (c) 2000 Glenn Randers-Pehrson, and are * distributed according to the same disclaimer and license as libpng-1.0.6 * with the following individuals added to the list of Contributing Authors @@ -189,13 +190,13 @@ * Y2K compliance in libpng: * ========================= * - * July 24, 2000 + * November 19, 2000 * * Since the PNG Development group is an ad-hoc body, we can't make * an official declaration. * * This is your unofficial assurance that libpng from version 0.71 and - * upward through 1.0.8 are Y2K compliant. It is my belief that earlier + * upward through 1.0.9beta2 are Y2K compliant. It is my belief that earlier * versions were also Y2K compliant. * * Libpng only has three year fields. One is a 2-byte unsigned integer @@ -251,26 +252,25 @@ */ /* Version information for png.h - this should match the version in png.c */ -#define PNG_LIBPNG_VER_STRING "1.0.8" +#define PNG_LIBPNG_VER_STRING "1.0.9beta2" #define PNG_LIBPNG_VER_SONUM 2 /* These should match the first 3 components of PNG_LIBPNG_VER_STRING: */ #define PNG_LIBPNG_VER_MAJOR 1 #define PNG_LIBPNG_VER_MINOR 0 -#define PNG_LIBPNG_VER_RELEASE 8 +#define PNG_LIBPNG_VER_RELEASE 9 /* This should match the numeric part of the final component of * PNG_LIBPNG_VER_STRING, omitting any leading zero: */ -#define PNG_LIBPNG_VER_BUILD 0 +#define PNG_LIBPNG_VER_BUILD 2 +#define PNG_LIBPNG_BUILD_TYPE beta /* alpha, beta, rc, stable, patched */ /* Careful here. At one time, Guy wanted to use 082, but that would be octal. * We must not include leading zeros. * Versions 0.7 through 1.0.0 were in the range 0 to 100 here (only * version 1.0.0 was mis-numbered 100 instead of 10000). From * version 1.0.1 it's xxyyzz, where x=major, y=minor, z=release */ -#define PNG_LIBPNG_VER 10008 /* 1.0.8 */ - -/* Note to maintainer: update this number in scripts/pngdef.pas as well */ +#define PNG_LIBPNG_VER 10009 /* 1.0.9 */ #ifndef PNG_VERSION_INFO_ONLY @@ -302,8 +302,8 @@ extern "C" { * the version above. */ #ifdef PNG_USE_GLOBAL_ARRAYS -PNG_EXPORT_VAR (char) png_libpng_ver[12]; /* need room for 99.99.99-patch-aa0*/ - /* Note to maintainer: increase to 18 at the next opportunity */ +PNG_EXPORT_VAR (const char) png_libpng_ver[18]; + /* need room for 99.99.99beta99z*/ #else #define png_libpng_ver png_get_header_ver(NULL) #endif @@ -311,17 +311,17 @@ PNG_EXPORT_VAR (char) png_libpng_ver[12]; /* need room for 99.99.99-patch-aa0*/ #ifdef PNG_USE_GLOBAL_ARRAYS /* This was removed in version 1.0.5c */ /* Structures to facilitate easy interlacing. See png.c for more details */ -PNG_EXPORT_VAR (int FARDATA) png_pass_start[7]; -PNG_EXPORT_VAR (int FARDATA) png_pass_inc[7]; -PNG_EXPORT_VAR (int FARDATA) png_pass_ystart[7]; -PNG_EXPORT_VAR (int FARDATA) png_pass_yinc[7]; -PNG_EXPORT_VAR (int FARDATA) png_pass_mask[7]; -PNG_EXPORT_VAR (int FARDATA) png_pass_dsp_mask[7]; +PNG_EXPORT_VAR (const int FARDATA) png_pass_start[7]; +PNG_EXPORT_VAR (const int FARDATA) png_pass_inc[7]; +PNG_EXPORT_VAR (const int FARDATA) png_pass_ystart[7]; +PNG_EXPORT_VAR (const int FARDATA) png_pass_yinc[7]; +PNG_EXPORT_VAR (const int FARDATA) png_pass_mask[7]; +PNG_EXPORT_VAR (const int FARDATA) png_pass_dsp_mask[7]; #ifdef PNG_HAVE_ASSEMBLER_COMBINE_ROW -extern int FARDATA png_pass_width[7]; /* now used in pngvcrd.c, pnggccrd.c */ +PNG_EXPORT_VAR (const int FARDATA) png_pass_width[7]; #endif /* This isn't currently used. If you need it, see png.c for more details. -extern int FARDATA png_pass_height[7]; +PNG_EXPORT_VAR (const int FARDATA) png_pass_height[7]; */ #endif @@ -523,6 +523,7 @@ typedef struct png_info_struct png_uint_16 num_trans; /* number of transparent palette color (tRNS) */ png_byte bit_depth; /* 1, 2, 4, 8, or 16 bits/channel (from IHDR) */ png_byte color_type; /* see PNG_COLOR_TYPE_ below (from IHDR) */ + /* The following three should have been named *_method not *_type */ png_byte compression_type; /* must be PNG_COMPRESSION_TYPE_BASE (IHDR) */ png_byte filter_type; /* must be PNG_FILTER_TYPE_BASE (from IHDR) */ png_byte interlace_type; /* One of PNG_INTERLACE_NONE, PNG_INTERLACE_ADAM7 */ @@ -764,6 +765,9 @@ typedef png_info FAR * FAR * png_infopp; #define PNG_COLOR_TYPE_RGB (PNG_COLOR_MASK_COLOR) #define PNG_COLOR_TYPE_RGB_ALPHA (PNG_COLOR_MASK_COLOR | PNG_COLOR_MASK_ALPHA) #define PNG_COLOR_TYPE_GRAY_ALPHA (PNG_COLOR_MASK_ALPHA) +/* aliases */ +#define PNG_COLOR_TYPE_RGBA PNG_COLOR_TYPE_RGB_ALPHA +#define PNG_COLOR_TYPE_GA PNG_COLOR_TYPE_GRAY_ALPHA /* This is for compression type. PNG 1.0-1.2 only define the single type. */ #define PNG_COMPRESSION_TYPE_BASE 0 /* Deflate method 8, 32K window */ @@ -904,6 +908,10 @@ typedef void (PNGAPI *png_unknown_chunk_ptr) PNGARG((png_structp)); #define PNG_TRANSFORM_INVERT_ALPHA 0x0400 /* read and write */ #define PNG_TRANSFORM_STRIP_FILLER 0x0800 /* WRITE only */ +/* Flags for MNG supported features */ +#define PNG_FLAG_MNG_EMPTY_PLTE 0x01 +#define PNG_ALL_MNG_FEATURES 0x01 + typedef png_voidp (*png_malloc_ptr) PNGARG((png_structp, png_size_t)); typedef void (*png_free_ptr) PNGARG((png_structp, png_voidp)); @@ -1137,9 +1145,11 @@ struct png_struct_def png_uint_16 rgb_to_gray_blue_coeff; #endif -#if defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \ +#if defined(PNG_MNG_FEATURES_SUPPORTED) || \ + defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \ defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED) - png_byte empty_plte_permitted; +/* Note to maintainer: change this to png_uint_32 at next opportunity */ + png_byte mng_features_permitted; #endif #if defined(PNG_READ_GAMMA_SUPPORTED) || defined(PNG_READ_BACKGROUND_SUPPORTED) @@ -1149,9 +1159,9 @@ struct png_struct_def }; /* This prevents a compiler error in png_get_copyright() in png.c if png.c -and png.h are both at * version 1.0.8 +and png.h are both at * version 1.0.9beta2 */ -typedef png_structp version_1_0_8; +typedef png_structp version_1_0_9beta2; typedef png_struct FAR * FAR * png_structpp; @@ -1387,6 +1397,7 @@ extern PNG_EXPORT(void,png_set_gamma) PNGARG((png_structp png_ptr, #if defined(PNG_READ_EMPTY_PLTE_SUPPORTED) || \ defined(PNG_WRITE_EMPTY_PLTE_SUPPORTED) /* Permit or disallow empty PLTE (0: not permitted, 1: permitted) */ +/* Deprecated and will be removed. Use png_permit_mng_features() instead. */ extern PNG_EXPORT(void,png_permit_empty_plte) PNGARG((png_structp png_ptr, int empty_plte_permitted)); #endif @@ -1928,12 +1939,13 @@ extern PNG_EXPORT(void,png_set_hIST) PNGARG((png_structp png_ptr, extern PNG_EXPORT(png_uint_32,png_get_IHDR) PNGARG((png_structp png_ptr, png_infop info_ptr, png_uint_32 *width, png_uint_32 *height, - int *bit_depth, int *color_type, int *interlace_type, - int *compression_type, int *filter_type)); + int *bit_depth, int *color_type, int *interlace_method, + int *compression_method, int *filter_method)); extern PNG_EXPORT(void,png_set_IHDR) PNGARG((png_structp png_ptr, png_infop info_ptr, png_uint_32 width, png_uint_32 height, int bit_depth, - int color_type, int interlace_type, int compression_type, int filter_type)); + int color_type, int interlace_method, int compression_method, + int filter_method)); #if defined(PNG_READ_oFFs_SUPPORTED) extern PNG_EXPORT(png_uint_32,png_get_oFFs) PNGARG((png_structp png_ptr, @@ -2117,11 +2129,11 @@ extern PNG_EXPORT(void, png_set_invalid) PNGARG((png_structp png_ptr, extern PNG_EXPORT(void, png_read_png) PNGARG((png_structp png_ptr, png_infop info_ptr, int transforms, - voidp params)); + png_voidp params)); extern PNG_EXPORT(void, png_write_png) PNGARG((png_structp png_ptr, png_infop info_ptr, int transforms, - voidp params)); + png_voidp params)); #endif /* Define PNG_DEBUG at compile time for debugging information. Higher @@ -2182,8 +2194,13 @@ extern PNG_EXPORT(png_charp,png_get_header_ver) PNGARG((png_structp png_ptr)); extern PNG_EXPORT(png_charp,png_get_header_version) PNGARG((png_structp png_ptr)); extern PNG_EXPORT(png_charp,png_get_libpng_ver) PNGARG((png_structp png_ptr)); +#ifdef PNG_MNG_FEATURES_SUPPORTED +extern PNG_EXPORT(png_uint_32,png_permit_mng_features) PNGARG((png_structp + png_ptr, png_uint_32 mng_features_permitted)); +#endif + #define PNG_HEADER_VERSION_STRING \ - " libpng version 1.0.8 - July 24, 2000 (header)\n" + " libpng version 1.0.9beta2 - November 19, 2000 (header)\n" #ifdef PNG_READ_COMPOSITE_NODIV_SUPPORTED /* With these routines we avoid an integer divide, which will be slower on @@ -2330,7 +2347,7 @@ extern PNG_EXPORT(png_charp,png_get_libpng_ver) PNGARG((png_structp png_ptr)); #if !defined(PNG_NO_EXTERN) || defined(PNG_ALWAYS_EXTERN) /* place to hold the signature string for a PNG file. */ #ifdef PNG_USE_GLOBAL_ARRAYS - PNG_EXPORT_VAR (png_byte FARDATA) png_sig[8]; + PNG_EXPORT_VAR (const png_byte FARDATA) png_sig[8]; #else #define png_sig png_sig_bytes(NULL) #endif @@ -2516,8 +2533,8 @@ PNG_EXTERN void png_write_sig PNGARG((png_structp png_ptr)); */ PNG_EXTERN void png_write_IHDR PNGARG((png_structp png_ptr, png_uint_32 width, png_uint_32 height, - int bit_depth, int color_type, int compression_type, int filter_type, - int interlace_type)); + int bit_depth, int color_type, int compression_method, int filter_method, + int interlace_method)); PNG_EXTERN void png_write_PLTE PNGARG((png_structp png_ptr, png_colorp palette, png_uint_32 num_pal)); diff --git a/pngasmrd.h b/pngasmrd.h index 35fe3b242..0293920bb 100644 --- a/pngasmrd.h +++ b/pngasmrd.h @@ -1,6 +1,6 @@ /* pngasmrd.h - assembler version of utilities to read a PNG file * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1999, 2000 Glenn Randers-Pehrson * diff --git a/pngconf.h b/pngconf.h index 4769cdf5f..9b64657f2 100644 --- a/pngconf.h +++ b/pngconf.h @@ -1,6 +1,6 @@ /* pngconf.h - machine configurable file for libpng * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -400,6 +400,7 @@ #define PNG_NO_WRITE_USER_TRANSFORM #define PNG_NO_USER_MEM #define PNG_NO_READ_EMPTY_PLTE +#define PNG_NO_MNG_FEATURES #define PNG_NO_FIXED_POINT_SUPPORTED #endif @@ -492,8 +493,12 @@ #endif #endif +#ifndef PNG_NO_MNG_FEATURES +#define PNG_MNG_FEATURES_SUPPORTED /* Useful for MNG applications */ +#endif +/* Deprecated, will be removed */ #ifndef PNG_NO_READ_EMPTY_PLTE -#define PNG_READ_EMPTY_PLTE_SUPPORTED /* useful for MNG applications */ +#define PNG_READ_EMPTY_PLTE_SUPPORTED #endif #ifdef PNG_WRITE_TRANSFORMS_SUPPORTED @@ -549,8 +554,9 @@ defined(PNG_WRITE_USER_TRANSFORM_SUPPORTED) #define PNG_WRITE_FLUSH_SUPPORTED #endif +/* Deprecated, see PNG_MNG_FEATURES_SUPPORTED, above */ #ifndef PNG_NO_WRITE_EMPTY_PLTE -#define PNG_WRITE_EMPTY_PLTE_SUPPORTED /* useful for MNG applications */ +#define PNG_WRITE_EMPTY_PLTE_SUPPORTED #endif #ifndef PNG_NO_STDIO @@ -1134,6 +1140,13 @@ typedef z_stream FAR * png_zstreamp; #endif #endif +#ifndef PNGAPI +# define PNGAPI +#endif +#ifndef PNG_IMPEXP +# define PNG_IMPEXP +#endif + #ifndef PNG_EXPORT # define PNG_EXPORT(type,symbol) PNG_IMPEXP type PNGAPI symbol #endif diff --git a/pngcrush.c b/pngcrush.c index 12f8b1f00..5272c7e3a 100644 --- a/pngcrush.c +++ b/pngcrush.c @@ -1,9 +1,12 @@ /* pngcrush.c - recompresses png files * Copyright (C) 1998, 1999, 2000 Glenn Randers-Pehrson (randeg@alum.rpi.edu) * + * The most recent version of pngcrush can be found at + * http://pmt.sourceforge.net/pngcrush/ + * * This program reads in a PNG image, and writes it out again, with the - * optimum filter_type and zlib_level. It uses brute force (trying - * filter_type none, and libpng adaptive filtering, with compression + * optimum filter_method and zlib_level. It uses brute force (trying + * filter_method none, and libpng adaptive filtering, with compression * levels 3 and 9). It does the most time-consuming method last in case * it turns out to be the best. * @@ -14,9 +17,13 @@ * * Thanks to Greg Roelofs for various bug fixes, suggestions, and * occasionally creating Linux executables. + * + * Thanks to Stephan Levavej for some helpful suggestions about gcc compiler + * options and for a suggestion to increase the Z_MEM_LEVEL from default. + * */ -#define PNGCRUSH_VERSION "1.5.1" +#define PNGCRUSH_VERSION "1.5.2" /* */ @@ -57,6 +64,26 @@ */ /* Change log: + * + * Version 1.5.2 (built with libpng-1.0.9beta1) + * + * Added "-iccp" option. + * + * Increased the zlib memory level, which improves compression (typically + * about 1.3 percent for photos) at the expense of increased memory usage. + * + * Enabled the "-max max_idat_size" option, even when max_idat_size + * exceeds the default 1/2 megabyte size. + * + * Added missing "png_ptr" argument to png_error() call + * + * Revised the "-help" output slightly and improved the "-version" output. + * + * The "-already[_crushed]" option is now ignored if the "-force" option + * is present or if chunks are being added, deleted, or modified. + * + * Improved "things_have_changed" behavior (now, when set in a particular + * file, it is not set for all remaining files) * * Version 1.5.1 (built with libpng-1.0.8) * @@ -272,26 +299,31 @@ /* To do: * - * Version 1.4.*: check for unused alpha channel and ok-to-reduce-depth. - * Rearrange palette to put most-used color first and transparent color - * second (see ImageMagick 5.1.1 and later). - * Finish pplt (partial palette) feature. + * Check for unused alpha channel and ok-to-reduce-depth. * Take care that sBIT and bKGD data aren't lost when reducing images * from truecolor to grayscale. * - * Version 1.4.*: Use an alternate write function for the trial passes, that + * Rearrange palette to put most-used color first and transparent color + * second (see ImageMagick 5.1.1 and later). + * + * Finish pplt (partial palette) feature. + * + * Use an alternate write function for the trial passes, that * simply counts bytes rather than actually writing to a file, to save wear * and tear on disk drives. * - * Version 1.4.*: Allow in-place file replacement or as a filter, as in + * Allow in-place file replacement or as a filter, as in * "pngcrush -overwrite file.png" * "pngcreator | pngcrush > output.png" * - * Version 1.4.*: Remove text-handling and color-handling features and put + * Remove text-handling and color-handling features and put * those in a separate program or programs, to avoid unnecessary * recompressing. * + * Move the Photoshop-fixing stuff into a separate program. + * * add "-time" directive + * */ #define PNG_INTERNAL @@ -301,6 +333,11 @@ * so they are ifdef'ed out in a special version of pngconf.h, which * includes pngcrush.h and is included by png.h */ +/* defined so I can write to a file on gui/windowing platforms */ +/* #define STDERR stderr */ +#define STDERR stdout /* for DOS */ + + #ifndef PNGCRUSH_LIBPNG_VER # define PNGCRUSH_LIBPNG_VER PNG_LIBPNG_VER #endif @@ -427,10 +464,6 @@ main() # define TIME_T float #endif -/* defined so I can write to a file on gui/windowing platforms */ -/* #define STDERR stderr */ -#define STDERR stdout /* for DOS */ - /* input and output filenames */ static PNG_CONST char *progname = "pngtest" DOT "png"; static PNG_CONST char *inname = "pngtest" DOT "png"; @@ -438,6 +471,7 @@ static PNG_CONST char *outname = "pngout" DOT "png"; static PNG_CONST char *directory_name = "pngcrush" DOT "bak"; static PNG_CONST char *extension = "_C" DOT "png"; + static png_uint_32 width, height; static png_uint_32 measured_idat_length; static int pngcrush_must_exit=0; @@ -465,6 +499,15 @@ char text_keyword[800]; char text_lang[800]; char text_lang_key[800]; #endif +#if (PNG_LIBPNG_VER < 10009) +#undef PNG_iCCP_SUPPORTED +#endif +#ifdef PNG_iCCP_SUPPORTED +int iccp_length = 0; +char *iccp_text; +char *iccp_file; +char iccp_name[80]; +#endif int best; char buffer[256]; @@ -498,8 +541,10 @@ static int verbose=1; static int help=0; static int fix=0; static int things_have_changed=0; +static int global_things_have_changed=0; static int default_compression_window=15; static int force_compression_window=0; +static int compression_mem_level=9; static int final_method=0; static int brute_force=0; static int brute_force_level=0; @@ -562,6 +607,7 @@ static png_infop write_end_info_ptr; static FILE *fpin, *fpout; png_uint_32 measure_idats(FILE *fpin); static int do_color_count; +static int reduction_ok=0; #ifdef PNGCRUSH_COUNT_COLORS int count_colors(FILE *fpin); static int num_rgba, reduce_to_gray, it_is_opaque; @@ -571,11 +617,15 @@ png_uint_32 png_measure_idat(png_structp png_ptr); # define MAX_METHODSP1 201 # define DEFAULT_METHODS 10 static png_uint_32 idat_length[MAX_METHODSP1]; -static int filter_method, zlib_level; +static int filter_type, zlib_level; static png_bytep png_row_filters=NULL; static TIME_T t_start, t_stop, t_decode, t_encode, t_misc; +#if (PNG_LIBPNG_VER >= 10000) +static png_uint_32 max_idat_size = 524288; +#else static png_uint_32 max_idat_size = PNG_ZBUF_SIZE; +#endif static png_uint_32 crushed_idat_size = 0x3ffffffL; static int already_crushed = 0; int ia; @@ -622,8 +672,8 @@ png_set_compression_buffer_size(png_structp png_ptr, png_uint_32 size) png_free(png_ptr, png_ptr->zbuf); png_ptr->zbuf=NULL; png_ptr->zbuf_size = (png_size_t)size; png_ptr->zbuf = (png_bytep)png_malloc(png_ptr, size); - if(png_ptr->zbuf) - png_error("Unable to malloc zbuf"); + if(!png_ptr->zbuf) + png_error(png_ptr,"Unable to malloc zbuf"); } #if defined(PNG_UNKNOWN_CHUNKS_SUPPORTED) @@ -1096,7 +1146,7 @@ main(int argc, char *argv[]) else if(!strncmp(argv[i],"-dou",4)) { double_gamma++; - things_have_changed=1; + global_things_have_changed=1; } #endif else if(!strncmp(argv[i],"-d",2)) @@ -1114,7 +1164,7 @@ main(int argc, char *argv[]) extension= argv[names++]; } else if(!strncmp(argv[i],"-force",6)) - things_have_changed=1; + global_things_have_changed=1; else if(!strncmp(argv[i],"-fix",4)) fix++; else if(!strncmp(argv[i],"-f",2)) @@ -1231,12 +1281,46 @@ main(int argc, char *argv[]) help++; verbose++; } + else if(!strncmp(argv[i],"-iccp",5)) + { +#ifdef PNG_iCCP_SUPPORTED + FILE *iccp_fn; + if(iccp_length) + free(iccp_text); + iccp_length=atoi(argv[++i]); + names+=3; + strcpy(iccp_name,argv[++i]); + iccp_file=argv[++i]; + if ((iccp_fn = FOPEN(iccp_file, "rb")) == NULL) + { + fprintf(STDERR, "Could not find file: %s\n", iccp_file); + iccp_length=0; + } + else + { + int ic; + iccp_text=malloc(iccp_length+1); + iccp_text[iccp_length]=(char)0x00; + for (ic=0; ic PNG_ZBUF_SIZE) max_idat_size=PNG_ZBUF_SIZE; } else if(!strncmp(argv[i],"-m",2)) { @@ -1269,7 +1353,7 @@ main(int argc, char *argv[]) do_pplt++; BUMP_I; strcpy(pplt_string,argv[i]); - things_have_changed=1; + global_things_have_changed=1; } else if(!strncmp(argv[i],"-p",2)) { @@ -1277,6 +1361,10 @@ main(int argc, char *argv[]) } else if(!strncmp(argv[i],"-q",2)) verbose=0; + else if(!strncmp(argv[i],"-reduce",7)) + { + reduction_ok++; + } #ifdef PNG_gAMA_SUPPORTED else if(!strncmp(argv[i],"-rep",4)) { @@ -1309,7 +1397,7 @@ main(int argc, char *argv[]) force_specified_gamma=atof(argv[i]); #endif } - things_have_changed=1; + global_things_have_changed=1; } #endif #ifdef PNG_pHYs_SUPPORTED @@ -1318,6 +1406,7 @@ main(int argc, char *argv[]) names++; BUMP_I; resolution=atoi(argv[i]); + global_things_have_changed=1; } #endif #ifdef PNGCRUSH_MULTIPLE_ROWS @@ -1448,11 +1537,14 @@ main(int argc, char *argv[]) #endif else if(!strncmp(argv[i],"-version",8)) { - fprintf(STDERR,"libpng "); + fprintf(STDERR, " pngcrush "); + fprintf(STDERR, PNGCRUSH_VERSION ); + fprintf(STDERR,", uses libpng "); fprintf(STDERR, PNG_LIBPNG_VER_STRING ); - fprintf(STDERR,", uses zlib "); + fprintf(STDERR,"and zlib "); fprintf(STDERR, ZLIB_VERSION ); - fprintf(STDERR,"\n"); + fprintf(STDERR, "\n Check http://pmt.sourceforge.net\n"); + fprintf(STDERR, " for the most recent version.\n"); } else if(!strncmp(argv[i],"-v",2)) { @@ -1464,6 +1556,11 @@ main(int argc, char *argv[]) force_compression_window++; names++; } + else if(!strncmp(argv[i],"-zm",3)) + { + compression_mem_level=atoi(argv[++i]); + names++; + } else if(!strncmp(argv[i],"-z",2)) { int lev, strat, filt; @@ -1593,7 +1690,12 @@ main(int argc, char *argv[]) if(verbose > 1) { png_crush_pause(); - fprintf(STDERR, "\noptions:\n"); + fprintf(STDERR, + "\noptions (Note: any option can be spelled out for clarity, e.g.,\n"); + fprintf(STDERR, + " \"pngcrush -dir New -method 7 -remove bkgd *.png\"\n"); + fprintf(STDERR, + " is the same as \"pngcrush -d New -m 7 -rem bkgd *.png\"):\n\n"); } else fprintf(STDERR, "options:\n"); @@ -1604,7 +1706,11 @@ main(int argc, char *argv[]) fprintf(STDERR, "\n If file has an IDAT greater than this size, it\n"); fprintf(STDERR, - " will be considered to be already crushed.\n\n"); + " will be considered to be already crushed and will\n"); + fprintf(STDERR, + " not be processed, unless you are making other changes\n"); + fprintf(STDERR, + " or the \"-force\" option is present.\n\n"); } fprintf(STDERR, " -brute (Use brute-force, try 114 different methods [11-124])\n"); @@ -1722,6 +1828,16 @@ main(int argc, char *argv[]) fprintf(STDERR, " gAMA chunk, use the '-replace_gamma' option.\n\n"); png_crush_pause(); +#ifdef PNG_iCCP_SUPPORTED + fprintf(STDERR, + " -iccp length \"Profile Name\" iccp_file\n"); + if(verbose > 1) + { + fprintf(STDERR, + "\n file with ICC profile to insert in an iCCP chunk."); + fprintf(STDERR, "\n\n"); + } +#endif #ifdef PNG_iTXt_SUPPORTED fprintf(STDERR, " -itxt b[efore_IDAT]|a[fter_IDAT] \"keyword\" \"text\"\n"); @@ -1763,7 +1879,7 @@ main(int argc, char *argv[]) } fprintf(STDERR, - " -max maximum_IDAT_size [1 through %d]\n",PNG_ZBUF_SIZE); + " -max maximum_IDAT_size [default %d]\n",PNG_ZBUF_SIZE); if(verbose > 1) fprintf(STDERR,"\n"); #if 0 @@ -1791,13 +1907,18 @@ main(int argc, char *argv[]) fprintf(STDERR, "\n Truncates the PLTE. Be sure not to truncate it to\n"); fprintf(STDERR, - "\n less than the greatest index present in IDAT.\n\n"); + " less than the greatest index present in IDAT.\n\n"); } fprintf(STDERR, " -q (quiet)\n"); if(verbose > 1) fprintf(STDERR,"\n"); + fprintf(STDERR, + " -reduce (do lossless color type or bit depth reduction)\n"); + if(verbose > 1) + fprintf(STDERR, + "\n (if possible)\n\n"); fprintf(STDERR, " -rem chunkname (or \"alla\" or \"allb\")\n"); if(verbose > 1) @@ -1890,7 +2011,6 @@ main(int argc, char *argv[]) " color type, scaled to the output bit depth.\n\n"); } #endif - fprintf(STDERR, " -v (display more detailed information)\n"); if(verbose > 1) @@ -1899,7 +2019,12 @@ main(int argc, char *argv[]) fprintf(STDERR, " -version (display the pngcrush version)\n"); if(verbose > 1) - fprintf(STDERR,"\n"); + { + fprintf(STDERR, + "\n Look for the most recent version of pngcrush at\n"); + fprintf(STDERR, + " http://pmt.sourceforge.net\n\n"); + } fprintf(STDERR, " -w compression_window_size [32, 16, 8, 4, 2, 1, 512]\n"); if(verbose > 1) @@ -1923,6 +2048,10 @@ main(int argc, char *argv[]) fprintf(STDERR, " '-m method' argument.\n\n"); } + fprintf(STDERR, + " -zmem zlib_compression_mem_level [1-9, default 9]\n"); + if(verbose > 1) + fprintf(STDERR,"\n"); #ifdef PNG_iTXt_SUPPORTED fprintf(STDERR, " -zitxt b[efore_IDAT]|a[fter_IDAT] \"keyword\" \"text\"\n"); @@ -2013,6 +2142,8 @@ main(int argc, char *argv[]) { first_trial = 1; + things_have_changed=global_things_have_changed; + if(png_row_filters != NULL) { free(png_row_filters); png_row_filters=NULL; @@ -2121,7 +2252,7 @@ main(int argc, char *argv[]) if(already_crushed) { fprintf(STDERR, "File has already been crushed: %s\n", inname); - continue; + if(!things_have_changed) continue; } if(verbose > 0) @@ -2142,6 +2273,7 @@ main(int argc, char *argv[]) if (do_color_count) { if (force_output_color_type == 8 && (input_color_type == 2 || + (input_color_type == 3) || input_color_type == 4 || input_color_type == 6)) /* check for unused alpha channel or single transparent color */ { @@ -2298,7 +2430,7 @@ main(int argc, char *argv[]) } else { - filter_method=fm[best]; + filter_type=fm[best]; zlib_level=lv[best]; if(zs[best] == 0)z_strategy=Z_DEFAULT_STRATEGY; if(zs[best] == 1)z_strategy=Z_FILTERED; @@ -2317,7 +2449,7 @@ main(int argc, char *argv[]) if((trial == 6 || trial == 9 || trial == 10) && best_of_three != 3) continue; } - filter_method=fm[trial]; + filter_type=fm[trial]; zlib_level=lv[trial]; if(zs[trial] == 0)z_strategy=Z_DEFAULT_STRATEGY; if(zs[trial] == 1)z_strategy=Z_FILTERED; @@ -2325,7 +2457,7 @@ main(int argc, char *argv[]) final_method=trial; if(nosave == 0) P2(" Begin trial %d, filter %d, strategy %d, level %d\n", - trial, filter_method, z_strategy, zlib_level); + trial, filter_type, z_strategy, zlib_level); } P2("prepare to open files.\n"); @@ -2463,7 +2595,7 @@ main(int argc, char *argv[]) } if(nosave == 0) { - if(png_get_compression_buffer_size(write_ptr) < max_idat_size) + if(png_get_compression_buffer_size(write_ptr) != max_idat_size) { P2("reinitializing write zbuf.\n"); png_set_compression_buffer_size(write_ptr, max_idat_size); @@ -2511,6 +2643,9 @@ main(int argc, char *argv[]) png_set_keep_unknown_chunks(write_ptr, HANDLE_CHUNK_IF_SAFE, (png_bytep)NULL, 0); + +/* Process the following chunks as if safe-to-copy since it is known that + recompressing the IDAT chunks has no effect on them */ #if !defined(PNG_cHRM_SUPPORTED) png_set_keep_unknown_chunks(write_ptr, HANDLE_CHUNK_ALWAYS, (png_bytep)png_cHRM, 1); @@ -2593,30 +2728,44 @@ main(int argc, char *argv[]) #endif /* PNG_WRITE_UNKNOWN_CHUNKS_SUPPORTED */ png_debug(0, "Reading info struct\n"); + { + png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10}; + + png_read_data(read_ptr, png_signature, 8); + png_set_sig_bytes(read_ptr, 8); + + if (png_sig_cmp(png_signature, 0, 8)) + { + if (png_sig_cmp(png_signature, 0, 4)) + png_error(read_ptr, "Not a PNG file!"); + else + png_error(read_ptr, "PNG file corrupted by ASCII conversion"); + } + } png_read_info(read_ptr, read_info_ptr); #if (PNG_LIBPNG_VER > 90) png_debug(0, "Transferring info struct\n"); { - int interlace_type, compression_type, filter_type; + int interlace_method, compression_method, filter_method; if (png_get_IHDR(read_ptr, read_info_ptr, &width, &height, &bit_depth, - &color_type, &interlace_type, &compression_type, &filter_type)) + &color_type, &interlace_method, &compression_method, &filter_method)) { int compression_window; int need_expand = 0; - int output_interlace_type=interlace_type; + int output_interlace_method=interlace_method; input_color_type=color_type; input_bit_depth=bit_depth; if(nointerlace) - output_interlace_type=0; + output_interlace_method=0; if(verbose > 1 && first_trial) { fprintf(STDERR, " IHDR chunk data:\n"); fprintf(STDERR, " Width=%ld, height=%ld\n", width, height); fprintf(STDERR, " Bit depth =%d\n", bit_depth); fprintf(STDERR, " Color type=%d\n", color_type); - fprintf(STDERR, " Interlace =%d\n", interlace_type); + fprintf(STDERR, " Interlace =%d\n", interlace_method); } if(output_color_type > 7) @@ -2720,6 +2869,7 @@ main(int argc, char *argv[]) int channels=0; png_set_compression_strategy(write_ptr, z_strategy); + png_set_compression_mem_level(write_ptr, compression_mem_level); if (output_color_type == 0)channels=1; if (output_color_type == 2)channels=3; @@ -2757,10 +2907,10 @@ main(int argc, char *argv[]) fprintf(STDERR, " Setting IHDR\n"); png_set_IHDR(write_ptr, write_info_ptr, width, height, - output_bit_depth, output_color_type, output_interlace_type, - compression_type, filter_type); + output_bit_depth, output_color_type, output_interlace_method, + compression_method, filter_method); - if(output_color_type != input_color_type) things_have_changed++; + if(output_color_type != input_color_type) things_have_changed=1; } } #if defined(PNG_READ_bKGD_SUPPORTED) && defined(PNG_WRITE_bKGD_SUPPORTED) @@ -2953,15 +3103,26 @@ main(int argc, char *argv[]) png_charp name; png_charp profile; png_uint_32 proflen; - int compression_type; + int compression_method; - if (png_get_iCCP(read_ptr, read_info_ptr, &name, &compression_type, + if (png_get_iCCP(read_ptr, read_info_ptr, &name, &compression_method, &profile, &proflen)) { + P1 ("Got iccp chunk, proflen=%lu\n",proflen); if(keep_chunk("iCCP",argv)) - png_set_iCCP(write_ptr, write_info_ptr, name, compression_type, + png_set_iCCP(write_ptr, write_info_ptr, name, compression_method, profile, proflen); + } +#ifdef PNG_iCCP_SUPPORTED + else if (iccp_length) + { + png_set_iCCP(write_ptr, write_info_ptr, iccp_name, 0, + iccp_text, iccp_length); + P1 ("Wrote iccp chunk, proflen=%d\n",iccp_length); + } +#endif + } #endif #if defined(PNG_READ_oFFs_SUPPORTED) && defined(PNG_WRITE_oFFs_SUPPORTED) @@ -3014,9 +3175,17 @@ main(int argc, char *argv[]) { if (png_get_pHYs(read_ptr, read_info_ptr, &res_x, &res_y, &unit_type)) + { + if(res_x == 0 && res_y == 0) + { + if(verbose > 0 && first_trial) + fprintf(STDERR, " Deleting useless pHYs 0 0 chunk\n"); + } + else { if(keep_chunk("pHYs",argv)) - png_set_pHYs(write_ptr, write_info_ptr, res_x, res_y, unit_type); + png_set_pHYs(write_ptr, write_info_ptr, res_x, res_y, unit_type); + } } } else @@ -3366,12 +3535,12 @@ main(int argc, char *argv[]) { png_set_compression_level(write_ptr, zlib_level); - if (filter_method == 0)png_set_filter(write_ptr,0,PNG_FILTER_NONE); - else if(filter_method == 1)png_set_filter(write_ptr,0,PNG_FILTER_SUB); - else if(filter_method == 2)png_set_filter(write_ptr,0,PNG_FILTER_UP); - else if(filter_method == 3)png_set_filter(write_ptr,0,PNG_FILTER_AVG); - else if(filter_method == 4)png_set_filter(write_ptr,0,PNG_FILTER_PAETH); - else if(filter_method == 5)png_set_filter(write_ptr,0,PNG_ALL_FILTERS); + if (filter_type == 0)png_set_filter(write_ptr,0,PNG_FILTER_NONE); + else if(filter_type == 1)png_set_filter(write_ptr,0,PNG_FILTER_SUB); + else if(filter_type == 2)png_set_filter(write_ptr,0,PNG_FILTER_UP); + else if(filter_type == 3)png_set_filter(write_ptr,0,PNG_FILTER_AVG); + else if(filter_type == 4)png_set_filter(write_ptr,0,PNG_FILTER_PAETH); + else if(filter_type == 5)png_set_filter(write_ptr,0,PNG_ALL_FILTERS); else png_set_filter(write_ptr,0,PNG_FILTER_NONE); @@ -3464,7 +3633,7 @@ main(int argc, char *argv[]) { /* check for sufficient memory: we need 2*zlib_window - and, if filter_method == 5, 4*rowbytes in separate allocations. + and, if filter_type == 5, 4*rowbytes in separate allocations. If it's not enough we can drop the "average" filter and we can reduce the zlib_window for writing. We can't change the input zlib_window because the input file might have @@ -3766,6 +3935,8 @@ main(int argc, char *argv[]) if(nosave) break; + first_trial=0; + if (nosave == 0) { png_debug(0, "Opening file for length measurement\n"); @@ -3789,11 +3960,10 @@ main(int argc, char *argv[]) { fprintf(STDERR, " IDAT length with method %d (fm %d zl %d zs %d)= %8lu\n", - trial,filter_method,zlib_level,z_strategy,idat_length[trial]); + trial,filter_type,zlib_level,z_strategy,idat_length[trial]); fflush(STDERR); } - first_trial=0; } /* end of trial-loop */ if (fpin) @@ -3847,6 +4017,10 @@ main(int argc, char *argv[]) free(png_row_filters); png_row_filters=NULL; } if(verbose > 0) show_result(); +#ifdef PNG_iCCP_SUPPORTED + if(iccp_length) + free(iccp_text); +#endif if(pngcrush_must_exit) exit(0); return(0); @@ -3874,6 +4048,7 @@ measure_idats(FILE *fpin) #else png_set_read_fn(read_ptr, (png_voidp)fpin, png_default_read_data); #endif + png_set_sig_bytes(read_ptr, 0); measured_idat_length=png_measure_idat(read_ptr); P2("measure_idats: IDAT length=%lu\n",measured_idat_length); @@ -3898,7 +4073,6 @@ png_measure_idat(png_structp png_ptr) /* Copyright (C) 1999, 2000 Glenn Randers-Pehrson (randeg@alum.rpi.edu) See notice in pngcrush.c for conditions of use and distribution */ png_uint_32 sum_idat_length=0; - png_debug(1, "in png_read_info\n"); { png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10}; @@ -3909,7 +4083,7 @@ png_measure_idat(png_structp png_ptr) if (png_sig_cmp(png_signature, 0, 8)) { if (png_sig_cmp(png_signature, 0, 4)) - png_error(png_ptr, "Not a PNG file"); + png_error(png_ptr, "Not a PNG file.."); else png_error(png_ptr, "PNG file corrupted by ASCII conversion"); } @@ -3927,7 +4101,11 @@ png_measure_idat(png_structp png_ptr) PNG_IDAT; PNG_IEND; PNG_IHDR; +#ifdef PNG_iCCP_SUPPORTED PNG_iCCP; +#else + const png_byte png_iCCP[5]={105, 67, 67, 80, '\0'}; +#endif #endif #endif png_byte chunk_name[5]; @@ -3970,6 +4148,7 @@ png_measure_idat(png_structp png_ptr) input_color_type=buffer[9]; } +#ifdef PNG_iCCP_SUPPORTED /* check for bad photoshop iccp chunk */ #ifdef PNG_UINT_IDAT if (png_get_uint_32(chunk_name) == PNG_UINT_iCCP) @@ -3977,6 +4156,12 @@ png_measure_idat(png_structp png_ptr) if (!png_memcmp(chunk_name, png_iCCP, 4)) #endif { + /* Check for bad Photoshop iCCP chunk. Libpng will reject the + * bad chunk because the Adler-32 bytes are missing, but we check + * here to see if it's really the sRGB profile, and if so, set the + * "intent" flag and gamma so pngcrush will write an sRGB chunk + * and a gamma chunk. + */ if (length == 2615) { png_crc_read(png_ptr, buffer, 22); @@ -3997,6 +4182,7 @@ png_measure_idat(png_structp png_ptr) } } } +#endif png_crc_finish(png_ptr, length); @@ -4016,8 +4202,9 @@ count_colors(FILE *fpin) { /* Copyright (C) 2000 Glenn Randers-Pehrson (randeg@alum.rpi.edu) See notice in pngcrush.c for conditions of use and distribution */ - int bit_depth, color_type, interlace_type, filter_type, compression_type; - png_uint_32 rowbytes, channels; + int bit_depth, color_type, interlace_method, filter_method, compression_method; + png_uint_32 rowbytes; + volatile png_uint_32 channels; int i; int pass, num_pass; @@ -4025,6 +4212,7 @@ count_colors(FILE *fpin) volatile int result, hashmiss, hashinserts; png_uint_32 rgba_frequency[257]; + png_uint_32 rgba_hi[257]; /* Actually contains ARGB not RGBA */ #if 0 png_uint_32 rgba_lo[257]; /* Low bytes of ARGB in 16-bit PNGs */ @@ -4053,7 +4241,9 @@ count_colors(FILE *fpin) num_rgba=0; for (i=0; i<257; i++) + { rgba_frequency[i]=0; + } P2("Checking alphas:\n"); png_debug(0, "Allocating read structure\n"); @@ -4075,10 +4265,12 @@ count_colors(FILE *fpin) #ifdef USE_HASHCODE int hash[16385]; +#endif + +#ifdef USE_HASHCODE for (i=0; i<16385; i++) hash[i]=-1; #endif - end_info_ptr = NULL; #if !defined(PNG_NO_STDIO) @@ -4087,6 +4279,20 @@ count_colors(FILE *fpin) png_set_read_fn(read_ptr, (png_voidp)fpin, png_default_read_data); #endif + { + png_byte png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10}; + + png_read_data(read_ptr, png_signature, 8); + png_set_sig_bytes(read_ptr, 8); + + if (png_sig_cmp(png_signature, 0, 8)) + { + if (png_sig_cmp(png_signature, 0, 4)) + png_error(read_ptr, "Not a PNG file."); + else + png_error(read_ptr, "PNG file corrupted by ASCII conversion"); + } + } png_read_info(read_ptr, read_info_ptr); #ifdef PNG_CRC_QUIET_USE @@ -4094,7 +4300,7 @@ count_colors(FILE *fpin) #endif png_get_IHDR(read_ptr, read_info_ptr, &width, &height, &bit_depth, - &color_type, &interlace_type, &compression_type, &filter_type); + &color_type, &interlace_method, &compression_method, &filter_method); if (color_type == 2) channels = 3; @@ -4106,11 +4312,11 @@ count_colors(FILE *fpin) channels=1; if(color_type == 0 || color_type == 3 || color_type == 4) - reduce_to_gray = 0; + reduce_to_gray = 1; if(bit_depth == 8) { - if(interlace_type) + if(interlace_method) num_pass=7; else num_pass = 1; @@ -4126,7 +4332,7 @@ count_colors(FILE *fpin) png_uint_32 pass_height, pass_width, y; png_debug(0, "\nBegin Pass\n"); - if (interlace_type) + if (interlace_method) { pass_height = (height - png_pass_ystart[pass] + png_pass_yinc[pass] - 1) / png_pass_yinc[pass]; @@ -4143,7 +4349,8 @@ count_colors(FILE *fpin) { png_uint_32 x; png_read_row(read_ptr, row_buf, (png_bytep)NULL); - if(result < 2 || it_is_opaque || reduce_to_gray) + if(result < 2 || it_is_opaque || + reduce_to_gray) { if(color_type==2) { @@ -4160,6 +4367,11 @@ count_colors(FILE *fpin) if(reduce_to_gray && ((*(rp)) != (*(rp+1)) || (*(rp)) != (*(rp+2)))) reduce_to_gray=0; + + if (result > 1 || !it_is_opaque) + continue; + + #ifdef USE_HASHCODE /* * R G B mask @@ -4242,6 +4454,9 @@ count_colors(FILE *fpin) reduce_to_gray=0; if(it_is_opaque && (*(rp+3)) != 255) it_is_opaque=0; + + if (result > 1) + continue; #ifdef USE_HASHCODE /* * A R G B mask @@ -4384,8 +4599,6 @@ count_colors(FILE *fpin) } else /* other color type */ { - /* to do: check color type 3 for max sample that is present - and reduce palette if possible */ result=2; } } @@ -4436,14 +4649,30 @@ count_colors(FILE *fpin) P2 ("hashcode misses=%d, inserts=%d\n",hashmiss, hashinserts); } - if(reduce_to_gray) - P1 ("The truecolor image is all gray and will be reduced.\n"); if(color_type == 0 || color_type == 2) it_is_opaque=0; - if(it_is_opaque) - P1 ("The image is opaque and the alpha channel will be removed.\n"); + if(reduction_ok) + { + if(reduce_to_gray) + P1 ("The truecolor image is all gray and will be reduced.\n"); + if(it_is_opaque) + P1 ("The image is opaque and the alpha channel will be removed.\n"); + } + else + { + if(reduce_to_gray) + P1 ("The truecolor image is all gray and could be reduced.\n"); + if(it_is_opaque) + P1 ("The image is opaque and the alpha channel could be removed.\n"); + if (reduce_to_gray || it_is_opaque) + P1 ("Rerun pngcrush with the \"-reduce\" option to do so.\n"); + reduce_to_gray = 0; + it_is_opaque = 0; + + } P2 ("Finished checking alphas, result=%d\n",result); } + ret=result; return (ret); } diff --git a/pngcrush.h b/pngcrush.h index 33b31e37e..845e52a57 100644 --- a/pngcrush.h +++ b/pngcrush.h @@ -26,6 +26,8 @@ # define PNG_USER_MEM_SUPPORTED #endif +#define MNG_EXTENSIONS_SUPPORTED /* extra filter types */ + #ifndef PNG_NO_LEGACY_SUPPORTED # define PNG_NO_LEGACY_SUPPORTED #endif diff --git a/pngerror.c b/pngerror.c index 3e4210f75..43bf597b3 100644 --- a/pngerror.c +++ b/pngerror.c @@ -1,7 +1,7 @@ /* pngerror.c - stub functions for i/o and memory allocation * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) diff --git a/pnggccrd.c b/pnggccrd.c index ea4f972b5..d6e49b4bd 100644 --- a/pnggccrd.c +++ b/pnggccrd.c @@ -6,14 +6,14 @@ * and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm * for Intel's performance analysis of the MMX vs. non-MMX code. * - * libpng version 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * Copyright (c) 1998, Intel Corporation * * Based on MSVC code contributed by Nirav Chhatrapati, Intel Corp., 1998. * Interface to libpng contributed by Gilles Vollant, 1999. - * GNU C port by Greg Roelofs, 1999. + * GNU C port by Greg Roelofs, 1999-2000. * * Lines 2350-4300 converted in place with intel2gas 1.3.1: * @@ -43,8 +43,8 @@ */ /* - * NOTES (mostly by Greg Roelofs) - * ===== + * TEMPORARY PORTING NOTES AND CHANGELOG (mostly by Greg Roelofs) + * ===================================== * * 19991006: * - fixed sign error in post-MMX cleanup code (16- & 32-bit cases) @@ -55,13 +55,13 @@ * - write MMX code for 48-bit case (pixel_bytes == 6) * - figure out what's up with 24-bit case (pixel_bytes == 3): * why subtract 8 from width_mmx in the pass 4/5 case? - * (only width_mmx case) + * (only width_mmx case) (near line 1606) * x [DONE] replace pixel_bytes within each block with the true * constant value (or are compilers smart enough to do that?) * - rewrite all MMX interlacing code so it's aligned with * the *beginning* of the row buffer, not the end. This * would not only allow one to eliminate half of the memory - * writes for odd passes (i.e., pass == odd), it may also + * writes for odd passes (that is, pass == odd), it may also * eliminate some unaligned-data-access exceptions (assuming * there's a penalty for not aligning 64-bit accesses on * 64-bit boundaries). The only catch is that the "leftover" @@ -113,7 +113,7 @@ * * 19991107: * - verified CPUID clobberage: 12-char string constant ("GenuineIntel", - * "AuthenticAMD", etc.) placed in EBX:ECX:EDX. Still need to polish. + * "AuthenticAMD", etc.) placed in ebx:ecx:edx. Still need to polish. * * 19991120: * - made "diff" variable (now "_dif") global to simplify conversion of @@ -123,14 +123,14 @@ * macro determines which is used); original not yet tested. * * 20000213: - * - When compiling with gcc, be sure to use -fomit-frame-pointer + * - when compiling with gcc, be sure to use -fomit-frame-pointer * * 20000319: * - fixed a register-name typo in png_do_read_interlace(), default (MMX) case, * pass == 4 or 5, that caused visible corruption of interlaced images * * 20000623: - * - Various problems were reported with gcc 2.95.2 in the Cygwin environment, + * - Various problems were reported with gcc 2.95.2 in the Cygwin environment, * many of the form "forbidden register 0 (ax) was spilled for class AREG." * This is explained at http://gcc.gnu.org/fom_serv/cache/23.html, and * Chuck Wilson supplied a patch involving dummy output registers. See @@ -147,10 +147,78 @@ * pnggccrd.c:1177: more than 10 operands in `asm' * They are all the same problem and can be worked around by using the * global _unmask variable unconditionally, not just in the -fPIC case. - * Apparently earlier versions of gcc also have the problem with more than + * Reportedly earlier versions of gcc also have the problem with more than * 10 operands; they just don't report it. Much strangeness ensues, etc. + * + * 20000729: + * - enabled png_read_filter_row_mmx_up() (shortest remaining unconverted + * MMX routine); began converting png_read_filter_row_mmx_sub() + * - to finish remaining sections: + * - clean up indentation and comments + * - preload local variables + * - add output and input regs (order of former determines numerical + * mapping of latter) + * - avoid all usage of ebx (including bx, bh, bl) register [20000823] + * - remove "$" from addressing of Shift and Mask variables [20000823] + * + * 20000731: + * - global union vars causing segfaults in png_read_filter_row_mmx_sub()? + * + * 20000822: + * - ARGH, stupid png_read_filter_row_mmx_sub() segfault only happens with + * shared-library (-fPIC) version! Code works just fine as part of static + * library. Damn damn damn damn damn, should have tested that sooner. + * ebx is getting clobbered again (explicitly this time); need to save it + * on stack or rewrite asm code to avoid using it altogether. Blargh! + * + * 20000823: + * - first section was trickiest; all remaining sections have ebx -> edx now. + * (-fPIC works again.) Also added missing underscores to various Shift* + * and *Mask* globals and got rid of leading "$" signs. + * + * 20000826: + * - added visual separators to help navigate microscopic printed copies + * (http://pobox.com/~newt/code/gpr-latest.zip, mode 10); started working + * on png_read_filter_row_mmx_avg() + * + * 20000828: + * - finished png_read_filter_row_mmx_avg(): only Paeth left! (930 lines...) + * What the hell, did png_read_filter_row_mmx_paeth(), too. Comments not + * cleaned up/shortened in either routine, but functionality is complete + * and seems to be working fine. + * + * 20000829: + * - ahhh, figured out last(?) bit of gcc/gas asm-fu: if register is listed + * as an input reg (with dummy output variables, etc.), then it *cannot* + * also appear in the clobber list or gcc 2.95.2 will barf. The solution + * is simple enough... + * + * 20000914: + * - bug in png_read_filter_row_mmx_avg(): 16-bit grayscale not handled + * correctly (but 48-bit RGB just fine) + * + * 20000916: + * - fixed bug in png_read_filter_row_mmx_avg(), bpp == 2 case; three errors: + * - "_ShiftBpp.use = 24;" should have been "_ShiftBpp.use = 16;" + * - "_ShiftRem.use = 40;" should have been "_ShiftRem.use = 48;" + * - "psllq _ShiftRem, %%mm2" should have been "psrlq _ShiftRem, %%mm2" + * + * STILL TO DO: + * - test png_do_read_interlace() 64-bit case (pixel_bytes == 8) + * - write MMX code for 48-bit case (pixel_bytes == 6) + * - figure out what's up with 24-bit case (pixel_bytes == 3): + * why subtract 8 from width_mmx in the pass 4/5 case? + * (only width_mmx case) (near line 1606) + * - rewrite all MMX interlacing code so it's aligned with beginning + * of the row buffer, not the end (see 19991007 for details) + * - pick one version of mmxsupport() and get rid of the other + * - add error messages to any remaining bogus default cases + * - enable pixel_depth == 8 cases in png_read_filter_row()? (test speed) + * - add support for runtime enable/disable/query of various MMX routines */ +//#define PNG_DEBUG 2 // GRR + #define PNG_INTERNAL #include "png.h" @@ -161,36 +229,46 @@ int mmxsupport(void); static int mmx_supported = 2; #ifdef PNG_USE_LOCAL_ARRAYS -static const int png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; -static const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; -static const int png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1}; +static const int FARDATA png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0}; +static const int FARDATA png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1}; +static const int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1}; #endif // djgpp, Win32, and Cygwin add their own underscores to global variables, // so define them without: #if defined(__DJGPP__) || defined(WIN32) || defined(__CYGWIN__) -# define _unmask unmask -# define _const4 const4 -# define _const6 const6 -# define _mask8_0 mask8_0 -# define _mask16_1 mask16_1 -# define _mask16_0 mask16_0 -# define _mask24_2 mask24_2 -# define _mask24_1 mask24_1 -# define _mask24_0 mask24_0 -# define _mask32_3 mask32_3 -# define _mask32_2 mask32_2 -# define _mask32_1 mask32_1 -# define _mask32_0 mask32_0 -# define _mask48_5 mask48_5 -# define _mask48_4 mask48_4 -# define _mask48_3 mask48_3 -# define _mask48_2 mask48_2 -# define _mask48_1 mask48_1 -# define _mask48_0 mask48_0 -# define _FullLength FullLength -# define _MMXLength MMXLength -# define _dif dif +# define _unmask unmask +# define _const4 const4 +# define _const6 const6 +# define _mask8_0 mask8_0 +# define _mask16_1 mask16_1 +# define _mask16_0 mask16_0 +# define _mask24_2 mask24_2 +# define _mask24_1 mask24_1 +# define _mask24_0 mask24_0 +# define _mask32_3 mask32_3 +# define _mask32_2 mask32_2 +# define _mask32_1 mask32_1 +# define _mask32_0 mask32_0 +# define _mask48_5 mask48_5 +# define _mask48_4 mask48_4 +# define _mask48_3 mask48_3 +# define _mask48_2 mask48_2 +# define _mask48_1 mask48_1 +# define _mask48_0 mask48_0 +# define _FullLength FullLength +# define _MMXLength MMXLength +# define _dif dif +# define _LBCarryMask LBCarryMask +# define _HBClearMask HBClearMask +# define _ActiveMask ActiveMask +# define _ActiveMask2 ActiveMask2 +# define _ActiveMaskEnd ActiveMaskEnd +# define _ShiftBpp ShiftBpp +# define _ShiftRem ShiftRem +# define _patemp patemp +# define _pbtemp pbtemp +# define _pctemp pctemp #endif /* These constants are used in the inlined MMX assembly code. @@ -235,6 +313,9 @@ static unsigned long long _const6 = 0x00000000000000FFLL; static png_uint_32 _FullLength; static png_uint_32 _MMXLength; static int _dif; +static int _patemp; // temp variables for Paeth routine +static int _pbtemp; +static int _pctemp; void /* PRIVATE */ @@ -242,6 +323,14 @@ png_read_filter_row_c(png_structp png_ptr, png_row_infop row_info, png_bytep row, png_bytep prev_row, int filter); + + +//===========================================================================// +// // +// P N G _ C O M B I N E _ R O W // +// // +//===========================================================================// + #if defined(PNG_HAVE_ASSEMBLER_COMBINE_ROW) /* Combines the row recently read in with the previous row. @@ -266,10 +355,6 @@ png_combine_row(png_structp png_ptr, png_bytep row, int mask) if (mmx_supported == 2) mmx_supported = mmxsupport(); -/* -fprintf(stderr, "GRR DEBUG: png_combine_row() pixel_depth = %d, mask = 0x%02x, unmask = 0x%02x\n", png_ptr->row_info.pixel_depth, mask, ~mask); -fflush(stderr); - */ if (mask == 0xff) { png_memcpy(row, png_ptr->row_buf + 1, @@ -533,9 +618,8 @@ fflush(stderr); "2" (len), // ecx "1" (mask) // edx -// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm4", "%mm6", "%mm7" + : "%mm0", "%mm4", "%mm6", "%mm7" // clobber list #endif ); } @@ -652,23 +736,22 @@ fflush(stderr); "end16: \n\t" "EMMS \n\t" // DONE - : "=a" (dummy_value_a), // output regs (dummy) - "=d" (dummy_value_d), + : "=a" (dummy_value_a), // output regs (dummy) "=c" (dummy_value_c), + "=d" (dummy_value_d), "=S" (dummy_value_S), "=D" (dummy_value_D) - : "3" (srcptr), // esi // input regs - "4" (dstptr), // edi - "0" (diff), // eax -// was (unmask) "b" RESERVED // ebx // Global Offset Table idx - "2" (len), // ecx - "1" (mask) // edx + : "0" (diff), // eax // input regs +// was (unmask) " " RESERVED // ebx // Global Offset Table idx + "1" (len), // ecx + "2" (mask), // edx + "3" (srcptr), // esi + "4" (dstptr) // edi -// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1", - "%mm4", "%mm5", "%mm6", "%mm7" + : "%mm0", "%mm1", "%mm4" // clobber list + , "%mm5", "%mm6", "%mm7" #endif ); } @@ -800,7 +883,7 @@ fflush(stderr); "end24: \n\t" "EMMS \n\t" // DONE - : "=a" (dummy_value_a), // output regs (dummy) + : "=a" (dummy_value_a), // output regs (dummy) "=d" (dummy_value_d), "=c" (dummy_value_c), "=S" (dummy_value_S), @@ -813,10 +896,9 @@ fflush(stderr); "2" (len), // ecx "1" (mask) // edx -// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1", "%mm2", - "%mm4", "%mm5", "%mm6", "%mm7" + : "%mm0", "%mm1", "%mm2" // clobber list + , "%mm4", "%mm5", "%mm6", "%mm7" #endif ); } @@ -955,7 +1037,7 @@ fflush(stderr); "end32: \n\t" "EMMS \n\t" // DONE - : "=a" (dummy_value_a), // output regs (dummy) + : "=a" (dummy_value_a), // output regs (dummy) "=d" (dummy_value_d), "=c" (dummy_value_c), "=S" (dummy_value_S), @@ -968,10 +1050,9 @@ fflush(stderr); "2" (len), // ecx "1" (mask) // edx -// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1", "%mm2", "%mm3", - "%mm4", "%mm5", "%mm6", "%mm7" + : "%mm0", "%mm1", "%mm2", "%mm3" // clobber list + , "%mm4", "%mm5", "%mm6", "%mm7" #endif ); } @@ -1127,7 +1208,7 @@ fflush(stderr); "end48: \n\t" "EMMS \n\t" // DONE - : "=a" (dummy_value_a), // output regs (dummy) + : "=a" (dummy_value_a), // output regs (dummy) "=d" (dummy_value_d), "=c" (dummy_value_c), "=S" (dummy_value_S), @@ -1140,10 +1221,9 @@ fflush(stderr); "2" (len), // ecx "1" (mask) // edx -// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1", "%mm2", "%mm3", - "%mm4", "%mm5", "%mm6", "%mm7" + : "%mm0", "%mm1", "%mm2", "%mm3" // clobber list + , "%mm4", "%mm5", "%mm6", "%mm7" #endif ); } @@ -1216,6 +1296,13 @@ fflush(stderr); + +//===========================================================================// +// // +// P N G _ D O _ R E A D _ I N T E R L A C E // +// // +//===========================================================================// + #if defined(PNG_READ_INTERLACING_SUPPORTED) #if defined(PNG_HAVE_ASSEMBLER_READ_INTERLACE) @@ -1227,41 +1314,17 @@ void /* PRIVATE */ png_do_read_interlace(png_row_infop row_info, png_bytep row, int pass, png_uint_32 transformations) { -/* -fprintf(stderr, "GRR DEBUG: entering png_do_read_interlace()\n"); -if (row == NULL) fprintf(stderr, "GRR DEBUG: row == NULL\n"); -if (row_info == NULL) fprintf(stderr, "GRR DEBUG: row_info == NULL\n"); -fflush(stderr); - */ png_debug(1,"in png_do_read_interlace\n"); if (mmx_supported == 2) mmx_supported = mmxsupport(); -/* -{ -fprintf(stderr, "GRR DEBUG: calling mmxsupport()\n"); -fprintf(stderr, "GRR DEBUG: done with mmxsupport() (mmx_supported = %d)\n", mmx_supported); -} - */ -/* -this one happened on first row due to weirdness with mmxsupport(): -if (row == NULL) fprintf(stderr, "GRR DEBUG: now row == NULL!!!\n"); - row was in ebx, and even though nothing touched ebx, it still got wiped... - [weird side effect of CPUID instruction?] -if (row_info == NULL) fprintf(stderr, "GRR DEBUG: now row_info == NULL!!!\n"); - */ if (row != NULL && row_info != NULL) { png_uint_32 final_width; final_width = row_info->width * png_pass_inc[pass]; -/* -fprintf(stderr, "GRR DEBUG: png_do_read_interlace() row_info->width = %d, final_width = %d\n", row_info->width, final_width); -fprintf(stderr, "GRR DEBUG: png_do_read_interlace() pixel_depth = %d\n", row_info->pixel_depth); -fflush(stderr); - */ switch (row_info->pixel_depth) { case 1: @@ -1467,6 +1530,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $21, %%edi \n\t" // (png_pass_inc[pass] - 1)*pixel_bytes @@ -1496,7 +1560,7 @@ fflush(stderr); "jnz .loop3_pass0 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1505,9 +1569,9 @@ fflush(stderr); "0" (width) // ecx // doesn't work "i" (0x0000000000FFFFFFLL) // %1 (a.k.a. _const4) -// : // clobber list #if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4" + : "%mm0", "%mm1", "%mm2" // clobber list + , "%mm3", "%mm4" #endif ); } @@ -1516,6 +1580,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $9, %%edi \n\t" // (png_pass_inc[pass] - 1)*pixel_bytes @@ -1539,7 +1604,7 @@ fflush(stderr); "jnz .loop3_pass2 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1547,9 +1612,8 @@ fflush(stderr); "2" (dp), // edi "0" (width) // ecx -// : // clobber list #if 0 /* %mm0, ..., %mm2 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1", "%mm2" + : "%mm0", "%mm1", "%mm2" // clobber list #endif ); } @@ -1567,6 +1631,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $3, %%esi \n\t" "subl $9, %%edi \n\t" @@ -1593,7 +1658,7 @@ fflush(stderr); "jnz .loop3_pass4 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1601,9 +1666,9 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, ..., %mm3 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1", "%mm2", "%mm3" + : "%mm0", "%mm1" // clobber list + , "%mm2", "%mm3" #endif ); } @@ -1638,6 +1703,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $3, %%esi \n\t" "subl $31, %%edi \n\t" @@ -1665,7 +1731,7 @@ fflush(stderr); "jnz .loop1_pass0 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1673,9 +1739,9 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4" + : "%mm0", "%mm1", "%mm2" // clobber list + , "%mm3", "%mm4" #endif ); } @@ -1718,6 +1784,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $3, %%esi \n\t" "subl $15, %%edi \n\t" @@ -1736,7 +1803,7 @@ fflush(stderr); "jnz .loop1_pass2 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1744,9 +1811,8 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1" + : "%mm0", "%mm1" // clobber list #endif ); } @@ -1771,6 +1837,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $7, %%esi \n\t" "subl $15, %%edi \n\t" @@ -1788,7 +1855,7 @@ fflush(stderr); "jnz .loop1_pass4 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (none) + : "=c" (dummy_value_c), // output regs (none) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1796,9 +1863,8 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1" + : "%mm0", "%mm1" // clobber list #endif ); } @@ -1828,6 +1894,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $2, %%esi \n\t" "subl $30, %%edi \n\t" @@ -1848,7 +1915,7 @@ fflush(stderr); "jnz .loop2_pass0 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1856,9 +1923,8 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1" + : "%mm0", "%mm1" // clobber list #endif ); } @@ -1887,6 +1953,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $2, %%esi \n\t" "subl $14, %%edi \n\t" @@ -1905,7 +1972,7 @@ fflush(stderr); "jnz .loop2_pass2 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1913,9 +1980,8 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1" + : "%mm0", "%mm1" // clobber list #endif ); } @@ -1944,6 +2010,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $2, %%esi \n\t" "subl $6, %%edi \n\t" @@ -1958,7 +2025,7 @@ fflush(stderr); "jnz .loop2_pass4 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -1966,9 +2033,8 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0" + : "%mm0" // clobber list #endif ); } @@ -1997,21 +2063,12 @@ fflush(stderr); { int width_mmx = ((width >> 1) << 1); width -= width_mmx; // 0,1 pixels => 0,4 bytes -/* -fprintf(stderr, "GRR DEBUG: png_do_read_interlace() pass = %d, width_mmx = %d, width = %d\n", pass, width_mmx, width); -fprintf(stderr, " sptr = 0x%08lx, dp = 0x%08lx\n", (unsigned long)sptr, (unsigned long)dp); -fflush(stderr); - */ if (width_mmx) { int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; -#ifdef GRR_DEBUG - FILE *junk = fopen("junk.4bytes", "wb"); - if (junk) - fclose(junk); -#endif /* GRR_DEBUG */ + __asm__ __volatile__ ( "subl $4, %%esi \n\t" "subl $60, %%edi \n\t" @@ -2035,7 +2092,7 @@ fflush(stderr); "jnz .loop4_pass0 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -2043,9 +2100,8 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1" + : "%mm0", "%mm1" // clobber list #endif ); } @@ -2074,6 +2130,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $4, %%esi \n\t" "subl $28, %%edi \n\t" @@ -2093,7 +2150,7 @@ fflush(stderr); "jnz .loop4_pass2 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -2101,9 +2158,8 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1" + : "%mm0", "%mm1" // clobber list #endif ); } @@ -2132,6 +2188,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $4, %%esi \n\t" "subl $12, %%edi \n\t" @@ -2149,7 +2206,7 @@ fflush(stderr); "jnz .loop4_pass4 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -2157,9 +2214,8 @@ fflush(stderr); "2" (dp), // edi "0" (width_mmx) // ecx -// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0", "%mm1" + : "%mm0", "%mm1" // clobber list #endif ); } @@ -2181,58 +2237,50 @@ fflush(stderr); } } /* end of pixel_bytes == 4 */ -#define STILL_WORKING_ON_THIS -#ifdef STILL_WORKING_ON_THIS // GRR: should work, but needs testing - // (special 64-bit version of rpng2) - //-------------------------------------------------------------- else if (pixel_bytes == 8) { +// GRR TEST: should work, but needs testing (special 64-bit version of rpng2?) // GRR NOTE: no need to combine passes here! if (((pass == 0) || (pass == 1)) && width) { + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + // source is 8-byte RRGGBBAA // dest is 64-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA ... - int dummy_value_c; // fix 'forbidden register spilled' - int dummy_value_S; - int dummy_value_D; -#ifdef GRR_DEBUG - FILE *junk = fopen("junk.8bytes", "wb"); - if (junk) - fclose(junk); -#endif /* GRR_DEBUG */ - __asm__ __volatile__ ( - "subl $56, %%edi \n\t" // start of last block + __asm__ __volatile__ ( + "subl $56, %%edi \n\t" // start of last block - ".loop8_pass0: \n\t" - "movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0 - "movq %%mm0, (%%edi) \n\t" - "movq %%mm0, 8(%%edi) \n\t" - "movq %%mm0, 16(%%edi) \n\t" - "movq %%mm0, 24(%%edi) \n\t" - "movq %%mm0, 32(%%edi) \n\t" - "movq %%mm0, 40(%%edi) \n\t" - "movq %%mm0, 48(%%edi) \n\t" - "subl $8, %%esi \n\t" - "movq %%mm0, 56(%%edi) \n\t" - "subl $64, %%edi \n\t" - "decl %%ecx \n\t" - "jnz .loop8_pass0 \n\t" - "EMMS \n\t" // DONE + ".loop8_pass0: \n\t" + "movq (%%esi), %%mm0 \n\t" // 7 6 5 4 3 2 1 0 + "movq %%mm0, (%%edi) \n\t" + "movq %%mm0, 8(%%edi) \n\t" + "movq %%mm0, 16(%%edi) \n\t" + "movq %%mm0, 24(%%edi) \n\t" + "movq %%mm0, 32(%%edi) \n\t" + "movq %%mm0, 40(%%edi) \n\t" + "movq %%mm0, 48(%%edi) \n\t" + "subl $8, %%esi \n\t" + "movq %%mm0, 56(%%edi) \n\t" + "subl $64, %%edi \n\t" + "decl %%ecx \n\t" + "jnz .loop8_pass0 \n\t" + "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) - "=S" (dummy_value_S), - "=D" (dummy_value_D) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "1" (sptr), // esi // input regs - "2" (dp), // edi - "0" (width) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width) // ecx -// : // clobber list #if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0" + : "%mm0" // clobber list #endif - ); + ); } else if (((pass == 2) || (pass == 3)) && width) { @@ -2245,6 +2293,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $24, %%edi \n\t" // start of last block @@ -2260,7 +2309,7 @@ fflush(stderr); "jnz .loop8_pass2 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -2268,9 +2317,8 @@ fflush(stderr); "2" (dp), // edi "0" (width) // ecx -// : // clobber list #if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0" + : "%mm0" // clobber list #endif ); } @@ -2286,6 +2334,7 @@ fflush(stderr); int dummy_value_c; // fix 'forbidden register spilled' int dummy_value_S; int dummy_value_D; + __asm__ __volatile__ ( "subl $8, %%edi \n\t" // start of last block @@ -2299,7 +2348,7 @@ fflush(stderr); "jnz .loop8_pass4 \n\t" "EMMS \n\t" // DONE - : "=c" (dummy_value_c), // output regs (dummy) + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) @@ -2307,9 +2356,8 @@ fflush(stderr); "2" (dp), // edi "0" (width) // ecx -// : // clobber list #if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */ - : "%mm0" + : "%mm0" // clobber list #endif ); } @@ -2317,8 +2365,6 @@ fflush(stderr); } /* end of pixel_bytes == 8 */ -#endif /* STILL_WORKING_ON_THIS */ - //-------------------------------------------------------------- else if (pixel_bytes == 6) { @@ -2477,19 +2523,30 @@ fflush(stderr); #endif /* PNG_READ_INTERLACING_SUPPORTED */ + + // These variables are utilized in the functions below. They are declared // globally here to ensure alignment on 8-byte boundaries. union uAll { long long use; double align; -} LBCarryMask = {0x0101010101010101LL}, - HBClearMask = {0x7f7f7f7f7f7f7f7fLL}, - ActiveMask, ActiveMask2, ActiveMaskEnd, ShiftBpp, ShiftRem; +} _LBCarryMask = {0x0101010101010101LL}, + _HBClearMask = {0x7f7f7f7f7f7f7f7fLL}, + _ActiveMask, _ActiveMask2, _ActiveMaskEnd, _ShiftBpp, _ShiftRem; + + +//===========================================================================// +// // +// P N G _ R E A D _ F I L T E R _ R O W _ M M X _ A V G // +// // +//===========================================================================// + // Optimized code for PNG Average filter decoder -void /* PRIVATE */ + +static void /* PRIVATE */ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, png_bytep prev_row) { @@ -2497,29 +2554,32 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, int dummy_value_c; // fix 'forbidden register 2 (cx) was spilled' error int dummy_value_S; int dummy_value_D; -// int diff; GRR: global now (shortened to dif/_dif) - bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel - _FullLength = row_info->rowbytes; // # of bytes to filter + bpp = (row_info->pixel_depth + 7) >> 3; // get # bytes per pixel + _FullLength = row_info->rowbytes; // # of bytes to filter + __asm__ __volatile__ ( - // Init address pointers and offset -//GRR "movl row, %%edi \n\t" // edi ==> Avg(x) - "xorl %%ebx, %%ebx \n\t" // ebx ==> x + // initialize address pointers and offset +#ifdef __PIC__ + "pushl %%ebx \n\t" // save index to Global Offset Table +#endif +//pre "movl row, %%edi \n\t" // edi: Avg(x) + "xorl %%ebx, %%ebx \n\t" // ebx: x "movl %%edi, %%edx \n\t" -//GRR "movl prev_row, %%esi \n\t" // esi ==> Prior(x) -//GRR "subl bpp, %%edx \n\t" // (bpp is preloaded into ecx) - "subl %%ecx, %%edx \n\t" // edx ==> Raw(x-bpp) +//pre "movl prev_row, %%esi \n\t" // esi: Prior(x) +//pre "subl bpp, %%edx \n\t" // (bpp is preloaded into ecx) + "subl %%ecx, %%edx \n\t" // edx: Raw(x-bpp) "xorl %%eax,%%eax \n\t" // Compute the Raw value for the first bpp bytes // Raw(x) = Avg(x) + (Prior(x)/2) "avg_rlp: \n\t" - "movb (%%esi,%%ebx,),%%al \n\t" // Load al with Prior(x) + "movb (%%esi,%%ebx,),%%al \n\t" // load al with Prior(x) "incl %%ebx \n\t" "shrb %%al \n\t" // divide by 2 "addb -1(%%edi,%%ebx,),%%al \n\t" // add Avg(x); -1 to offset inc ebx -//GRR "cmpl bpp, %%ebx \n\t" // (bpp is preloaded into ecx) +//pre "cmpl bpp, %%ebx \n\t" // (bpp is preloaded into ecx) "cmpl %%ecx, %%ebx \n\t" "movb %%al,-1(%%edi,%%ebx,) \n\t" // write Raw(x); -1 to offset inc ebx "jb avg_rlp \n\t" // mov does not affect flags @@ -2529,13 +2589,14 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, "addl %%ebx, _dif \n\t" // add bpp "addl $0xf, _dif \n\t" // add 7+8 to incr past alignment bdry "andl $0xfffffff8, _dif \n\t" // mask to alignment boundary - "subl %%edi, _dif \n\t" // subtract from start => value ebx at alignment - "jz avg_go \n\t" + "subl %%edi, _dif \n\t" // subtract from start => value ebx at + "jz avg_go \n\t" // alignment // fix alignment // Compute the Raw value for the bytes up to the alignment boundary // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2) "xorl %%ecx, %%ecx \n\t" + "avg_lp1: \n\t" "xorl %%eax, %%eax \n\t" "movb (%%esi,%%ebx,), %%cl \n\t" // load cl with Prior(x) @@ -2555,108 +2616,116 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, "andl $0x00000007, %%eax \n\t" // calc bytes over mult of 8 "subl %%eax, %%ecx \n\t" // drop over bytes from original length "movl %%ecx, _MMXLength \n\t" +#ifdef __PIC__ + "popl %%ebx \n\t" // restore index to Global Offset Table +#endif - : "=c" (dummy_value_c), // output regs/vars here, e.g., "=m" (_MMXLength) instead of final instr + : "=c" (dummy_value_c), // output regs (dummy) "=S" (dummy_value_S), "=D" (dummy_value_D) - : "1" (prev_row), // esi // input regs - "2" (row), // edi - "0" (bpp) // ecx + : "0" (bpp), // ecx // input regs + "1" (prev_row), // esi + "2" (row) // edi - : "%eax", "%ebx", // clobber list - "%edx" -// GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength) PROBABLY + : "%eax", "%edx" // clobber list +#ifndef __PIC__ + , "%ebx" +#endif + // GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength) + // (seems to work fine without...) ); -#ifdef GRR_GCC_MMX_CONVERTED - // Now do the math for the rest of the row - switch ( bpp ) + // now do the math for the rest of the row + switch (bpp) { case 3: { - ActiveMask.use = 0x0000000000ffffff; - ShiftBpp.use = 24; // == 3 * 8 - ShiftRem.use = 40; // == 64 - 24 - __asm__ ( - // Re-init address pointers and offset - "movq $ActiveMask, %%mm7 \n\t" - "movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary - "movq $LBCarryMask, %%mm5 \n\t" - "movl row, %%edi \n\t" // edi ==> Avg(x) - "movq $HBClearMask, %%mm4 \n\t" - "movl prev_row, %%esi \n\t" // esi ==> Prior(x) - // PRIME the pump (load the first Raw(x-bpp) data set) - "movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes - // (we correct position in loop below) - "avg_3lp: \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" // Load mm0 with Avg(x) - // Add (Prev_row/2) to Average - "movq %%mm5, %%mm3 \n\t" - "psrlq $ShiftRem, %%mm2 \n\t" // Correct position Raw(x-bpp) data - "movq (%%esi,%%ebx,), %%mm1 \n\t" // Load mm1 with Prior(x) - "movq %%mm7, %%mm6 \n\t" - "pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte - "psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2 - "pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte - "paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte - // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry - "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys - "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) - "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 - "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte - "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm6, %%mm2 \n\t" // Leave only Active Group 1 bytes to add to Avg - "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active + _ActiveMask.use = 0x0000000000ffffffLL; + _ShiftBpp.use = 24; // == 3 * 8 + _ShiftRem.use = 40; // == 64 - 24 + + __asm__ __volatile__ ( + // re-init address pointers and offset + "movq _ActiveMask, %%mm7 \n\t" + "movl _dif, %%ecx \n\t" // ecx: x = offset to + "movq _LBCarryMask, %%mm5 \n\t" // alignment boundary +// preload "movl row, %%edi \n\t" // edi: Avg(x) + "movq _HBClearMask, %%mm4 \n\t" +// preload "movl prev_row, %%esi \n\t" // esi: Prior(x) + + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%ecx,), %%mm2 \n\t" // load previous aligned 8 bytes + // (correct pos. in loop below) + "avg_3lp: \n\t" + "movq (%%edi,%%ecx,), %%mm0 \n\t" // load mm0 with Avg(x) + "movq %%mm5, %%mm3 \n\t" + "psrlq _ShiftRem, %%mm2 \n\t" // correct position Raw(x-bpp) data + "movq (%%esi,%%ecx,), %%mm1 \n\t" // load mm1 with Prior(x) + "movq %%mm7, %%mm6 \n\t" + "pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte + "psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2 + "pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte + "paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte + // add 1st active group (Raw(x-bpp)/2) to average with LBCarry + "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys + "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both + // lsb's were == 1 (only valid for active group) + "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 + "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte + "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte + "pand %%mm6, %%mm2 \n\t" // leave only Active Group 1 bytes to add to Avg + "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active // byte - // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry - "psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 3-5 - "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 - "psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly - "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys - "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) - "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 - "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte - "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg - "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active + // add 2nd active group (Raw(x-bpp)/2) to average with _LBCarry + "psllq _ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 3-5 + "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 + "psllq _ShiftBpp, %%mm2 \n\t" // shift data to pos. correctly + "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys + "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both + // lsb's were == 1 (only valid for active group) + "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 + "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte + "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte + "pand %%mm6, %%mm2 \n\t" // leave only Active Group 2 bytes to add to Avg + "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active // byte - // Add 3rd active group (Raw(x-bpp)/2) to Average with LBCarry - "psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover the last two + // add 3rd active group (Raw(x-bpp)/2) to average with _LBCarry + "psllq _ShiftBpp, %%mm6 \n\t" // shift mm6 mask to cover last two // bytes - "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 - "psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly + "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 + "psllq _ShiftBpp, %%mm2 \n\t" // shift data to pos. correctly // Data only needs to be shifted once here to // get the correct x-bpp offset. - "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys - "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) - "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 - "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte - "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg - "addl $8, %%ebx \n\t" - "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active - // byte - // Now ready to write back to memory - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" - // Move updated Raw(x) to use as Raw(x-bpp) for next loop - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm0, %%mm2 \n\t" // mov updated Raw(x) to mm2 - "jb avg_3lp \n\t" + "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys + "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both + // lsb's were == 1 (only valid for active group) + "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 + "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte + "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte + "pand %%mm6, %%mm2 \n\t" // leave only Active Group 2 bytes to add to Avg + "addl $8, %%ecx \n\t" + "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active + // byte + // now ready to write back to memory + "movq %%mm0, -8(%%edi,%%ecx,) \n\t" + // move updated Raw(x) to use as Raw(x-bpp) for next loop + "cmpl _MMXLength, %%ecx \n\t" + "movq %%mm0, %%mm2 \n\t" // mov updated Raw(x) to mm2 + "jb avg_3lp \n\t" - : // output regs/vars go here, e.g.: "=m" (memory_var) + : "=S" (dummy_value_S), // output regs (dummy) + "=D" (dummy_value_D) - : "S" (prev_row), // esi // input regs - "D" (row) // edi + : "0" (prev_row), // esi // input regs + "1" (row) // edi - : "%ebx", "%edi", "%esi" // clobber list -// GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength) PROBABLY -// , "%mm0", "%mm1", "%mm2", "%mm3", -// "%mm4", "%mm5", "%mm6", "%mm7" + : "%ecx" // clobber list +#if 0 /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); } break; // end 3 bpp @@ -2664,189 +2733,207 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, case 6: case 4: //case 7: // who wrote this? PNG doesn't support 5 or 7 bytes/pixel - //case 5: + //case 5: // GRR BOGUS { - ActiveMask.use = 0xffffffffffffffff; // use shift below to clear - // appropriate inactive bytes - ShiftBpp.use = bpp << 3; - ShiftRem.use = 64 - ShiftBpp.use; - __asm__ ( - "movq $HBClearMask, %%mm4 \n\t" + _ActiveMask.use = 0xffffffffffffffffLL; // use shift below to clear + // appropriate inactive bytes + _ShiftBpp.use = bpp << 3; + _ShiftRem.use = 64 - _ShiftBpp.use; - // Re-init address pointers and offset - "movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary + __asm__ __volatile__ ( + "movq _HBClearMask, %%mm4 \n\t" - // Load ActiveMask and clear all bytes except for 1st active group - "movq $ActiveMask, %%mm7 \n\t" - "movl row, %%edi \n\t" // edi ==> Avg(x) - "psrlq $ShiftRem, %%mm7 \n\t" - "movl prev_row, %%esi \n\t" // esi ==> Prior(x) + // re-init address pointers and offset + "movl _dif, %%ecx \n\t" // ecx: x = offset to alignment boundary + + // load _ActiveMask and clear all bytes except for 1st active group + "movq _ActiveMask, %%mm7 \n\t" +// preload "movl row, %%edi \n\t" // edi: Avg(x) + "psrlq _ShiftRem, %%mm7 \n\t" +// preload "movl prev_row, %%esi \n\t" // esi: Prior(x) "movq %%mm7, %%mm6 \n\t" - "movq $LBCarryMask, %%mm5 \n\t" - "psllq $ShiftBpp, %%mm6 \n\t" // Create mask for 2nd active group + "movq _LBCarryMask, %%mm5 \n\t" + "psllq _ShiftBpp, %%mm6 \n\t" // create mask for 2nd active group - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes - // (we correct position in loop below) + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%ecx,), %%mm2 \n\t" // load previous aligned 8 bytes + // (we correct pos. in loop below) "avg_4lp: \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "psrlq $ShiftRem, %%mm2 \n\t" // shift data to position correctly - "movq (%%esi,%%ebx,), %%mm1 \n\t" - // Add (Prev_row/2) to Average + "movq (%%edi,%%ecx,), %%mm0 \n\t" + "psrlq _ShiftRem, %%mm2 \n\t" // shift data to pos. correctly + "movq (%%esi,%%ecx,), %%mm1 \n\t" + // add (Prev_row/2) to average "movq %%mm5, %%mm3 \n\t" "pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte "psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2 "pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte "paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte - // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry + // add 1st active group (Raw(x-bpp)/2) to average with _LBCarry "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) + // lsb's were == 1 (only valid for active group) "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm7, %%mm2 \n\t" // Leave only Active Group 1 bytes to add to Avg + "pand %%mm7, %%mm2 \n\t" // leave only Active Group 1 bytes to add to Avg "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active // byte - // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry + // add 2nd active group (Raw(x-bpp)/2) to average with _LBCarry "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 - "psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly - "addl $8, %%ebx \n\t" + "psllq _ShiftBpp, %%mm2 \n\t" // shift data to pos. correctly + "addl $8, %%ecx \n\t" "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) + // lsb's were == 1 (only valid for active group) "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg + "pand %%mm6, %%mm2 \n\t" // leave only Active Group 2 bytes to add to Avg "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active // byte - "cmpl _MMXLength, %%ebx \n\t" - // Now ready to write back to memory - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" - // Prep Raw(x-bpp) for next loop + "cmpl _MMXLength, %%ecx \n\t" + // now ready to write back to memory + "movq %%mm0, -8(%%edi,%%ecx,) \n\t" + // prep Raw(x-bpp) for next loop "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 "jb avg_4lp \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=S" (dummy_value_S), // output regs (dummy) + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (prev_row), // esi // input regs + "1" (row) // edi - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + : "%ecx" // clobber list +#if 0 /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); } break; // end 4,6 bpp case 2: { - ActiveMask.use = 0x000000000000ffff; - ShiftBpp.use = 24; // == 3 * 8 - ShiftRem.use = 40; // == 64 - 24 - __asm__ ( - // Load ActiveMask - "movq $ActiveMask, %%mm7 \n\t" - // Re-init address pointers and offset - "movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary - "movq $LBCarryMask, %%mm5 \n\t" - "movl row, %%edi \n\t" // edi ==> Avg(x) - "movq $HBClearMask, %%mm4 \n\t" - "movl prev_row, %%esi \n\t" // esi ==> Prior(x) - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes - // (we correct position in loop below) + _ActiveMask.use = 0x000000000000ffffLL; + _ShiftBpp.use = 16; // == 2 * 8 + _ShiftRem.use = 48; // == 64 - 16 + + __asm__ __volatile__ ( + // load _ActiveMask + "movq _ActiveMask, %%mm7 \n\t" + // re-init address pointers and offset + "movl _dif, %%ecx \n\t" // ecx: x = offset to alignment boundary + "movq _LBCarryMask, %%mm5 \n\t" +// preload "movl row, %%edi \n\t" // edi: Avg(x) + "movq _HBClearMask, %%mm4 \n\t" +// preload "movl prev_row, %%esi \n\t" // esi: Prior(x) + + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%ecx,), %%mm2 \n\t" // load previous aligned 8 bytes + // (we correct pos. in loop below) "avg_2lp: \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "psllq $ShiftRem, %%mm2 \n\t" // shift data to position correctly - "movq (%%esi,%%ebx,), %%mm1 \n\t" - // Add (Prev_row/2) to Average + "movq (%%edi,%%ecx,), %%mm0 \n\t" + "psrlq _ShiftRem, %%mm2 \n\t" // shift data to pos. correctly + "movq (%%esi,%%ecx,), %%mm1 \n\t" // (GRR BUGFIX: was psllq) + // add (Prev_row/2) to average "movq %%mm5, %%mm3 \n\t" "pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte "psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2 "pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte "movq %%mm7, %%mm6 \n\t" "paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte - // Add 1st active group (Raw(x-bpp)/2) to Average with LBCarry + + // add 1st active group (Raw(x-bpp)/2) to average with _LBCarry "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) + // lsb's were == 1 (only valid for active group) "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm6, %%mm2 \n\t" // Leave only Active Group 1 bytes to add to Avg - "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte - // Add 2nd active group (Raw(x-bpp)/2) to Average with LBCarry - "psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 2 & 3 - "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 - "psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly - "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys - "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) - "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 - "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte - "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg + "pand %%mm6, %%mm2 \n\t" // leave only Active Group 1 bytes to add to Avg "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte - // Add rdd active group (Raw(x-bpp)/2) to Average with LBCarry - "psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 4 & 5 + // add 2nd active group (Raw(x-bpp)/2) to average with _LBCarry + "psllq _ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 2 & 3 "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 - "psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly - // Data only needs to be shifted once here to - // get the correct x-bpp offset. + "psllq _ShiftBpp, %%mm2 \n\t" // shift data to pos. correctly "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) + // lsb's were == 1 (only valid for active group) "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg + "pand %%mm6, %%mm2 \n\t" // leave only Active Group 2 bytes to add to Avg "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte - // Add 4th active group (Raw(x-bpp)/2) to Average with LBCarry - "psllq $ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 6 & 7 + // add 3rd active group (Raw(x-bpp)/2) to average with _LBCarry + "psllq _ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 4 & 5 "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 - "psllq $ShiftBpp, %%mm2 \n\t" // shift data to position correctly - // Data only needs to be shifted once here to - // get the correct x-bpp offset. - "addl $8, %%ebx \n\t" + "psllq _ShiftBpp, %%mm2 \n\t" // shift data to pos. correctly "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 (Only valid for active group) + // lsb's were == 1 (only valid for active group) "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte - "pand %%mm6, %%mm2 \n\t" // Leave only Active Group 2 bytes to add to Avg + "pand %%mm6, %%mm2 \n\t" // leave only Active Group 2 bytes to add to Avg "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte - "cmpl _MMXLength, %%ebx \n\t" - // Now ready to write back to memory - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" - // Prep Raw(x-bpp) for next loop + // add 4th active group (Raw(x-bpp)/2) to average with _LBCarry + "psllq _ShiftBpp, %%mm6 \n\t" // shift the mm6 mask to cover bytes 6 & 7 + "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 + "psllq _ShiftBpp, %%mm2 \n\t" // shift data to pos. correctly + "addl $8, %%ecx \n\t" + "movq %%mm3, %%mm1 \n\t" // now use mm1 for getting LBCarrys + "pand %%mm2, %%mm1 \n\t" // get LBCarrys for each byte where both + // lsb's were == 1 (only valid for active group) + "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 + "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte + "paddb %%mm1, %%mm2 \n\t" // add LBCarrys to (Raw(x-bpp)/2) for each byte + "pand %%mm6, %%mm2 \n\t" // leave only Active Group 2 bytes to add to Avg + "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) + LBCarrys to Avg for each Active byte + + "cmpl _MMXLength, %%ecx \n\t" + // now ready to write back to memory + "movq %%mm0, -8(%%edi,%%ecx,) \n\t" + // prep Raw(x-bpp) for next loop "movq %%mm0, %%mm2 \n\t" // mov updated Raws to mm2 "jb avg_2lp \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=S" (dummy_value_S), // output regs (dummy) + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (prev_row), // esi // input regs + "1" (row) // edi - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + : "%ecx" // clobber list +#if 0 /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); } break; // end 2 bpp case 1: { - __asm__ ( - // Re-init address pointers and offset - "movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary - "movl row, %%edi \n\t" // edi ==> Avg(x) - "cmpl _FullLength, %%ebx \n\t" // Test if offset at end of array + __asm__ __volatile__ ( + // re-init address pointers and offset +#ifdef __PIC__ + "pushl %%ebx \n\t" // save Global Offset Table index +#endif + "movl _dif, %%ebx \n\t" // ebx: x = offset to alignment boundary +// preload "movl row, %%edi \n\t" // edi: Avg(x) + "cmpl _FullLength, %%ebx \n\t" // test if offset at end of array "jnb avg_1end \n\t" - // Do Paeth decode for remaining bytes - "movl prev_row, %%esi \n\t" // esi ==> Prior(x) + // do Paeth decode for remaining bytes +// preload "movl prev_row, %%esi \n\t" // esi: Prior(x) "movl %%edi, %%edx \n\t" - "xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx in loop below - "subl bpp, %%edx \n\t" // edx ==> Raw(x-bpp) +// preload "subl bpp, %%edx \n\t" // (bpp is preloaded into ecx) + "subl %%ecx, %%edx \n\t" // edx: Raw(x-bpp) + "xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx + // in loop below "avg_1lp: \n\t" // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2) "xorl %%eax, %%eax \n\t" @@ -2855,77 +2942,99 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, "addw %%cx, %%ax \n\t" "incl %%ebx \n\t" "shrw %%ax \n\t" // divide by 2 - "addb -1(%%edi,%%ebx,), %%al \n\t" // Add Avg(x); -1 to offset inc ebx - "cmpl _FullLength, %%ebx \n\t" // Check if at end of array - "movb %%al, -1(%%edi,%%ebx,) \n\t" // Write back Raw(x); + "addb -1(%%edi,%%ebx,), %%al \n\t" // add Avg(x); -1 to offset inc ebx + "cmpl _FullLength, %%ebx \n\t" // check if at end of array + "movb %%al, -1(%%edi,%%ebx,) \n\t" // write back Raw(x); // mov does not affect flags; -1 to offset inc ebx "jb avg_1lp \n\t" + "avg_1end: \n\t" +#ifdef __PIC__ + "popl %%ebx \n\t" // Global Offset Table index +#endif - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (bpp), // ecx // input regs + "1" (prev_row), // esi + "2" (row) // edi - : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list + : "%eax", "%edx" // clobber list +#ifndef __PIC__ + , "%ebx" +#endif ); } return; // end 1 bpp case 8: { - __asm__ ( - // Re-init address pointers and offset - "movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary - "movq $LBCarryMask, %%mm5 \n\t" - "movl row, %%edi \n\t" // edi ==> Avg(x) - "movq $HBClearMask, %%mm4 \n\t" - "movl prev_row, %%esi \n\t" // esi ==> Prior(x) - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm2 \n\t" // Load previous aligned 8 bytes - // (NO NEED to correct position in loop below) + __asm__ __volatile__ ( + // re-init address pointers and offset + "movl _dif, %%ecx \n\t" // ecx: x == offset to alignment + "movq _LBCarryMask, %%mm5 \n\t" // boundary +// preload "movl row, %%edi \n\t" // edi: Avg(x) + "movq _HBClearMask, %%mm4 \n\t" +// preload "movl prev_row, %%esi \n\t" // esi: Prior(x) + + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%ecx,), %%mm2 \n\t" // load previous aligned 8 bytes + // (NO NEED to correct pos. in loop below) + "avg_8lp: \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" + "movq (%%edi,%%ecx,), %%mm0 \n\t" "movq %%mm5, %%mm3 \n\t" - "movq (%%esi,%%ebx,), %%mm1 \n\t" - "addl $8, %%ebx \n\t" + "movq (%%esi,%%ecx,), %%mm1 \n\t" + "addl $8, %%ecx \n\t" "pand %%mm1, %%mm3 \n\t" // get lsb for each prev_row byte "psrlq $1, %%mm1 \n\t" // divide prev_row bytes by 2 - "pand %%mm2, %%mm3 \n\t" // get LBCarrys for each byte where both - // lsb's were == 1 + "pand %%mm2, %%mm3 \n\t" // get LBCarrys for each byte + // where both lsb's were == 1 "psrlq $1, %%mm2 \n\t" // divide raw bytes by 2 - "pand %%mm4, %%mm1 \n\t" // clear invalid bit 7 of each byte - "paddb %%mm3, %%mm0 \n\t" // add LBCarrys to Avg for each byte - "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7 of each byte - "paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg for each byte - "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) to Avg for each byte - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" + "pand %%mm4, %%mm1 \n\t" // clear invalid bit 7, each byte + "paddb %%mm3, %%mm0 \n\t" // add LBCarrys to Avg, each byte + "pand %%mm4, %%mm2 \n\t" // clear invalid bit 7, each byte + "paddb %%mm1, %%mm0 \n\t" // add (Prev_row/2) to Avg, each + "paddb %%mm2, %%mm0 \n\t" // add (Raw/2) to Avg for each + "cmpl _MMXLength, %%ecx \n\t" + "movq %%mm0, -8(%%edi,%%ecx,) \n\t" "movq %%mm0, %%mm2 \n\t" // reuse as Raw(x-bpp) "jb avg_8lp \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=S" (dummy_value_S), // output regs (dummy) + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (prev_row), // esi // input regs + "1" (row) // edi - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" // CHECKASM: clobber list + : "%ecx" // clobber list +#if 0 /* %mm0, ..., %mm5 not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2" + , "%mm3", "%mm4", "%mm5" +#endif ); } break; // end 8 bpp - default: // bpp greater than 8 (!= 1,2,3,4,6,8) + default: // bpp greater than 8 (!= 1,2,3,4,[5],6,[7],8) { - GRR: PRINT ERROR HERE: SHOULD NEVER BE REACHED (unless smaller than 1?) + // GRR: PRINT ERROR HERE: SHOULD NEVER BE REACHED + fprintf(stderr, + "libpng: internal logic error (png_read_filter_row_mmx_avg())\n"); - __asm__ ( - "movq $LBCarryMask, %%mm5 \n\t" - // Re-init address pointers and offset - "movl _dif, %%ebx \n\t" // ebx ==> x = offset to alignment boundary - "movl row, %%edi \n\t" // edi ==> Avg(x) - "movq $HBClearMask, %%mm4 \n\t" +#if 0 + __asm__ __volatile__ ( + "movq _LBCarryMask, %%mm5 \n\t" + // re-init address pointers and offset + "movl _dif, %%ebx \n\t" // ebx: x = offset to alignment boundary + "movl row, %%edi \n\t" // edi: Avg(x) + "movq _HBClearMask, %%mm4 \n\t" "movl %%edi, %%edx \n\t" - "movl prev_row, %%esi \n\t" // esi ==> Prior(x) - "subl bpp, %%edx \n\t" // edx ==> Raw(x-bpp) + "movl prev_row, %%esi \n\t" // esi: Prior(x) + "subl bpp, %%edx \n\t" // edx: Raw(x-bpp) "avg_Alp: \n\t" "movq (%%edi,%%ebx,), %%mm0 \n\t" "movq %%mm5, %%mm3 \n\t" @@ -2950,24 +3059,32 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) - : "%ebx", "%edx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5" // CHECKASM: clobber list + : "%ebx", "%edx", "%edi", "%esi" // CHECKASM: clobber list ); +#endif /* 0 - NEVER REACHED */ } break; - } // end switch ( bpp ) - __asm__ ( - // MMX acceleration complete now do clean-up - // Check if any remaining bytes left to decode - "movl _MMXLength, %%ebx \n\t" // ebx ==> x = offset bytes remaining after MMX - "movl row, %%edi \n\t" // edi ==> Avg(x) - "cmpl _FullLength, %%ebx \n\t" // Test if offset at end of array + } // end switch (bpp) + + __asm__ __volatile__ ( + // MMX acceleration complete; now do clean-up + // check if any remaining bytes left to decode +#ifdef __PIC__ + "pushl %%ebx \n\t" // save index to Global Offset Table +#endif + "movl _MMXLength, %%ebx \n\t" // ebx: x == offset bytes after MMX +//pre "movl row, %%edi \n\t" // edi: Avg(x) + "cmpl _FullLength, %%ebx \n\t" // test if offset at end of array "jnb avg_end \n\t" - // Do Paeth decode for remaining bytes - "movl prev_row, %%esi \n\t" // esi ==> Prior(x) + + // do Avg decode for remaining bytes +//pre "movl prev_row, %%esi \n\t" // esi: Prior(x) "movl %%edi, %%edx \n\t" - "xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx in loop below - "subl bpp, %%edx \n\t" // edx ==> Raw(x-bpp) +//pre "subl bpp, %%edx \n\t" // (bpp is preloaded into ecx) + "subl %%ecx, %%edx \n\t" // edx: Raw(x-bpp) + "xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx below + "avg_lp2: \n\t" // Raw(x) = Avg(x) + ((Raw(x-bpp) + Prior(x))/2) "xorl %%eax, %%eax \n\t" @@ -2976,39 +3093,64 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, "addw %%cx, %%ax \n\t" "incl %%ebx \n\t" "shrw %%ax \n\t" // divide by 2 - "addb -1(%%edi,%%ebx,), %%al \n\t" // Add Avg(x); -1 to offset inc ebx - "cmpl _FullLength, %%ebx \n\t" // Check if at end of array - "movb %%al, -1(%%edi,%%ebx,) \n\t" // Write back Raw(x); - // mov does not affect flags; -1 to offset inc ebx - "jb avg_lp2 \n\t" + "addb -1(%%edi,%%ebx,), %%al \n\t" // add Avg(x); -1 to offset inc ebx + "cmpl _FullLength, %%ebx \n\t" // check if at end of array + "movb %%al, -1(%%edi,%%ebx,) \n\t" // write back Raw(x) [mov does not + "jb avg_lp2 \n\t" // affect flags; -1 to offset inc ebx] + "avg_end: \n\t" - "emms \n\t" // End MMX instructions; prep for possible FP instrs. + "EMMS \n\t" // end MMX; prep for poss. FP instrs. +#ifdef __PIC__ + "popl %%ebx \n\t" // restore index to Global Offset Table +#endif - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (bpp), // ecx // input regs + "1" (prev_row), // esi + "2" (row) // edi - : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list + : "%eax", "%edx" // clobber list +#ifndef __PIC__ + , "%ebx" +#endif ); -#endif /* GRR_GCC_MMX_CONVERTED */ -} + +} /* end png_read_filter_row_mmx_avg() */ + + + + +//===========================================================================// +// // +// P N G _ R E A D _ F I L T E R _ R O W _ M M X _ P A E T H // +// // +//===========================================================================// // Optimized code for PNG Paeth filter decoder -void /* PRIVATE */ + +static void /* PRIVATE */ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, png_bytep prev_row) { -#ifdef GRR_GCC_MMX_CONVERTED int bpp; - int patemp, pbtemp, pctemp; + int dummy_value_c; // fix 'forbidden register 2 (cx) was spilled' error + int dummy_value_S; + int dummy_value_D; bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel _FullLength = row_info->rowbytes; // # of bytes to filter - __asm__ ( - "xorl %%ebx, %%ebx \n\t" // ebx ==> x offset - "movl row, %%edi \n\t" - "xorl %%edx, %%edx \n\t" // edx ==> x-bpp offset - "movl prev_row, %%esi \n\t" + + __asm__ __volatile__ ( +#ifdef __PIC__ + "pushl %%ebx \n\t" // save index to Global Offset Table +#endif + "xorl %%ebx, %%ebx \n\t" // ebx: x offset +//pre "movl row, %%edi \n\t" + "xorl %%edx, %%edx \n\t" // edx: x-bpp offset +//pre "movl prev_row, %%esi \n\t" "xorl %%eax, %%eax \n\t" // Compute the Raw value for the first bpp bytes @@ -3018,7 +3160,8 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "movb (%%edi,%%ebx,), %%al \n\t" "addb (%%esi,%%ebx,), %%al \n\t" "incl %%ebx \n\t" - "cmpl bpp, %%ebx \n\t" +//pre "cmpl bpp, %%ebx \n\t" (bpp is preloaded into ecx) + "cmpl %%ecx, %%ebx \n\t" "movb %%al, -1(%%edi,%%ebx,) \n\t" "jb paeth_rlp \n\t" // get # of bytes to alignment @@ -3030,62 +3173,70 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "subl %%edi, _dif \n\t" // subtract from start ==> value ebx at alignment "jz paeth_go \n\t" // fix alignment + "paeth_lp1: \n\t" "xorl %%eax, %%eax \n\t" // pav = p - a = (a + b - c) - a = b - c "movb (%%esi,%%ebx,), %%al \n\t" // load Prior(x) into al "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp) - "movl %%eax, patemp \n\t" // Save pav for later use + "movl %%eax, _patemp \n\t" // Save pav for later use "xorl %%eax, %%eax \n\t" // pbv = p - b = (a + b - c) - b = a - c "movb (%%edi,%%edx,), %%al \n\t" // load Raw(x-bpp) into al "subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp) "movl %%eax, %%ecx \n\t" // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv - "addl patemp, %%eax \n\t" // pcv = pav + pbv + "addl _patemp, %%eax \n\t" // pcv = pav + pbv // pc = abs(pcv) "testl $0x80000000, %%eax \n\t" "jz paeth_pca \n\t" "negl %%eax \n\t" // reverse sign of neg values + "paeth_pca: \n\t" - "movl %%eax, pctemp \n\t" // save pc for later use + "movl %%eax, _pctemp \n\t" // save pc for later use // pb = abs(pbv) "testl $0x80000000, %%ecx \n\t" "jz paeth_pba \n\t" "negl %%ecx \n\t" // reverse sign of neg values + "paeth_pba: \n\t" - "movl %%ecx, pbtemp \n\t" // save pb for later use + "movl %%ecx, _pbtemp \n\t" // save pb for later use // pa = abs(pav) - "movl patemp, %%eax \n\t" + "movl _patemp, %%eax \n\t" "testl $0x80000000, %%eax \n\t" "jz paeth_paa \n\t" "negl %%eax \n\t" // reverse sign of neg values + "paeth_paa: \n\t" - "movl %%eax, patemp \n\t" // save pa for later use + "movl %%eax, _patemp \n\t" // save pa for later use // test if pa <= pb "cmpl %%ecx, %%eax \n\t" "jna paeth_abb \n\t" // pa > pb; now test if pb <= pc - "cmpl pctemp, %%ecx \n\t" + "cmpl _pctemp, %%ecx \n\t" "jna paeth_bbc \n\t" // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp) "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "jmp paeth_paeth \n\t" + "paeth_bbc: \n\t" // pb <= pc; Raw(x) = Paeth(x) + Prior(x) "movb (%%esi,%%ebx,), %%cl \n\t" // load Prior(x) into cl "jmp paeth_paeth \n\t" + "paeth_abb: \n\t" // pa <= pb; now test if pa <= pc - "cmpl pctemp, %%eax \n\t" + "cmpl _pctemp, %%eax \n\t" "jna paeth_abc \n\t" // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp) "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "jmp paeth_paeth \n\t" + "paeth_abc: \n\t" // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp) "movb (%%edi,%%edx,), %%cl \n\t" // load Raw(x-bpp) into cl + "paeth_paeth: \n\t" "incl %%ebx \n\t" "incl %%edx \n\t" @@ -3093,6 +3244,7 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "addb %%cl, -1(%%edi,%%ebx,) \n\t" "cmpl _dif, %%ebx \n\t" "jb paeth_lp1 \n\t" + "paeth_go: \n\t" "movl _FullLength, %%ecx \n\t" "movl %%ecx, %%eax \n\t" @@ -3100,40 +3252,51 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "andl $0x00000007, %%eax \n\t" // calc bytes over mult of 8 "subl %%eax, %%ecx \n\t" // drop over bytes from original length "movl %%ecx, _MMXLength \n\t" +#ifdef __PIC__ + "popl %%ebx \n\t" // restore index to Global Offset Table +#endif - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (bpp), // ecx // input regs + "1" (prev_row), // esi + "2" (row) // edi - : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list + : "%eax", "%edx" // clobber list +#ifndef __PIC__ + , "%ebx" +#endif ); - // Now do the math for the rest of the row - switch ( bpp ) + // now do the math for the rest of the row + switch (bpp) { case 3: { - ActiveMask.use = 0x0000000000ffffff; - ActiveMaskEnd.use = 0xffff000000000000; - ShiftBpp.use = 24; // == bpp(3) * 8 - ShiftRem.use = 40; // == 64 - 24 - __asm__ ( - "movl _dif, %%ebx \n\t" - "movl row, %%edi \n\t" - "movl prev_row, %%esi \n\t" + _ActiveMask.use = 0x0000000000ffffffLL; + _ActiveMaskEnd.use = 0xffff000000000000LL; + _ShiftBpp.use = 24; // == bpp(3) * 8 + _ShiftRem.use = 40; // == 64 - 24 + + __asm__ __volatile__ ( + "movl _dif, %%ecx \n\t" +// preload "movl row, %%edi \n\t" +// preload "movl prev_row, %%esi \n\t" "pxor %%mm0, %%mm0 \n\t" - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%ecx,), %%mm1 \n\t" "paeth_3lp: \n\t" - "psrlq $ShiftRem, %%mm1 \n\t" // shift last 3 bytes to 1st 3 bytes - "movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x) - "punpcklbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a - "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // Prep c=Prior(x-bpp) bytes - "punpcklbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b - "psrlq $ShiftRem, %%mm3 \n\t" // shift last 3 bytes to 1st 3 bytes + "psrlq _ShiftRem, %%mm1 \n\t" // shift last 3 bytes to 1st 3 bytes + "movq (%%esi,%%ecx,), %%mm2 \n\t" // load b=Prior(x) + "punpcklbw %%mm0, %%mm1 \n\t" // unpack High bytes of a + "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // prep c=Prior(x-bpp) bytes + "punpcklbw %%mm0, %%mm2 \n\t" // unpack High bytes of b + "psrlq _ShiftRem, %%mm3 \n\t" // shift last 3 bytes to 1st 3 bytes // pav = p - a = (a + b - c) - a = b - c "movq %%mm2, %%mm4 \n\t" - "punpcklbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c + "punpcklbw %%mm0, %%mm3 \n\t" // unpack High bytes of c // pbv = p - b = (a + b - c) - b = a - c "movq %%mm1, %%mm5 \n\t" "psubw %%mm3, %%mm4 \n\t" @@ -3145,17 +3308,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0 + "pcmpgtw %%mm4, %%mm0 \n\t" // create mask pav bytes < 0 "paddw %%mm5, %%mm6 \n\t" - "pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7 - "pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0 + "pand %%mm4, %%mm0 \n\t" // only pav bytes < 0 in mm7 + "pcmpgtw %%mm5, %%mm7 \n\t" // create mask pbv bytes < 0 "psubw %%mm0, %%mm4 \n\t" - "pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0 + "pand %%mm5, %%mm7 \n\t" // only pbv bytes < 0 in mm0 "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm7, %%mm5 \n\t" "psubw %%mm0, %%mm6 \n\t" // test pa <= pb @@ -3179,18 +3342,18 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "paddw %%mm3, %%mm7 \n\t" "pxor %%mm0, %%mm0 \n\t" "packuswb %%mm1, %%mm7 \n\t" - "movq (%%esi,%%ebx,), %%mm3 \n\t" // load c=Prior(x-bpp) - "pand $ActiveMask, %%mm7 \n\t" + "movq (%%esi,%%ecx,), %%mm3 \n\t" // load c=Prior(x-bpp) + "pand _ActiveMask, %%mm7 \n\t" "movq %%mm3, %%mm2 \n\t" // load b=Prior(x) step 1 - "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) - "punpcklbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c - "movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value - "movq %%mm7, %%mm1 \n\t" // Now mm1 will be used as Raw(x-bpp) - // Now do Paeth for 2nd set of bytes (3-5) - "psrlq $ShiftBpp, %%mm2 \n\t" // load b=Prior(x) step 2 - "punpcklbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a + "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) + "punpcklbw %%mm0, %%mm3 \n\t" // unpack High bytes of c + "movq %%mm7, (%%edi,%%ecx,) \n\t" // write back updated value + "movq %%mm7, %%mm1 \n\t" // now mm1 will be used as Raw(x-bpp) + // now do Paeth for 2nd set of bytes (3-5) + "psrlq _ShiftBpp, %%mm2 \n\t" // load b=Prior(x) step 2 + "punpcklbw %%mm0, %%mm1 \n\t" // unpack High bytes of a "pxor %%mm7, %%mm7 \n\t" - "punpcklbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b + "punpcklbw %%mm0, %%mm2 \n\t" // unpack High bytes of b // pbv = p - b = (a + b - c) - b = a - c "movq %%mm1, %%mm5 \n\t" // pav = p - a = (a + b - c) - a = b - c @@ -3205,17 +3368,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm5, %%mm0 \n\t" // Create mask pbv bytes < 0 - "pcmpgtw %%mm4, %%mm7 \n\t" // Create mask pav bytes < 0 - "pand %%mm5, %%mm0 \n\t" // Only pbv bytes < 0 in mm0 - "pand %%mm4, %%mm7 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm5, %%mm0 \n\t" // create mask pbv bytes < 0 + "pcmpgtw %%mm4, %%mm7 \n\t" // create mask pav bytes < 0 + "pand %%mm5, %%mm0 \n\t" // only pbv bytes < 0 in mm0 + "pand %%mm4, %%mm7 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm0, %%mm5 \n\t" "psubw %%mm7, %%mm4 \n\t" "psubw %%mm0, %%mm5 \n\t" "psubw %%mm7, %%mm4 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm0, %%mm6 \n\t" // test pa <= pb "movq %%mm4, %%mm7 \n\t" @@ -3232,7 +3395,7 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "paddw %%mm2, %%mm0 \n\t" // test ((pa <= pb)? pa:pb) <= pc "pcmpgtw %%mm6, %%mm7 \n\t" // pab > pc? - "movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x) + "movq (%%esi,%%ecx,), %%mm2 \n\t" // load b=Prior(x) "pand %%mm7, %%mm3 \n\t" "pandn %%mm0, %%mm7 \n\t" "pxor %%mm1, %%mm1 \n\t" @@ -3240,21 +3403,21 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "pxor %%mm0, %%mm0 \n\t" "packuswb %%mm1, %%mm7 \n\t" "movq %%mm2, %%mm3 \n\t" // load c=Prior(x-bpp) step 1 - "pand $ActiveMask, %%mm7 \n\t" - "punpckhbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b - "psllq $ShiftBpp, %%mm7 \n\t" // Shift bytes to 2nd group of 3 bytes + "pand _ActiveMask, %%mm7 \n\t" + "punpckhbw %%mm0, %%mm2 \n\t" // unpack High bytes of b + "psllq _ShiftBpp, %%mm7 \n\t" // shift bytes to 2nd group of 3 bytes // pav = p - a = (a + b - c) - a = b - c "movq %%mm2, %%mm4 \n\t" - "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) - "psllq $ShiftBpp, %%mm3 \n\t" // load c=Prior(x-bpp) step 2 - "movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value + "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) + "psllq _ShiftBpp, %%mm3 \n\t" // load c=Prior(x-bpp) step 2 + "movq %%mm7, (%%edi,%%ecx,) \n\t" // write back updated value "movq %%mm7, %%mm1 \n\t" - "punpckhbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c - "psllq $ShiftBpp, %%mm1 \n\t" // Shift bytes - // Now mm1 will be used as Raw(x-bpp) - // Now do Paeth for 3rd, and final, set of bytes (6-7) + "punpckhbw %%mm0, %%mm3 \n\t" // unpack High bytes of c + "psllq _ShiftBpp, %%mm1 \n\t" // shift bytes + // now mm1 will be used as Raw(x-bpp) + // now do Paeth for 3rd, and final, set of bytes (6-7) "pxor %%mm7, %%mm7 \n\t" - "punpckhbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a + "punpckhbw %%mm0, %%mm1 \n\t" // unpack High bytes of a "psubw %%mm3, %%mm4 \n\t" // pbv = p - b = (a + b - c) - b = a - c "movq %%mm1, %%mm5 \n\t" @@ -3267,17 +3430,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0 - "pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0 - "pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7 - "pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0 + "pcmpgtw %%mm4, %%mm0 \n\t" // create mask pav bytes < 0 + "pcmpgtw %%mm5, %%mm7 \n\t" // create mask pbv bytes < 0 + "pand %%mm4, %%mm0 \n\t" // only pav bytes < 0 in mm7 + "pand %%mm5, %%mm7 \n\t" // only pbv bytes < 0 in mm0 "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm0, %%mm6 \n\t" // test pa <= pb "movq %%mm4, %%mm7 \n\t" @@ -3299,55 +3462,63 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "paddw %%mm3, %%mm7 \n\t" "pxor %%mm1, %%mm1 \n\t" "packuswb %%mm7, %%mm1 \n\t" - // Step ebx to next set of 8 bytes and repeat loop til done - "addl $8, %%ebx \n\t" - "pand $ActiveMaskEnd, %%mm1 \n\t" - "paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x) + // step ecx to next set of 8 bytes and repeat loop til done + "addl $8, %%ecx \n\t" + "pand _ActiveMaskEnd, %%mm1 \n\t" + "paddb -8(%%edi,%%ecx,), %%mm1 \n\t" // add Paeth predictor with Raw(x) - "cmpl _MMXLength, %%ebx \n\t" + "cmpl _MMXLength, %%ecx \n\t" "pxor %%mm0, %%mm0 \n\t" // pxor does not affect flags - "movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value + "movq %%mm1, -8(%%edi,%%ecx,) \n\t" // write back updated value // mm1 will be used as Raw(x-bpp) next loop // mm3 ready to be used as Prior(x-bpp) next loop "jb paeth_3lp \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=S" (dummy_value_S), // output regs (dummy) + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (prev_row), // esi // input regs + "1" (row) // edi - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + : "%ecx" // clobber list +#if 0 /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); } - break; + break; // end 3 bpp case 6: //case 7: // GRR BOGUS //case 5: // GRR BOGUS { - ActiveMask.use = 0x00000000ffffffff; - ActiveMask2.use = 0xffffffff00000000; - ShiftBpp.use = bpp << 3; // == bpp * 8 - ShiftRem.use = 64 - ShiftBpp.use; - __asm__ ( - "movl _dif, %%ebx \n\t" - "movl row, %%edi \n\t" - "movl prev_row, %%esi \n\t" - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" + _ActiveMask.use = 0x00000000ffffffffLL; + _ActiveMask2.use = 0xffffffff00000000LL; + _ShiftBpp.use = bpp << 3; // == bpp * 8 + _ShiftRem.use = 64 - _ShiftBpp.use; + + __asm__ __volatile__ ( + "movl _dif, %%ecx \n\t" +// preload "movl row, %%edi \n\t" +// preload "movl prev_row, %%esi \n\t" + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%ecx,), %%mm1 \n\t" "pxor %%mm0, %%mm0 \n\t" + "paeth_6lp: \n\t" - // Must shift to position Raw(x-bpp) data - "psrlq $ShiftRem, %%mm1 \n\t" - // Do first set of 4 bytes - "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes - "punpcklbw %%mm0, %%mm1 \n\t" // Unpack Low bytes of a - "movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x) - "punpcklbw %%mm0, %%mm2 \n\t" // Unpack Low bytes of b - // Must shift to position Prior(x-bpp) data - "psrlq $ShiftRem, %%mm3 \n\t" + // must shift to position Raw(x-bpp) data + "psrlq _ShiftRem, %%mm1 \n\t" + // do first set of 4 bytes + "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes + "punpcklbw %%mm0, %%mm1 \n\t" // unpack Low bytes of a + "movq (%%esi,%%ecx,), %%mm2 \n\t" // load b=Prior(x) + "punpcklbw %%mm0, %%mm2 \n\t" // unpack Low bytes of b + // must shift to position Prior(x-bpp) data + "psrlq _ShiftRem, %%mm3 \n\t" // pav = p - a = (a + b - c) - a = b - c "movq %%mm2, %%mm4 \n\t" - "punpcklbw %%mm0, %%mm3 \n\t" // Unpack Low bytes of c + "punpcklbw %%mm0, %%mm3 \n\t" // unpack Low bytes of c // pbv = p - b = (a + b - c) - b = a - c "movq %%mm1, %%mm5 \n\t" "psubw %%mm3, %%mm4 \n\t" @@ -3358,17 +3529,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0 + "pcmpgtw %%mm4, %%mm0 \n\t" // create mask pav bytes < 0 "paddw %%mm5, %%mm6 \n\t" - "pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7 - "pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0 + "pand %%mm4, %%mm0 \n\t" // only pav bytes < 0 in mm7 + "pcmpgtw %%mm5, %%mm7 \n\t" // create mask pbv bytes < 0 "psubw %%mm0, %%mm4 \n\t" - "pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0 + "pand %%mm5, %%mm7 \n\t" // only pbv bytes < 0 in mm0 "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm7, %%mm5 \n\t" "psubw %%mm0, %%mm6 \n\t" // test pa <= pb @@ -3392,24 +3563,24 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "paddw %%mm3, %%mm7 \n\t" "pxor %%mm0, %%mm0 \n\t" "packuswb %%mm1, %%mm7 \n\t" - "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // load c=Prior(x-bpp) - "pand $ActiveMask, %%mm7 \n\t" - "psrlq $ShiftRem, %%mm3 \n\t" - "movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x) step 1 - "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) + "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // load c=Prior(x-bpp) + "pand _ActiveMask, %%mm7 \n\t" + "psrlq _ShiftRem, %%mm3 \n\t" + "movq (%%esi,%%ecx,), %%mm2 \n\t" // load b=Prior(x) step 1 + "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor and Raw(x) "movq %%mm2, %%mm6 \n\t" - "movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" - "psllq $ShiftBpp, %%mm6 \n\t" + "movq %%mm7, (%%edi,%%ecx,) \n\t" // write back updated value + "movq -8(%%edi,%%ecx,), %%mm1 \n\t" + "psllq _ShiftBpp, %%mm6 \n\t" "movq %%mm7, %%mm5 \n\t" - "psrlq $ShiftRem, %%mm1 \n\t" + "psrlq _ShiftRem, %%mm1 \n\t" "por %%mm6, %%mm3 \n\t" - "psllq $ShiftBpp, %%mm5 \n\t" - "punpckhbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c + "psllq _ShiftBpp, %%mm5 \n\t" + "punpckhbw %%mm0, %%mm3 \n\t" // unpack High bytes of c "por %%mm5, %%mm1 \n\t" - // Do second set of 4 bytes - "punpckhbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b - "punpckhbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a + // do second set of 4 bytes + "punpckhbw %%mm0, %%mm2 \n\t" // unpack High bytes of b + "punpckhbw %%mm0, %%mm1 \n\t" // unpack High bytes of a // pav = p - a = (a + b - c) - a = b - c "movq %%mm2, %%mm4 \n\t" // pbv = p - b = (a + b - c) - b = a - c @@ -3422,17 +3593,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0 + "pcmpgtw %%mm4, %%mm0 \n\t" // create mask pav bytes < 0 "paddw %%mm5, %%mm6 \n\t" - "pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7 - "pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0 + "pand %%mm4, %%mm0 \n\t" // only pav bytes < 0 in mm7 + "pcmpgtw %%mm5, %%mm7 \n\t" // create mask pbv bytes < 0 "psubw %%mm0, %%mm4 \n\t" - "pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0 + "pand %%mm5, %%mm7 \n\t" // only pbv bytes < 0 in mm0 "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm7, %%mm5 \n\t" "psubw %%mm0, %%mm6 \n\t" // test pa <= pb @@ -3456,44 +3627,51 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "pxor %%mm1, %%mm1 \n\t" "paddw %%mm3, %%mm7 \n\t" "pxor %%mm0, %%mm0 \n\t" - // Step ex to next set of 8 bytes and repeat loop til done - "addl $8, %%ebx \n\t" + // step ecx to next set of 8 bytes and repeat loop til done + "addl $8, %%ecx \n\t" "packuswb %%mm7, %%mm1 \n\t" - "paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x) - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value + "paddb -8(%%edi,%%ecx,), %%mm1 \n\t" // add Paeth predictor with Raw(x) + "cmpl _MMXLength, %%ecx \n\t" + "movq %%mm1, -8(%%edi,%%ecx,) \n\t" // write back updated value // mm1 will be used as Raw(x-bpp) next loop "jb paeth_6lp \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=S" (dummy_value_S), // output regs (dummy) + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (prev_row), // esi // input regs + "1" (row) // edi - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + : "%ecx" // clobber list +#if 0 /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); } - break; + break; // end 6 bpp case 4: { - ActiveMask.use = 0x00000000ffffffff; - __asm__ ( - "movl _dif, %%ebx \n\t" - "movl row, %%edi \n\t" - "movl prev_row, %%esi \n\t" + _ActiveMask.use = 0x00000000ffffffffLL; + + __asm__ __volatile__ ( + "movl _dif, %%ecx \n\t" +// preload "movl row, %%edi \n\t" +// preload "movl prev_row, %%esi \n\t" "pxor %%mm0, %%mm0 \n\t" - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" // Only time should need to read + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%ecx,), %%mm1 \n\t" // only time should need to read // a=Raw(x-bpp) bytes "paeth_4lp: \n\t" - // Do first set of 4 bytes - "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes - "punpckhbw %%mm0, %%mm1 \n\t" // Unpack Low bytes of a - "movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x) - "punpcklbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b + // do first set of 4 bytes + "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes + "punpckhbw %%mm0, %%mm1 \n\t" // unpack Low bytes of a + "movq (%%esi,%%ecx,), %%mm2 \n\t" // load b=Prior(x) + "punpcklbw %%mm0, %%mm2 \n\t" // unpack High bytes of b // pav = p - a = (a + b - c) - a = b - c "movq %%mm2, %%mm4 \n\t" - "punpckhbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c + "punpckhbw %%mm0, %%mm3 \n\t" // unpack High bytes of c // pbv = p - b = (a + b - c) - b = a - c "movq %%mm1, %%mm5 \n\t" "psubw %%mm3, %%mm4 \n\t" @@ -3504,17 +3682,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0 + "pcmpgtw %%mm4, %%mm0 \n\t" // create mask pav bytes < 0 "paddw %%mm5, %%mm6 \n\t" - "pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7 - "pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0 + "pand %%mm4, %%mm0 \n\t" // only pav bytes < 0 in mm7 + "pcmpgtw %%mm5, %%mm7 \n\t" // create mask pbv bytes < 0 "psubw %%mm0, %%mm4 \n\t" - "pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0 + "pand %%mm5, %%mm7 \n\t" // only pbv bytes < 0 in mm0 "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm7, %%mm5 \n\t" "psubw %%mm0, %%mm6 \n\t" // test pa <= pb @@ -3538,16 +3716,16 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "paddw %%mm3, %%mm7 \n\t" "pxor %%mm0, %%mm0 \n\t" "packuswb %%mm1, %%mm7 \n\t" - "movq (%%esi,%%ebx,), %%mm3 \n\t" // load c=Prior(x-bpp) - "pand $ActiveMask, %%mm7 \n\t" + "movq (%%esi,%%ecx,), %%mm3 \n\t" // load c=Prior(x-bpp) + "pand _ActiveMask, %%mm7 \n\t" "movq %%mm3, %%mm2 \n\t" // load b=Prior(x) step 1 - "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) - "punpcklbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c - "movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value - "movq %%mm7, %%mm1 \n\t" // Now mm1 will be used as Raw(x-bpp) - // Do second set of 4 bytes - "punpckhbw %%mm0, %%mm2 \n\t" // Unpack Low bytes of b - "punpcklbw %%mm0, %%mm1 \n\t" // Unpack Low bytes of a + "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) + "punpcklbw %%mm0, %%mm3 \n\t" // unpack High bytes of c + "movq %%mm7, (%%edi,%%ecx,) \n\t" // write back updated value + "movq %%mm7, %%mm1 \n\t" // now mm1 will be used as Raw(x-bpp) + // do second set of 4 bytes + "punpckhbw %%mm0, %%mm2 \n\t" // unpack Low bytes of b + "punpcklbw %%mm0, %%mm1 \n\t" // unpack Low bytes of a // pav = p - a = (a + b - c) - a = b - c "movq %%mm2, %%mm4 \n\t" // pbv = p - b = (a + b - c) - b = a - c @@ -3560,17 +3738,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0 + "pcmpgtw %%mm4, %%mm0 \n\t" // create mask pav bytes < 0 "paddw %%mm5, %%mm6 \n\t" - "pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7 - "pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0 + "pand %%mm4, %%mm0 \n\t" // only pav bytes < 0 in mm7 + "pcmpgtw %%mm5, %%mm7 \n\t" // create mask pbv bytes < 0 "psubw %%mm0, %%mm4 \n\t" - "pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0 + "pand %%mm5, %%mm7 \n\t" // only pbv bytes < 0 in mm0 "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm7, %%mm5 \n\t" "psubw %%mm0, %%mm6 \n\t" // test pa <= pb @@ -3594,43 +3772,51 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "pxor %%mm1, %%mm1 \n\t" "paddw %%mm3, %%mm7 \n\t" "pxor %%mm0, %%mm0 \n\t" - // Step ex to next set of 8 bytes and repeat loop til done - "addl $8, %%ebx \n\t" + // step ecx to next set of 8 bytes and repeat loop til done + "addl $8, %%ecx \n\t" "packuswb %%mm7, %%mm1 \n\t" - "paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x) - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value + "paddb -8(%%edi,%%ecx,), %%mm1 \n\t" // add predictor with Raw(x) + "cmpl _MMXLength, %%ecx \n\t" + "movq %%mm1, -8(%%edi,%%ecx,) \n\t" // write back updated value // mm1 will be used as Raw(x-bpp) next loop "jb paeth_4lp \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=S" (dummy_value_S), // output regs (dummy) + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (prev_row), // esi // input regs + "1" (row) // edi - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + : "%ecx" // clobber list +#if 0 /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); } - break; + break; // end 4 bpp + case 8: // bpp == 8 { - ActiveMask.use = 0x00000000ffffffff; - __asm__ ( - "movl _dif, %%ebx \n\t" - "movl row, %%edi \n\t" - "movl prev_row, %%esi \n\t" + _ActiveMask.use = 0x00000000ffffffffLL; + + __asm__ __volatile__ ( + "movl _dif, %%ecx \n\t" +// preload "movl row, %%edi \n\t" +// preload "movl prev_row, %%esi \n\t" "pxor %%mm0, %%mm0 \n\t" - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" // Only time should need to read + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%ecx,), %%mm1 \n\t" // only time should need to read // a=Raw(x-bpp) bytes "paeth_8lp: \n\t" - // Do first set of 4 bytes - "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes - "punpcklbw %%mm0, %%mm1 \n\t" // Unpack Low bytes of a - "movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x) - "punpcklbw %%mm0, %%mm2 \n\t" // Unpack Low bytes of b + // do first set of 4 bytes + "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes + "punpcklbw %%mm0, %%mm1 \n\t" // unpack Low bytes of a + "movq (%%esi,%%ecx,), %%mm2 \n\t" // load b=Prior(x) + "punpcklbw %%mm0, %%mm2 \n\t" // unpack Low bytes of b // pav = p - a = (a + b - c) - a = b - c "movq %%mm2, %%mm4 \n\t" - "punpcklbw %%mm0, %%mm3 \n\t" // Unpack Low bytes of c + "punpcklbw %%mm0, %%mm3 \n\t" // unpack Low bytes of c // pbv = p - b = (a + b - c) - b = a - c "movq %%mm1, %%mm5 \n\t" "psubw %%mm3, %%mm4 \n\t" @@ -3641,17 +3827,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0 + "pcmpgtw %%mm4, %%mm0 \n\t" // create mask pav bytes < 0 "paddw %%mm5, %%mm6 \n\t" - "pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7 - "pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0 + "pand %%mm4, %%mm0 \n\t" // only pav bytes < 0 in mm7 + "pcmpgtw %%mm5, %%mm7 \n\t" // create mask pbv bytes < 0 "psubw %%mm0, %%mm4 \n\t" - "pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0 + "pand %%mm5, %%mm7 \n\t" // only pbv bytes < 0 in mm0 "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm7, %%mm5 \n\t" "psubw %%mm0, %%mm6 \n\t" // test pa <= pb @@ -3675,17 +3861,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "paddw %%mm3, %%mm7 \n\t" "pxor %%mm0, %%mm0 \n\t" "packuswb %%mm1, %%mm7 \n\t" - "movq -8(%%esi,%%ebx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes - "pand $ActiveMask, %%mm7 \n\t" - "movq (%%esi,%%ebx,), %%mm2 \n\t" // load b=Prior(x) - "paddb (%%edi,%%ebx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) - "punpckhbw %%mm0, %%mm3 \n\t" // Unpack High bytes of c - "movq %%mm7, (%%edi,%%ebx,) \n\t" // write back updated value - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" // read a=Raw(x-bpp) bytes + "movq -8(%%esi,%%ecx,), %%mm3 \n\t" // read c=Prior(x-bpp) bytes + "pand _ActiveMask, %%mm7 \n\t" + "movq (%%esi,%%ecx,), %%mm2 \n\t" // load b=Prior(x) + "paddb (%%edi,%%ecx,), %%mm7 \n\t" // add Paeth predictor with Raw(x) + "punpckhbw %%mm0, %%mm3 \n\t" // unpack High bytes of c + "movq %%mm7, (%%edi,%%ecx,) \n\t" // write back updated value + "movq -8(%%edi,%%ecx,), %%mm1 \n\t" // read a=Raw(x-bpp) bytes - // Do second set of 4 bytes - "punpckhbw %%mm0, %%mm2 \n\t" // Unpack High bytes of b - "punpckhbw %%mm0, %%mm1 \n\t" // Unpack High bytes of a + // do second set of 4 bytes + "punpckhbw %%mm0, %%mm2 \n\t" // unpack High bytes of b + "punpckhbw %%mm0, %%mm1 \n\t" // unpack High bytes of a // pav = p - a = (a + b - c) - a = b - c "movq %%mm2, %%mm4 \n\t" // pbv = p - b = (a + b - c) - b = a - c @@ -3698,17 +3884,17 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, // pa = abs(p-a) = abs(pav) // pb = abs(p-b) = abs(pbv) // pc = abs(p-c) = abs(pcv) - "pcmpgtw %%mm4, %%mm0 \n\t" // Create mask pav bytes < 0 + "pcmpgtw %%mm4, %%mm0 \n\t" // create mask pav bytes < 0 "paddw %%mm5, %%mm6 \n\t" - "pand %%mm4, %%mm0 \n\t" // Only pav bytes < 0 in mm7 - "pcmpgtw %%mm5, %%mm7 \n\t" // Create mask pbv bytes < 0 + "pand %%mm4, %%mm0 \n\t" // only pav bytes < 0 in mm7 + "pcmpgtw %%mm5, %%mm7 \n\t" // create mask pbv bytes < 0 "psubw %%mm0, %%mm4 \n\t" - "pand %%mm5, %%mm7 \n\t" // Only pbv bytes < 0 in mm0 + "pand %%mm5, %%mm7 \n\t" // only pbv bytes < 0 in mm0 "psubw %%mm0, %%mm4 \n\t" "psubw %%mm7, %%mm5 \n\t" "pxor %%mm0, %%mm0 \n\t" - "pcmpgtw %%mm6, %%mm0 \n\t" // Create mask pcv bytes < 0 - "pand %%mm6, %%mm0 \n\t" // Only pav bytes < 0 in mm7 + "pcmpgtw %%mm6, %%mm0 \n\t" // create mask pcv bytes < 0 + "pand %%mm6, %%mm0 \n\t" // only pav bytes < 0 in mm7 "psubw %%mm7, %%mm5 \n\t" "psubw %%mm0, %%mm6 \n\t" // test pa <= pb @@ -3732,94 +3918,113 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "pxor %%mm1, %%mm1 \n\t" "paddw %%mm3, %%mm7 \n\t" "pxor %%mm0, %%mm0 \n\t" - // Step ex to next set of 8 bytes and repeat loop til done - "addl $8, %%ebx \n\t" + // step ecx to next set of 8 bytes and repeat loop til done + "addl $8, %%ecx \n\t" "packuswb %%mm7, %%mm1 \n\t" - "paddb -8(%%edi,%%ebx,), %%mm1 \n\t" // add Paeth predictor with Raw(x) - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm1, -8(%%edi,%%ebx,) \n\t" // write back updated value + "paddb -8(%%edi,%%ecx,), %%mm1 \n\t" // add Paeth predictor with Raw(x) + "cmpl _MMXLength, %%ecx \n\t" + "movq %%mm1, -8(%%edi,%%ecx,) \n\t" // write back updated value // mm1 will be used as Raw(x-bpp) next loop "jb paeth_8lp \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=S" (dummy_value_S), // output regs (dummy) + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (prev_row), // esi // input regs + "1" (row) // edi - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + : "%ecx" // clobber list +#if 0 /* %mm0, ..., %mm7 not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); } - break; + break; // end 8 bpp case 1: // bpp = 1 case 2: // bpp = 2 default: // bpp > 8 { - __asm__ ( + __asm__ __volatile__ ( +#ifdef __PIC__ + "pushl %%ebx \n\t" // save Global Offset Table index +#endif "movl _dif, %%ebx \n\t" "cmpl _FullLength, %%ebx \n\t" "jnb paeth_dend \n\t" - "movl row, %%edi \n\t" - "movl prev_row, %%esi \n\t" - // Do Paeth decode for remaining bytes + +// preload "movl row, %%edi \n\t" +// preload "movl prev_row, %%esi \n\t" + // do Paeth decode for remaining bytes "movl %%ebx, %%edx \n\t" - "xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx in loop below - "subl bpp, %%edx \n\t" // Set edx = ebx - bpp +// preload "subl bpp, %%edx \n\t" // (bpp is preloaded into ecx) + "subl %%ecx, %%edx \n\t" // edx = ebx - bpp + "xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx + "paeth_dlp: \n\t" "xorl %%eax, %%eax \n\t" // pav = p - a = (a + b - c) - a = b - c "movb (%%esi,%%ebx,), %%al \n\t" // load Prior(x) into al "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp) - "movl %%eax, patemp \n\t" // Save pav for later use + "movl %%eax, _patemp \n\t" // Save pav for later use "xorl %%eax, %%eax \n\t" // pbv = p - b = (a + b - c) - b = a - c "movb (%%edi,%%edx,), %%al \n\t" // load Raw(x-bpp) into al "subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp) "movl %%eax, %%ecx \n\t" // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv - "addl patemp, %%eax \n\t" // pcv = pav + pbv + "addl _patemp, %%eax \n\t" // pcv = pav + pbv // pc = abs(pcv) "testl $0x80000000, %%eax \n\t" "jz paeth_dpca \n\t" "negl %%eax \n\t" // reverse sign of neg values + "paeth_dpca: \n\t" - "movl %%eax, pctemp \n\t" // save pc for later use + "movl %%eax, _pctemp \n\t" // save pc for later use // pb = abs(pbv) "testl $0x80000000, %%ecx \n\t" "jz paeth_dpba \n\t" "negl %%ecx \n\t" // reverse sign of neg values + "paeth_dpba: \n\t" - "movl %%ecx, pbtemp \n\t" // save pb for later use + "movl %%ecx, _pbtemp \n\t" // save pb for later use // pa = abs(pav) - "movl patemp, %%eax \n\t" + "movl _patemp, %%eax \n\t" "testl $0x80000000, %%eax \n\t" "jz paeth_dpaa \n\t" "negl %%eax \n\t" // reverse sign of neg values + "paeth_dpaa: \n\t" - "movl %%eax, patemp \n\t" // save pa for later use + "movl %%eax, _patemp \n\t" // save pa for later use // test if pa <= pb "cmpl %%ecx, %%eax \n\t" "jna paeth_dabb \n\t" // pa > pb; now test if pb <= pc - "cmpl pctemp, %%ecx \n\t" + "cmpl _pctemp, %%ecx \n\t" "jna paeth_dbbc \n\t" // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp) "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "jmp paeth_dpaeth \n\t" + "paeth_dbbc: \n\t" // pb <= pc; Raw(x) = Paeth(x) + Prior(x) "movb (%%esi,%%ebx,), %%cl \n\t" // load Prior(x) into cl "jmp paeth_dpaeth \n\t" + "paeth_dabb: \n\t" // pa <= pb; now test if pa <= pc - "cmpl pctemp, %%eax \n\t" + "cmpl _pctemp, %%eax \n\t" "jna paeth_dabc \n\t" // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp) "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "jmp paeth_dpaeth \n\t" + "paeth_dabc: \n\t" // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp) "movb (%%edi,%%edx,), %%cl \n\t" // load Raw(x-bpp) into cl + "paeth_dpaeth: \n\t" "incl %%ebx \n\t" "incl %%edx \n\t" @@ -3827,85 +4032,110 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "addb %%cl, -1(%%edi,%%ebx,) \n\t" "cmpl _FullLength, %%ebx \n\t" "jb paeth_dlp \n\t" + "paeth_dend: \n\t" +#ifdef __PIC__ + "popl %%ebx \n\t" // index to Global Offset Table +#endif - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (bpp), // ecx // input regs + "1" (prev_row), // esi + "2" (row) // edi - : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list + : "%eax", "%edx" // clobber list +#ifndef __PIC__ + , "%ebx" +#endif ); } return; // No need to go further with this one - } // end switch ( bpp ) - __asm__ ( - // MMX acceleration complete now do clean-up - // Check if any remaining bytes left to decode + + } // end switch (bpp) + + __asm__ __volatile__ ( + // MMX acceleration complete; now do clean-up + // check if any remaining bytes left to decode +#ifdef __PIC__ + "pushl %%ebx \n\t" // save index to Global Offset Table +#endif "movl _MMXLength, %%ebx \n\t" "cmpl _FullLength, %%ebx \n\t" "jnb paeth_end \n\t" - "movl row, %%edi \n\t" - "movl prev_row, %%esi \n\t" - // Do Paeth decode for remaining bytes +//pre "movl row, %%edi \n\t" +//pre "movl prev_row, %%esi \n\t" + // do Paeth decode for remaining bytes "movl %%ebx, %%edx \n\t" - "xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx in loop below - "subl bpp, %%edx \n\t" // Set edx = ebx - bpp +//pre "subl bpp, %%edx \n\t" // (bpp is preloaded into ecx) + "subl %%ecx, %%edx \n\t" // edx = ebx - bpp + "xorl %%ecx, %%ecx \n\t" // zero ecx before using cl & cx below + "paeth_lp2: \n\t" "xorl %%eax, %%eax \n\t" // pav = p - a = (a + b - c) - a = b - c "movb (%%esi,%%ebx,), %%al \n\t" // load Prior(x) into al "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp) - "movl %%eax, patemp \n\t" // Save pav for later use + "movl %%eax, _patemp \n\t" // Save pav for later use "xorl %%eax, %%eax \n\t" // pbv = p - b = (a + b - c) - b = a - c "movb (%%edi,%%edx,), %%al \n\t" // load Raw(x-bpp) into al "subl %%ecx, %%eax \n\t" // subtract Prior(x-bpp) "movl %%eax, %%ecx \n\t" // pcv = p - c = (a + b - c) -c = (a - c) + (b - c) = pav + pbv - "addl patemp, %%eax \n\t" // pcv = pav + pbv + "addl _patemp, %%eax \n\t" // pcv = pav + pbv // pc = abs(pcv) "testl $0x80000000, %%eax \n\t" "jz paeth_pca2 \n\t" "negl %%eax \n\t" // reverse sign of neg values + "paeth_pca2: \n\t" - "movl %%eax, pctemp \n\t" // save pc for later use + "movl %%eax, _pctemp \n\t" // save pc for later use // pb = abs(pbv) "testl $0x80000000, %%ecx \n\t" "jz paeth_pba2 \n\t" "negl %%ecx \n\t" // reverse sign of neg values + "paeth_pba2: \n\t" - "movl %%ecx, pbtemp \n\t" // save pb for later use + "movl %%ecx, _pbtemp \n\t" // save pb for later use // pa = abs(pav) - "movl patemp, %%eax \n\t" + "movl _patemp, %%eax \n\t" "testl $0x80000000, %%eax \n\t" "jz paeth_paa2 \n\t" "negl %%eax \n\t" // reverse sign of neg values + "paeth_paa2: \n\t" - "movl %%eax, patemp \n\t" // save pa for later use + "movl %%eax, _patemp \n\t" // save pa for later use // test if pa <= pb "cmpl %%ecx, %%eax \n\t" "jna paeth_abb2 \n\t" // pa > pb; now test if pb <= pc - "cmpl pctemp, %%ecx \n\t" + "cmpl _pctemp, %%ecx \n\t" "jna paeth_bbc2 \n\t" // pb > pc; Raw(x) = Paeth(x) + Prior(x-bpp) "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "jmp paeth_paeth2 \n\t" + "paeth_bbc2: \n\t" // pb <= pc; Raw(x) = Paeth(x) + Prior(x) "movb (%%esi,%%ebx,), %%cl \n\t" // load Prior(x) into cl "jmp paeth_paeth2 \n\t" + "paeth_abb2: \n\t" // pa <= pb; now test if pa <= pc - "cmpl pctemp, %%eax \n\t" + "cmpl _pctemp, %%eax \n\t" "jna paeth_abc2 \n\t" // pa > pc; Raw(x) = Paeth(x) + Prior(x-bpp) "movb (%%esi,%%edx,), %%cl \n\t" // load Prior(x-bpp) into cl "jmp paeth_paeth2 \n\t" + "paeth_abc2: \n\t" // pa <= pc; Raw(x) = Paeth(x) + Raw(x-bpp) "movb (%%edi,%%edx,), %%cl \n\t" // load Raw(x-bpp) into cl + "paeth_paeth2: \n\t" "incl %%ebx \n\t" "incl %%edx \n\t" @@ -3913,491 +4143,602 @@ png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row, "addb %%cl, -1(%%edi,%%ebx,) \n\t" "cmpl _FullLength, %%ebx \n\t" "jb paeth_lp2 \n\t" + "paeth_end: \n\t" - "emms \n\t" // End MMX instructions; prep for possible FP instrs. + "EMMS \n\t" // end MMX; prep for poss. FP instrs. +#ifdef __PIC__ + "popl %%ebx \n\t" // restore index to Global Offset Table +#endif - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (bpp), // ecx // input regs + "1" (prev_row), // esi + "2" (row) // edi - : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list + : "%eax", "%edx" // clobber list (no input regs!) +#ifndef __PIC__ + , "%ebx" +#endif ); -#endif /* GRR_GCC_MMX_CONVERTED */ -} + +} /* end png_read_filter_row_mmx_paeth() */ + + + + +//===========================================================================// +// // +// P N G _ R E A D _ F I L T E R _ R O W _ M M X _ S U B // +// // +//===========================================================================// // Optimized code for PNG Sub filter decoder -void /* PRIVATE */ + +static void /* PRIVATE */ png_read_filter_row_mmx_sub(png_row_infop row_info, png_bytep row) { -#ifdef GRR_GCC_MMX_CONVERTED int bpp; + int dummy_value_a; + int dummy_value_D; - bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel - _FullLength = row_info->rowbytes - bpp; // # of bytes to filter - __asm__ ( - "movl row, %%edi \n\t" + bpp = (row_info->pixel_depth + 7) >> 3; // calc number of bytes per pixel + _FullLength = row_info->rowbytes - bpp; // number of bytes to filter + + __asm__ __volatile__ ( +//pre "movl row, %%edi \n\t" "movl %%edi, %%esi \n\t" // lp = row - "addl bpp, %%edi \n\t" // rp = row + bpp - "xorl %%eax, %%eax \n\t" +//pre "movl bpp, %%eax \n\t" + "addl %%eax, %%edi \n\t" // rp = row + bpp +//irr "xorl %%eax, %%eax \n\t" // get # of bytes to alignment "movl %%edi, _dif \n\t" // take start of row "addl $0xf, _dif \n\t" // add 7 + 8 to incr past - // alignment boundary - "xorl %%ebx, %%ebx \n\t" + // alignment boundary + "xorl %%ecx, %%ecx \n\t" "andl $0xfffffff8, _dif \n\t" // mask to alignment boundary "subl %%edi, _dif \n\t" // subtract from start ==> value - // ebx at alignment - "jz sub_go \n\t" - // fix alignment - "sub_lp1: \n\t" - "movb (%%esi,%%ebx,), %%al \n\t" - "addb %%al, (%%edi,%%ebx,) \n\t" - "incl %%ebx \n\t" - "cmpl _dif, %%ebx \n\t" + "jz sub_go \n\t" // ecx at alignment + + "sub_lp1: \n\t" // fix alignment + "movb (%%esi,%%ecx,), %%al \n\t" + "addb %%al, (%%edi,%%ecx,) \n\t" + "incl %%ecx \n\t" + "cmpl _dif, %%ecx \n\t" "jb sub_lp1 \n\t" + "sub_go: \n\t" - "movl _FullLength, %%ecx \n\t" - "movl %%ecx, %%edx \n\t" - "subl %%ebx, %%edx \n\t" // subtract alignment fix + "movl _FullLength, %%eax \n\t" + "movl %%eax, %%edx \n\t" + "subl %%ecx, %%edx \n\t" // subtract alignment fix "andl $0x00000007, %%edx \n\t" // calc bytes over mult of 8 - "subl %%edx, %%ecx \n\t" // drop over bytes from length - "movl %%ecx, _MMXLength \n\t" + "subl %%edx, %%eax \n\t" // drop over bytes from length + "movl %%eax, _MMXLength \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=a" (dummy_value_a), // 0 // output regs (dummy) + "=D" (dummy_value_D) // 1 - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (bpp), // eax // input regs + "1" (row) // edi - : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi" // CHECKASM: clobber list + : "%ebx", "%ecx", "%edx" // clobber list + , "%esi" + +#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); - // Now do the math for the rest of the row - switch ( bpp ) + // now do the math for the rest of the row + switch (bpp) { - case 3: - { - ActiveMask.use = 0x0000ffffff000000; - ShiftBpp.use = 24; // == 3 * 8 - ShiftRem.use = 40; // == 64 - 24 - __asm__ ( - "movl row, %%edi \n\t" - "movq $ActiveMask, %%mm7 \n\t" // Load ActiveMask for 2nd active byte group - "movl %%edi, %%esi \n\t" // lp = row - "addl bpp, %%edi \n\t" // rp = row + bpp - "movq %%mm7, %%mm6 \n\t" - "movl _dif, %%ebx \n\t" - "psllq $ShiftBpp, %%mm6 \n\t" // Move mask in mm6 to cover 3rd active - // byte group - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" - "sub_3lp: \n\t" - "psrlq $ShiftRem, %%mm1 \n\t" // Shift data for adding 1st bpp bytes - // no need for mask; shift clears inactive bytes - // Add 1st active group - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "paddb %%mm1, %%mm0 \n\t" - // Add 2nd active group - "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 - "psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly - "pand %%mm7, %%mm1 \n\t" // mask to use only 2nd active group - "paddb %%mm1, %%mm0 \n\t" - // Add 3rd active group - "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 - "psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly - "pand %%mm6, %%mm1 \n\t" // mask to use only 3rd active group - "addl $8, %%ebx \n\t" - "paddb %%mm1, %%mm0 \n\t" - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // Write updated Raws back to array - // Prep for doing 1st add at top of loop - "movq %%mm0, %%mm1 \n\t" - "jb sub_3lp \n\t" + case 3: + { + _ActiveMask.use = 0x0000ffffff000000LL; + _ShiftBpp.use = 24; // == 3 * 8 + _ShiftRem.use = 40; // == 64 - 24 - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + __asm__ __volatile__ ( +// preload "movl row, %%edi \n\t" + "movq _ActiveMask, %%mm7 \n\t" // load _ActiveMask for 2nd + // active byte group + "movl %%edi, %%esi \n\t" // lp = row +// preload "movl bpp, %%eax \n\t" + "addl %%eax, %%edi \n\t" // rp = row + bpp + "movq %%mm7, %%mm6 \n\t" + "movl _dif, %%edx \n\t" + "psllq _ShiftBpp, %%mm6 \n\t" // move mask in mm6 to cover + // 3rd active byte group + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%edx,), %%mm1 \n\t" - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + "sub_3lp: \n\t" // shift data for adding first + "psrlq _ShiftRem, %%mm1 \n\t" // bpp bytes (no need for mask; + // shift clears inactive bytes) + // add 1st active group + "movq (%%edi,%%edx,), %%mm0 \n\t" + "paddb %%mm1, %%mm0 \n\t" - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm6", "%mm7" // CHECKASM: clobber list + // add 2nd active group + "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 + "psllq _ShiftBpp, %%mm1 \n\t" // shift data to pos. correctly + "pand %%mm7, %%mm1 \n\t" // mask to use 2nd active group + "paddb %%mm1, %%mm0 \n\t" + + // add 3rd active group + "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 + "psllq _ShiftBpp, %%mm1 \n\t" // shift data to pos. correctly + "pand %%mm6, %%mm1 \n\t" // mask to use 3rd active group + "addl $8, %%edx \n\t" + "paddb %%mm1, %%mm0 \n\t" + + "cmpl _MMXLength, %%edx \n\t" + "movq %%mm0, -8(%%edi,%%edx,) \n\t" // write updated Raws to array + "movq %%mm0, %%mm1 \n\t" // prep 1st add at top of loop + "jb sub_3lp \n\t" + + : "=a" (dummy_value_a), // 0 // output regs (dummy) + "=D" (dummy_value_D) // 1 + + : "0" (bpp), // eax // input regs + "1" (row) // edi + + : "%edx", "%esi" // clobber list +#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm6", "%mm7" +#endif ); } break; case 1: { - // Placed here just in case this is a duplicate of the - // non-MMX code for the SUB filter in png_read_filter_row above - // - // png_bytep rp; - // png_bytep lp; - // png_uint_32 i; - // bpp = (row_info->pixel_depth + 7) >> 3; - // for (i = (png_uint_32)bpp, rp = row + bpp, lp = row; - // i < row_info->rowbytes; i++, rp++, lp++) - // { - // *rp = (png_byte)(((int)(*rp) + (int)(*lp)) & 0xff); - // } - __asm__ ( - "movl _dif, %%ebx \n\t" - "movl row, %%edi \n\t" - "cmpl _FullLength, %%ebx \n\t" + __asm__ __volatile__ ( + "movl _dif, %%edx \n\t" +// preload "movl row, %%edi \n\t" + "cmpl _FullLength, %%edx \n\t" "jnb sub_1end \n\t" "movl %%edi, %%esi \n\t" // lp = row "xorl %%eax, %%eax \n\t" - "addl bpp, %%edi \n\t" // rp = row + bpp +// preload "movl bpp, %%eax \n\t" + "addl %%eax, %%edi \n\t" // rp = row + bpp + "sub_1lp: \n\t" - "movb (%%esi,%%ebx,), %%al \n\t" - "addb %%al, (%%edi,%%ebx,) \n\t" - "incl %%ebx \n\t" - "cmpl _FullLength, %%ebx \n\t" + "movb (%%esi,%%edx,), %%al \n\t" + "addb %%al, (%%edi,%%edx,) \n\t" + "incl %%edx \n\t" + "cmpl _FullLength, %%edx \n\t" "jb sub_1lp \n\t" + "sub_1end: \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + : "=a" (dummy_value_a), // 0 // output regs (dummy) + "=D" (dummy_value_D) // 1 - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "0" (bpp), // eax // input regs + "1" (row) // edi - : "%eax", "%ebx", "%edi", "%esi" // CHECKASM: clobber list + : "%edx", "%esi" // clobber list ); } return; case 6: - case 7: case 4: - case 5: + //case 7: // GRR BOGUS + //case 5: // GRR BOGUS { - ShiftBpp.use = bpp << 3; - ShiftRem.use = 64 - ShiftBpp.use; - __asm__ ( - "movl row, %%edi \n\t" - "movl _dif, %%ebx \n\t" - "movl %%edi, %%esi \n\t" // lp = row - "addl bpp, %%edi \n\t" // rp = row + bpp - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" - "sub_4lp: \n\t" - "psrlq $ShiftRem, %%mm1 \n\t" // Shift data for adding 1st bpp bytes - // no need for mask; shift clears inactive bytes - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "paddb %%mm1, %%mm0 \n\t" - // Add 2nd active group - "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 - "psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly - // there is no need for any mask - // since shift clears inactive bits/bytes - "addl $8, %%ebx \n\t" - "paddb %%mm1, %%mm0 \n\t" - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" - "movq %%mm0, %%mm1 \n\t" // Prep for doing 1st add at top of loop - "jb sub_4lp \n\t" + _ShiftBpp.use = bpp << 3; + _ShiftRem.use = 64 - _ShiftBpp.use; - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + __asm__ __volatile__ ( +// preload "movl row, %%edi \n\t" + "movl _dif, %%edx \n\t" + "movl %%edi, %%esi \n\t" // lp = row +// preload "movl bpp, %%eax \n\t" + "addl %%eax, %%edi \n\t" // rp = row + bpp - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%edx,), %%mm1 \n\t" - : "%ebx", "%edi", "%esi", "%mm0", "%mm1" // CHECKASM: clobber list + "sub_4lp: \n\t" // shift data for adding first + "psrlq _ShiftRem, %%mm1 \n\t" // bpp bytes (no need for mask; + // shift clears inactive bytes) + "movq (%%edi,%%edx,), %%mm0 \n\t" + "paddb %%mm1, %%mm0 \n\t" + + // add 2nd active group + "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 + "psllq _ShiftBpp, %%mm1 \n\t" // shift data to pos. correctly + "addl $8, %%edx \n\t" + "paddb %%mm1, %%mm0 \n\t" + + "cmpl _MMXLength, %%edx \n\t" + "movq %%mm0, -8(%%edi,%%edx,) \n\t" + "movq %%mm0, %%mm1 \n\t" // prep 1st add at top of loop + "jb sub_4lp \n\t" + + : "=a" (dummy_value_a), // 0 // output regs (dummy) + "=D" (dummy_value_D) // 1 + + : "0" (bpp), // eax // input regs + "1" (row) // edi + + : "%edx", "%esi" // clobber list +#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1" +#endif ); } break; case 2: { - ActiveMask.use = 0x00000000ffff0000; - ShiftBpp.use = 16; // == 2 * 8 - ShiftRem.use = 48; // == 64 - 16 - __asm__ ( - "movq $ActiveMask, %%mm7 \n\t" // Load ActiveMask for 2nd active byte group - "movl _dif, %%ebx \n\t" - "movq %%mm7, %%mm6 \n\t" - "movl row, %%edi \n\t" - "psllq $ShiftBpp, %%mm6 \n\t" // Move mask in mm6 to cover 3rd active - // byte group - "movl %%edi, %%esi \n\t" // lp = row - "movq %%mm6, %%mm5 \n\t" - "addl bpp, %%edi \n\t" // rp = row + bpp - "psllq $ShiftBpp, %%mm5 \n\t" // Move mask in mm5 to cover 4th active - // byte group - // PRIME the pump (load the first Raw(x-bpp) data set - "movq -8(%%edi,%%ebx,), %%mm1 \n\t" - "sub_2lp: \n\t" - // Add 1st active group - "psrlq $ShiftRem, %%mm1 \n\t" // Shift data for adding 1st bpp bytes - // no need for mask; shift clears inactive - // bytes - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "paddb %%mm1, %%mm0 \n\t" - // Add 2nd active group - "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 - "psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly - "pand %%mm7, %%mm1 \n\t" // mask to use only 2nd active group - "paddb %%mm1, %%mm0 \n\t" - // Add 3rd active group - "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 - "psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly - "pand %%mm6, %%mm1 \n\t" // mask to use only 3rd active group - "paddb %%mm1, %%mm0 \n\t" - // Add 4th active group - "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 - "psllq $ShiftBpp, %%mm1 \n\t" // shift data to position correctly - "pand %%mm5, %%mm1 \n\t" // mask to use only 4th active group - "addl $8, %%ebx \n\t" - "paddb %%mm1, %%mm0 \n\t" - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // Write updated Raws back to array - "movq %%mm0, %%mm1 \n\t" // Prep for doing 1st add at top of loop - "jb sub_2lp \n\t" + _ActiveMask.use = 0x00000000ffff0000LL; + _ShiftBpp.use = 16; // == 2 * 8 + _ShiftRem.use = 48; // == 64 - 16 - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + __asm__ __volatile__ ( + "movq _ActiveMask, %%mm7 \n\t" // load _ActiveMask for 2nd + // active byte group + "movl _dif, %%edx \n\t" + "movq %%mm7, %%mm6 \n\t" +// preload "movl row, %%edi \n\t" + "psllq _ShiftBpp, %%mm6 \n\t" // move mask in mm6 to cover + // 3rd active byte group + "movl %%edi, %%esi \n\t" // lp = row + "movq %%mm6, %%mm5 \n\t" +// preload "movl bpp, %%eax \n\t" + "addl %%eax, %%edi \n\t" // rp = row + bpp + "psllq _ShiftBpp, %%mm5 \n\t" // move mask in mm5 to cover + // 4th active byte group + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%edx,), %%mm1 \n\t" - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + "sub_2lp: \n\t" // shift data for adding first + "psrlq _ShiftRem, %%mm1 \n\t" // bpp bytes (no need for mask; + // shift clears inactive bytes) + // add 1st active group + "movq (%%edi,%%edx,), %%mm0 \n\t" + "paddb %%mm1, %%mm0 \n\t" - : "%ebx", "%edi", "%esi", "%mm0", "%mm1", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + // add 2nd active group + "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 + "psllq _ShiftBpp, %%mm1 \n\t" // shift data to pos. correctly + "pand %%mm7, %%mm1 \n\t" // mask to use 2nd active group + "paddb %%mm1, %%mm0 \n\t" + + // add 3rd active group + "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 + "psllq _ShiftBpp, %%mm1 \n\t" // shift data to pos. correctly + "pand %%mm6, %%mm1 \n\t" // mask to use 3rd active group + "paddb %%mm1, %%mm0 \n\t" + + // add 4th active group + "movq %%mm0, %%mm1 \n\t" // mov updated Raws to mm1 + "psllq _ShiftBpp, %%mm1 \n\t" // shift data to pos. correctly + "pand %%mm5, %%mm1 \n\t" // mask to use 4th active group + "addl $8, %%edx \n\t" + "paddb %%mm1, %%mm0 \n\t" + "cmpl _MMXLength, %%edx \n\t" + "movq %%mm0, -8(%%edi,%%edx,) \n\t" // write updated Raws to array + "movq %%mm0, %%mm1 \n\t" // prep 1st add at top of loop + "jb sub_2lp \n\t" + + : "=a" (dummy_value_a), // 0 // output regs (dummy) + "=D" (dummy_value_D) // 1 + + : "0" (bpp), // eax // input regs + "1" (row) // edi + + : "%edx", "%esi" // clobber list +#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm5", "%mm6", "%mm7" +#endif ); } break; + case 8: { - __asm__ ( - "movl row, %%edi \n\t" - "movl _dif, %%ebx \n\t" - "movl %%edi, %%esi \n\t" // lp = row - "addl bpp, %%edi \n\t" // rp = row + bpp - "movl _MMXLength, %%ecx \n\t" - "movq -8(%%edi,%%ebx,), %%mm7 \n\t" // PRIME the pump (load the first - // Raw(x-bpp) data set - "andl $0x0000003f, %%ecx \n\t" // calc bytes over mult of 64 - "sub_8lp: \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" // Load Sub(x) for 1st 8 bytes - "paddb %%mm7, %%mm0 \n\t" - "movq 8(%%edi,%%ebx,), %%mm1 \n\t" // Load Sub(x) for 2nd 8 bytes - "movq %%mm0, (%%edi,%%ebx,) \n\t" // Write Raw(x) for 1st 8 bytes - // Now mm0 will be used as Raw(x-bpp) for - // the 2nd group of 8 bytes. This will be - // repeated for each group of 8 bytes with - // the 8th group being used as the Raw(x-bpp) - // for the 1st group of the next loop. - "paddb %%mm0, %%mm1 \n\t" - "movq 16(%%edi,%%ebx,), %%mm2 \n\t" // Load Sub(x) for 3rd 8 bytes - "movq %%mm1, 8(%%edi,%%ebx,) \n\t" // Write Raw(x) for 2nd 8 bytes - "paddb %%mm1, %%mm2 \n\t" - "movq 24(%%edi,%%ebx,), %%mm3 \n\t" // Load Sub(x) for 4th 8 bytes - "movq %%mm2, 16(%%edi,%%ebx,) \n\t" // Write Raw(x) for 3rd 8 bytes - "paddb %%mm2, %%mm3 \n\t" - "movq 32(%%edi,%%ebx,), %%mm4 \n\t" // Load Sub(x) for 5th 8 bytes - "movq %%mm3, 24(%%edi,%%ebx,) \n\t" // Write Raw(x) for 4th 8 bytes - "paddb %%mm3, %%mm4 \n\t" - "movq 40(%%edi,%%ebx,), %%mm5 \n\t" // Load Sub(x) for 6th 8 bytes - "movq %%mm4, 32(%%edi,%%ebx,) \n\t" // Write Raw(x) for 5th 8 bytes - "paddb %%mm4, %%mm5 \n\t" - "movq 48(%%edi,%%ebx,), %%mm6 \n\t" // Load Sub(x) for 7th 8 bytes - "movq %%mm5, 40(%%edi,%%ebx,) \n\t" // Write Raw(x) for 6th 8 bytes - "paddb %%mm5, %%mm6 \n\t" - "movq 56(%%edi,%%ebx,), %%mm7 \n\t" // Load Sub(x) for 8th 8 bytes - "movq %%mm6, 48(%%edi,%%ebx,) \n\t" // Write Raw(x) for 7th 8 bytes - "addl $64, %%ebx \n\t" - "paddb %%mm6, %%mm7 \n\t" - "cmpl %%ecx, %%ebx \n\t" - "movq %%mm7, -8(%%edi,%%ebx,) \n\t" // Write Raw(x) for 8th 8 bytes - "jb sub_8lp \n\t" - "cmpl _MMXLength, %%ebx \n\t" - "jnb sub_8lt8 \n\t" - "sub_8lpA: \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "addl $8, %%ebx \n\t" - "paddb %%mm7, %%mm0 \n\t" - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // use -8 to offset early add to ebx - "movq %%mm0, %%mm7 \n\t" // Move calculated Raw(x) data to mm1 to - // be the new Raw(x-bpp) for the next loop - "jb sub_8lpA \n\t" - "sub_8lt8: \n\t" + __asm__ __volatile__ ( +// preload "movl row, %%edi \n\t" + "movl _dif, %%edx \n\t" + "movl %%edi, %%esi \n\t" // lp = row +// preload "movl bpp, %%eax \n\t" + "addl %%eax, %%edi \n\t" // rp = row + bpp + "movl _MMXLength, %%ecx \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + // prime the pump: load the first Raw(x-bpp) data set + "movq -8(%%edi,%%edx,), %%mm7 \n\t" + "andl $0x0000003f, %%ecx \n\t" // calc bytes over mult of 64 - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + "sub_8lp: \n\t" + "movq (%%edi,%%edx,), %%mm0 \n\t" // load Sub(x) for 1st 8 bytes + "paddb %%mm7, %%mm0 \n\t" + "movq 8(%%edi,%%edx,), %%mm1 \n\t" // load Sub(x) for 2nd 8 bytes + "movq %%mm0, (%%edi,%%edx,) \n\t" // write Raw(x) for 1st 8 bytes - : "%ebx", "%ecx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + // Now mm0 will be used as Raw(x-bpp) for the 2nd group of 8 bytes. + // This will be repeated for each group of 8 bytes with the 8th + // group being used as the Raw(x-bpp) for the 1st group of the + // next loop. + + "paddb %%mm0, %%mm1 \n\t" + "movq 16(%%edi,%%edx,), %%mm2 \n\t" // load Sub(x) for 3rd 8 bytes + "movq %%mm1, 8(%%edi,%%edx,) \n\t" // write Raw(x) for 2nd 8 bytes + "paddb %%mm1, %%mm2 \n\t" + "movq 24(%%edi,%%edx,), %%mm3 \n\t" // load Sub(x) for 4th 8 bytes + "movq %%mm2, 16(%%edi,%%edx,) \n\t" // write Raw(x) for 3rd 8 bytes + "paddb %%mm2, %%mm3 \n\t" + "movq 32(%%edi,%%edx,), %%mm4 \n\t" // load Sub(x) for 5th 8 bytes + "movq %%mm3, 24(%%edi,%%edx,) \n\t" // write Raw(x) for 4th 8 bytes + "paddb %%mm3, %%mm4 \n\t" + "movq 40(%%edi,%%edx,), %%mm5 \n\t" // load Sub(x) for 6th 8 bytes + "movq %%mm4, 32(%%edi,%%edx,) \n\t" // write Raw(x) for 5th 8 bytes + "paddb %%mm4, %%mm5 \n\t" + "movq 48(%%edi,%%edx,), %%mm6 \n\t" // load Sub(x) for 7th 8 bytes + "movq %%mm5, 40(%%edi,%%edx,) \n\t" // write Raw(x) for 6th 8 bytes + "paddb %%mm5, %%mm6 \n\t" + "movq 56(%%edi,%%edx,), %%mm7 \n\t" // load Sub(x) for 8th 8 bytes + "movq %%mm6, 48(%%edi,%%edx,) \n\t" // write Raw(x) for 7th 8 bytes + "addl $64, %%edx \n\t" + "paddb %%mm6, %%mm7 \n\t" + "cmpl %%ecx, %%edx \n\t" + "movq %%mm7, -8(%%edi,%%edx,) \n\t" // write Raw(x) for 8th 8 bytes + "jb sub_8lp \n\t" + + "cmpl _MMXLength, %%edx \n\t" + "jnb sub_8lt8 \n\t" + + "sub_8lpA: \n\t" + "movq (%%edi,%%edx,), %%mm0 \n\t" + "addl $8, %%edx \n\t" + "paddb %%mm7, %%mm0 \n\t" + "cmpl _MMXLength, %%edx \n\t" + "movq %%mm0, -8(%%edi,%%edx,) \n\t" // -8 to offset early addl edx + "movq %%mm0, %%mm7 \n\t" // move calculated Raw(x) data + // to mm1 to be new Raw(x-bpp) + // for next loop + "jb sub_8lpA \n\t" + + "sub_8lt8: \n\t" + + : "=a" (dummy_value_a), // 0 // output regs (dummy) + "=D" (dummy_value_D) // 1 + + : "0" (bpp), // eax // input regs + "1" (row) // edi + + : "%ecx", "%edx", "%esi" // clobber list +#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); } break; - default: // bpp greater than 8 bytes + default: // bpp greater than 8 bytes GRR BOGUS { - __asm__ ( - "movl _dif, %%ebx \n\t" - "movl row, %%edi \n\t" - "movl %%edi, %%esi \n\t" // lp = row - "addl bpp, %%edi \n\t" // rp = row + bpp - "sub_Alp: \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "movq (%%esi,%%ebx,), %%mm1 \n\t" - "addl $8, %%ebx \n\t" - "paddb %%mm1, %%mm0 \n\t" - "cmpl _MMXLength, %%ebx \n\t" - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // mov does not affect flags; -8 to offset - // add ebx - "jb sub_Alp \n\t" + __asm__ __volatile__ ( + "movl _dif, %%edx \n\t" +// preload "movl row, %%edi \n\t" + "movl %%edi, %%esi \n\t" // lp = row +// preload "movl bpp, %%eax \n\t" + "addl %%eax, %%edi \n\t" // rp = row + bpp - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + "sub_Alp: \n\t" + "movq (%%edi,%%edx,), %%mm0 \n\t" + "movq (%%esi,%%edx,), %%mm1 \n\t" + "addl $8, %%edx \n\t" + "paddb %%mm1, %%mm0 \n\t" + "cmpl _MMXLength, %%edx \n\t" + "movq %%mm0, -8(%%edi,%%edx,) \n\t" // mov does not affect flags; + // -8 to offset addl edx + "jb sub_Alp \n\t" - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + : "=a" (dummy_value_a), // 0 // output regs (dummy) + "=D" (dummy_value_D) // 1 - : "%ebx", "%edi", "%esi", "%mm0", "%mm1" // CHECKASM: clobber list + : "0" (bpp), // eax // input regs + "1" (row) // edi + + : "%edx", "%esi" // clobber list +#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1" +#endif ); } break; - } // end switch ( bpp ) + } // end switch (bpp) - __asm__ ( - "movl _MMXLength, %%ebx \n\t" - "movl row, %%edi \n\t" - "cmpl _FullLength, %%ebx \n\t" - "jnb sub_end \n\t" - "movl %%edi, %%esi \n\t" // lp = row - "xorl %%eax, %%eax \n\t" - "addl bpp, %%edi \n\t" // rp = row + bpp - "sub_lp2: \n\t" - "movb (%%esi,%%ebx,), %%al \n\t" - "addb %%al, (%%edi,%%ebx,) \n\t" - "incl %%ebx \n\t" - "cmpl _FullLength, %%ebx \n\t" - "jb sub_lp2 \n\t" - "sub_end: \n\t" - "emms \n\t" // end MMX instructions + __asm__ __volatile__ ( + "movl _MMXLength, %%edx \n\t" +//pre "movl row, %%edi \n\t" + "cmpl _FullLength, %%edx \n\t" + "jnb sub_end \n\t" - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + "movl %%edi, %%esi \n\t" // lp = row +//pre "movl bpp, %%eax \n\t" + "addl %%eax, %%edi \n\t" // rp = row + bpp + "xorl %%eax, %%eax \n\t" - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + "sub_lp2: \n\t" + "movb (%%esi,%%edx,), %%al \n\t" + "addb %%al, (%%edi,%%edx,) \n\t" + "incl %%edx \n\t" + "cmpl _FullLength, %%edx \n\t" + "jb sub_lp2 \n\t" - : "%eax", "%ebx", "%edi", "%esi" // CHECKASM: clobber list + "sub_end: \n\t" + "EMMS \n\t" // end MMX instructions + + : "=a" (dummy_value_a), // 0 // output regs (dummy) + "=D" (dummy_value_D) // 1 + + : "0" (bpp), // eax // input regs + "1" (row) // edi + + : "%edx", "%esi" // clobber list ); -#endif /* GRR_GCC_MMX_CONVERTED */ -} + +} // end of png_read_filter_row_mmx_sub() + + + + +//===========================================================================// +// // +// P N G _ R E A D _ F I L T E R _ R O W _ M M X _ U P // +// // +//===========================================================================// // Optimized code for PNG Up filter decoder -void /* PRIVATE */ + +static void /* PRIVATE */ png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row, png_bytep prev_row) { -#ifdef GRR_GCC_MMX_CONVERTED png_uint_32 len; + int dummy_value_d; // fix 'forbidden register 3 (dx) was spilled' error + int dummy_value_S; + int dummy_value_D; - len = row_info->rowbytes; // # of bytes to filter - __asm__ ( - "movl row, %%edi \n\t" + len = row_info->rowbytes; // number of bytes to filter + + __asm__ __volatile__ ( +//pre "movl row, %%edi \n\t" // get # of bytes to alignment - "movl %%edi, %%ecx \n\t" - "xorl %%ebx, %%ebx \n\t" - "addl $0x7, %%ecx \n\t" - "xorl %%eax, %%eax \n\t" - "andl $0xfffffff8, %%ecx \n\t" - "movl prev_row, %%esi \n\t" - "subl %%edi, %%ecx \n\t" - "jz up_go \n\t" - // fix alignment - "up_lp1: \n\t" - "movb (%%edi,%%ebx,), %%al \n\t" - "addb (%%esi,%%ebx,), %%al \n\t" - "incl %%ebx \n\t" - "cmpl %%ecx, %%ebx \n\t" - "movb %%al, -1(%%edi,%%ebx,) \n\t" // mov does not affect flags; -1 to offset inc ebx - "jb up_lp1 \n\t" - "up_go: \n\t" - "movl len, %%ecx \n\t" - "movl %%ecx, %%edx \n\t" - "subl %%ebx, %%edx \n\t" // subtract alignment fix - "andl $0x0000003f, %%edx \n\t" // calc bytes over mult of 64 - "subl %%edx, %%ecx \n\t" // drop over bytes from length - // Unrolled loop - use all MMX registers and interleave to reduce + "movl %%edi, %%ecx \n\t" + "xorl %%ebx, %%ebx \n\t" + "addl $0x7, %%ecx \n\t" + "xorl %%eax, %%eax \n\t" + "andl $0xfffffff8, %%ecx \n\t" +//pre "movl prev_row, %%esi \n\t" + "subl %%edi, %%ecx \n\t" + "jz up_go \n\t" + + "up_lp1: \n\t" // fix alignment + "movb (%%edi,%%ebx,), %%al \n\t" + "addb (%%esi,%%ebx,), %%al \n\t" + "incl %%ebx \n\t" + "cmpl %%ecx, %%ebx \n\t" + "movb %%al, -1(%%edi,%%ebx,) \n\t" // mov does not affect flags; -1 to + "jb up_lp1 \n\t" // offset incl ebx + + "up_go: \n\t" +//pre "movl len, %%edx \n\t" + "movl %%edx, %%ecx \n\t" + "subl %%ebx, %%edx \n\t" // subtract alignment fix + "andl $0x0000003f, %%edx \n\t" // calc bytes over mult of 64 + "subl %%edx, %%ecx \n\t" // drop over bytes from length + + // unrolled loop - use all MMX registers and interleave to reduce // number of branch instructions (loops) and reduce partial stalls - "up_loop: \n\t" - "movq (%%esi,%%ebx,), %%mm1 \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "movq 8(%%esi,%%ebx,), %%mm3 \n\t" - "paddb %%mm1, %%mm0 \n\t" - "movq 8(%%edi,%%ebx,), %%mm2 \n\t" - "movq %%mm0, (%%edi,%%ebx,) \n\t" - "paddb %%mm3, %%mm2 \n\t" + "up_loop: \n\t" + "movq (%%esi,%%ebx,), %%mm1 \n\t" + "movq (%%edi,%%ebx,), %%mm0 \n\t" + "movq 8(%%esi,%%ebx,), %%mm3 \n\t" + "paddb %%mm1, %%mm0 \n\t" + "movq 8(%%edi,%%ebx,), %%mm2 \n\t" + "movq %%mm0, (%%edi,%%ebx,) \n\t" + "paddb %%mm3, %%mm2 \n\t" "movq 16(%%esi,%%ebx,), %%mm5 \n\t" - "movq %%mm2, 8(%%edi,%%ebx,) \n\t" + "movq %%mm2, 8(%%edi,%%ebx,) \n\t" "movq 16(%%edi,%%ebx,), %%mm4 \n\t" "movq 24(%%esi,%%ebx,), %%mm7 \n\t" - "paddb %%mm5, %%mm4 \n\t" + "paddb %%mm5, %%mm4 \n\t" "movq 24(%%edi,%%ebx,), %%mm6 \n\t" "movq %%mm4, 16(%%edi,%%ebx,) \n\t" - "paddb %%mm7, %%mm6 \n\t" + "paddb %%mm7, %%mm6 \n\t" "movq 32(%%esi,%%ebx,), %%mm1 \n\t" "movq %%mm6, 24(%%edi,%%ebx,) \n\t" "movq 32(%%edi,%%ebx,), %%mm0 \n\t" "movq 40(%%esi,%%ebx,), %%mm3 \n\t" - "paddb %%mm1, %%mm0 \n\t" + "paddb %%mm1, %%mm0 \n\t" "movq 40(%%edi,%%ebx,), %%mm2 \n\t" "movq %%mm0, 32(%%edi,%%ebx,) \n\t" - "paddb %%mm3, %%mm2 \n\t" + "paddb %%mm3, %%mm2 \n\t" "movq 48(%%esi,%%ebx,), %%mm5 \n\t" "movq %%mm2, 40(%%edi,%%ebx,) \n\t" "movq 48(%%edi,%%ebx,), %%mm4 \n\t" "movq 56(%%esi,%%ebx,), %%mm7 \n\t" - "paddb %%mm5, %%mm4 \n\t" + "paddb %%mm5, %%mm4 \n\t" "movq 56(%%edi,%%ebx,), %%mm6 \n\t" "movq %%mm4, 48(%%edi,%%ebx,) \n\t" - "addl $64, %%ebx \n\t" - "paddb %%mm7, %%mm6 \n\t" - "cmpl %%ecx, %%ebx \n\t" + "addl $64, %%ebx \n\t" + "paddb %%mm7, %%mm6 \n\t" + "cmpl %%ecx, %%ebx \n\t" "movq %%mm6, -8(%%edi,%%ebx,) \n\t" // (+56)movq does not affect flags; - // -8 to offset add ebx - "jb up_loop \n\t" + "jb up_loop \n\t" // -8 to offset addl ebx - "cmpl $0, %%edx \n\t" // Test for bytes over mult of 64 - "jz up_end \n\t" + "cmpl $0, %%edx \n\t" // test for bytes over mult of 64 + "jz up_end \n\t" + "cmpl $8, %%edx \n\t" // test for less than 8 bytes + "jb up_lt8 \n\t" // [added by lcreeve@netins.net] - // 2 lines added by lcreeve@netins.net - // (mail 11 Jul 98 in png-implement list) - "cmpl $8, %%edx \n\t" //test for less than 8 bytes - "jb up_lt8 \n\t" + "addl %%edx, %%ecx \n\t" + "andl $0x00000007, %%edx \n\t" // calc bytes over mult of 8 + "subl %%edx, %%ecx \n\t" // drop over bytes from length + "jz up_lt8 \n\t" + "up_lpA: \n\t" // use MMX regs to update 8 bytes sim. + "movq (%%esi,%%ebx,), %%mm1 \n\t" + "movq (%%edi,%%ebx,), %%mm0 \n\t" + "addl $8, %%ebx \n\t" + "paddb %%mm1, %%mm0 \n\t" + "cmpl %%ecx, %%ebx \n\t" + "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // movq does not affect flags; -8 to + "jb up_lpA \n\t" // offset add ebx + "cmpl $0, %%edx \n\t" // test for bytes over mult of 8 + "jz up_end \n\t" - "addl %%edx, %%ecx \n\t" - "andl $0x00000007, %%edx \n\t" // calc bytes over mult of 8 - "subl %%edx, %%ecx \n\t" // drop over bytes from length - "jz up_lt8 \n\t" - // Loop using MMX registers mm0 & mm1 to update 8 bytes simultaneously - "up_lpA: \n\t" - "movq (%%esi,%%ebx,), %%mm1 \n\t" - "movq (%%edi,%%ebx,), %%mm0 \n\t" - "addl $8, %%ebx \n\t" - "paddb %%mm1, %%mm0 \n\t" - "cmpl %%ecx, %%ebx \n\t" - "movq %%mm0, -8(%%edi,%%ebx,) \n\t" // movq does not affect flags; -8 to offset add ebx - "jb up_lpA \n\t" - "cmpl $0, %%edx \n\t" // Test for bytes over mult of 8 - "jz up_end \n\t" - "up_lt8: \n\t" - "xorl %%eax, %%eax \n\t" - "addl %%edx, %%ecx \n\t" // move over byte count into counter - // Loop using x86 registers to update remaining bytes - "up_lp2: \n\t" - "movb (%%edi,%%ebx,), %%al \n\t" - "addb (%%esi,%%ebx,), %%al \n\t" - "incl %%ebx \n\t" - "cmpl %%ecx, %%ebx \n\t" - "movb %%al, -1(%%edi,%%ebx,) \n\t" // mov does not affect flags; -1 to offset inc ebx - "jb up_lp2 \n\t" - "up_end: \n\t" - // Conversion of filtered row completed - "emms \n\t" // End MMX instructions; prep for possible FP instrs. + "up_lt8: \n\t" + "xorl %%eax, %%eax \n\t" + "addl %%edx, %%ecx \n\t" // move over byte count into counter - : // FIXASM: output regs/vars go here, e.g.: "=m" (memory_var) + "up_lp2: \n\t" // use x86 regs for remaining bytes + "movb (%%edi,%%ebx,), %%al \n\t" + "addb (%%esi,%%ebx,), %%al \n\t" + "incl %%ebx \n\t" + "cmpl %%ecx, %%ebx \n\t" + "movb %%al, -1(%%edi,%%ebx,) \n\t" // mov does not affect flags; -1 to + "jb up_lp2 \n\t" // offset inc ebx - : // FIXASM: input regs, e.g.: "c" (count), "S" (src), "D" (dest) + "up_end: \n\t" + "EMMS \n\t" // conversion of filtered row complete - : "%eax", "%ebx", "%ecx", "%edx", "%edi", "%esi", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" // CHECKASM: clobber list + : "=d" (dummy_value_d), // 0 // output regs (dummy) + "=S" (dummy_value_S), // 1 + "=D" (dummy_value_D) // 2 + + : "0" (len), // edx // input regs + "1" (prev_row), // esi + "2" (row) // edi + + : "%eax", "%ebx", "%ecx" // clobber list (no input regs!) + +#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ + , "%mm0", "%mm1", "%mm2", "%mm3" + , "%mm4", "%mm5", "%mm6", "%mm7" +#endif ); -#endif /* GRR_GCC_MMX_CONVERTED */ -} +} // end of png_read_filter_row_mmx_up() + + + + +//===========================================================================// +// // +// P N G _ R E A D _ F I L T E R _ R O W // +// // +//===========================================================================// #if defined(PNG_HAVE_ASSEMBLER_READ_FILTER_ROW) @@ -4410,17 +4751,21 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep #ifdef PNG_DEBUG char filnm[6]; #endif - #define UseMMX 1 + +#define UseMMX 1 + +#define UseMMX_sub 1 // GRR: converted 20000730 +#define UseMMX_up 1 // GRR: converted 20000729 +#define UseMMX_avg 1 // GRR: converted 20000828 (+ 16-bit bugfix 20000916) +#define UseMMX_paeth 1 // GRR: converted 20000828 if (mmx_supported == 2) mmx_supported = mmxsupport(); -#ifdef GRR_GCC_MMX_CONVERTED if (!mmx_supported) -#endif { png_read_filter_row_c(png_ptr, row_info, row, prev_row, filter); - return ; + return; } #ifdef PNG_DEBUG @@ -4432,23 +4777,24 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep #endif switch (filter) { - case 0: sprintf(filnm, "None "); + case 0: sprintf(filnm, "none"); break; - case 1: sprintf(filnm, "Sub "); + case 1: sprintf(filnm, "sub"); break; - case 2: sprintf(filnm, "Up "); + case 2: sprintf(filnm, "up"); break; - case 3: sprintf(filnm, "Avg "); + case 3: sprintf(filnm, "avg"); break; case 4: sprintf(filnm, "Paeth"); break; - default: sprintf(filnm, "Unknw"); + default: sprintf(filnm, "unknw"); break; } - png_debug2(0,"row=%5d, %s, ", png_ptr->row_number, filnm); - png_debug2(0, "pd=%2d, b=%d, ", (int)row_info->pixel_depth, + png_debug2(0, "row_number=%5ld, %5s, ", png_ptr->row_number, filnm); + png_debug1(0, "row=0x%08lx, ", (unsigned long)row); + png_debug2(0, "pixdepth=%2d, bytes=%d, ", (int)row_info->pixel_depth, (int)((row_info->pixel_depth + 7) >> 3)); - png_debug1(0,"len=%8d, ", row_info->rowbytes); + png_debug1(0,"rowbytes=%8ld\n", row_info->rowbytes); #endif switch (filter) @@ -4457,7 +4803,7 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep break; case PNG_FILTER_VALUE_SUB: -#if (UseMMX == 1) +#if (UseMMX_sub == 1) if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128)) { png_read_filter_row_mmx_sub(row_info, row); @@ -4476,11 +4822,11 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep *rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff); rp++; } - } //end !UseMMX + } //end !UseMMX_sub break; case PNG_FILTER_VALUE_UP: -#if (UseMMX == 1) +#if (UseMMX_up == 1) if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128)) { png_read_filter_row_mmx_up(row_info, row, prev_row); @@ -4496,11 +4842,11 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep { *rp = (png_byte)(((int)(*rp) + (int)(*pp)) & 0xff); } - } //end !UseMMX + } //end !UseMMX_up break; case PNG_FILTER_VALUE_AVG: -#if (UseMMX == 1) +#if (UseMMX_avg == 1) if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128)) { png_read_filter_row_mmx_avg(row_info, row, prev_row); @@ -4528,11 +4874,11 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep ((int)(*pp++ + *lp++) >> 1)) & 0xff); rp++; } - } //end !UseMMX + } //end !UseMMX_avg break; case PNG_FILTER_VALUE_PAETH: -#if (UseMMX == 1) +#if (UseMMX_paeth == 1) if ((row_info->pixel_depth > 8) && (row_info->rowbytes >= 128)) { png_read_filter_row_mmx_paeth(row_info, row, prev_row); @@ -4589,7 +4935,7 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep *rp = (png_byte)(((int)(*rp) + p) & 0xff); rp++; } - } //end !UseMMX + } //end !UseMMX_paeth break; default: @@ -4602,6 +4948,14 @@ png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep #endif /* PNG_HAVE_ASSEMBLER_READ_FILTER_ROW */ + + +//===========================================================================// +// // +// M M X S U P P O R T // +// // +//===========================================================================// + // GRR NOTES: (1) the following code assumes 386 or better (pushfl/popfl) // (2) all instructions compile with gcc 2.7.2.3 and later // (3) the function is moved down here to prevent gcc from @@ -4618,7 +4972,7 @@ int mmxsupport(void) { int mmx_supported_local = 0; - __asm__ ( + __asm__ __volatile__ ( // ".byte 0x66 \n\t" // convert 16-bit pushf to 32-bit pushfd // "pushf \n\t" // save Eflag to stack "pushfl \n\t" // save Eflag to stack @@ -4670,11 +5024,13 @@ int mmxsupport(void) return mmx_supported_local; } + #else /* !ORIG_THAT_USED_TO_CLOBBER_EBX */ + int mmxsupport(void) { - __asm__ ( + __asm__ __volatile__ ( "pushl %%ebx \n\t" // ebx gets clobbered by CPUID instruction "pushl %%ecx \n\t" // so does ecx... "pushl %%edx \n\t" // ...and edx (but ecx & edx safe on Linux) @@ -4740,5 +5096,3 @@ int mmxsupport(void) #endif /* ?ORIG_THAT_USED_TO_CLOBBER_EBX */ #endif /* PNG_ASSEMBLER_CODE_SUPPORTED && PNG_USE_PNGGCCRD */ - - diff --git a/pngget.c b/pngget.c index 266dc3b17..42ba52897 100644 --- a/pngget.c +++ b/pngget.c @@ -1,7 +1,7 @@ /* pngget.c - retrieval of values from info struct * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) diff --git a/pngmem.c b/pngmem.c index 22c77beed..6a0474f84 100644 --- a/pngmem.c +++ b/pngmem.c @@ -1,7 +1,7 @@ /* pngmem.c - stub functions for memory allocation * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) diff --git a/pngpread.c b/pngpread.c index 6dfd01803..0e564666b 100644 --- a/pngpread.c +++ b/pngpread.c @@ -1,7 +1,7 @@ /* pngpread.c - read a png file in push mode * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -894,25 +894,25 @@ png_read_push_finish_row(png_structp png_ptr) /* arrays to facilitate easy interlacing - use pass (0 - 6) as index */ /* start of interlace block */ - const int png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; + const int FARDATA png_pass_start[] = {0, 4, 0, 2, 0, 1, 0}; /* offset to next interlace block */ - const int png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; + const int FARDATA png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; /* start of interlace block in the y direction */ - const int png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1}; + const int FARDATA png_pass_ystart[] = {0, 0, 4, 0, 2, 0, 1}; /* offset to next interlace block in the y direction */ - const int png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2}; + const int FARDATA png_pass_yinc[] = {8, 8, 8, 4, 4, 2, 2}; /* Width of interlace block. This is not currently used - if you need * it, uncomment it here and in png.h - const int png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; + const int FARDATA png_pass_width[] = {8, 4, 4, 2, 2, 1, 1}; */ /* Height of interlace block. This is not currently used - if you need * it, uncomment it here and in png.h - const int png_pass_height[] = {8, 8, 4, 4, 2, 2, 1}; + const int FARDATA png_pass_height[] = {8, 8, 4, 4, 2, 2, 1}; */ #endif @@ -1429,7 +1429,8 @@ png_progressive_combine_row (png_structp png_ptr, png_bytep old_row, png_bytep new_row) { #ifdef PNG_USE_LOCAL_ARRAYS - const int png_pass_dsp_mask[7] = {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff}; + const int FARDATA png_pass_dsp_mask[7] = + {0xff, 0x0f, 0xff, 0x33, 0xff, 0x55, 0xff}; #endif if (new_row != NULL) /* new_row must == png_ptr->row_buf here. */ png_combine_row(png_ptr, old_row, png_pass_dsp_mask[png_ptr->pass]); diff --git a/pngread.c b/pngread.c index d1cbc512d..af88de5ec 100644 --- a/pngread.c +++ b/pngread.c @@ -1,7 +1,7 @@ /* pngread.c - read a PNG file * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -440,6 +440,9 @@ png_read_update_info(png_structp png_ptr, png_infop info_ptr) /* save jump buffer and error functions */ if (!(png_ptr->flags & PNG_FLAG_ROW_INIT)) png_read_start_row(png_ptr); + else + png_warning(png_ptr, + "Ignoring extra png_read_update_info() call; row buffer not reallocated"); png_read_transform_info(png_ptr, info_ptr); } @@ -698,7 +701,7 @@ png_read_row(png_structp png_ptr, png_bytep row, png_bytep dsp_row) * not called png_set_interlace_handling(), the display_row buffer will * be ignored, so pass NULL to it. * - * [*] png_handle_alpha() does not exist yet, as of libpng version 1.0.8 + * [*] png_handle_alpha() does not exist yet, as of libpng version 1.0.9beta2 */ void PNGAPI @@ -747,7 +750,7 @@ png_read_rows(png_structp png_ptr, png_bytepp row, * only call this function once. If you desire to have an image for * each pass of a interlaced image, use png_read_rows() instead. * - * [*] png_handle_alpha() does not exist yet, as of libpng version 1.0.8 + * [*] png_handle_alpha() does not exist yet, as of libpng version 1.0.9beta2 */ void PNGAPI png_read_image(png_structp png_ptr, png_bytepp image) diff --git a/pngrio.c b/pngrio.c index b6e592254..5cd3ddc06 100644 --- a/pngrio.c +++ b/pngrio.c @@ -1,7 +1,7 @@ /* pngrio.c - functions for data input * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) diff --git a/pngrtran.c b/pngrtran.c index da75f0a85..57391bac4 100644 --- a/pngrtran.c +++ b/pngrtran.c @@ -1,7 +1,7 @@ /* pngrtran.c - transforms the data in a row for PNG readers * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -1090,7 +1090,12 @@ png_read_transform_info(png_structp png_ptr, png_infop info_ptr) if ((png_ptr->transformations & PNG_FILLER) && ((info_ptr->color_type == PNG_COLOR_TYPE_RGB) || (info_ptr->color_type == PNG_COLOR_TYPE_GRAY))) + { info_ptr->channels++; +#if 0 /* if adding a true alpha channel not just filler */ + info_ptr->color_type |= PNG_COLOR_MASK_ALPHA; +#endif + } #endif #if defined(PNG_USER_TRANSFORM_PTR_SUPPORTED) && \ diff --git a/pngrutil.c b/pngrutil.c index fc6242050..e87ee5357 100644 --- a/pngrutil.c +++ b/pngrutil.c @@ -1,7 +1,7 @@ /* pngrutil.c - utilities to read a PNG file * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -976,6 +976,8 @@ png_handle_iCCP(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) png_byte compression_type; png_charp profile; png_uint_32 skip = 0; + png_uint_32 profile_size = 0; + png_uint_32 profile_length = 0; png_size_t slength, prefix_length, data_length; png_debug(1, "in png_handle_iCCP\n"); @@ -1027,22 +1029,43 @@ png_handle_iCCP(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) /* there should be at least one zero (the compression type byte) following the separator, and we should be on it */ - if (*profile || profile >= chunkdata + slength) + if ( profile >= chunkdata + slength) { png_free(png_ptr, chunkdata); - png_warning(png_ptr, "malformed iCCP chunk"); + png_warning(png_ptr, "Malformed iCCP chunk"); return; } /* compression_type should always be zero */ compression_type = *profile++; + if (compression_type) + { + png_warning(png_ptr, "Ignoring nonzero compression type in iCCP chunk"); + compression_type=0x00; /* Reset it to zero (libpng-1.0.6 through 1.0.8 + wrote nonzero) */ + } prefix_length = profile - chunkdata; chunkdata = png_decompress_chunk(png_ptr, compression_type, chunkdata, slength, prefix_length, &data_length); + profile_length = data_length - prefix_length; + profile_size = ((*(chunkdata+prefix_length))<<24) | + ((*(chunkdata+prefix_length+1))<<16) | + ((*(chunkdata+prefix_length+2))<< 8) | + ((*(chunkdata+prefix_length+3)) ); + + if(profile_size < profile_length) + profile_length = profile_size; + + if(profile_size > profile_length) + { + png_warning(png_ptr, "Ignoring truncated iCCP profile.\n"); + return; + } + png_set_iCCP(png_ptr, info_ptr, chunkdata, compression_type, - chunkdata + prefix_length, data_length); + chunkdata + prefix_length, data_length-prefix_length); png_free(png_ptr, chunkdata); } #endif /* PNG_READ_iCCP_SUPPORTED */ @@ -1336,7 +1359,6 @@ png_handle_bKGD(png_structp png_ptr, png_infop info_ptr, png_uint_32 length) if(buf[0] > info_ptr->num_palette) { png_warning(png_ptr, "Incorrect bKGD chunk index value"); - png_crc_finish(png_ptr, length); return; } png_ptr->background.red = diff --git a/pngset.c b/pngset.c index 60923288d..114012656 100644 --- a/pngset.c +++ b/pngset.c @@ -1,7 +1,7 @@ /* pngset.c - storage of image information into info struct * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -741,10 +741,27 @@ png_set_unknown_chunk_location(png_structp png_ptr, png_infop info_ptr, void PNGAPI png_permit_empty_plte (png_structp png_ptr, int empty_plte_permitted) { - png_debug(1, "in png_permit_empty_plte\n"); + /* This function is deprecated in favor of png_permit_mng_features() + and will be removed from libpng-2.0.0 */ + png_debug(1, "in png_permit_empty_plte, DEPRECATED.\n"); if (png_ptr == NULL) return; - png_ptr->empty_plte_permitted=(png_byte)empty_plte_permitted; + png_ptr->mng_features_permitted = (png_byte) + ((png_ptr->mng_features_permitted & (~(PNG_FLAG_MNG_EMPTY_PLTE))) | + ((empty_plte_permitted & PNG_FLAG_MNG_EMPTY_PLTE))); +} +#endif + +#if defined(PNG_MNG_FEATURES_SUPPORTED) +png_uint_32 PNGAPI +png_permit_mng_features (png_structp png_ptr, png_uint_32 mng_features) +{ + png_debug(1, "in png_permit_mng_features\n"); + if (png_ptr == NULL) + return (png_uint_32)0; + png_ptr->mng_features_permitted = + (png_byte)(mng_features & PNG_ALL_MNG_FEATURES); + return (png_uint_32)png_ptr->mng_features_permitted; } #endif @@ -825,6 +842,8 @@ png_set_compression_buffer_size(png_structp png_ptr, png_uint_32 size) png_free(png_ptr, png_ptr->zbuf); png_ptr->zbuf_size = (png_size_t)size; png_ptr->zbuf = (png_bytep)png_malloc(png_ptr, size); + if(!png_ptr->zbuf) + png_error(png_ptr,"Unable to malloc zbuf"); png_ptr->zstream.next_out = png_ptr->zbuf; png_ptr->zstream.avail_out = (uInt)png_ptr->zbuf_size; } diff --git a/pngtrans.c b/pngtrans.c index b974a66ae..140905c6f 100644 --- a/pngtrans.c +++ b/pngtrans.c @@ -1,7 +1,7 @@ /* pngtrans.c - transforms the data in a row (used by both readers and writers) * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) diff --git a/pngvcrd.c b/pngvcrd.c index ed09aaea8..4b85a1fd7 100644 --- a/pngvcrd.c +++ b/pngvcrd.c @@ -2,7 +2,7 @@ * * For Intel x86 CPU and Microsoft Visual C++ compiler * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * Copyright (c) 1998, Intel Corporation @@ -10,6 +10,8 @@ * Contributed by Nirav Chhatrapati, Intel Corporation, 1998 * Interface to libpng contributed by Gilles Vollant, 1999 * + * [png_read_filter_row_mmx_avg() bpp == 2 bugfix, GRR 20000916] + * */ #define PNG_INTERNAL @@ -2117,8 +2119,8 @@ davg4lp: case 2: { ActiveMask.use = 0x000000000000ffff; - ShiftBpp.use = 24; // == 3 * 8 - ShiftRem.use = 40; // == 64 - 24 + ShiftBpp.use = 16; // == 2 * 8 [BUGFIX] + ShiftRem.use = 48; // == 64 - 16 [BUGFIX] _asm { // Load ActiveMask movq mm7, ActiveMask @@ -2133,7 +2135,7 @@ davg4lp: // (we correct position in loop below) davg2lp: movq mm0, [edi + ebx] - psllq mm2, ShiftRem // shift data to position correctly + psrlq mm2, ShiftRem // shift data to position correctly [BUGFIX] movq mm1, [esi + ebx] // Add (Prev_row/2) to Average movq mm3, mm5 diff --git a/pngwio.c b/pngwio.c index c70062426..d62fe85ba 100644 --- a/pngwio.c +++ b/pngwio.c @@ -1,7 +1,7 @@ /* pngwio.c - functions for data output * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) diff --git a/pngwrite.c b/pngwrite.c index 167e77056..ba79c7b83 100644 --- a/pngwrite.c +++ b/pngwrite.c @@ -1,7 +1,7 @@ /* pngwrite.c - general routines to write a PNG file * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -964,7 +964,6 @@ void PNGAPI png_set_filter(png_structp png_ptr, int method, int filters) { png_debug(1, "in png_set_filter\n"); - /* We allow 'method' only for future expansion of the base filter method. */ if (method == PNG_FILTER_TYPE_BASE) { switch (filters & (PNG_ALL_FILTERS | 0x07)) diff --git a/pngwtran.c b/pngwtran.c index 257ccd13c..d0f5dd7fb 100644 --- a/pngwtran.c +++ b/pngwtran.c @@ -1,7 +1,7 @@ /* pngwtran.c - transforms the data in a row for PNG writers * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) diff --git a/pngwutil.c b/pngwutil.c index fb702c986..ef81008d6 100644 --- a/pngwutil.c +++ b/pngwutil.c @@ -1,7 +1,7 @@ /* pngwutil.c - utilities to write a PNG file * - * libpng 1.0.8 - July 24, 2000 + * libpng 1.0.9beta2 - November 19, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * (Version 0.96 Copyright (c) 1996, 1997 Andreas Dilger) @@ -514,8 +514,8 @@ png_write_PLTE(png_structp png_ptr, png_colorp palette, png_uint_32 num_pal) png_debug(1, "in png_write_PLTE\n"); if (( -#ifdef PNG_WRITE_EMPTY_PLTE_SUPPORTED - !png_ptr->empty_plte_permitted && +#if defined(PNG_MNG_FEATURES_SUPPORTED) + !(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE) && #endif num_pal == 0) || num_pal > 256) { @@ -670,6 +670,7 @@ png_write_iCCP(png_structp png_ptr, png_charp name, int compression_type, /* make sure we include the NULL after the name and the compression type */ png_write_chunk_start(png_ptr, (png_bytep)png_iCCP, (png_uint_32)name_len+profile_len+2); + new_name[name_len+1]=0x00; png_write_chunk_data(png_ptr, (png_bytep)new_name, name_len + 2); if (profile_len) @@ -996,9 +997,9 @@ png_write_bKGD(png_structp png_ptr, png_color_16p back, int color_type) if (color_type == PNG_COLOR_TYPE_PALETTE) { if ( -#ifdef PNG_WRITE_EMPTY_PLTE_SUPPORTED - (!png_ptr->empty_plte_permitted || - (png_ptr->empty_plte_permitted && png_ptr->num_palette)) && +#if defined(PNG_MNG_FEATURES_SUPPORTED) + (png_ptr->num_palette || + (!(png_ptr->mng_features_permitted & PNG_FLAG_MNG_EMPTY_PLTE))) && #endif back->index > png_ptr->num_palette) { @@ -1083,7 +1084,7 @@ png_check_keyword(png_structp png_ptr, png_charp key, png_charpp new_key) png_debug1(2, "Keyword to be checked is '%s'\n", key); - *new_key = (png_charp)png_malloc(png_ptr, (png_uint_32)(key_len + 1)); + *new_key = (png_charp)png_malloc(png_ptr, (png_uint_32)(key_len + 2)); /* Replace non-printing characters with a blank and print a warning */ for (kp = key, dp = *new_key; *kp != '\0'; kp++, dp++) @@ -2553,6 +2554,7 @@ png_write_filtered_row(png_structp png_ptr, png_bytep filtered_row) png_debug(1, "in png_write_filtered_row\n"); png_debug1(2, "filter = %d\n", filtered_row[0]); /* set up the zlib input buffer */ + png_ptr->zstream.next_in = filtered_row; png_ptr->zstream.avail_in = (uInt)png_ptr->row_info.rowbytes + 1; /* repeat until we have compressed all the data */