[libpng17] Combined sub_row, up_row, avg_row, and paeth_row buffers into a

single try_row buffer and in cases where two or more of those are
being tested, a second tst_row buffer.  This improves CPU speed
over that achieved by libpng-1.7.0beta49.
This commit is contained in:
Glenn Randers-Pehrson 2015-02-15 12:44:16 -06:00
parent 7e56f5858d
commit f1e4acb5b1
5 changed files with 110 additions and 83 deletions

View File

@ -1,5 +1,5 @@
Libpng 1.7.0beta50 - February 11, 2015 Libpng 1.7.0beta50 - February 15, 2015
This is not intended to be a public release. It will be replaced This is not intended to be a public release. It will be replaced
within a few weeks by a public version or by another test version. within a few weeks by a public version or by another test version.
@ -708,7 +708,11 @@ Version 1.7.0beta49 [February 11, 2015]
Consolidated redundant code in pngwutil.c Consolidated redundant code in pngwutil.c
Deal with integer overflow of sum in pngwutil.c Deal with integer overflow of sum in pngwutil.c
Version 1.7.0beta50 [February 11, 2015] Version 1.7.0beta50 [February 15, 2015]
Combined sub_row, up_row, avg_row, and paeth_row buffers into a
single try_row buffer and in cases where two or more of those are
being tested, a second tst_row buffer. This improves CPU speed
over that achieved by libpng-1.7.0beta49.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit (subscription required; visit

View File

@ -4997,7 +4997,11 @@ Version 1.7.0beta49 [February 11, 2015]
Consolidated redundant code in pngwutil.c Consolidated redundant code in pngwutil.c
Deal with integer overflow of sum in pngwutil.c Deal with integer overflow of sum in pngwutil.c
Version 1.7.0beta50 [February 11, 2015] Version 1.7.0beta50 [February 15, 2015]
Combined sub_row, up_row, avg_row, and paeth_row buffers into a
single try_row buffer and in cases where two or more of those are
being tested, a second tst_row buffer. This improves CPU speed
over that achieved by libpng-1.7.0beta49.
Send comments/corrections/commendations to png-mng-implement at lists.sf.net Send comments/corrections/commendations to png-mng-implement at lists.sf.net
(subscription required; visit (subscription required; visit

View File

@ -341,8 +341,9 @@ struct png_struct_def
size_t big_row_buf_size; /* Actual size of both */ size_t big_row_buf_size; /* Actual size of both */
#endif #endif
#ifdef PNG_WRITE_SUPPORTED #ifdef PNG_WRITE_FILTER_SUPPORTED
png_bytep try_row; /* buffer to save trial row when filtering */ png_bytep try_row; /* buffer to save trial row when filtering */
png_bytep tst_row; /* buffer to save best trial row when filtering */
#endif #endif
/* UNKNOWN CHUNK HANDLING */ /* UNKNOWN CHUNK HANDLING */

View File

@ -943,8 +943,10 @@ png_write_destroy(png_structrp png_ptr)
#ifdef PNG_WRITE_FILTER_SUPPORTED #ifdef PNG_WRITE_FILTER_SUPPORTED
png_free(png_ptr, png_ptr->prev_row); png_free(png_ptr, png_ptr->prev_row);
png_free(png_ptr, png_ptr->try_row); png_free(png_ptr, png_ptr->try_row);
png_free(png_ptr, png_ptr->tst_row);
png_ptr->prev_row = NULL; png_ptr->prev_row = NULL;
png_ptr->try_row = NULL; png_ptr->try_row = NULL;
png_ptr->tst_row = NULL;
#endif #endif
#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED #ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED

View File

@ -1957,7 +1957,24 @@ png_write_alloc_filter_row_buffers(png_structrp png_ptr, int filters)
if (((filters & (PNG_FILTER_SUB | PNG_FILTER_UP | PNG_FILTER_AVG | if (((filters & (PNG_FILTER_SUB | PNG_FILTER_UP | PNG_FILTER_AVG |
PNG_FILTER_PAETH)) != 0) && png_ptr->try_row == NULL) PNG_FILTER_PAETH)) != 0) && png_ptr->try_row == NULL)
{ {
int num_filters = 0;
png_ptr->try_row = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size)); png_ptr->try_row = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size));
if (filters & PNG_FILTER_SUB)
num_filters++;
if (filters & PNG_FILTER_UP)
num_filters++;
if (filters & PNG_FILTER_AVG)
num_filters++;
if (filters & PNG_FILTER_PAETH)
num_filters++;
if (num_filters > 1)
png_ptr->tst_row = png_voidcast(png_bytep, png_malloc(png_ptr, buf_size));
} }
} }
#endif /* WRITE_FILTER */ #endif /* WRITE_FILTER */
@ -2352,22 +2369,21 @@ png_setup_sub_row(png_structrp png_ptr, const png_uint_32 bpp,
for (lp = png_ptr->row_buf + 1; i < row_bytes; for (lp = png_ptr->row_buf + 1; i < row_bytes;
i++, rp++, lp++, dp++) i++, rp++, lp++, dp++)
{
v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
if (lmins != PNG_SIZE_MAX)
{ {
png_size_t old_sum = sum; png_size_t old_sum = sum;
v = *dp = (png_byte)(((int)*rp - (int)*lp) & 0xff);
sum += (v < 128) ? v : 256 - v; sum += (v < 128) ? v : 256 - v;
if (sum < old_sum) /* overflow happened */ if (sum < old_sum) /* overflow happened */
return (PNG_SIZE_MAX - 1); {
sum = PNG_SIZE_MAX - 1;
break;
}
if (sum > lmins) /* We are already worse, don't continue. */ if (sum > lmins) /* We are already worse, don't continue. */
break; break;
} }
}
return (sum); return (sum);
} }
@ -2384,21 +2400,21 @@ png_setup_up_row(png_structrp png_ptr, const png_size_t row_bytes,
for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1, for (i = 0, rp = png_ptr->row_buf + 1, dp = png_ptr->try_row + 1,
pp = png_ptr->prev_row + 1; i < row_bytes; pp = png_ptr->prev_row + 1; i < row_bytes;
i++, rp++, pp++, dp++) i++, rp++, pp++, dp++)
{
v = *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
if (lmins != PNG_SIZE_MAX)
{ {
png_size_t old_sum = sum; png_size_t old_sum = sum;
v = *dp = (png_byte)(((int)*rp - (int)*pp) & 0xff);
sum += (v < 128) ? v : 256 - v; sum += (v < 128) ? v : 256 - v;
if (sum < old_sum) /* overflow happened */ if (sum < old_sum) /* overflow happened */
return (PNG_SIZE_MAX - 1); {
sum = PNG_SIZE_MAX - 1;
break;
}
if (sum > lmins) /* We are already worse, don't continue. */ if (sum > lmins) /* We are already worse, don't continue. */
break; break;
} }
}
return (sum); return (sum);
} }
@ -2422,22 +2438,22 @@ png_setup_avg_row(png_structrp png_ptr, const png_uint_32 bpp,
for (lp = png_ptr->row_buf + 1; i < row_bytes; i++) for (lp = png_ptr->row_buf + 1; i < row_bytes; i++)
{ {
png_size_t old_sum = sum;
v = *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2)) v = *dp++ = (png_byte)(((int)*rp++ - (((int)*pp++ + (int)*lp++) / 2))
& 0xff); & 0xff);
if (lmins != PNG_SIZE_MAX)
{
png_size_t old_sum = sum;
sum += (v < 128) ? v : 256 - v; sum += (v < 128) ? v : 256 - v;
if (sum < old_sum) /* overflow happened */ if (sum < old_sum) /* overflow happened */
return (PNG_SIZE_MAX - 1); {
sum = PNG_SIZE_MAX - 1;
break;
}
if (sum > lmins) /* We are already worse, don't continue. */ if (sum > lmins) /* We are already worse, don't continue. */
break; break;
} }
}
return (sum); return (sum);
} }
@ -2463,6 +2479,7 @@ png_setup_paeth_row(png_structrp png_ptr, const png_uint_32 bpp,
i++) i++)
{ {
int a, b, c, pa, pb, pc, p; int a, b, c, pa, pb, pc, p;
png_size_t old_sum = sum;
b = *pp++; b = *pp++;
c = *cp++; c = *cp++;
@ -2485,19 +2502,17 @@ png_setup_paeth_row(png_structrp png_ptr, const png_uint_32 bpp,
v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff); v = *dp++ = (png_byte)(((int)*rp++ - p) & 0xff);
if (lmins != PNG_SIZE_MAX)
{
png_size_t old_sum = sum;
sum += (v < 128) ? v : 256 - v; sum += (v < 128) ? v : 256 - v;
if (sum < old_sum) /* overflow happened */ if (sum < old_sum) /* overflow happened */
return (PNG_SIZE_MAX - 1); {
sum = PNG_SIZE_MAX - 1;
break;
}
if (sum > lmins) /* We are already worse, don't continue. */ if (sum > lmins) /* We are already worse, don't continue. */
break; break;
} }
}
return (sum); return (sum);
} }
@ -2516,9 +2531,9 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
#ifndef PNG_WRITE_FILTER_SUPPORTED #ifndef PNG_WRITE_FILTER_SUPPORTED
png_write_filtered_row(png_ptr, png_ptr->row_buf, row_info->rowbytes+1); png_write_filtered_row(png_ptr, png_ptr->row_buf, row_info->rowbytes+1);
#else #else
png_byte best_filter_value = PNG_FILTER_VALUE_NONE;
png_byte filter_to_do = png_ptr->do_filter; png_byte filter_to_do = png_ptr->do_filter;
png_bytep row_buf; png_bytep row_buf;
png_bytep best_row;
png_uint_32 bpp; png_uint_32 bpp;
#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED #ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
int num_p_filters = png_ptr->num_prev_filters; int num_p_filters = png_ptr->num_prev_filters;
@ -2570,6 +2585,8 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
/* We don't need to test the 'no filter' case if this is the only filter /* We don't need to test the 'no filter' case if this is the only filter
* that has been chosen, as it doesn't actually do anything to the data. * that has been chosen, as it doesn't actually do anything to the data.
*/ */
best_row = png_ptr->row_buf;
if ((filter_to_do & PNG_FILTER_NONE) != 0 && filter_to_do != PNG_FILTER_NONE) if ((filter_to_do & PNG_FILTER_NONE) != 0 && filter_to_do != PNG_FILTER_NONE)
{ {
png_bytep rp; png_bytep rp;
@ -2623,6 +2640,7 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
} }
#endif #endif
mins = sum; mins = sum;
best_row = png_ptr->row_buf;
} }
/* Sub filter */ /* Sub filter */
@ -2630,8 +2648,8 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
/* It's the only filter so no testing is needed */ /* It's the only filter so no testing is needed */
{ {
png_ptr->try_row[0] = PNG_FILTER_VALUE_SUB; png_ptr->try_row[0] = PNG_FILTER_VALUE_SUB;
(void) png_setup_sub_row(png_ptr, bpp, row_bytes, PNG_SIZE_MAX); (void) png_setup_sub_row(png_ptr, bpp, row_bytes, mins);
best_filter_value = PNG_FILTER_VALUE_SUB; best_row = png_ptr->try_row;
} }
else if ((filter_to_do & PNG_FILTER_SUB) != 0) else if ((filter_to_do & PNG_FILTER_SUB) != 0)
@ -2718,7 +2736,12 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
if (sum < mins) if (sum < mins)
{ {
mins = sum; mins = sum;
best_filter_value = PNG_FILTER_VALUE_SUB; best_row = png_ptr->try_row;
if (png_ptr->tst_row != NULL)
{
png_ptr->try_row = png_ptr->tst_row;
png_ptr->tst_row = best_row;
}
} }
} }
@ -2726,8 +2749,8 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
if (filter_to_do == PNG_FILTER_UP) if (filter_to_do == PNG_FILTER_UP)
{ {
png_ptr->try_row[0] = PNG_FILTER_VALUE_UP; png_ptr->try_row[0] = PNG_FILTER_VALUE_UP;
(void) png_setup_up_row(png_ptr, row_bytes, PNG_SIZE_MAX); (void) png_setup_up_row(png_ptr, row_bytes, mins);
best_filter_value = PNG_FILTER_VALUE_UP; best_row = png_ptr->try_row;
} }
else if ((filter_to_do & PNG_FILTER_UP) != 0) else if ((filter_to_do & PNG_FILTER_UP) != 0)
@ -2810,7 +2833,12 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
if (sum < mins) if (sum < mins)
{ {
mins = sum; mins = sum;
best_filter_value = PNG_FILTER_VALUE_UP; best_row = png_ptr->try_row;
if (png_ptr->tst_row != NULL)
{
png_ptr->try_row = png_ptr->tst_row;
png_ptr->tst_row = best_row;
}
} }
} }
@ -2818,8 +2846,8 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
if (filter_to_do == PNG_FILTER_AVG) if (filter_to_do == PNG_FILTER_AVG)
{ {
png_ptr->try_row[0] = PNG_FILTER_VALUE_AVG; png_ptr->try_row[0] = PNG_FILTER_VALUE_AVG;
(void) png_setup_avg_row(png_ptr, bpp, row_bytes, PNG_SIZE_MAX); (void) png_setup_avg_row(png_ptr, bpp, row_bytes, mins);
best_filter_value = PNG_FILTER_VALUE_AVG; best_row = png_ptr->try_row;
} }
else if ((filter_to_do & PNG_FILTER_AVG) != 0) else if ((filter_to_do & PNG_FILTER_AVG) != 0)
@ -2901,7 +2929,12 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
if (sum < mins) if (sum < mins)
{ {
mins = sum; mins = sum;
best_filter_value = PNG_FILTER_VALUE_AVG; best_row = png_ptr->try_row;
if (png_ptr->tst_row != NULL)
{
png_ptr->try_row = png_ptr->tst_row;
png_ptr->tst_row = best_row;
}
} }
} }
@ -2909,8 +2942,8 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
if ((filter_to_do == PNG_FILTER_PAETH) != 0) if ((filter_to_do == PNG_FILTER_PAETH) != 0)
{ {
png_ptr->try_row[0] = PNG_FILTER_VALUE_PAETH; png_ptr->try_row[0] = PNG_FILTER_VALUE_PAETH;
(void) png_setup_paeth_row(png_ptr, bpp, row_bytes, PNG_SIZE_MAX); (void) png_setup_paeth_row(png_ptr, bpp, row_bytes, mins);
best_filter_value = PNG_FILTER_VALUE_PAETH; best_row = png_ptr->try_row;
} }
else if ((filter_to_do & PNG_FILTER_PAETH) != 0) else if ((filter_to_do & PNG_FILTER_PAETH) != 0)
@ -2991,35 +3024,18 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
if (sum < mins) if (sum < mins)
{ {
best_filter_value = PNG_FILTER_VALUE_PAETH; mins = sum;
best_row = png_ptr->try_row;
if (png_ptr->tst_row != NULL)
{
png_ptr->try_row = png_ptr->tst_row;
png_ptr->tst_row = best_row;
}
} }
} }
/* Do the actual writing of the filtered row data from the chosen filter. */ /* Do the actual writing of the filtered row data from the chosen filter. */
if (best_filter_value == PNG_FILTER_VALUE_NONE) png_write_filtered_row(png_ptr, best_row, row_info->rowbytes+1);
png_write_filtered_row(png_ptr, png_ptr->row_buf, row_info->rowbytes+1);
else
{
if (best_filter_value != png_ptr->try_row[0])
{
png_ptr->try_row[0] = best_filter_value;
if (best_filter_value == PNG_FILTER_VALUE_SUB)
(void) png_setup_sub_row(png_ptr, bpp, row_bytes, PNG_SIZE_MAX);
if (best_filter_value == PNG_FILTER_VALUE_UP)
(void) png_setup_up_row(png_ptr, row_bytes, PNG_SIZE_MAX);
if (best_filter_value == PNG_FILTER_VALUE_AVG)
(void) png_setup_avg_row(png_ptr, bpp, row_bytes, PNG_SIZE_MAX);
if (best_filter_value == PNG_FILTER_VALUE_PAETH)
(void) png_setup_paeth_row(png_ptr, bpp, row_bytes, PNG_SIZE_MAX);
}
png_write_filtered_row(png_ptr, png_ptr->try_row, row_info->rowbytes+1);
}
#ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED #ifdef PNG_WRITE_WEIGHTED_FILTER_SUPPORTED
/* Save the type of filter we picked this time for future calculations */ /* Save the type of filter we picked this time for future calculations */
@ -3032,7 +3048,7 @@ png_write_find_filter(png_structrp png_ptr, png_row_infop row_info)
png_ptr->prev_filters[j] = png_ptr->prev_filters[j - 1]; png_ptr->prev_filters[j] = png_ptr->prev_filters[j - 1];
} }
png_ptr->prev_filters[j] = best_filter_value; png_ptr->prev_filters[j] = best_row[0];
} }
#endif /* WRITE_WEIGHTED_FILTER */ #endif /* WRITE_WEIGHTED_FILTER */
#endif /* WRITE_FILTER */ #endif /* WRITE_FILTER */