* and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm
* for Intel's performance analysis of the MMX vs. non-MMX code.
*
- * libpng version 1.2.5rc3 - September 18, 2002
+ * libpng version 1.2.7 - September 12, 2004
* For conditions of distribution and use, see copyright notice in png.h
- * Copyright (c) 1998-2002 Glenn Randers-Pehrson
+ * Copyright (c) 1998-2004 Glenn Randers-Pehrson
* Copyright (c) 1998, Intel Corporation
*
* Based on MSVC code contributed by Nirav Chhatrapati, Intel Corp., 1998.
* 20020304:
* - eliminated incorrect use of width_mmx in pixel_bytes == 8 case
*
+ * 20040724:
+ * - more tinkering with clobber list at lines 4529 and 5033, to get
+ * it to compile on gcc-3.4.
+ *
* STILL TO DO:
* - test png_do_read_interlace() 64-bit case (pixel_bytes == 8)
* - write MMX code for 48-bit case (pixel_bytes == 6)
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
if (_mmx_supported == 2) {
+#if !defined(PNG_1_0_X)
/* this should have happened in png_init_mmx_flags() already */
png_warning(png_ptr, "asm_flags may not have been initialized");
+#endif
png_mmx_support();
}
#endif
{
png_debug(2,"mask == 0xff: doing single png_memcpy()\n");
png_memcpy(row, png_ptr->row_buf + 1,
- (png_size_t)((png_ptr->width * png_ptr->row_info.pixel_depth + 7) >> 3));
+ (png_size_t)PNG_ROWBYTES(png_ptr->row_info.pixel_depth,png_ptr->width));
}
else /* (png_combine_row() is never called with mask == 0) */
{
"=S" (dummy_value_S),
"=D" (dummy_value_D)
- : "1" (sptr), /* esi // input regs */
- "2" (dp), /* edi */
- "0" (width) /* ecx */
-/* doesn't work "i" (0x0000000000FFFFFFLL) // %1 (a.k.a. _const4) */
+ : "1" (sptr), // esi // input regs
+ "2" (dp), // edi
+ "0" (width), // ecx
+ "rim" (_const4) // %1(?) (0x0000000000FFFFFFLL)
#if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */
: "%mm0", "%mm1", "%mm2" /* clobber list */
"=S" (dummy_value_S),
"=D" (dummy_value_D)
- : "1" (sptr), /* esi // input regs */
- "2" (dp), /* edi */
- "0" (width) /* ecx */
+ : "1" (sptr), // esi // input regs
+ "2" (dp), // edi
+ "0" (width), // ecx
+ "rim" (_const4) // (0x0000000000FFFFFFLL)
#if 0 /* %mm0, ..., %mm2 not supported by gcc 2.7.2.3 or egcs 1.1 */
: "%mm0", "%mm1", "%mm2" /* clobber list */
"=S" (dummy_value_S),
"=D" (dummy_value_D)
- : "1" (sptr), /* esi // input regs */
- "2" (dp), /* edi */
- "0" (width_mmx) /* ecx */
+ : "1" (sptr), // esi // input regs
+ "2" (dp), // edi
+ "0" (width_mmx), // ecx
+ "rim" (_const4), // 0x0000000000FFFFFFLL
+ "rim" (_const6) // 0x00000000000000FFLL
#if 0 /* %mm0, ..., %mm3 not supported by gcc 2.7.2.3 or egcs 1.1 */
: "%mm0", "%mm1" /* clobber list */
} /* end switch (row_info->pixel_depth) */
row_info->width = final_width;
- row_info->rowbytes = ((final_width *
- (png_uint_32)row_info->pixel_depth + 7) >> 3);
+
+ row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,final_width);
}
} /* end png_do_read_interlace() */
: "0" (bpp), /* eax // input regs */
"1" (row) /* edi */
- : "%ebx", "%ecx", "%edx" /* clobber list */
- , "%esi"
+ : "%esi", "%ecx", "%edx" // clobber list
#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
, "%mm0", "%mm1", "%mm2", "%mm3"
"1" (prev_row), /* esi */
"2" (row) /* edi */
- : "%eax", "%ebx", "%ecx" /* clobber list (no input regs!) */
+ : "%eax", "%ecx" // clobber list (no input regs!)
+#ifndef __PIC__
+ , "%ebx"
+#endif
#if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */
, "%mm0", "%mm1", "%mm2", "%mm3"
{
#if defined(PNG_MMX_CODE_SUPPORTED)
__asm__ __volatile__ (
- "pushl %%ebx \n\t" /* ebx gets clobbered by CPUID instruction */
- "pushl %%ecx \n\t" /* so does ecx... */
- "pushl %%edx \n\t" /* ...and edx (but ecx & edx safe on Linux) */
-/* ".byte 0x66 \n\t" // convert 16-bit pushf to 32-bit pushfd */
-/* "pushf \n\t" // 16-bit pushf */
- "pushfl \n\t" /* save Eflag to stack */
- "popl %%eax \n\t" /* get Eflag from stack into eax */
- "movl %%eax, %%ecx \n\t" /* make another copy of Eflag in ecx */
- "xorl $0x200000, %%eax \n\t" /* toggle ID bit in Eflag (i.e., bit 21) */
- "pushl %%eax \n\t" /* save modified Eflag back to stack */
-/* ".byte 0x66 \n\t" // convert 16-bit popf to 32-bit popfd */
-/* "popf \n\t" // 16-bit popf */
- "popfl \n\t" /* restore modified value to Eflag reg */
- "pushfl \n\t" /* save Eflag to stack */
- "popl %%eax \n\t" /* get Eflag from stack */
- "pushl %%ecx \n\t" /* save original Eflag to stack */
- "popfl \n\t" /* restore original Eflag */
- "xorl %%ecx, %%eax \n\t" /* compare new Eflag with original Eflag */
- "jz 0f \n\t" /* if same, CPUID instr. is not supported */
-
- "xorl %%eax, %%eax \n\t" /* set eax to zero */
-/* ".byte 0x0f, 0xa2 \n\t" // CPUID instruction (two-byte opcode) */
- "cpuid \n\t" /* get the CPU identification info */
- "cmpl $1, %%eax \n\t" /* make sure eax return non-zero value */
- "jl 0f \n\t" /* if eax is zero, MMX is not supported */
-
- "xorl %%eax, %%eax \n\t" /* set eax to zero and... */
- "incl %%eax \n\t" /* ...increment eax to 1. This pair is */
- /* faster than the instruction "mov eax, 1" */
- "cpuid \n\t" /* get the CPU identification info again */
- "andl $0x800000, %%edx \n\t" /* mask out all bits but MMX bit (23) */
- "cmpl $0, %%edx \n\t" /* 0 = MMX not supported */
- "jz 0f \n\t" /* non-zero = yes, MMX IS supported */
-
- "movl $1, %%eax \n\t" /* set return value to 1 */
- "jmp 1f \n\t" /* DONE: have MMX support */
-
- "0: \n\t" /* .NOT_SUPPORTED: target label for jump instructions */
- "movl $0, %%eax \n\t" /* set return value to 0 */
- "1: \n\t" /* .RETURN: target label for jump instructions */
- "movl %%eax, _mmx_supported \n\t" /* save in global static variable, too */
- "popl %%edx \n\t" /* restore edx */
- "popl %%ecx \n\t" /* restore ecx */
- "popl %%ebx \n\t" /* restore ebx */
-
-/* "ret \n\t" // DONE: no MMX support */
- /* (fall through to standard C "ret") */
-
- : /* output list (none) */
-
- : /* any variables used on input (none) */
-
- : "%eax" /* clobber list */
-/* , "%ebx", "%ecx", "%edx" // GRR: we handle these manually */
-/* , "memory" // if write to a variable gcc thought was in a reg */
-/* , "cc" // "condition codes" (flag bits) */
+ "pushl %%ebx \n\t" // ebx gets clobbered by CPUID instruction
+ "pushl %%ecx \n\t" // so does ecx...
+ "pushl %%edx \n\t" // ...and edx (but ecx & edx safe on Linux)
+// ".byte 0x66 \n\t" // convert 16-bit pushf to 32-bit pushfd
+// "pushf \n\t" // 16-bit pushf
+ "pushfl \n\t" // save Eflag to stack
+ "popl %%eax \n\t" // get Eflag from stack into eax
+ "movl %%eax, %%ecx \n\t" // make another copy of Eflag in ecx
+ "xorl $0x200000, %%eax \n\t" // toggle ID bit in Eflag (i.e., bit 21)
+ "pushl %%eax \n\t" // save modified Eflag back to stack
+// ".byte 0x66 \n\t" // convert 16-bit popf to 32-bit popfd
+// "popf \n\t" // 16-bit popf
+ "popfl \n\t" // restore modified value to Eflag reg
+ "pushfl \n\t" // save Eflag to stack
+ "popl %%eax \n\t" // get Eflag from stack
+ "pushl %%ecx \n\t" // save original Eflag to stack
+ "popfl \n\t" // restore original Eflag
+ "xorl %%ecx, %%eax \n\t" // compare new Eflag with original Eflag
+ "jz 0f \n\t" // if same, CPUID instr. is not supported
+
+ "xorl %%eax, %%eax \n\t" // set eax to zero
+// ".byte 0x0f, 0xa2 \n\t" // CPUID instruction (two-byte opcode)
+ "cpuid \n\t" // get the CPU identification info
+ "cmpl $1, %%eax \n\t" // make sure eax return non-zero value
+ "jl 0f \n\t" // if eax is zero, MMX is not supported
+
+ "xorl %%eax, %%eax \n\t" // set eax to zero and...
+ "incl %%eax \n\t" // ...increment eax to 1. This pair is
+ // faster than the instruction "mov eax, 1"
+ "cpuid \n\t" // get the CPU identification info again
+ "andl $0x800000, %%edx \n\t" // mask out all bits but MMX bit (23)
+ "cmpl $0, %%edx \n\t" // 0 = MMX not supported
+ "jz 0f \n\t" // non-zero = yes, MMX IS supported
+
+ "movl $1, %%eax \n\t" // set return value to 1
+ "jmp 1f \n\t" // DONE: have MMX support
+
+ "0: \n\t" // .NOT_SUPPORTED: target label for jump instructions
+ "movl $0, %%eax \n\t" // set return value to 0
+ "1: \n\t" // .RETURN: target label for jump instructions
+ "movl %%eax, _mmx_supported \n\t" // save in global static variable, too
+ "popl %%edx \n\t" // restore edx
+ "popl %%ecx \n\t" // restore ecx
+ "popl %%ebx \n\t" // restore ebx
+
+// "ret \n\t" // DONE: no MMX support
+ // (fall through to standard C "ret")
+
+ : // output list (none)
+
+ : // any variables used on input (none)
+
+ : "%eax" // clobber list
+// , "%ebx", "%ecx", "%edx" // GRR: we handle these manually
+// , "memory" // if write to a variable gcc thought was in a reg
+// , "cc" // "condition codes" (flag bits)
);
#else
_mmx_supported = 0;