]> git.saurik.com Git - wxWidgets.git/blame_incremental - utils/Install/sfxzip/crc_i386.S
post wxUniv merge fix
[wxWidgets.git] / utils / Install / sfxzip / crc_i386.S
... / ...
CommitLineData
1/*
2 * crc_i386.S, optimized CRC calculation function for Zip and UnZip, not
3 * copyrighted by Paul Kienitz and Christian Spieler. Last revised 12 Oct 97.
4 *
5 * GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm
6 * => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66)
7 *
8 * SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide
9 * aligned reads for most of the data from buffer), can be
10 * disabled by defining the macro NO_32_BIT_LOADS
11 *
12 * SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized
13 * CPUs (like the Pentium Pro, Pentium II, and probably some
14 * Pentium clones). This optimization is controlled by the
15 * preprocessor switch "__686" and is disabled by default.
16 * (This default is based on the assumption that most users
17 * do not yet work on a Pentium Pro or Pentium II machine ...)
18 *
19 * FLAT memory model assumed. Calling interface:
20 * - args are pushed onto the stack from right to left,
21 * - return value is given in the EAX register,
22 * - all other registers (with exception of EFLAGS) are preserved. (With
23 * GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving
24 * them nevertheless adds only 4 single byte instructions.)
25 *
26 * This source generates the function
27 * ulg crc32(ulg crc, ZCONST uch *buf, ulg len).
28 *
29 * The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
30 * This results in shorter code at the expense of reduced performance.
31 */
32
33/* This file is NOT used in conjunction with zlib. */
34#ifndef USE_ZLIB
35
36/* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
37 * external symbols with an underline character '_'.
38 */
39#if defined(NO_UNDERLINE) || defined(__ELF__)
40# define _crc32 crc32
41# define _get_crc_table get_crc_table
42#endif
43/* Use 16-byte alignment if your assembler supports it. Warning: gas
44 * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4
45 * the parameter is a number of bytes.
46 */
47#ifndef ALIGNMENT
48# define ALIGNMENT .align 4,0x90
49#endif
50
51#if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386)
52
53/* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas).
54 * Warning: it uses the AT&T syntax: mov source,dest
55 * This file is only optional. If you want to use the C version,
56 * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string.
57 */
58
59 .file "crc_i386.S"
60
61#if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME)
62# undef USE_STACKFRAME
63#else
64 /* The default is to use standard stack frame entry, because it
65 * results in smaller code!
66 */
67# ifndef USE_STD_STACKFRAME
68# define USE_STD_STACKFRAME
69# endif
70#endif
71
72#ifdef USE_STD_STACKFRAME
73# define _STD_ENTRY pushl %ebp ; movl %esp,%ebp
74# define arg1 8(%ebp)
75# define arg2 12(%ebp)
76# define arg3 16(%ebp)
77# define _STD_LEAVE popl %ebp
78#else /* !USE_STD_STACKFRAME */
79# define _STD_ENTRY
80# define arg1 24(%esp)
81# define arg2 28(%esp)
82# define arg3 32(%esp)
83# define _STD_LEAVE
84#endif /* ?USE_STD_STACKFRAME */
85
86/*
87 * These two (three) macros make up the loop body of the CRC32 cruncher.
88 * registers modified:
89 * eax : crc value "c"
90 * esi : pointer to next data byte (or lword) "buf++"
91 * registers read:
92 * edi : pointer to base of crc_table array
93 * scratch registers:
94 * ebx : index into crc_table array
95 * (requires upper three bytes = 0 when __686 is undefined)
96 */
97#ifndef __686 /* optimize for 386, 486, Pentium */
98#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\
99 movb %al, %bl ;/* tmp = c & 0xFF */\
100 shrl $8, %eax ;/* c = (c >> 8) */\
101 xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */
102#else /* __686 : optimize for Pentium Pro and compatible CPUs */
103#define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\
104 movzbl %al, %ebx ;/* tmp = c & 0xFF */\
105 shrl $8, %eax ;/* c = (c >> 8) */\
106 xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */
107#endif /* ?__686 */
108
109#define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\
110 xorb (%esi), %al ;/* c ^= *buf */\
111 incl %esi ;/* buf++ */\
112 Do_CRC
113
114#ifndef NO_32_BIT_LOADS
115#define Do_CRC_lword \
116 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\
117 addl $4, %esi ;/* ((ulg *)buf)++ */\
118 Do_CRC \
119 Do_CRC \
120 Do_CRC \
121 Do_CRC
122#endif /* !NO_32_BIT_LOADS */
123
124
125 .text
126
127 .globl _crc32
128
129_crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */
130 _STD_ENTRY
131 pushl %edi
132 pushl %esi
133 pushl %ebx
134 pushl %edx
135 pushl %ecx
136
137 movl arg2, %esi /* 2nd arg: uch *buf */
138 subl %eax, %eax /* > if (!buf) */
139 testl %esi, %esi /* > return 0; */
140 jz .L_fine /* > else { */
141 call _get_crc_table
142 movl %eax, %edi
143 movl arg1, %eax /* 1st arg: ulg crc */
144#ifndef __686
145 subl %ebx, %ebx /* ebx=0; bl usable as dword */
146#endif
147 movl arg3, %ecx /* 3rd arg: extent len */
148 notl %eax /* > c = ~crc; */
149
150#ifndef NO_UNROLLED_LOOPS
151# ifndef NO_32_BIT_LOADS
152 testl %ecx, %ecx
153 jz .L_bail
154 /* Assert now have positive length */
155.L_align_loop:
156 testl $3, %esi /* Align buf on lword boundary */
157 jz .L_aligned_now
158 Do_CRC_byte
159 decl %ecx
160 jnz .L_align_loop
161.L_aligned_now:
162# endif /* !NO_32_BIT_LOADS */
163 movl %ecx, %edx /* save len in edx */
164 andl $7, %edx /* edx = len % 8 */
165 shrl $3, %ecx /* ecx = len / 8 */
166 jz .L_No_Eights
167/* align loop head at start of 486 internal cache line !! */
168 ALIGNMENT
169.L_Next_Eight:
170# ifndef NO_32_BIT_LOADS
171 /* Do_CRC_lword */
172 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */
173 addl $4, %esi ;/* ((ulg *)buf)++ */
174 Do_CRC
175 Do_CRC
176 Do_CRC
177 Do_CRC
178 /* Do_CRC_lword */
179 xorl (%esi), %eax ;/* c ^= *(ulg *)buf */
180 addl $4, %esi ;/* ((ulg *)buf)++ */
181 Do_CRC
182 Do_CRC
183 Do_CRC
184 Do_CRC
185# else /* NO_32_BIT_LOADS */
186 Do_CRC_byte
187 Do_CRC_byte
188 Do_CRC_byte
189 Do_CRC_byte
190 Do_CRC_byte
191 Do_CRC_byte
192 Do_CRC_byte
193 Do_CRC_byte
194# endif /* ?NO_32_BIT_LOADS */
195 decl %ecx
196 jnz .L_Next_Eight
197
198.L_No_Eights:
199 movl %edx, %ecx
200#endif /* NO_UNROLLED_LOOPS */
201
202#ifndef NO_JECXZ_SUPPORT
203 jecxz .L_bail /* > if (len) */
204#else
205 testl %ecx, %ecx /* > if (len) */
206 jz .L_bail
207#endif
208/* align loop head at start of 486 internal cache line !! */
209 ALIGNMENT
210.L_loupe: /* > do { */
211 Do_CRC_byte /* c = CRC32(c, *buf++); */
212 decl %ecx /* > } while (--len); */
213 jnz .L_loupe
214
215.L_bail: /* > } */
216 notl %eax /* > return ~c; */
217.L_fine:
218 popl %ecx
219 popl %edx
220 popl %ebx
221 popl %esi
222 popl %edi
223 _STD_LEAVE
224 ret
225
226#else
227 error: this asm version is for 386 only
228#endif /* i386 || _i386 || _I386 || __i386 */
229
230#endif /* !USE_ZLIB */