]>
Commit | Line | Data |
---|---|---|
f6bcfd97 BP |
1 | /* |
2 | * crc_i386.S, optimized CRC calculation function for Zip and UnZip, not | |
3 | * copyrighted by Paul Kienitz and Christian Spieler. Last revised 12 Oct 97. | |
4 | * | |
5 | * GRR 961110: incorporated Scott Field optimizations from win32/crc_i386.asm | |
6 | * => overall 6% speedup in "unzip -tq" on 9MB zipfile (486-66) | |
7 | * | |
8 | * SPC 970402: revised for Rodney Brown's optimizations (32-bit-wide | |
9 | * aligned reads for most of the data from buffer), can be | |
10 | * disabled by defining the macro NO_32_BIT_LOADS | |
11 | * | |
12 | * SPC 971012: added Rodney Brown's additional tweaks for 32-bit-optimized | |
13 | * CPUs (like the Pentium Pro, Pentium II, and probably some | |
14 | * Pentium clones). This optimization is controlled by the | |
15 | * preprocessor switch "__686" and is disabled by default. | |
16 | * (This default is based on the assumption that most users | |
17 | * do not yet work on a Pentium Pro or Pentium II machine ...) | |
18 | * | |
19 | * FLAT memory model assumed. Calling interface: | |
20 | * - args are pushed onto the stack from right to left, | |
21 | * - return value is given in the EAX register, | |
22 | * - all other registers (with exception of EFLAGS) are preserved. (With | |
23 | * GNU C 2.7.x, %edx and %ecx are `scratch' registers, but preserving | |
24 | * them nevertheless adds only 4 single byte instructions.) | |
25 | * | |
26 | * This source generates the function | |
27 | * ulg crc32(ulg crc, ZCONST uch *buf, ulg len). | |
28 | * | |
29 | * The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. | |
30 | * This results in shorter code at the expense of reduced performance. | |
31 | */ | |
32 | ||
33 | /* This file is NOT used in conjunction with zlib. */ | |
34 | #ifndef USE_ZLIB | |
35 | ||
36 | /* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix | |
37 | * external symbols with an underline character '_'. | |
38 | */ | |
39 | #if defined(NO_UNDERLINE) || defined(__ELF__) | |
40 | # define _crc32 crc32 | |
41 | # define _get_crc_table get_crc_table | |
42 | #endif | |
43 | /* Use 16-byte alignment if your assembler supports it. Warning: gas | |
44 | * uses a log(x) parameter (.align 4 means 16-byte alignment). On SVR4 | |
45 | * the parameter is a number of bytes. | |
46 | */ | |
47 | #ifndef ALIGNMENT | |
48 | # define ALIGNMENT .align 4,0x90 | |
49 | #endif | |
50 | ||
51 | #if defined(i386) || defined(_i386) || defined(_I386) || defined(__i386) | |
52 | ||
53 | /* This version is for 386 Unix, OS/2, MSDOS in 32 bit mode (gcc & gas). | |
54 | * Warning: it uses the AT&T syntax: mov source,dest | |
55 | * This file is only optional. If you want to use the C version, | |
56 | * remove -DASM_CRC from CFLAGS in Makefile and set OBJA to an empty string. | |
57 | */ | |
58 | ||
59 | .file "crc_i386.S" | |
60 | ||
61 | #if defined(NO_STD_STACKFRAME) && defined(USE_STD_STACKFRAME) | |
62 | # undef USE_STACKFRAME | |
63 | #else | |
64 | /* The default is to use standard stack frame entry, because it | |
65 | * results in smaller code! | |
66 | */ | |
67 | # ifndef USE_STD_STACKFRAME | |
68 | # define USE_STD_STACKFRAME | |
69 | # endif | |
70 | #endif | |
71 | ||
72 | #ifdef USE_STD_STACKFRAME | |
73 | # define _STD_ENTRY pushl %ebp ; movl %esp,%ebp | |
74 | # define arg1 8(%ebp) | |
75 | # define arg2 12(%ebp) | |
76 | # define arg3 16(%ebp) | |
77 | # define _STD_LEAVE popl %ebp | |
78 | #else /* !USE_STD_STACKFRAME */ | |
79 | # define _STD_ENTRY | |
80 | # define arg1 24(%esp) | |
81 | # define arg2 28(%esp) | |
82 | # define arg3 32(%esp) | |
83 | # define _STD_LEAVE | |
84 | #endif /* ?USE_STD_STACKFRAME */ | |
85 | ||
86 | /* | |
87 | * These two (three) macros make up the loop body of the CRC32 cruncher. | |
88 | * registers modified: | |
89 | * eax : crc value "c" | |
90 | * esi : pointer to next data byte (or lword) "buf++" | |
91 | * registers read: | |
92 | * edi : pointer to base of crc_table array | |
93 | * scratch registers: | |
94 | * ebx : index into crc_table array | |
95 | * (requires upper three bytes = 0 when __686 is undefined) | |
96 | */ | |
97 | #ifndef __686 /* optimize for 386, 486, Pentium */ | |
98 | #define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ | |
99 | movb %al, %bl ;/* tmp = c & 0xFF */\ | |
100 | shrl $8, %eax ;/* c = (c >> 8) */\ | |
101 | xorl (%edi, %ebx, 4), %eax ;/* c ^= table[tmp] */ | |
102 | #else /* __686 : optimize for Pentium Pro and compatible CPUs */ | |
103 | #define Do_CRC /* c = (c >> 8) ^ table[c & 0xFF] */\ | |
104 | movzbl %al, %ebx ;/* tmp = c & 0xFF */\ | |
105 | shrl $8, %eax ;/* c = (c >> 8) */\ | |
106 | xorl (%edi, %ebx, 4), %eax ;/* c ^=table[tmp] */ | |
107 | #endif /* ?__686 */ | |
108 | ||
109 | #define Do_CRC_byte /* c = (c >> 8) ^ table[(c^*buf++)&0xFF] */\ | |
110 | xorb (%esi), %al ;/* c ^= *buf */\ | |
111 | incl %esi ;/* buf++ */\ | |
112 | Do_CRC | |
113 | ||
114 | #ifndef NO_32_BIT_LOADS | |
115 | #define Do_CRC_lword \ | |
116 | xorl (%esi), %eax ;/* c ^= *(ulg *)buf */\ | |
117 | addl $4, %esi ;/* ((ulg *)buf)++ */\ | |
118 | Do_CRC \ | |
119 | Do_CRC \ | |
120 | Do_CRC \ | |
121 | Do_CRC | |
122 | #endif /* !NO_32_BIT_LOADS */ | |
123 | ||
124 | ||
125 | .text | |
126 | ||
127 | .globl _crc32 | |
128 | ||
129 | _crc32: /* ulg crc32(ulg crc, uch *buf, extent len) */ | |
130 | _STD_ENTRY | |
131 | pushl %edi | |
132 | pushl %esi | |
133 | pushl %ebx | |
134 | pushl %edx | |
135 | pushl %ecx | |
136 | ||
137 | movl arg2, %esi /* 2nd arg: uch *buf */ | |
138 | subl %eax, %eax /* > if (!buf) */ | |
139 | testl %esi, %esi /* > return 0; */ | |
140 | jz .L_fine /* > else { */ | |
141 | call _get_crc_table | |
142 | movl %eax, %edi | |
143 | movl arg1, %eax /* 1st arg: ulg crc */ | |
144 | #ifndef __686 | |
145 | subl %ebx, %ebx /* ebx=0; bl usable as dword */ | |
146 | #endif | |
147 | movl arg3, %ecx /* 3rd arg: extent len */ | |
148 | notl %eax /* > c = ~crc; */ | |
149 | ||
150 | #ifndef NO_UNROLLED_LOOPS | |
151 | # ifndef NO_32_BIT_LOADS | |
152 | testl %ecx, %ecx | |
153 | jz .L_bail | |
154 | /* Assert now have positive length */ | |
155 | .L_align_loop: | |
156 | testl $3, %esi /* Align buf on lword boundary */ | |
157 | jz .L_aligned_now | |
158 | Do_CRC_byte | |
159 | decl %ecx | |
160 | jnz .L_align_loop | |
161 | .L_aligned_now: | |
162 | # endif /* !NO_32_BIT_LOADS */ | |
163 | movl %ecx, %edx /* save len in edx */ | |
164 | andl $7, %edx /* edx = len % 8 */ | |
165 | shrl $3, %ecx /* ecx = len / 8 */ | |
166 | jz .L_No_Eights | |
167 | /* align loop head at start of 486 internal cache line !! */ | |
168 | ALIGNMENT | |
169 | .L_Next_Eight: | |
170 | # ifndef NO_32_BIT_LOADS | |
171 | /* Do_CRC_lword */ | |
172 | xorl (%esi), %eax ;/* c ^= *(ulg *)buf */ | |
173 | addl $4, %esi ;/* ((ulg *)buf)++ */ | |
174 | Do_CRC | |
175 | Do_CRC | |
176 | Do_CRC | |
177 | Do_CRC | |
178 | /* Do_CRC_lword */ | |
179 | xorl (%esi), %eax ;/* c ^= *(ulg *)buf */ | |
180 | addl $4, %esi ;/* ((ulg *)buf)++ */ | |
181 | Do_CRC | |
182 | Do_CRC | |
183 | Do_CRC | |
184 | Do_CRC | |
185 | # else /* NO_32_BIT_LOADS */ | |
186 | Do_CRC_byte | |
187 | Do_CRC_byte | |
188 | Do_CRC_byte | |
189 | Do_CRC_byte | |
190 | Do_CRC_byte | |
191 | Do_CRC_byte | |
192 | Do_CRC_byte | |
193 | Do_CRC_byte | |
194 | # endif /* ?NO_32_BIT_LOADS */ | |
195 | decl %ecx | |
196 | jnz .L_Next_Eight | |
197 | ||
198 | .L_No_Eights: | |
199 | movl %edx, %ecx | |
200 | #endif /* NO_UNROLLED_LOOPS */ | |
201 | ||
202 | #ifndef NO_JECXZ_SUPPORT | |
203 | jecxz .L_bail /* > if (len) */ | |
204 | #else | |
205 | testl %ecx, %ecx /* > if (len) */ | |
206 | jz .L_bail | |
207 | #endif | |
208 | /* align loop head at start of 486 internal cache line !! */ | |
209 | ALIGNMENT | |
210 | .L_loupe: /* > do { */ | |
211 | Do_CRC_byte /* c = CRC32(c, *buf++); */ | |
212 | decl %ecx /* > } while (--len); */ | |
213 | jnz .L_loupe | |
214 | ||
215 | .L_bail: /* > } */ | |
216 | notl %eax /* > return ~c; */ | |
217 | .L_fine: | |
218 | popl %ecx | |
219 | popl %edx | |
220 | popl %ebx | |
221 | popl %esi | |
222 | popl %edi | |
223 | _STD_LEAVE | |
224 | ret | |
225 | ||
226 | #else | |
227 | error: this asm version is for 386 only | |
228 | #endif /* i386 || _i386 || _I386 || __i386 */ | |
229 | ||
230 | #endif /* !USE_ZLIB */ |