]>
Commit | Line | Data |
---|---|---|
f6bcfd97 BP |
1 | /* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm |
2 | * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler. | |
3 | * Last revised: 22-Mai-1998 | |
4 | * | |
5 | * Original coded (in crc_i386.asm) and put into the public domain | |
6 | * by Paul Kienitz and Christian Spieler. | |
7 | * | |
8 | * Revised 06-Oct-96, Scott Field (sfield@microsoft.com) | |
9 | * fixed to assemble with masm by not using .model directive which makes | |
10 | * assumptions about segment alignment. Also, | |
11 | * avoid using loop, and j[e]cxz where possible. Use mov + inc, rather | |
12 | * than lodsb, and other misc. changes resulting in the following performance | |
13 | * increases: | |
14 | * | |
15 | * unrolled loops NO_UNROLLED_LOOPS | |
16 | * *8 >8 <8 *8 >8 <8 | |
17 | * | |
18 | * +54% +42% +35% +82% +52% +25% | |
19 | * | |
20 | * first item in each table is input buffer length, even multiple of 8 | |
21 | * second item in each table is input buffer length, > 8 | |
22 | * third item in each table is input buffer length, < 8 | |
23 | * | |
24 | * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) | |
25 | * Incorporated Rodney Brown's 32-bit-reads optimization as found in the | |
26 | * UNIX AS source crc_i386.S. This new code can be disabled by defining | |
27 | * the macro symbol NO_32_BIT_LOADS. | |
28 | * | |
29 | * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) | |
30 | * Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs | |
31 | * (like the Pentium Pro, Pentium II, and probably some Pentium clones). | |
32 | * This optimization is controlled by the macro symbol __686 and is disabled | |
33 | * by default. (This default is based on the assumption that most users | |
34 | * do not yet work on a Pentium Pro or Pentium II machine ...) | |
35 | * | |
36 | * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++ | |
37 | * 32-bit, removed unneeded kludge for potentially unknown movzx mnemonic, | |
38 | * confirmed correct working with MS VC++ (32-bit). | |
39 | * | |
40 | * Revised 22-Mai-98, Peter Kunath, Chr. Spieler : The 16-Nov-97 revision broke | |
41 | * MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its | |
42 | * own __asm {...} construct. For MSVC, a "#pragma warning" was added to | |
43 | * shut up the "no return value" warning message. | |
44 | * | |
45 | * FLAT memory model assumed. | |
46 | * | |
47 | * The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. | |
48 | * This results in shorter code at the expense of reduced performance. | |
49 | * | |
50 | */ | |
51 | ||
52 | #include "zip.h" | |
53 | ||
54 | #ifndef USE_ZLIB | |
55 | ||
56 | #ifndef ZCONST | |
57 | # define ZCONST const | |
58 | #endif | |
59 | ||
60 | /* Select wether the following inline-assember code is supported. */ | |
61 | #if (defined(_MSC_VER) && _MSC_VER >= 700) | |
62 | #if (defined(_M_IX86) && _M_IX86 >= 300) | |
63 | # define MSC_INLINE_ASM_32BIT_SUPPORT | |
64 | /* Disable warning for no return value, typical of asm functions */ | |
65 | # pragma warning( disable : 4035 ) | |
66 | #endif | |
67 | #endif | |
68 | ||
69 | #if (defined(__BORLANDC__) && __BORLANDC__ >= 452) | |
70 | # define MSC_INLINE_ASM_32BIT_SUPPORT | |
71 | #endif | |
72 | ||
73 | #ifdef MSC_INLINE_ASM_32BIT_SUPPORT | |
74 | /* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */ | |
75 | ||
76 | /* | |
77 | * These two (three) macros make up the loop body of the CRC32 cruncher. | |
78 | * registers modified: | |
79 | * eax : crc value "c" | |
80 | * esi : pointer to next data byte (or dword) "buf++" | |
81 | * registers read: | |
82 | * edi : pointer to base of crc_table array | |
83 | * scratch registers: | |
84 | * ebx : index into crc_table array | |
85 | * (requires upper three bytes = 0 when __686 is undefined) | |
86 | */ | |
87 | #ifndef __686 | |
88 | #define Do_CRC { \ | |
89 | __asm { mov bl, al }; \ | |
90 | __asm { shr eax, 8 }; \ | |
91 | __asm { xor eax, [edi+ebx*4] }; } | |
92 | #else /* __686 */ | |
93 | #define Do_CRC { \ | |
94 | __asm { movzx ebx, al }; \ | |
95 | __asm { shr eax, 8 }; \ | |
96 | __asm { xor eax, [edi+ebx*4] }; } | |
97 | #endif /* ?__686 */ | |
98 | ||
99 | #define Do_CRC_byte { \ | |
100 | __asm { xor al, byte ptr [esi] }; \ | |
101 | __asm { inc esi }; \ | |
102 | Do_CRC; } | |
103 | ||
104 | #ifndef NO_32_BIT_LOADS | |
105 | #define Do_CRC_dword { \ | |
106 | __asm { xor eax, dword ptr [esi] }; \ | |
107 | __asm { add esi, 4 }; \ | |
108 | Do_CRC; \ | |
109 | Do_CRC; \ | |
110 | Do_CRC; \ | |
111 | Do_CRC; } | |
112 | #endif /* !NO_32_BIT_LOADS */ | |
113 | ||
114 | /* ========================================================================= */ | |
115 | ulg crc32(crc, buf, len) | |
116 | ulg crc; /* crc shift register */ | |
117 | ZCONST uch *buf; /* pointer to bytes to pump through */ | |
118 | extent len; /* number of bytes in buf[] */ | |
119 | /* Run a set of bytes through the crc shift register. If buf is a NULL | |
120 | pointer, then initialize the crc shift register contents instead. | |
121 | Return the current crc in either case. */ | |
122 | { | |
123 | __asm { | |
124 | push edx | |
125 | push ecx | |
126 | ||
127 | mov esi,buf ;/* 2nd arg: uch *buf */ | |
128 | sub eax,eax ;/*> if (!buf) */ | |
129 | test esi,esi ;/*> return 0; */ | |
130 | jz fine ;/*> else { */ | |
131 | ||
132 | call get_crc_table | |
133 | mov edi,eax | |
134 | mov eax,crc ;/* 1st arg: ulg crc */ | |
135 | #ifndef __686 | |
136 | sub ebx,ebx ;/* ebx=0; => bl usable as a dword */ | |
137 | #endif | |
138 | mov ecx,len ;/* 3rd arg: extent len */ | |
139 | not eax ;/*> c = ~crc; */ | |
140 | ||
141 | #ifndef NO_UNROLLED_LOOPS | |
142 | # ifndef NO_32_BIT_LOADS | |
143 | test ecx,ecx | |
144 | je bail | |
145 | align_loop: | |
146 | test esi,3 ;/* align buf pointer on next */ | |
147 | jz aligned_now ;/* dword boundary */ | |
148 | } | |
149 | Do_CRC_byte ; | |
150 | __asm { | |
151 | dec ecx | |
152 | jnz align_loop | |
153 | aligned_now: | |
154 | # endif /* !NO_32_BIT_LOADS */ | |
155 | mov edx,ecx ;/* save len in edx */ | |
156 | and edx,000000007H ;/* edx = len % 8 */ | |
157 | shr ecx,3 ;/* ecx = len / 8 */ | |
158 | jz No_Eights | |
159 | ; align loop head at start of 486 internal cache line !! | |
160 | align 16 | |
161 | Next_Eight: | |
162 | } | |
163 | # ifndef NO_32_BIT_LOADS | |
164 | Do_CRC_dword ; | |
165 | Do_CRC_dword ; | |
166 | # else /* NO_32_BIT_LOADS */ | |
167 | Do_CRC_byte ; | |
168 | Do_CRC_byte ; | |
169 | Do_CRC_byte ; | |
170 | Do_CRC_byte ; | |
171 | Do_CRC_byte ; | |
172 | Do_CRC_byte ; | |
173 | Do_CRC_byte ; | |
174 | Do_CRC_byte ; | |
175 | # endif /* ?NO_32_BIT_LOADS */ | |
176 | __asm { | |
177 | dec ecx | |
178 | jnz Next_Eight | |
179 | No_Eights: | |
180 | mov ecx,edx | |
181 | ||
182 | #endif /* NO_UNROLLED_LOOPS */ | |
183 | #ifndef NO_JECXZ_SUPPORT | |
184 | jecxz bail ;/*> if (len) */ | |
185 | #else | |
186 | test ecx,ecx ;/*> if (len) */ | |
187 | jz bail | |
188 | #endif | |
189 | ; align loop head at start of 486 internal cache line !! | |
190 | align 16 | |
191 | loupe: ;/*> do { */ | |
192 | } | |
193 | Do_CRC_byte ;/* c = CRC32(c, *buf++); */ | |
194 | __asm { | |
195 | dec ecx ;/*> } while (--len); */ | |
196 | jnz loupe | |
197 | ||
198 | bail: ;/*> } */ | |
199 | not eax ;/*> return ~c; */ | |
200 | fine: | |
201 | pop ecx | |
202 | pop edx | |
203 | } | |
204 | #ifdef NEED_RETURN | |
205 | return _EAX; | |
206 | #endif | |
207 | } | |
208 | #endif /* MSC_INLINE_ASM_32BIT_SUPPORT */ | |
209 | #if (defined(_MSC_VER) && _MSC_VER >= 700) | |
210 | #if (defined(_M_IX86) && _M_IX86 >= 300) | |
211 | /* Reenable missing return value warning */ | |
212 | # pragma warning( default : 4035 ) | |
213 | #endif | |
214 | #endif | |
215 | #endif /* !USE_ZLIB */ |