]>
Commit | Line | Data |
---|---|---|
1 | ; crc_i386.asm, optimized CRC calculation function for Zip and UnZip, not | |
2 | ; copyrighted by Paul Kienitz and Christian Spieler. Last revised 25 Mar 98. | |
3 | ; | |
4 | ; Revised 06-Oct-96, Scott Field (sfield@microsoft.com) | |
5 | ; fixed to assemble with masm by not using .model directive which makes | |
6 | ; assumptions about segment alignment. Also, | |
7 | ; avoid using loop, and j[e]cxz where possible. Use mov + inc, rather | |
8 | ; than lodsb, and other misc. changes resulting in the following performance | |
9 | ; increases: | |
10 | ; | |
11 | ; unrolled loops NO_UNROLLED_LOOPS | |
12 | ; *8 >8 <8 *8 >8 <8 | |
13 | ; | |
14 | ; +54% +42% +35% +82% +52% +25% | |
15 | ; | |
16 | ; first item in each table is input buffer length, even multiple of 8 | |
17 | ; second item in each table is input buffer length, > 8 | |
18 | ; third item in each table is input buffer length, < 8 | |
19 | ; | |
20 | ; Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) | |
21 | ; Incorporated Rodney Brown's 32-bit-reads optimization as found in the | |
22 | ; UNIX AS source crc_i386.S. This new code can be disabled by defining | |
23 | ; the macro symbol NO_32_BIT_LOADS. | |
24 | ; | |
25 | ; Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) | |
26 | ; Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs | |
27 | ; (like the Pentium Pro, Pentium II, and probably some Pentium clones). | |
28 | ; This optimization is controlled by the macro symbol __686 and is disabled | |
29 | ; by default. (This default is based on the assumption that most users | |
30 | ; do not yet work on a Pentium Pro or Pentium II machine ...) | |
31 | ; | |
32 | ; FLAT memory model assumed. | |
33 | ; | |
34 | ; The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. | |
35 | ; This results in shorter code at the expense of reduced performance. | |
36 | ; | |
37 | ; Revised 25-Mar-98, Cosmin Truta (cosmint@cs.ubbcluj.ro) | |
38 | ; Working without .model directive caused tasm32 version 5.0 to produce | |
39 | ; bad object code. The optimized alignments can be optionally disabled | |
40 | ; by defining NO_ALIGN, thus allowing to use .model flat. There is no need | |
41 | ; to define this macro if using other version of tasm. | |
42 | ; | |
43 | ;============================================================================== | |
44 | ; | |
45 | ; Do NOT assemble this source if external crc32 routine from zlib gets used. | |
46 | ; | |
47 | IFNDEF USE_ZLIB | |
48 | ; | |
49 | .386p | |
50 | name crc_i386 | |
51 | ||
52 | IFDEF NO_ALIGN | |
53 | .model flat | |
54 | ENDIF | |
55 | ||
56 | extrn _get_crc_table:near ; ZCONST ulg near *get_crc_table(void); | |
57 | ||
58 | ; | |
59 | IFNDEF NO_STD_STACKFRAME | |
60 | ; Use a `standard' stack frame setup on routine entry and exit. | |
61 | ; Actually, this option is set as default, because it results | |
62 | ; in smaller code !! | |
63 | STD_ENTRY MACRO | |
64 | push ebp | |
65 | mov ebp,esp | |
66 | ENDM | |
67 | ||
68 | Arg1 EQU 08H[ebp] | |
69 | Arg2 EQU 0CH[ebp] | |
70 | Arg3 EQU 10H[ebp] | |
71 | ||
72 | STD_LEAVE MACRO | |
73 | pop ebp | |
74 | ENDM | |
75 | ||
76 | ELSE ; NO_STD_STACKFRAME | |
77 | ||
78 | STD_ENTRY MACRO | |
79 | ENDM | |
80 | ||
81 | Arg1 EQU 18H[esp] | |
82 | Arg2 EQU 1CH[esp] | |
83 | Arg3 EQU 20H[esp] | |
84 | ||
85 | STD_LEAVE MACRO | |
86 | ENDM | |
87 | ||
88 | ENDIF ; ?NO_STD_STACKFRAME | |
89 | ||
90 | ; These two (three) macros make up the loop body of the CRC32 cruncher. | |
91 | ; registers modified: | |
92 | ; eax : crc value "c" | |
93 | ; esi : pointer to next data byte (or dword) "buf++" | |
94 | ; registers read: | |
95 | ; edi : pointer to base of crc_table array | |
96 | ; scratch registers: | |
97 | ; ebx : index into crc_table array | |
98 | ; (requires upper three bytes = 0 when __686 is undefined) | |
99 | IFNDEF __686 ; optimize for 386, 486, Pentium | |
100 | Do_CRC MACRO | |
101 | mov bl,al ; tmp = c & 0xFF | |
102 | shr eax,8 ; c = (c >> 8) | |
103 | xor eax,[edi+ebx*4] ; ^ table[tmp] | |
104 | ENDM | |
105 | ELSE ; __686 : optimize for Pentium Pro, Pentium II and compatible CPUs | |
106 | Do_CRC MACRO | |
107 | movzx ebx,al ; tmp = c & 0xFF | |
108 | shr eax,8 ; c = (c >> 8) | |
109 | xor eax,[edi+ebx*4] ; ^ table[tmp] | |
110 | ENDM | |
111 | ENDIF ; ?__686 | |
112 | Do_CRC_byte MACRO | |
113 | xor al, byte ptr [esi] ; c ^= *buf | |
114 | inc esi ; buf++ | |
115 | Do_CRC ; c = (c >> 8) ^ table[c & 0xFF] | |
116 | ENDM | |
117 | IFNDEF NO_32_BIT_LOADS | |
118 | Do_CRC_dword MACRO | |
119 | xor eax, dword ptr [esi] ; c ^= *(ulg *)buf | |
120 | add esi, 4 ; ((ulg *)buf)++ | |
121 | Do_CRC | |
122 | Do_CRC | |
123 | Do_CRC | |
124 | Do_CRC | |
125 | ENDM | |
126 | ENDIF ; !NO_32_BIT_LOADS | |
127 | ||
128 | IFNDEF NO_ALIGN | |
129 | _TEXT segment use32 para public 'CODE' | |
130 | ELSE | |
131 | _TEXT segment use32 | |
132 | ENDIF | |
133 | assume CS: _TEXT | |
134 | ||
135 | public _crc32 | |
136 | _crc32 proc near ; ulg crc32(ulg crc, ZCONST uch *buf, extent len) | |
137 | STD_ENTRY | |
138 | push edi | |
139 | push esi | |
140 | push ebx | |
141 | push edx | |
142 | push ecx | |
143 | ||
144 | mov esi,Arg2 ; 2nd arg: uch *buf | |
145 | sub eax,eax ;> if (!buf) | |
146 | test esi,esi ;> return 0; | |
147 | jz fine ;> else { | |
148 | ||
149 | call _get_crc_table | |
150 | mov edi,eax | |
151 | mov eax,Arg1 ; 1st arg: ulg crc | |
152 | IFNDEF __686 | |
153 | sub ebx,ebx ; ebx=0; make bl usable as a dword | |
154 | ENDIF | |
155 | mov ecx,Arg3 ; 3rd arg: extent len | |
156 | not eax ;> c = ~crc; | |
157 | ||
158 | IFNDEF NO_UNROLLED_LOOPS | |
159 | IFNDEF NO_32_BIT_LOADS | |
160 | test ecx,ecx | |
161 | je bail | |
162 | align_loop: | |
163 | test esi,3 ; align buf pointer on next | |
164 | jz SHORT aligned_now ; dword boundary | |
165 | Do_CRC_byte | |
166 | dec ecx | |
167 | jnz align_loop | |
168 | aligned_now: | |
169 | ENDIF ; !NO_32_BIT_LOADS | |
170 | mov edx,ecx ; save len in edx | |
171 | and edx,000000007H ; edx = len % 8 | |
172 | shr ecx,3 ; ecx = len / 8 | |
173 | jz SHORT No_Eights | |
174 | IFNDEF NO_ALIGN | |
175 | ; align loop head at start of 486 internal cache line !! | |
176 | align 16 | |
177 | ENDIF | |
178 | Next_Eight: | |
179 | IFNDEF NO_32_BIT_LOADS | |
180 | Do_CRC_dword | |
181 | Do_CRC_dword | |
182 | ELSE ; NO_32_BIT_LOADS | |
183 | Do_CRC_byte | |
184 | Do_CRC_byte | |
185 | Do_CRC_byte | |
186 | Do_CRC_byte | |
187 | Do_CRC_byte | |
188 | Do_CRC_byte | |
189 | Do_CRC_byte | |
190 | Do_CRC_byte | |
191 | ENDIF ; ?NO_32_BIT_LOADS | |
192 | dec ecx | |
193 | jnz Next_Eight | |
194 | No_Eights: | |
195 | mov ecx,edx | |
196 | ||
197 | ENDIF ; NO_UNROLLED_LOOPS | |
198 | IFNDEF NO_JECXZ_SUPPORT | |
199 | jecxz bail ;> if (len) | |
200 | ELSE | |
201 | test ecx,ecx ;> if (len) | |
202 | jz SHORT bail | |
203 | ENDIF | |
204 | IFNDEF NO_ALIGN | |
205 | ; align loop head at start of 486 internal cache line !! | |
206 | align 16 | |
207 | ENDIF | |
208 | loupe: ;> do { | |
209 | Do_CRC_byte ; c = CRC32(c, *buf++); | |
210 | dec ecx ;> } while (--len); | |
211 | jnz loupe | |
212 | ||
213 | bail: ;> } | |
214 | not eax ;> return ~c; | |
215 | fine: | |
216 | pop ecx | |
217 | pop edx | |
218 | pop ebx | |
219 | pop esi | |
220 | pop edi | |
221 | STD_LEAVE | |
222 | ret | |
223 | _crc32 endp | |
224 | ||
225 | _TEXT ends | |
226 | ; | |
227 | ENDIF ; !USE_ZLIB | |
228 | ; | |
229 | end |