1 ; crc_i386.asm, optimized CRC calculation function for Zip and UnZip, not
2 ; copyrighted by Paul Kienitz and Christian Spieler. Last revised 25 Mar 98.
4 ; Revised 06-Oct-96, Scott Field (sfield@microsoft.com)
5 ; fixed to assemble with masm by not using .model directive which makes
6 ; assumptions about segment alignment. Also,
7 ; avoid using loop, and j[e]cxz where possible. Use mov + inc, rather
8 ; than lodsb, and other misc. changes resulting in the following performance
11 ; unrolled loops NO_UNROLLED_LOOPS
14 ; +54% +42% +35% +82% +52% +25%
16 ; first item in each table is input buffer length, even multiple of 8
17 ; second item in each table is input buffer length, > 8
18 ; third item in each table is input buffer length, < 8
20 ; Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
21 ; Incorporated Rodney Brown's 32-bit-reads optimization as found in the
22 ; UNIX AS source crc_i386.S. This new code can be disabled by defining
23 ; the macro symbol NO_32_BIT_LOADS.
25 ; Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
26 ; Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs
27 ; (like the Pentium Pro, Pentium II, and probably some Pentium clones).
28 ; This optimization is controlled by the macro symbol __686 and is disabled
29 ; by default. (This default is based on the assumption that most users
30 ; do not yet work on a Pentium Pro or Pentium II machine ...)
32 ; FLAT memory model assumed.
34 ; The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
35 ; This results in shorter code at the expense of reduced performance.
37 ; Revised 25-Mar-98, Cosmin Truta (cosmint@cs.ubbcluj.ro)
38 ; Working without .model directive caused tasm32 version 5.0 to produce
39 ; bad object code. The optimized alignments can be optionally disabled
40 ; by defining NO_ALIGN, thus allowing to use .model flat. There is no need
41 ; to define this macro if using other version of tasm.
43 ;==============================================================================
45 ; Do NOT assemble this source if external crc32 routine from zlib gets used.
56 extrn _get_crc_table:near ; ZCONST ulg near *get_crc_table(void);
59 IFNDEF NO_STD_STACKFRAME
60 ; Use a `standard' stack frame setup on routine entry and exit.
61 ; Actually, this option is set as default, because it results
76 ELSE ; NO_STD_STACKFRAME
88 ENDIF ; ?NO_STD_STACKFRAME
90 ; These two (three) macros make up the loop body of the CRC32 cruncher.
93 ; esi : pointer to next data byte (or dword) "buf++"
95 ; edi : pointer to base of crc_table array
97 ; ebx : index into crc_table array
98 ; (requires upper three bytes = 0 when __686 is undefined)
99 IFNDEF __686 ; optimize for 386, 486, Pentium
101 mov bl,al ; tmp = c & 0xFF
102 shr eax,8 ; c = (c >> 8)
103 xor eax,[edi+ebx*4] ; ^ table[tmp]
105 ELSE ; __686 : optimize for Pentium Pro, Pentium II and compatible CPUs
107 movzx ebx,al ; tmp = c & 0xFF
108 shr eax,8 ; c = (c >> 8)
109 xor eax,[edi+ebx*4] ; ^ table[tmp]
113 xor al, byte ptr [esi] ; c ^= *buf
115 Do_CRC ; c = (c >> 8) ^ table[c & 0xFF]
117 IFNDEF NO_32_BIT_LOADS
119 xor eax, dword ptr [esi] ; c ^= *(ulg *)buf
120 add esi, 4 ; ((ulg *)buf)++
126 ENDIF ; !NO_32_BIT_LOADS
129 _TEXT segment use32 para public 'CODE'
136 _crc32 proc near ; ulg crc32(ulg crc, ZCONST uch *buf, extent len)
144 mov esi,Arg2 ; 2nd arg: uch *buf
145 sub eax,eax ;> if (!buf)
146 test esi,esi ;> return 0;
151 mov eax,Arg1 ; 1st arg: ulg crc
153 sub ebx,ebx ; ebx=0; make bl usable as a dword
155 mov ecx,Arg3 ; 3rd arg: extent len
158 IFNDEF NO_UNROLLED_LOOPS
159 IFNDEF NO_32_BIT_LOADS
163 test esi,3 ; align buf pointer on next
164 jz SHORT aligned_now ; dword boundary
169 ENDIF ; !NO_32_BIT_LOADS
170 mov edx,ecx ; save len in edx
171 and edx,000000007H ; edx = len % 8
172 shr ecx,3 ; ecx = len / 8
175 ; align loop head at start of 486 internal cache line !!
179 IFNDEF NO_32_BIT_LOADS
182 ELSE ; NO_32_BIT_LOADS
191 ENDIF ; ?NO_32_BIT_LOADS
197 ENDIF ; NO_UNROLLED_LOOPS
198 IFNDEF NO_JECXZ_SUPPORT
199 jecxz bail ;> if (len)
201 test ecx,ecx ;> if (len)
205 ; align loop head at start of 486 internal cache line !!
209 Do_CRC_byte ; c = CRC32(c, *buf++);
210 dec ecx ;> } while (--len);
214 not eax ;> return ~c;