]> git.saurik.com Git - wxWidgets.git/blame - utils/Install/sfxzip/crc_i386.asm
post wxUniv merge fix
[wxWidgets.git] / utils / Install / sfxzip / crc_i386.asm
CommitLineData
f6bcfd97
BP
1; crc_i386.asm, optimized CRC calculation function for Zip and UnZip, not
2; copyrighted by Paul Kienitz and Christian Spieler. Last revised 25 Mar 98.
3;
4; Revised 06-Oct-96, Scott Field (sfield@microsoft.com)
5; fixed to assemble with masm by not using .model directive which makes
6; assumptions about segment alignment. Also,
7; avoid using loop, and j[e]cxz where possible. Use mov + inc, rather
8; than lodsb, and other misc. changes resulting in the following performance
9; increases:
10;
11; unrolled loops NO_UNROLLED_LOOPS
12; *8 >8 <8 *8 >8 <8
13;
14; +54% +42% +35% +82% +52% +25%
15;
16; first item in each table is input buffer length, even multiple of 8
17; second item in each table is input buffer length, > 8
18; third item in each table is input buffer length, < 8
19;
20; Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
21; Incorporated Rodney Brown's 32-bit-reads optimization as found in the
22; UNIX AS source crc_i386.S. This new code can be disabled by defining
23; the macro symbol NO_32_BIT_LOADS.
24;
25; Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au)
26; Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs
27; (like the Pentium Pro, Pentium II, and probably some Pentium clones).
28; This optimization is controlled by the macro symbol __686 and is disabled
29; by default. (This default is based on the assumption that most users
30; do not yet work on a Pentium Pro or Pentium II machine ...)
31;
32; FLAT memory model assumed.
33;
34; The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS.
35; This results in shorter code at the expense of reduced performance.
36;
37; Revised 25-Mar-98, Cosmin Truta (cosmint@cs.ubbcluj.ro)
38; Working without .model directive caused tasm32 version 5.0 to produce
39; bad object code. The optimized alignments can be optionally disabled
40; by defining NO_ALIGN, thus allowing to use .model flat. There is no need
41; to define this macro if using other version of tasm.
42;
43;==============================================================================
44;
45; Do NOT assemble this source if external crc32 routine from zlib gets used.
46;
47 IFNDEF USE_ZLIB
48;
49 .386p
50 name crc_i386
51
52 IFDEF NO_ALIGN
53 .model flat
54 ENDIF
55
56extrn _get_crc_table:near ; ZCONST ulg near *get_crc_table(void);
57
58;
59 IFNDEF NO_STD_STACKFRAME
60 ; Use a `standard' stack frame setup on routine entry and exit.
61 ; Actually, this option is set as default, because it results
62 ; in smaller code !!
63STD_ENTRY MACRO
64 push ebp
65 mov ebp,esp
66 ENDM
67
68 Arg1 EQU 08H[ebp]
69 Arg2 EQU 0CH[ebp]
70 Arg3 EQU 10H[ebp]
71
72STD_LEAVE MACRO
73 pop ebp
74 ENDM
75
76 ELSE ; NO_STD_STACKFRAME
77
78STD_ENTRY MACRO
79 ENDM
80
81 Arg1 EQU 18H[esp]
82 Arg2 EQU 1CH[esp]
83 Arg3 EQU 20H[esp]
84
85STD_LEAVE MACRO
86 ENDM
87
88 ENDIF ; ?NO_STD_STACKFRAME
89
90; These two (three) macros make up the loop body of the CRC32 cruncher.
91; registers modified:
92; eax : crc value "c"
93; esi : pointer to next data byte (or dword) "buf++"
94; registers read:
95; edi : pointer to base of crc_table array
96; scratch registers:
97; ebx : index into crc_table array
98; (requires upper three bytes = 0 when __686 is undefined)
99 IFNDEF __686 ; optimize for 386, 486, Pentium
100Do_CRC MACRO
101 mov bl,al ; tmp = c & 0xFF
102 shr eax,8 ; c = (c >> 8)
103 xor eax,[edi+ebx*4] ; ^ table[tmp]
104 ENDM
105 ELSE ; __686 : optimize for Pentium Pro, Pentium II and compatible CPUs
106Do_CRC MACRO
107 movzx ebx,al ; tmp = c & 0xFF
108 shr eax,8 ; c = (c >> 8)
109 xor eax,[edi+ebx*4] ; ^ table[tmp]
110 ENDM
111 ENDIF ; ?__686
112Do_CRC_byte MACRO
113 xor al, byte ptr [esi] ; c ^= *buf
114 inc esi ; buf++
115 Do_CRC ; c = (c >> 8) ^ table[c & 0xFF]
116 ENDM
117 IFNDEF NO_32_BIT_LOADS
118Do_CRC_dword MACRO
119 xor eax, dword ptr [esi] ; c ^= *(ulg *)buf
120 add esi, 4 ; ((ulg *)buf)++
121 Do_CRC
122 Do_CRC
123 Do_CRC
124 Do_CRC
125 ENDM
126 ENDIF ; !NO_32_BIT_LOADS
127
128 IFNDEF NO_ALIGN
129_TEXT segment use32 para public 'CODE'
130 ELSE
131_TEXT segment use32
132 ENDIF
133 assume CS: _TEXT
134
135 public _crc32
136_crc32 proc near ; ulg crc32(ulg crc, ZCONST uch *buf, extent len)
137 STD_ENTRY
138 push edi
139 push esi
140 push ebx
141 push edx
142 push ecx
143
144 mov esi,Arg2 ; 2nd arg: uch *buf
145 sub eax,eax ;> if (!buf)
146 test esi,esi ;> return 0;
147 jz fine ;> else {
148
149 call _get_crc_table
150 mov edi,eax
151 mov eax,Arg1 ; 1st arg: ulg crc
152 IFNDEF __686
153 sub ebx,ebx ; ebx=0; make bl usable as a dword
154 ENDIF
155 mov ecx,Arg3 ; 3rd arg: extent len
156 not eax ;> c = ~crc;
157
158 IFNDEF NO_UNROLLED_LOOPS
159 IFNDEF NO_32_BIT_LOADS
160 test ecx,ecx
161 je bail
162align_loop:
163 test esi,3 ; align buf pointer on next
164 jz SHORT aligned_now ; dword boundary
165 Do_CRC_byte
166 dec ecx
167 jnz align_loop
168aligned_now:
169 ENDIF ; !NO_32_BIT_LOADS
170 mov edx,ecx ; save len in edx
171 and edx,000000007H ; edx = len % 8
172 shr ecx,3 ; ecx = len / 8
173 jz SHORT No_Eights
174 IFNDEF NO_ALIGN
175; align loop head at start of 486 internal cache line !!
176 align 16
177 ENDIF
178Next_Eight:
179 IFNDEF NO_32_BIT_LOADS
180 Do_CRC_dword
181 Do_CRC_dword
182 ELSE ; NO_32_BIT_LOADS
183 Do_CRC_byte
184 Do_CRC_byte
185 Do_CRC_byte
186 Do_CRC_byte
187 Do_CRC_byte
188 Do_CRC_byte
189 Do_CRC_byte
190 Do_CRC_byte
191 ENDIF ; ?NO_32_BIT_LOADS
192 dec ecx
193 jnz Next_Eight
194No_Eights:
195 mov ecx,edx
196
197 ENDIF ; NO_UNROLLED_LOOPS
198 IFNDEF NO_JECXZ_SUPPORT
199 jecxz bail ;> if (len)
200 ELSE
201 test ecx,ecx ;> if (len)
202 jz SHORT bail
203 ENDIF
204 IFNDEF NO_ALIGN
205; align loop head at start of 486 internal cache line !!
206 align 16
207 ENDIF
208loupe: ;> do {
209 Do_CRC_byte ; c = CRC32(c, *buf++);
210 dec ecx ;> } while (--len);
211 jnz loupe
212
213bail: ;> }
214 not eax ;> return ~c;
215fine:
216 pop ecx
217 pop edx
218 pop ebx
219 pop esi
220 pop edi
221 STD_LEAVE
222 ret
223_crc32 endp
224
225_TEXT ends
226;
227 ENDIF ; !USE_ZLIB
228;
229end