| 1 | /* crc_i386.c -- Microsoft 32-bit C/C++ adaptation of crc_i386.asm |
| 2 | * Created by Rodney Brown from crc_i386.asm, modified by Chr. Spieler. |
| 3 | * Last revised: 22-Mai-1998 |
| 4 | * |
| 5 | * Original coded (in crc_i386.asm) and put into the public domain |
| 6 | * by Paul Kienitz and Christian Spieler. |
| 7 | * |
| 8 | * Revised 06-Oct-96, Scott Field (sfield@microsoft.com) |
| 9 | * fixed to assemble with masm by not using .model directive which makes |
| 10 | * assumptions about segment alignment. Also, |
| 11 | * avoid using loop, and j[e]cxz where possible. Use mov + inc, rather |
| 12 | * than lodsb, and other misc. changes resulting in the following performance |
| 13 | * increases: |
| 14 | * |
| 15 | * unrolled loops NO_UNROLLED_LOOPS |
| 16 | * *8 >8 <8 *8 >8 <8 |
| 17 | * |
| 18 | * +54% +42% +35% +82% +52% +25% |
| 19 | * |
| 20 | * first item in each table is input buffer length, even multiple of 8 |
| 21 | * second item in each table is input buffer length, > 8 |
| 22 | * third item in each table is input buffer length, < 8 |
| 23 | * |
| 24 | * Revised 02-Apr-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) |
| 25 | * Incorporated Rodney Brown's 32-bit-reads optimization as found in the |
| 26 | * UNIX AS source crc_i386.S. This new code can be disabled by defining |
| 27 | * the macro symbol NO_32_BIT_LOADS. |
| 28 | * |
| 29 | * Revised 12-Oct-97, Chr. Spieler, based on Rodney Brown (rdb@cmutual.com.au) |
| 30 | * Incorporated Rodney Brown's additional tweaks for 32-bit-optimized CPUs |
| 31 | * (like the Pentium Pro, Pentium II, and probably some Pentium clones). |
| 32 | * This optimization is controlled by the macro symbol __686 and is disabled |
| 33 | * by default. (This default is based on the assumption that most users |
| 34 | * do not yet work on a Pentium Pro or Pentium II machine ...) |
| 35 | * |
| 36 | * Revised 16-Nov-97, Chr. Spieler: Made code compatible with Borland C++ |
| 37 | * 32-bit, removed unneeded kludge for potentially unknown movzx mnemonic, |
| 38 | * confirmed correct working with MS VC++ (32-bit). |
| 39 | * |
| 40 | * Revised 22-Mai-98, Peter Kunath, Chr. Spieler : The 16-Nov-97 revision broke |
| 41 | * MSVC 5.0. Inside preprocessor macros, each instruction is enclosed in its |
| 42 | * own __asm {...} construct. For MSVC, a "#pragma warning" was added to |
| 43 | * shut up the "no return value" warning message. |
| 44 | * |
| 45 | * FLAT memory model assumed. |
| 46 | * |
| 47 | * The loop unrolling can be disabled by defining the macro NO_UNROLLED_LOOPS. |
| 48 | * This results in shorter code at the expense of reduced performance. |
| 49 | * |
| 50 | */ |
| 51 | |
| 52 | #include "zip.h" |
| 53 | |
| 54 | #ifndef USE_ZLIB |
| 55 | |
| 56 | #ifndef ZCONST |
| 57 | # define ZCONST const |
| 58 | #endif |
| 59 | |
| 60 | /* Select wether the following inline-assember code is supported. */ |
| 61 | #if (defined(_MSC_VER) && _MSC_VER >= 700) |
| 62 | #if (defined(_M_IX86) && _M_IX86 >= 300) |
| 63 | # define MSC_INLINE_ASM_32BIT_SUPPORT |
| 64 | /* Disable warning for no return value, typical of asm functions */ |
| 65 | # pragma warning( disable : 4035 ) |
| 66 | #endif |
| 67 | #endif |
| 68 | |
| 69 | #if (defined(__BORLANDC__) && __BORLANDC__ >= 452) |
| 70 | # define MSC_INLINE_ASM_32BIT_SUPPORT |
| 71 | #endif |
| 72 | |
| 73 | #ifdef MSC_INLINE_ASM_32BIT_SUPPORT |
| 74 | /* This code is intended for Microsoft C/C++ (32-bit) compatible compilers. */ |
| 75 | |
| 76 | /* |
| 77 | * These two (three) macros make up the loop body of the CRC32 cruncher. |
| 78 | * registers modified: |
| 79 | * eax : crc value "c" |
| 80 | * esi : pointer to next data byte (or dword) "buf++" |
| 81 | * registers read: |
| 82 | * edi : pointer to base of crc_table array |
| 83 | * scratch registers: |
| 84 | * ebx : index into crc_table array |
| 85 | * (requires upper three bytes = 0 when __686 is undefined) |
| 86 | */ |
| 87 | #ifndef __686 |
| 88 | #define Do_CRC { \ |
| 89 | __asm { mov bl, al }; \ |
| 90 | __asm { shr eax, 8 }; \ |
| 91 | __asm { xor eax, [edi+ebx*4] }; } |
| 92 | #else /* __686 */ |
| 93 | #define Do_CRC { \ |
| 94 | __asm { movzx ebx, al }; \ |
| 95 | __asm { shr eax, 8 }; \ |
| 96 | __asm { xor eax, [edi+ebx*4] }; } |
| 97 | #endif /* ?__686 */ |
| 98 | |
| 99 | #define Do_CRC_byte { \ |
| 100 | __asm { xor al, byte ptr [esi] }; \ |
| 101 | __asm { inc esi }; \ |
| 102 | Do_CRC; } |
| 103 | |
| 104 | #ifndef NO_32_BIT_LOADS |
| 105 | #define Do_CRC_dword { \ |
| 106 | __asm { xor eax, dword ptr [esi] }; \ |
| 107 | __asm { add esi, 4 }; \ |
| 108 | Do_CRC; \ |
| 109 | Do_CRC; \ |
| 110 | Do_CRC; \ |
| 111 | Do_CRC; } |
| 112 | #endif /* !NO_32_BIT_LOADS */ |
| 113 | |
| 114 | /* ========================================================================= */ |
| 115 | ulg crc32(crc, buf, len) |
| 116 | ulg crc; /* crc shift register */ |
| 117 | ZCONST uch *buf; /* pointer to bytes to pump through */ |
| 118 | extent len; /* number of bytes in buf[] */ |
| 119 | /* Run a set of bytes through the crc shift register. If buf is a NULL |
| 120 | pointer, then initialize the crc shift register contents instead. |
| 121 | Return the current crc in either case. */ |
| 122 | { |
| 123 | __asm { |
| 124 | push edx |
| 125 | push ecx |
| 126 | |
| 127 | mov esi,buf ;/* 2nd arg: uch *buf */ |
| 128 | sub eax,eax ;/*> if (!buf) */ |
| 129 | test esi,esi ;/*> return 0; */ |
| 130 | jz fine ;/*> else { */ |
| 131 | |
| 132 | call get_crc_table |
| 133 | mov edi,eax |
| 134 | mov eax,crc ;/* 1st arg: ulg crc */ |
| 135 | #ifndef __686 |
| 136 | sub ebx,ebx ;/* ebx=0; => bl usable as a dword */ |
| 137 | #endif |
| 138 | mov ecx,len ;/* 3rd arg: extent len */ |
| 139 | not eax ;/*> c = ~crc; */ |
| 140 | |
| 141 | #ifndef NO_UNROLLED_LOOPS |
| 142 | # ifndef NO_32_BIT_LOADS |
| 143 | test ecx,ecx |
| 144 | je bail |
| 145 | align_loop: |
| 146 | test esi,3 ;/* align buf pointer on next */ |
| 147 | jz aligned_now ;/* dword boundary */ |
| 148 | } |
| 149 | Do_CRC_byte ; |
| 150 | __asm { |
| 151 | dec ecx |
| 152 | jnz align_loop |
| 153 | aligned_now: |
| 154 | # endif /* !NO_32_BIT_LOADS */ |
| 155 | mov edx,ecx ;/* save len in edx */ |
| 156 | and edx,000000007H ;/* edx = len % 8 */ |
| 157 | shr ecx,3 ;/* ecx = len / 8 */ |
| 158 | jz No_Eights |
| 159 | ; align loop head at start of 486 internal cache line !! |
| 160 | align 16 |
| 161 | Next_Eight: |
| 162 | } |
| 163 | # ifndef NO_32_BIT_LOADS |
| 164 | Do_CRC_dword ; |
| 165 | Do_CRC_dword ; |
| 166 | # else /* NO_32_BIT_LOADS */ |
| 167 | Do_CRC_byte ; |
| 168 | Do_CRC_byte ; |
| 169 | Do_CRC_byte ; |
| 170 | Do_CRC_byte ; |
| 171 | Do_CRC_byte ; |
| 172 | Do_CRC_byte ; |
| 173 | Do_CRC_byte ; |
| 174 | Do_CRC_byte ; |
| 175 | # endif /* ?NO_32_BIT_LOADS */ |
| 176 | __asm { |
| 177 | dec ecx |
| 178 | jnz Next_Eight |
| 179 | No_Eights: |
| 180 | mov ecx,edx |
| 181 | |
| 182 | #endif /* NO_UNROLLED_LOOPS */ |
| 183 | #ifndef NO_JECXZ_SUPPORT |
| 184 | jecxz bail ;/*> if (len) */ |
| 185 | #else |
| 186 | test ecx,ecx ;/*> if (len) */ |
| 187 | jz bail |
| 188 | #endif |
| 189 | ; align loop head at start of 486 internal cache line !! |
| 190 | align 16 |
| 191 | loupe: ;/*> do { */ |
| 192 | } |
| 193 | Do_CRC_byte ;/* c = CRC32(c, *buf++); */ |
| 194 | __asm { |
| 195 | dec ecx ;/*> } while (--len); */ |
| 196 | jnz loupe |
| 197 | |
| 198 | bail: ;/*> } */ |
| 199 | not eax ;/*> return ~c; */ |
| 200 | fine: |
| 201 | pop ecx |
| 202 | pop edx |
| 203 | } |
| 204 | #ifdef NEED_RETURN |
| 205 | return _EAX; |
| 206 | #endif |
| 207 | } |
| 208 | #endif /* MSC_INLINE_ASM_32BIT_SUPPORT */ |
| 209 | #if (defined(_MSC_VER) && _MSC_VER >= 700) |
| 210 | #if (defined(_M_IX86) && _M_IX86 >= 300) |
| 211 | /* Reenable missing return value warning */ |
| 212 | # pragma warning( default : 4035 ) |
| 213 | #endif |
| 214 | #endif |
| 215 | #endif /* !USE_ZLIB */ |