]>
Commit | Line | Data |
---|---|---|
1 | /* Copyright (C) 1991, 1993, 1995, 1997, 1998 Free Software Foundation, Inc. | |
2 | Contributed by Torbjorn Granlund (tege@sics.se). | |
3 | ||
4 | NOTE: The canonical source of this file is maintained with the GNU C Library. | |
5 | Bugs can be reported to bug-glibc@prep.ai.mit.edu. | |
6 | ||
7 | This program is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by the | |
9 | Free Software Foundation; either version 2, or (at your option) any | |
10 | later version. | |
11 | ||
12 | This program is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with this program; if not, write to the Free Software | |
19 | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | |
20 | USA. */ | |
21 | ||
22 | #ifdef HAVE_CONFIG_H | |
23 | # include "config.h" | |
24 | #endif | |
25 | ||
26 | #undef __ptr_t | |
27 | #if defined __cplusplus || (defined __STDC__ && __STDC__) | |
28 | # define __ptr_t void * | |
29 | #else /* Not C++ or ANSI C. */ | |
30 | # undef const | |
31 | # define const | |
32 | # define __ptr_t char * | |
33 | #endif /* C++ or ANSI C. */ | |
34 | ||
35 | #ifndef __P | |
36 | # if defined __GNUC__ || (defined __STDC__ && __STDC__) | |
37 | # define __P(args) args | |
38 | # else | |
39 | # define __P(args) () | |
40 | # endif /* GCC. */ | |
41 | #endif /* Not __P. */ | |
42 | ||
43 | #if defined HAVE_STRING_H || defined _LIBC | |
44 | # include <string.h> | |
45 | #endif | |
46 | ||
47 | #undef memcmp | |
48 | ||
49 | #ifdef _LIBC | |
50 | ||
51 | # include <memcopy.h> | |
52 | # include <endian.h> | |
53 | ||
54 | # if __BYTE_ORDER == __BIG_ENDIAN | |
55 | # define WORDS_BIGENDIAN | |
56 | # endif | |
57 | ||
58 | #else /* Not in the GNU C library. */ | |
59 | ||
60 | # include <sys/types.h> | |
61 | ||
62 | /* Type to use for aligned memory operations. | |
63 | This should normally be the biggest type supported by a single load | |
64 | and store. Must be an unsigned type. */ | |
65 | # define op_t unsigned long int | |
66 | # define OPSIZ (sizeof(op_t)) | |
67 | ||
68 | /* Threshold value for when to enter the unrolled loops. */ | |
69 | # define OP_T_THRES 16 | |
70 | ||
71 | /* Type to use for unaligned operations. */ | |
72 | typedef unsigned char byte; | |
73 | ||
74 | # ifndef WORDS_BIGENDIAN | |
75 | # define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2))) | |
76 | # else | |
77 | # define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2))) | |
78 | # endif | |
79 | ||
80 | #endif /* In the GNU C library. */ | |
81 | ||
82 | #ifdef WORDS_BIGENDIAN | |
83 | # define CMP_LT_OR_GT(a, b) ((a) > (b) ? 1 : -1) | |
84 | #else | |
85 | # define CMP_LT_OR_GT(a, b) memcmp_bytes ((a), (b)) | |
86 | #endif | |
87 | ||
88 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE! */ | |
89 | ||
90 | /* The strategy of this memcmp is: | |
91 | ||
92 | 1. Compare bytes until one of the block pointers is aligned. | |
93 | ||
94 | 2. Compare using memcmp_common_alignment or | |
95 | memcmp_not_common_alignment, regarding the alignment of the other | |
96 | block after the initial byte operations. The maximum number of | |
97 | full words (of type op_t) are compared in this way. | |
98 | ||
99 | 3. Compare the few remaining bytes. */ | |
100 | ||
101 | #ifndef WORDS_BIGENDIAN | |
102 | /* memcmp_bytes -- Compare A and B bytewise in the byte order of the machine. | |
103 | A and B are known to be different. | |
104 | This is needed only on little-endian machines. */ | |
105 | ||
106 | static int memcmp_bytes __P((op_t, op_t)); | |
107 | ||
108 | # ifdef __GNUC__ | |
109 | __inline | |
110 | # endif | |
111 | static int | |
112 | memcmp_bytes (long unsigned int a, long unsigned int b) | |
113 | { | |
114 | long int srcp1 = (long int) &a; | |
115 | long int srcp2 = (long int) &b; | |
116 | op_t a0, b0; | |
117 | ||
118 | do | |
119 | { | |
120 | a0 = ((byte *) srcp1)[0]; | |
121 | b0 = ((byte *) srcp2)[0]; | |
122 | srcp1 += 1; | |
123 | srcp2 += 1; | |
124 | } | |
125 | while (a0 == b0); | |
126 | return a0 - b0; | |
127 | } | |
128 | #endif | |
129 | ||
130 | static int memcmp_common_alignment __P((long, long, size_t)); | |
131 | ||
132 | /* memcmp_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN `op_t' | |
133 | objects (not LEN bytes!). Both SRCP1 and SRCP2 should be aligned for | |
134 | memory operations on `op_t's. */ | |
135 | #ifdef __GNUC__ | |
136 | __inline | |
137 | #endif | |
138 | static int | |
139 | memcmp_common_alignment (long int srcp1, long int srcp2, size_t len) | |
140 | { | |
141 | op_t a0, a1; | |
142 | op_t b0, b1; | |
143 | ||
144 | switch (len % 4) | |
145 | { | |
146 | default: /* Avoid warning about uninitialized local variables. */ | |
147 | case 2: | |
148 | a0 = ((op_t *) srcp1)[0]; | |
149 | b0 = ((op_t *) srcp2)[0]; | |
150 | srcp1 -= 2 * OPSIZ; | |
151 | srcp2 -= 2 * OPSIZ; | |
152 | len += 2; | |
153 | goto do1; | |
154 | case 3: | |
155 | a1 = ((op_t *) srcp1)[0]; | |
156 | b1 = ((op_t *) srcp2)[0]; | |
157 | srcp1 -= OPSIZ; | |
158 | srcp2 -= OPSIZ; | |
159 | len += 1; | |
160 | goto do2; | |
161 | case 0: | |
162 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
163 | return 0; | |
164 | a0 = ((op_t *) srcp1)[0]; | |
165 | b0 = ((op_t *) srcp2)[0]; | |
166 | goto do3; | |
167 | case 1: | |
168 | a1 = ((op_t *) srcp1)[0]; | |
169 | b1 = ((op_t *) srcp2)[0]; | |
170 | srcp1 += OPSIZ; | |
171 | srcp2 += OPSIZ; | |
172 | len -= 1; | |
173 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
174 | goto do0; | |
175 | /* Fall through. */ | |
176 | } | |
177 | ||
178 | do | |
179 | { | |
180 | a0 = ((op_t *) srcp1)[0]; | |
181 | b0 = ((op_t *) srcp2)[0]; | |
182 | if (a1 != b1) | |
183 | return CMP_LT_OR_GT (a1, b1); | |
184 | ||
185 | do3: | |
186 | a1 = ((op_t *) srcp1)[1]; | |
187 | b1 = ((op_t *) srcp2)[1]; | |
188 | if (a0 != b0) | |
189 | return CMP_LT_OR_GT (a0, b0); | |
190 | ||
191 | do2: | |
192 | a0 = ((op_t *) srcp1)[2]; | |
193 | b0 = ((op_t *) srcp2)[2]; | |
194 | if (a1 != b1) | |
195 | return CMP_LT_OR_GT (a1, b1); | |
196 | ||
197 | do1: | |
198 | a1 = ((op_t *) srcp1)[3]; | |
199 | b1 = ((op_t *) srcp2)[3]; | |
200 | if (a0 != b0) | |
201 | return CMP_LT_OR_GT (a0, b0); | |
202 | ||
203 | srcp1 += 4 * OPSIZ; | |
204 | srcp2 += 4 * OPSIZ; | |
205 | len -= 4; | |
206 | } | |
207 | while (len != 0); | |
208 | ||
209 | /* This is the right position for do0. Please don't move | |
210 | it into the loop. */ | |
211 | do0: | |
212 | if (a1 != b1) | |
213 | return CMP_LT_OR_GT (a1, b1); | |
214 | return 0; | |
215 | } | |
216 | ||
217 | static int memcmp_not_common_alignment __P((long, long, size_t)); | |
218 | ||
219 | /* memcmp_not_common_alignment -- Compare blocks at SRCP1 and SRCP2 with LEN | |
220 | `op_t' objects (not LEN bytes!). SRCP2 should be aligned for memory | |
221 | operations on `op_t', but SRCP1 *should be unaligned*. */ | |
222 | #ifdef __GNUC__ | |
223 | __inline | |
224 | #endif | |
225 | static int | |
226 | memcmp_not_common_alignment (long int srcp1, long int srcp2, size_t len) | |
227 | { | |
228 | op_t a0, a1, a2, a3; | |
229 | op_t b0, b1, b2, b3; | |
230 | op_t x; | |
231 | int shl, shr; | |
232 | ||
233 | /* Calculate how to shift a word read at the memory operation | |
234 | aligned srcp1 to make it aligned for comparison. */ | |
235 | ||
236 | shl = 8 * (srcp1 % OPSIZ); | |
237 | shr = 8 * OPSIZ - shl; | |
238 | ||
239 | /* Make SRCP1 aligned by rounding it down to the beginning of the `op_t' | |
240 | it points in the middle of. */ | |
241 | srcp1 &= -OPSIZ; | |
242 | ||
243 | switch (len % 4) | |
244 | { | |
245 | default: /* Avoid warning about uninitialized local variables. */ | |
246 | case 2: | |
247 | a1 = ((op_t *) srcp1)[0]; | |
248 | a2 = ((op_t *) srcp1)[1]; | |
249 | b2 = ((op_t *) srcp2)[0]; | |
250 | srcp1 -= 1 * OPSIZ; | |
251 | srcp2 -= 2 * OPSIZ; | |
252 | len += 2; | |
253 | goto do1; | |
254 | case 3: | |
255 | a0 = ((op_t *) srcp1)[0]; | |
256 | a1 = ((op_t *) srcp1)[1]; | |
257 | b1 = ((op_t *) srcp2)[0]; | |
258 | srcp2 -= 1 * OPSIZ; | |
259 | len += 1; | |
260 | goto do2; | |
261 | case 0: | |
262 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
263 | return 0; | |
264 | a3 = ((op_t *) srcp1)[0]; | |
265 | a0 = ((op_t *) srcp1)[1]; | |
266 | b0 = ((op_t *) srcp2)[0]; | |
267 | srcp1 += 1 * OPSIZ; | |
268 | goto do3; | |
269 | case 1: | |
270 | a2 = ((op_t *) srcp1)[0]; | |
271 | a3 = ((op_t *) srcp1)[1]; | |
272 | b3 = ((op_t *) srcp2)[0]; | |
273 | srcp1 += 2 * OPSIZ; | |
274 | srcp2 += 1 * OPSIZ; | |
275 | len -= 1; | |
276 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) | |
277 | goto do0; | |
278 | /* Fall through. */ | |
279 | } | |
280 | ||
281 | do | |
282 | { | |
283 | a0 = ((op_t *) srcp1)[0]; | |
284 | b0 = ((op_t *) srcp2)[0]; | |
285 | x = MERGE(a2, shl, a3, shr); | |
286 | if (x != b3) | |
287 | return CMP_LT_OR_GT (x, b3); | |
288 | ||
289 | do3: | |
290 | a1 = ((op_t *) srcp1)[1]; | |
291 | b1 = ((op_t *) srcp2)[1]; | |
292 | x = MERGE(a3, shl, a0, shr); | |
293 | if (x != b0) | |
294 | return CMP_LT_OR_GT (x, b0); | |
295 | ||
296 | do2: | |
297 | a2 = ((op_t *) srcp1)[2]; | |
298 | b2 = ((op_t *) srcp2)[2]; | |
299 | x = MERGE(a0, shl, a1, shr); | |
300 | if (x != b1) | |
301 | return CMP_LT_OR_GT (x, b1); | |
302 | ||
303 | do1: | |
304 | a3 = ((op_t *) srcp1)[3]; | |
305 | b3 = ((op_t *) srcp2)[3]; | |
306 | x = MERGE(a1, shl, a2, shr); | |
307 | if (x != b2) | |
308 | return CMP_LT_OR_GT (x, b2); | |
309 | ||
310 | srcp1 += 4 * OPSIZ; | |
311 | srcp2 += 4 * OPSIZ; | |
312 | len -= 4; | |
313 | } | |
314 | while (len != 0); | |
315 | ||
316 | /* This is the right position for do0. Please don't move | |
317 | it into the loop. */ | |
318 | do0: | |
319 | x = MERGE(a2, shl, a3, shr); | |
320 | if (x != b3) | |
321 | return CMP_LT_OR_GT (x, b3); | |
322 | return 0; | |
323 | } | |
324 | ||
325 | int | |
326 | rpl_memcmp (const void *s1, const void *s2, size_t len) | |
327 | { | |
328 | op_t a0; | |
329 | op_t b0; | |
330 | long int srcp1 = (long int) s1; | |
331 | long int srcp2 = (long int) s2; | |
332 | op_t res; | |
333 | ||
334 | if (len >= OP_T_THRES) | |
335 | { | |
336 | /* There are at least some bytes to compare. No need to test | |
337 | for LEN == 0 in this alignment loop. */ | |
338 | while (srcp2 % OPSIZ != 0) | |
339 | { | |
340 | a0 = ((byte *) srcp1)[0]; | |
341 | b0 = ((byte *) srcp2)[0]; | |
342 | srcp1 += 1; | |
343 | srcp2 += 1; | |
344 | res = a0 - b0; | |
345 | if (res != 0) | |
346 | return res; | |
347 | len -= 1; | |
348 | } | |
349 | ||
350 | /* SRCP2 is now aligned for memory operations on `op_t'. | |
351 | SRCP1 alignment determines if we can do a simple, | |
352 | aligned compare or need to shuffle bits. */ | |
353 | ||
354 | if (srcp1 % OPSIZ == 0) | |
355 | res = memcmp_common_alignment (srcp1, srcp2, len / OPSIZ); | |
356 | else | |
357 | res = memcmp_not_common_alignment (srcp1, srcp2, len / OPSIZ); | |
358 | if (res != 0) | |
359 | return res; | |
360 | ||
361 | /* Number of bytes remaining in the interval [0..OPSIZ-1]. */ | |
362 | srcp1 += len & -OPSIZ; | |
363 | srcp2 += len & -OPSIZ; | |
364 | len %= OPSIZ; | |
365 | } | |
366 | ||
367 | /* There are just a few bytes to compare. Use byte memory operations. */ | |
368 | while (len != 0) | |
369 | { | |
370 | a0 = ((byte *) srcp1)[0]; | |
371 | b0 = ((byte *) srcp2)[0]; | |
372 | srcp1 += 1; | |
373 | srcp2 += 1; | |
374 | res = a0 - b0; | |
375 | if (res != 0) | |
376 | return res; | |
377 | len -= 1; | |
378 | } | |
379 | ||
380 | return 0; | |
381 | } | |
382 | ||
383 | #ifdef weak_alias | |
384 | # undef bcmp | |
385 | weak_alias (memcmp, bcmp) | |
386 | #endif |