]> git.saurik.com Git - apple/boot.git/blob - i386/libsaio/hfs_compare.c
boot-132.tar.gz
[apple/boot.git] / i386 / libsaio / hfs_compare.c
1 /*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30 #include <sl.h>
31 #include "hfs_CaseTables.h"
32
33 #if ! UNCOMPRESSED
34
35 static unsigned short *
36 UncompressStructure(struct compressed_block *bp, int count, int size)
37 {
38 unsigned short *out = malloc(size);
39 unsigned short *op = out;
40 unsigned short data;
41 int i, j;
42
43 for (i=0; i<count; i++, bp++) {
44 data = bp->data;
45 for (j=0; j<bp->count; j++) {
46 *op++ = data;
47 if (bp->type == kTypeAscending) data++;
48 else if (bp->type == kTypeAscending256) data += 256;
49 }
50 }
51 return out;
52 }
53
54 static void
55 InitCompareTables(void)
56 {
57 if (gCompareTable == 0) {
58 gCompareTable = UncompressStructure(gCompareTableCompressed,
59 kCompareTableNBlocks, kCompareTableDataSize);
60 gLowerCaseTable = UncompressStructure(gLowerCaseTableCompressed,
61 kLowerCaseTableNBlocks, kLowerCaseTableDataSize);
62 }
63 }
64
65 #endif /* ! UNCOMPRESSED */
66
67 //_______________________________________________________________________
68 //
69 // Routine: FastRelString
70 //
71 // Output: returns -1 if str1 < str2
72 // returns 1 if str1 > str2
73 // return 0 if equal
74 //
75 //_______________________________________________________________________
76
77 int32_t FastRelString(u_int8_t * str1, u_int8_t * str2)
78 {
79 int32_t bestGuess;
80 u_int8_t length, length2;
81
82 #if ! UNCOMPRESED
83 InitCompareTables();
84 #endif
85
86 length = *(str1++);
87 length2 = *(str2++);
88
89 if (length == length2)
90 bestGuess = 0;
91 else if (length < length2)
92 bestGuess = -1;
93 else
94 {
95 bestGuess = 1;
96 length = length2;
97 }
98
99 while (length--)
100 {
101 u_int32_t aChar, bChar;
102
103 aChar = *(str1++);
104 bChar = *(str2++);
105
106 if (aChar != bChar) /* If they don't match exacly, do case conversion */
107 {
108 u_int16_t aSortWord, bSortWord;
109
110 aSortWord = gCompareTable[aChar];
111 bSortWord = gCompareTable[bChar];
112
113 if (aSortWord > bSortWord)
114 return 1;
115
116 if (aSortWord < bSortWord)
117 return -1;
118 }
119
120 /*
121 * If characters match exactly, then go on to next character
122 * immediately without doing any extra work.
123 */
124 }
125
126 /* if you got to here, then return bestGuess */
127 return bestGuess;
128 }
129
130
131 //
132 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
133 //
134 // IF RESULT
135 // --------------------------
136 // str1 < str2 => -1
137 // str1 = str2 => 0
138 // str1 > str2 => +1
139 //
140 // The lower case table starts with 256 entries (one for each of the upper bytes
141 // of the original Unicode char). If that entry is zero, then all characters with
142 // that upper byte are already case folded. If the entry is non-zero, then it is
143 // the _index_ (not byte offset) of the start of the sub-table for the characters
144 // with that upper byte. All ignorable characters are folded to the value zero.
145 //
146 // In pseudocode:
147 //
148 // Let c = source Unicode character
149 // Let table[] = lower case table
150 //
151 // lower = table[highbyte(c)]
152 // if (lower == 0)
153 // lower = c
154 // else
155 // lower = table[lower+lowbyte(c)]
156 //
157 // if (lower == 0)
158 // ignore this character
159 //
160 // To handle ignorable characters, we now need a loop to find the next valid character.
161 // Also, we can't pre-compute the number of characters to compare; the string length might
162 // be larger than the number of non-ignorable characters. Further, we must be able to handle
163 // ignorable characters at any point in the string, including as the first or last characters.
164 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
165 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
166 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
167 // an invalid Unicode character).
168 //
169 // Pseudocode:
170 //
171 // while (1) {
172 // c1 = GetNextValidChar(str1) // returns zero if at end of string
173 // c2 = GetNextValidChar(str2)
174 //
175 // if (c1 != c2) break // found a difference
176 //
177 // if (c1 == 0) // reached end of string on both strings at once?
178 // return 0; // yes, so strings are equal
179 // }
180 //
181 // // When we get here, c1 != c2. So, we just need to determine which one is less.
182 // if (c1 < c2)
183 // return -1;
184 // else
185 // return 1;
186 //
187
188 int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,
189 u_int16_t * str2, register u_int32_t length2 )
190 {
191 register u_int16_t c1,c2;
192 register u_int16_t temp;
193
194 #if ! UNCOMPRESSED
195 InitCompareTables();
196 #endif
197
198 while (1) {
199 /* Set default values for c1, c2 in case there are no more valid chars */
200 c1 = 0;
201 c2 = 0;
202
203 /* Find next non-ignorable char from str1, or zero if no more */
204 while (length1 && c1 == 0) {
205 c1 = SWAP_BE16(*(str1++));
206 --length1;
207 if ((temp = gLowerCaseTable[c1>>8]) != 0) // is there a subtable for this upper byte?
208 c1 = gLowerCaseTable[temp + (c1 & 0x00FF)]; // yes, so fold the char
209 }
210
211 /* Find next non-ignorable char from str2, or zero if no more */
212 while (length2 && c2 == 0) {
213 c2 = SWAP_BE16(*(str2++));
214 --length2;
215 if ((temp = gLowerCaseTable[c2>>8]) != 0) // is there a subtable for this upper byte?
216 c2 = gLowerCaseTable[temp + (c2 & 0x00FF)]; // yes, so fold the char
217 }
218
219 if (c1 != c2) /* found a difference, so stop looping */
220 break;
221
222 if (c1 == 0) /* did we reach the end of both strings at the same time? */
223 return 0; /* yes, so strings are equal */
224 }
225
226 if (c1 < c2)
227 return -1;
228 else
229 return 1;
230 }
231
232
233 //
234 // BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
235 // Compared using a 16-bit binary comparison (no case folding)
236 //
237 int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,
238 u_int16_t * str2, u_int32_t length2)
239 {
240 register u_int16_t c1, c2;
241 int32_t bestGuess;
242 u_int32_t length;
243
244 bestGuess = 0;
245
246 if (length1 < length2) {
247 length = length1;
248 --bestGuess;
249 } else if (length1 > length2) {
250 length = length2;
251 ++bestGuess;
252 } else {
253 length = length1;
254 }
255
256 while (length--) {
257 c1 = *(str1++);
258 c2 = *(str2++);
259
260 if (c1 > c2)
261 return (1);
262 if (c1 < c2)
263 return (-1);
264 }
265
266 return (bestGuess);
267 }
268
269
270 /*
271 * UTF-8 (UCS Transformation Format)
272 *
273 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
274 * requires a maximum of three 3 bytes per UCS-2 character. Only the
275 * shortest encoding required to represent the significant UCS-2 bits
276 * is legal.
277 *
278 * UTF-8 Multibyte Codes
279 *
280 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
281 * -------------------------------------------------------------------
282 * 1 7 0x0000 0x007F 0xxxxxxx
283 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
284 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
285 * -------------------------------------------------------------------
286 */
287
288
289 /*
290 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
291 * null terminated UTF-8 string at utf8p.
292 *
293 * ucslen is the number of UCS-2 input characters (not bytes)
294 * bufsize is the size of the output buffer in bytes
295 */
296 void
297 utf_encodestr( const u_int16_t * ucsp, int ucslen,
298 u_int8_t * utf8p, u_int32_t bufsize, int byte_order )
299 {
300 u_int8_t *bufend;
301 u_int16_t ucs_ch;
302
303 bufend = utf8p + bufsize;
304
305 while (ucslen-- > 0) {
306 if (byte_order == OSBigEndian)
307 ucs_ch = SWAP_BE16(*ucsp++);
308 else
309 ucs_ch = SWAP_LE16(*ucsp++);
310
311 if (ucs_ch < 0x0080) {
312 if (utf8p >= bufend)
313 break;
314 if (ucs_ch == '\0')
315 continue; /* skip over embedded NULLs */
316 *utf8p++ = ucs_ch;
317
318 } else if (ucs_ch < 0x800) {
319 if ((utf8p + 1) >= bufend)
320 break;
321 *utf8p++ = (ucs_ch >> 6) | 0xc0;
322 *utf8p++ = (ucs_ch & 0x3f) | 0x80;
323
324 } else {
325 if ((utf8p + 2) >= bufend)
326 break;
327 *utf8p++ = (ucs_ch >> 12) | 0xe0;
328 *utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
329 *utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
330 }
331 }
332
333 *utf8p = '\0';
334 }
335
336
337 /*
338 * utf_decodestr - Decodes the null terminated UTF-8 string at
339 * utf8p into a UCS-2 (Unicode) string at ucsp.
340 *
341 * ucslen is the number of UCS-2 output characters (not bytes)
342 * bufsize is the size of the output buffer in bytes
343 */
344 void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize, int byte_order)
345 {
346 u_int16_t *bufstart;
347 u_int16_t *bufend;
348 u_int16_t ucs_ch;
349 u_int8_t byte;
350
351 bufstart = ucsp;
352 bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
353
354 while ((byte = *utf8p++) != '\0') {
355 if (ucsp >= bufend)
356 break;
357
358 /* check for ascii */
359 if (byte < 0x80) {
360 ucs_ch = byte;
361
362 if (byte_order == OSBigEndian)
363 *ucsp++ = SWAP_BE16(ucs_ch);
364 else
365 *ucsp++ = SWAP_LE16(ucs_ch);
366
367 continue;
368 }
369
370 switch (byte & 0xf0) {
371 /* 2 byte sequence*/
372 case 0xc0:
373 case 0xd0:
374 /* extract bits 6 - 10 from first byte */
375 ucs_ch = (byte & 0x1F) << 6;
376 break;
377 /* 3 byte sequence*/
378 case 0xe0:
379 /* extract bits 12 - 15 from first byte */
380 ucs_ch = (byte & 0x0F) << 6;
381
382 /* extract bits 6 - 11 from second byte */
383 if (((byte = *utf8p++) & 0xc0) != 0x80)
384 goto stop;
385
386 ucs_ch += (byte & 0x3F);
387 ucs_ch <<= 6;
388 break;
389 default:
390 goto stop;
391 }
392
393 /* extract bits 0 - 5 from final byte */
394 if (((byte = *utf8p++) & 0xc0) != 0x80)
395 goto stop;
396 ucs_ch += (byte & 0x3F);
397
398 if (byte_order == OSBigEndian)
399 *ucsp++ = SWAP_BE16(ucs_ch);
400 else
401 *ucsp++ = SWAP_LE16(ucs_ch);
402 }
403 stop:
404 if (byte_order == OSBigEndian)
405 *ucslen = SWAP_BE16(ucsp - bufstart);
406 else
407 *ucslen = SWAP_LE16(ucsp - bufstart);
408 }