]> git.saurik.com Git - apple/bootx.git/blob - bootx.tproj/fs.subproj/HFSCompare.c
d159c5efb364757bde21f75b4da522edefb9d8fa
[apple/bootx.git] / bootx.tproj / fs.subproj / HFSCompare.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * HFSCompare.c - Functions for working with and comparing HFS nams.
27 *
28 * Copyright (c) 1999-2000 Apple Computer, Inc.
29 *
30 * DRI: Josh de Cesare
31 */
32
33 #include <sl.h>
34 #include "CaseTables.h"
35
36
37 //_______________________________________________________________________
38 //
39 // Routine: FastRelString
40 //
41 // Output: returns -1 if str1 < str2
42 // returns 1 if str1 > str2
43 // return 0 if equal
44 //
45 //_______________________________________________________________________
46
47 int32_t FastRelString(char *str1, char *str2 )
48 {
49 int32_t bestGuess;
50 u_int8_t length, length2;
51
52
53 length = *(str1++);
54 length2 = *(str2++);
55
56 if (length == length2)
57 bestGuess = 0;
58 else if (length < length2)
59 bestGuess = -1;
60 else
61 {
62 bestGuess = 1;
63 length = length2;
64 }
65
66 while (length--)
67 {
68 u_int32_t aChar, bChar;
69
70 aChar = *(str1++);
71 bChar = *(str2++);
72
73 if (aChar != bChar) /* If they don't match exacly, do case conversion */
74 {
75 u_int16_t aSortWord, bSortWord;
76
77 aSortWord = gCompareTable[aChar];
78 bSortWord = gCompareTable[bChar];
79
80 if (aSortWord > bSortWord)
81 return 1;
82
83 if (aSortWord < bSortWord)
84 return -1;
85 }
86
87 /*
88 * If characters match exactly, then go on to next character
89 * immediately without doing any extra work.
90 */
91 }
92
93 /* if you got to here, then return bestGuess */
94 return bestGuess;
95 }
96
97
98
99 //
100 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
101 //
102 // IF RESULT
103 // --------------------------
104 // str1 < str2 => -1
105 // str1 = str2 => 0
106 // str1 > str2 => +1
107 //
108 // The lower case table starts with 256 entries (one for each of the upper bytes
109 // of the original Unicode char). If that entry is zero, then all characters with
110 // that upper byte are already case folded. If the entry is non-zero, then it is
111 // the _index_ (not byte offset) of the start of the sub-table for the characters
112 // with that upper byte. All ignorable characters are folded to the value zero.
113 //
114 // In pseudocode:
115 //
116 // Let c = source Unicode character
117 // Let table[] = lower case table
118 //
119 // lower = table[highbyte(c)]
120 // if (lower == 0)
121 // lower = c
122 // else
123 // lower = table[lower+lowbyte(c)]
124 //
125 // if (lower == 0)
126 // ignore this character
127 //
128 // To handle ignorable characters, we now need a loop to find the next valid character.
129 // Also, we can't pre-compute the number of characters to compare; the string length might
130 // be larger than the number of non-ignorable characters. Further, we must be able to handle
131 // ignorable characters at any point in the string, including as the first or last characters.
132 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
133 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
134 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
135 // an invalid Unicode character).
136 //
137 // Pseudocode:
138 //
139 // while (1) {
140 // c1 = GetNextValidChar(str1) // returns zero if at end of string
141 // c2 = GetNextValidChar(str2)
142 //
143 // if (c1 != c2) break // found a difference
144 //
145 // if (c1 == 0) // reached end of string on both strings at once?
146 // return 0; // yes, so strings are equal
147 // }
148 //
149 // // When we get here, c1 != c2. So, we just need to determine which one is less.
150 // if (c1 < c2)
151 // return -1;
152 // else
153 // return 1;
154 //
155
156 int32_t FastUnicodeCompare (u_int16_t *str1, register u_int32_t length1,
157 u_int16_t *str2, register u_int32_t length2)
158 {
159 register u_int16_t c1,c2;
160 register u_int16_t temp;
161
162 while (1) {
163 /* Set default values for c1, c2 in case there are no more valid chars */
164 c1 = 0;
165 c2 = 0;
166
167 /* Find next non-ignorable char from str1, or zero if no more */
168 while (length1 && c1 == 0) {
169 c1 = *(str1++);
170 --length1;
171 if ((temp = gLowerCaseTable[c1>>8]) != 0) // is there a subtable for this upper byte?
172 c1 = gLowerCaseTable[temp + (c1 & 0x00FF)]; // yes, so fold the char
173 }
174
175
176 /* Find next non-ignorable char from str2, or zero if no more */
177 while (length2 && c2 == 0) {
178 c2 = *(str2++);
179 --length2;
180 if ((temp = gLowerCaseTable[c2>>8]) != 0) // is there a subtable for this upper byte?
181 c2 = gLowerCaseTable[temp + (c2 & 0x00FF)]; // yes, so fold the char
182 }
183
184 if (c1 != c2) /* found a difference, so stop looping */
185 break;
186
187 if (c1 == 0) /* did we reach the end of both strings at the same time? */
188 return 0; /* yes, so strings are equal */
189 }
190
191 if (c1 < c2)
192 return -1;
193 else
194 return 1;
195 }
196
197 //
198 // BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
199 // Compared using a 16-bit binary comparison (no case folding)
200 //
201 int32_t BinaryUnicodeCompare (u_int16_t * str1, u_int32_t length1,
202 u_int16_t * str2, u_int32_t length2)
203 {
204 register u_int16_t c1, c2;
205 int32_t bestGuess;
206 u_int32_t length;
207
208 bestGuess = 0;
209
210 if (length1 < length2) {
211 length = length1;
212 --bestGuess;
213 } else if (length1 > length2) {
214 length = length2;
215 ++bestGuess;
216 } else {
217 length = length1;
218 }
219
220 while (length--) {
221 c1 = *(str1++);
222 c2 = *(str2++);
223
224 if (c1 > c2)
225 return (1);
226 if (c1 < c2)
227 return (-1);
228 }
229
230 return (bestGuess);
231 }
232
233 /*
234 * UTF-8 (UCS Transformation Format)
235 *
236 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
237 * requires a maximum of three 3 bytes per UCS-2 character. Only the
238 * shortest encoding required to represent the significant UCS-2 bits
239 * is legal.
240 *
241 * UTF-8 Multibyte Codes
242 *
243 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
244 * -------------------------------------------------------------------
245 * 1 7 0x0000 0x007F 0xxxxxxx
246 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
247 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
248 * -------------------------------------------------------------------
249 */
250
251
252 /*
253 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
254 * null terminated UTF-8 string at utf8p.
255 *
256 * ucslen is the number of UCS-2 input characters (not bytes)
257 * bufsize is the size of the output buffer in bytes
258 */
259 void
260 utf_encodestr(const u_int16_t *ucsp, int ucslen, u_int8_t *utf8p, u_int32_t bufsize)
261 {
262 u_int8_t *bufend;
263 u_int16_t ucs_ch;
264
265 bufend = utf8p + bufsize;
266
267 while (ucslen-- > 0) {
268 ucs_ch = *ucsp++;
269
270 if (ucs_ch < 0x0080) {
271 if (utf8p >= bufend)
272 break;
273 if (ucs_ch == '\0')
274 continue; /* skip over embedded NULLs */
275 *utf8p++ = ucs_ch;
276
277 } else if (ucs_ch < 0x800) {
278 if ((utf8p + 1) >= bufend)
279 break;
280 *utf8p++ = (ucs_ch >> 6) | 0xc0;
281 *utf8p++ = (ucs_ch & 0x3f) | 0x80;
282
283 } else {
284 if ((utf8p + 2) >= bufend)
285 break;
286 *utf8p++ = (ucs_ch >> 12) | 0xe0;
287 *utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
288 *utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
289 }
290
291 }
292
293 *utf8p = '\0';
294 }
295
296
297 /*
298 * utf_decodestr - Decodes the null terminated UTF-8 string at
299 * utf8p into a UCS-2 (Unicode) string at ucsp.
300 *
301 * ucslen is the number of UCS-2 output characters (not bytes)
302 * bufsize is the size of the output buffer in bytes
303 */
304 void
305 utf_decodestr(const u_int8_t *utf8p, u_int16_t *ucsp, u_int16_t *ucslen, u_int32_t bufsize)
306 {
307 u_int16_t *bufstart;
308 u_int16_t *bufend;
309 u_int16_t ucs_ch;
310 u_int8_t byte;
311
312 bufstart = ucsp;
313 bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
314
315 while ((byte = *utf8p++) != '\0') {
316 if (ucsp >= bufend)
317 break;
318
319 /* check for ascii */
320 if (byte < 0x80) {
321 ucs_ch = byte;
322
323 *ucsp++ = ucs_ch;
324 continue;
325 }
326
327 switch (byte & 0xf0) {
328 /* 2 byte sequence*/
329 case 0xc0:
330 case 0xd0:
331 /* extract bits 6 - 10 from first byte */
332 ucs_ch = (byte & 0x1F) << 6;
333 break;
334 /* 3 byte sequence*/
335 case 0xe0:
336 /* extract bits 12 - 15 from first byte */
337 ucs_ch = (byte & 0x0F) << 6;
338
339 /* extract bits 6 - 11 from second byte */
340 if (((byte = *utf8p++) & 0xc0) != 0x80)
341 goto stop;
342
343 ucs_ch += (byte & 0x3F);
344 ucs_ch <<= 6;
345 break;
346 default:
347 goto stop;
348 }
349
350 /* extract bits 0 - 5 from final byte */
351 if (((byte = *utf8p++) & 0xc0) != 0x80)
352 goto stop;
353 ucs_ch += (byte & 0x3F);
354
355 *ucsp++ = ucs_ch;
356 }
357 stop:
358 *ucslen = ucsp - bufstart;
359 }