]> git.saurik.com Git - apple/bootx.git/blob - bootx.tproj/fs.subproj/HFSCompare.c
24efaef9c5f79aceee58ec7c0c42393f8e3344c5
[apple/bootx.git] / bootx.tproj / fs.subproj / HFSCompare.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30 #include <sl.h>
31 #include "CaseTables.h"
32
33
34 //_______________________________________________________________________
35 //
36 // Routine: FastRelString
37 //
38 // Output: returns -1 if str1 < str2
39 // returns 1 if str1 > str2
40 // return 0 if equal
41 //
42 //_______________________________________________________________________
43
44 int32_t FastRelString(char *str1, char *str2 )
45 {
46 int32_t bestGuess;
47 u_int8_t length, length2;
48
49
50 length = *(str1++);
51 length2 = *(str2++);
52
53 if (length == length2)
54 bestGuess = 0;
55 else if (length < length2)
56 bestGuess = -1;
57 else
58 {
59 bestGuess = 1;
60 length = length2;
61 }
62
63 while (length--)
64 {
65 u_int32_t aChar, bChar;
66
67 aChar = *(str1++);
68 bChar = *(str2++);
69
70 if (aChar != bChar) /* If they don't match exacly, do case conversion */
71 {
72 u_int16_t aSortWord, bSortWord;
73
74 aSortWord = gCompareTable[aChar];
75 bSortWord = gCompareTable[bChar];
76
77 if (aSortWord > bSortWord)
78 return 1;
79
80 if (aSortWord < bSortWord)
81 return -1;
82 }
83
84 /*
85 * If characters match exactly, then go on to next character
86 * immediately without doing any extra work.
87 */
88 }
89
90 /* if you got to here, then return bestGuess */
91 return bestGuess;
92 }
93
94
95
96 //
97 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
98 //
99 // IF RESULT
100 // --------------------------
101 // str1 < str2 => -1
102 // str1 = str2 => 0
103 // str1 > str2 => +1
104 //
105 // The lower case table starts with 256 entries (one for each of the upper bytes
106 // of the original Unicode char). If that entry is zero, then all characters with
107 // that upper byte are already case folded. If the entry is non-zero, then it is
108 // the _index_ (not byte offset) of the start of the sub-table for the characters
109 // with that upper byte. All ignorable characters are folded to the value zero.
110 //
111 // In pseudocode:
112 //
113 // Let c = source Unicode character
114 // Let table[] = lower case table
115 //
116 // lower = table[highbyte(c)]
117 // if (lower == 0)
118 // lower = c
119 // else
120 // lower = table[lower+lowbyte(c)]
121 //
122 // if (lower == 0)
123 // ignore this character
124 //
125 // To handle ignorable characters, we now need a loop to find the next valid character.
126 // Also, we can't pre-compute the number of characters to compare; the string length might
127 // be larger than the number of non-ignorable characters. Further, we must be able to handle
128 // ignorable characters at any point in the string, including as the first or last characters.
129 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
130 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
131 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
132 // an invalid Unicode character).
133 //
134 // Pseudocode:
135 //
136 // while (1) {
137 // c1 = GetNextValidChar(str1) // returns zero if at end of string
138 // c2 = GetNextValidChar(str2)
139 //
140 // if (c1 != c2) break // found a difference
141 //
142 // if (c1 == 0) // reached end of string on both strings at once?
143 // return 0; // yes, so strings are equal
144 // }
145 //
146 // // When we get here, c1 != c2. So, we just need to determine which one is less.
147 // if (c1 < c2)
148 // return -1;
149 // else
150 // return 1;
151 //
152
153 int32_t FastUnicodeCompare (u_int16_t *str1, register u_int32_t length1,
154 u_int16_t *str2, register u_int32_t length2)
155 {
156 register u_int16_t c1,c2;
157 register u_int16_t temp;
158
159 while (1) {
160 /* Set default values for c1, c2 in case there are no more valid chars */
161 c1 = 0;
162 c2 = 0;
163
164 /* Find next non-ignorable char from str1, or zero if no more */
165 while (length1 && c1 == 0) {
166 c1 = *(str1++);
167 --length1;
168 if ((temp = gLowerCaseTable[c1>>8]) != 0) // is there a subtable for this upper byte?
169 c1 = gLowerCaseTable[temp + (c1 & 0x00FF)]; // yes, so fold the char
170 }
171
172
173 /* Find next non-ignorable char from str2, or zero if no more */
174 while (length2 && c2 == 0) {
175 c2 = *(str2++);
176 --length2;
177 if ((temp = gLowerCaseTable[c2>>8]) != 0) // is there a subtable for this upper byte?
178 c2 = gLowerCaseTable[temp + (c2 & 0x00FF)]; // yes, so fold the char
179 }
180
181 if (c1 != c2) /* found a difference, so stop looping */
182 break;
183
184 if (c1 == 0) /* did we reach the end of both strings at the same time? */
185 return 0; /* yes, so strings are equal */
186 }
187
188 if (c1 < c2)
189 return -1;
190 else
191 return 1;
192 }
193
194
195 /*
196 * UTF-8 (UCS Transformation Format)
197 *
198 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
199 * requires a maximum of three 3 bytes per UCS-2 character. Only the
200 * shortest encoding required to represent the significant UCS-2 bits
201 * is legal.
202 *
203 * UTF-8 Multibyte Codes
204 *
205 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
206 * -------------------------------------------------------------------
207 * 1 7 0x0000 0x007F 0xxxxxxx
208 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
209 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
210 * -------------------------------------------------------------------
211 */
212
213
214 /*
215 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
216 * null terminated UTF-8 string at utf8p.
217 *
218 * ucslen is the number of UCS-2 input characters (not bytes)
219 * bufsize is the size of the output buffer in bytes
220 */
221 void
222 utf_encodestr(const u_int16_t *ucsp, int ucslen, u_int8_t *utf8p, u_int32_t bufsize)
223 {
224 u_int8_t *bufend;
225 u_int16_t ucs_ch;
226
227 bufend = utf8p + bufsize;
228
229 while (ucslen-- > 0) {
230 ucs_ch = *ucsp++;
231
232 if (ucs_ch < 0x0080) {
233 if (utf8p >= bufend)
234 break;
235 if (ucs_ch == '\0')
236 continue; /* skip over embedded NULLs */
237 *utf8p++ = ucs_ch;
238
239 } else if (ucs_ch < 0x800) {
240 if ((utf8p + 1) >= bufend)
241 break;
242 *utf8p++ = (ucs_ch >> 6) | 0xc0;
243 *utf8p++ = (ucs_ch & 0x3f) | 0x80;
244
245 } else {
246 if ((utf8p + 2) >= bufend)
247 break;
248 *utf8p++ = (ucs_ch >> 12) | 0xe0;
249 *utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
250 *utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
251 }
252
253 }
254
255 *utf8p = '\0';
256 }
257
258
259 /*
260 * utf_decodestr - Decodes the null terminated UTF-8 string at
261 * utf8p into a UCS-2 (Unicode) string at ucsp.
262 *
263 * ucslen is the number of UCS-2 output characters (not bytes)
264 * bufsize is the size of the output buffer in bytes
265 */
266 void
267 utf_decodestr(const u_int8_t *utf8p, u_int16_t *ucsp, u_int16_t *ucslen, u_int32_t bufsize)
268 {
269 u_int16_t *bufstart;
270 u_int16_t *bufend;
271 u_int16_t ucs_ch;
272 u_int8_t byte;
273
274 bufstart = ucsp;
275 bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
276
277 while ((byte = *utf8p++) != '\0') {
278 if (ucsp >= bufend)
279 break;
280
281 /* check for ascii */
282 if (byte < 0x80) {
283 ucs_ch = byte;
284
285 *ucsp++ = ucs_ch;
286 continue;
287 }
288
289 switch (byte & 0xf0) {
290 /* 2 byte sequence*/
291 case 0xc0:
292 case 0xd0:
293 /* extract bits 6 - 10 from first byte */
294 ucs_ch = (byte & 0x1F) << 6;
295 break;
296 /* 3 byte sequence*/
297 case 0xe0:
298 /* extract bits 12 - 15 from first byte */
299 ucs_ch = (byte & 0x0F) << 6;
300
301 /* extract bits 6 - 11 from second byte */
302 if (((byte = *utf8p++) & 0xc0) != 0x80)
303 goto stop;
304
305 ucs_ch += (byte & 0x3F);
306 ucs_ch <<= 6;
307 break;
308 default:
309 goto stop;
310 }
311
312 /* extract bits 0 - 5 from final byte */
313 if (((byte = *utf8p++) & 0xc0) != 0x80)
314 goto stop;
315 ucs_ch += (byte & 0x3F);
316
317 *ucsp++ = ucs_ch;
318 }
319 stop:
320 *ucslen = ucsp - bufstart;
321 }