]> git.saurik.com Git - apple/boot.git/blob - i386/libsaio/hfs_compare.c
boot-111.1.tar.gz
[apple/boot.git] / i386 / libsaio / hfs_compare.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
24 *
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
26 *
27 * DRI: Josh de Cesare
28 */
29
30 #include <sl.h>
31 #include "hfs_CaseTables.h"
32
33 //_______________________________________________________________________
34 //
35 // Routine: FastRelString
36 //
37 // Output: returns -1 if str1 < str2
38 // returns 1 if str1 > str2
39 // return 0 if equal
40 //
41 //_______________________________________________________________________
42
43 int32_t FastRelString(char * str1, char * str2)
44 {
45 int32_t bestGuess;
46 u_int8_t length, length2;
47
48 length = *(str1++);
49 length2 = *(str2++);
50
51 if (length == length2)
52 bestGuess = 0;
53 else if (length < length2)
54 bestGuess = -1;
55 else
56 {
57 bestGuess = 1;
58 length = length2;
59 }
60
61 while (length--)
62 {
63 u_int32_t aChar, bChar;
64
65 aChar = *(str1++);
66 bChar = *(str2++);
67
68 if (aChar != bChar) /* If they don't match exacly, do case conversion */
69 {
70 u_int16_t aSortWord, bSortWord;
71
72 aSortWord = gCompareTable[aChar];
73 bSortWord = gCompareTable[bChar];
74
75 if (aSortWord > bSortWord)
76 return 1;
77
78 if (aSortWord < bSortWord)
79 return -1;
80 }
81
82 /*
83 * If characters match exactly, then go on to next character
84 * immediately without doing any extra work.
85 */
86 }
87
88 /* if you got to here, then return bestGuess */
89 return bestGuess;
90 }
91
92
93 //
94 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
95 //
96 // IF RESULT
97 // --------------------------
98 // str1 < str2 => -1
99 // str1 = str2 => 0
100 // str1 > str2 => +1
101 //
102 // The lower case table starts with 256 entries (one for each of the upper bytes
103 // of the original Unicode char). If that entry is zero, then all characters with
104 // that upper byte are already case folded. If the entry is non-zero, then it is
105 // the _index_ (not byte offset) of the start of the sub-table for the characters
106 // with that upper byte. All ignorable characters are folded to the value zero.
107 //
108 // In pseudocode:
109 //
110 // Let c = source Unicode character
111 // Let table[] = lower case table
112 //
113 // lower = table[highbyte(c)]
114 // if (lower == 0)
115 // lower = c
116 // else
117 // lower = table[lower+lowbyte(c)]
118 //
119 // if (lower == 0)
120 // ignore this character
121 //
122 // To handle ignorable characters, we now need a loop to find the next valid character.
123 // Also, we can't pre-compute the number of characters to compare; the string length might
124 // be larger than the number of non-ignorable characters. Further, we must be able to handle
125 // ignorable characters at any point in the string, including as the first or last characters.
126 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
127 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
128 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
129 // an invalid Unicode character).
130 //
131 // Pseudocode:
132 //
133 // while (1) {
134 // c1 = GetNextValidChar(str1) // returns zero if at end of string
135 // c2 = GetNextValidChar(str2)
136 //
137 // if (c1 != c2) break // found a difference
138 //
139 // if (c1 == 0) // reached end of string on both strings at once?
140 // return 0; // yes, so strings are equal
141 // }
142 //
143 // // When we get here, c1 != c2. So, we just need to determine which one is less.
144 // if (c1 < c2)
145 // return -1;
146 // else
147 // return 1;
148 //
149
150 int32_t FastUnicodeCompare( u_int16_t * str1, register u_int32_t length1,
151 u_int16_t * str2, register u_int32_t length2 )
152 {
153 register u_int16_t c1,c2;
154 register u_int16_t temp;
155
156 while (1) {
157 /* Set default values for c1, c2 in case there are no more valid chars */
158 c1 = 0;
159 c2 = 0;
160
161 /* Find next non-ignorable char from str1, or zero if no more */
162 while (length1 && c1 == 0) {
163 c1 = SWAP_BE16(*(str1++));
164 --length1;
165 if ((temp = gLowerCaseTable[c1>>8]) != 0) // is there a subtable for this upper byte?
166 c1 = gLowerCaseTable[temp + (c1 & 0x00FF)]; // yes, so fold the char
167 }
168
169 /* Find next non-ignorable char from str2, or zero if no more */
170 while (length2 && c2 == 0) {
171 c2 = SWAP_BE16(*(str2++));
172 --length2;
173 if ((temp = gLowerCaseTable[c2>>8]) != 0) // is there a subtable for this upper byte?
174 c2 = gLowerCaseTable[temp + (c2 & 0x00FF)]; // yes, so fold the char
175 }
176
177 if (c1 != c2) /* found a difference, so stop looping */
178 break;
179
180 if (c1 == 0) /* did we reach the end of both strings at the same time? */
181 return 0; /* yes, so strings are equal */
182 }
183
184 if (c1 < c2)
185 return -1;
186 else
187 return 1;
188 }
189
190
191 /*
192 * UTF-8 (UCS Transformation Format)
193 *
194 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
195 * requires a maximum of three 3 bytes per UCS-2 character. Only the
196 * shortest encoding required to represent the significant UCS-2 bits
197 * is legal.
198 *
199 * UTF-8 Multibyte Codes
200 *
201 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
202 * -------------------------------------------------------------------
203 * 1 7 0x0000 0x007F 0xxxxxxx
204 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
205 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
206 * -------------------------------------------------------------------
207 */
208
209
210 /*
211 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
212 * null terminated UTF-8 string at utf8p.
213 *
214 * ucslen is the number of UCS-2 input characters (not bytes)
215 * bufsize is the size of the output buffer in bytes
216 */
217 void
218 utf_encodestr( const u_int16_t * ucsp, int ucslen,
219 u_int8_t * utf8p, u_int32_t bufsize )
220 {
221 u_int8_t *bufend;
222 u_int16_t ucs_ch;
223
224 bufend = utf8p + bufsize;
225
226 while (ucslen-- > 0) {
227 ucs_ch = SWAP_BE16(*ucsp++);
228
229 if (ucs_ch < 0x0080) {
230 if (utf8p >= bufend)
231 break;
232 if (ucs_ch == '\0')
233 continue; /* skip over embedded NULLs */
234 *utf8p++ = ucs_ch;
235
236 } else if (ucs_ch < 0x800) {
237 if ((utf8p + 1) >= bufend)
238 break;
239 *utf8p++ = (ucs_ch >> 6) | 0xc0;
240 *utf8p++ = (ucs_ch & 0x3f) | 0x80;
241
242 } else {
243 if ((utf8p + 2) >= bufend)
244 break;
245 *utf8p++ = (ucs_ch >> 12) | 0xe0;
246 *utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80;
247 *utf8p++ = ((ucs_ch) & 0x3f) | 0x80;
248 }
249 }
250
251 *utf8p = '\0';
252 }
253
254
255 /*
256 * utf_decodestr - Decodes the null terminated UTF-8 string at
257 * utf8p into a UCS-2 (Unicode) string at ucsp.
258 *
259 * ucslen is the number of UCS-2 output characters (not bytes)
260 * bufsize is the size of the output buffer in bytes
261 */
262 void utf_decodestr(const u_int8_t * utf8p, u_int16_t * ucsp, u_int16_t * ucslen, u_int32_t bufsize)
263 {
264 u_int16_t *bufstart;
265 u_int16_t *bufend;
266 u_int16_t ucs_ch;
267 u_int8_t byte;
268
269 bufstart = ucsp;
270 bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize);
271
272 while ((byte = *utf8p++) != '\0') {
273 if (ucsp >= bufend)
274 break;
275
276 /* check for ascii */
277 if (byte < 0x80) {
278 ucs_ch = byte;
279
280 *ucsp++ = SWAP_BE16(ucs_ch);
281 continue;
282 }
283
284 switch (byte & 0xf0) {
285 /* 2 byte sequence*/
286 case 0xc0:
287 case 0xd0:
288 /* extract bits 6 - 10 from first byte */
289 ucs_ch = (byte & 0x1F) << 6;
290 break;
291 /* 3 byte sequence*/
292 case 0xe0:
293 /* extract bits 12 - 15 from first byte */
294 ucs_ch = (byte & 0x0F) << 6;
295
296 /* extract bits 6 - 11 from second byte */
297 if (((byte = *utf8p++) & 0xc0) != 0x80)
298 goto stop;
299
300 ucs_ch += (byte & 0x3F);
301 ucs_ch <<= 6;
302 break;
303 default:
304 goto stop;
305 }
306
307 /* extract bits 0 - 5 from final byte */
308 if (((byte = *utf8p++) & 0xc0) != 0x80)
309 goto stop;
310 ucs_ch += (byte & 0x3F);
311
312 *ucsp++ = SWAP_BE16(ucs_ch);
313 }
314 stop:
315 *ucslen = SWAP_BE16(ucsp - bufstart);
316 }