]>
Commit | Line | Data |
---|---|---|
04fee52e A |
1 | /* |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * The contents of this file constitute Original Code as defined in and | |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
11 | * | |
12 | * This Original Code and all software distributed under the License are | |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the | |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
19 | * | |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* | |
23 | * HFSCompare.c - Functions for working with and comparing HFS nams. | |
24 | * | |
25 | * Copyright (c) 1999-2000 Apple Computer, Inc. | |
26 | * | |
27 | * DRI: Josh de Cesare | |
28 | */ | |
29 | ||
30 | #include <sl.h> | |
31 | #include "CaseTables.h" | |
32 | ||
33 | ||
34 | //_______________________________________________________________________ | |
35 | // | |
36 | // Routine: FastRelString | |
37 | // | |
38 | // Output: returns -1 if str1 < str2 | |
39 | // returns 1 if str1 > str2 | |
40 | // return 0 if equal | |
41 | // | |
42 | //_______________________________________________________________________ | |
43 | ||
44 | int32_t FastRelString(char *str1, char *str2 ) | |
45 | { | |
46 | int32_t bestGuess; | |
47 | u_int8_t length, length2; | |
48 | ||
49 | ||
50 | length = *(str1++); | |
51 | length2 = *(str2++); | |
52 | ||
53 | if (length == length2) | |
54 | bestGuess = 0; | |
55 | else if (length < length2) | |
56 | bestGuess = -1; | |
57 | else | |
58 | { | |
59 | bestGuess = 1; | |
60 | length = length2; | |
61 | } | |
62 | ||
63 | while (length--) | |
64 | { | |
65 | u_int32_t aChar, bChar; | |
66 | ||
67 | aChar = *(str1++); | |
68 | bChar = *(str2++); | |
69 | ||
70 | if (aChar != bChar) /* If they don't match exacly, do case conversion */ | |
71 | { | |
72 | u_int16_t aSortWord, bSortWord; | |
73 | ||
74 | aSortWord = gCompareTable[aChar]; | |
75 | bSortWord = gCompareTable[bChar]; | |
76 | ||
77 | if (aSortWord > bSortWord) | |
78 | return 1; | |
79 | ||
80 | if (aSortWord < bSortWord) | |
81 | return -1; | |
82 | } | |
83 | ||
84 | /* | |
85 | * If characters match exactly, then go on to next character | |
86 | * immediately without doing any extra work. | |
87 | */ | |
88 | } | |
89 | ||
90 | /* if you got to here, then return bestGuess */ | |
91 | return bestGuess; | |
92 | } | |
93 | ||
94 | ||
95 | ||
96 | // | |
97 | // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering | |
98 | // | |
99 | // IF RESULT | |
100 | // -------------------------- | |
101 | // str1 < str2 => -1 | |
102 | // str1 = str2 => 0 | |
103 | // str1 > str2 => +1 | |
104 | // | |
105 | // The lower case table starts with 256 entries (one for each of the upper bytes | |
106 | // of the original Unicode char). If that entry is zero, then all characters with | |
107 | // that upper byte are already case folded. If the entry is non-zero, then it is | |
108 | // the _index_ (not byte offset) of the start of the sub-table for the characters | |
109 | // with that upper byte. All ignorable characters are folded to the value zero. | |
110 | // | |
111 | // In pseudocode: | |
112 | // | |
113 | // Let c = source Unicode character | |
114 | // Let table[] = lower case table | |
115 | // | |
116 | // lower = table[highbyte(c)] | |
117 | // if (lower == 0) | |
118 | // lower = c | |
119 | // else | |
120 | // lower = table[lower+lowbyte(c)] | |
121 | // | |
122 | // if (lower == 0) | |
123 | // ignore this character | |
124 | // | |
125 | // To handle ignorable characters, we now need a loop to find the next valid character. | |
126 | // Also, we can't pre-compute the number of characters to compare; the string length might | |
127 | // be larger than the number of non-ignorable characters. Further, we must be able to handle | |
128 | // ignorable characters at any point in the string, including as the first or last characters. | |
129 | // We use a zero value as a sentinel to detect both end-of-string and ignorable characters. | |
130 | // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename, | |
131 | // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is | |
132 | // an invalid Unicode character). | |
133 | // | |
134 | // Pseudocode: | |
135 | // | |
136 | // while (1) { | |
137 | // c1 = GetNextValidChar(str1) // returns zero if at end of string | |
138 | // c2 = GetNextValidChar(str2) | |
139 | // | |
140 | // if (c1 != c2) break // found a difference | |
141 | // | |
142 | // if (c1 == 0) // reached end of string on both strings at once? | |
143 | // return 0; // yes, so strings are equal | |
144 | // } | |
145 | // | |
146 | // // When we get here, c1 != c2. So, we just need to determine which one is less. | |
147 | // if (c1 < c2) | |
148 | // return -1; | |
149 | // else | |
150 | // return 1; | |
151 | // | |
152 | ||
153 | int32_t FastUnicodeCompare (u_int16_t *str1, register u_int32_t length1, | |
154 | u_int16_t *str2, register u_int32_t length2) | |
155 | { | |
156 | register u_int16_t c1,c2; | |
157 | register u_int16_t temp; | |
158 | ||
159 | while (1) { | |
160 | /* Set default values for c1, c2 in case there are no more valid chars */ | |
161 | c1 = 0; | |
162 | c2 = 0; | |
163 | ||
164 | /* Find next non-ignorable char from str1, or zero if no more */ | |
165 | while (length1 && c1 == 0) { | |
166 | c1 = *(str1++); | |
167 | --length1; | |
168 | if ((temp = gLowerCaseTable[c1>>8]) != 0) // is there a subtable for this upper byte? | |
169 | c1 = gLowerCaseTable[temp + (c1 & 0x00FF)]; // yes, so fold the char | |
170 | } | |
171 | ||
172 | ||
173 | /* Find next non-ignorable char from str2, or zero if no more */ | |
174 | while (length2 && c2 == 0) { | |
175 | c2 = *(str2++); | |
176 | --length2; | |
177 | if ((temp = gLowerCaseTable[c2>>8]) != 0) // is there a subtable for this upper byte? | |
178 | c2 = gLowerCaseTable[temp + (c2 & 0x00FF)]; // yes, so fold the char | |
179 | } | |
180 | ||
181 | if (c1 != c2) /* found a difference, so stop looping */ | |
182 | break; | |
183 | ||
184 | if (c1 == 0) /* did we reach the end of both strings at the same time? */ | |
185 | return 0; /* yes, so strings are equal */ | |
186 | } | |
187 | ||
188 | if (c1 < c2) | |
189 | return -1; | |
190 | else | |
191 | return 1; | |
192 | } | |
193 | ||
194 | ||
195 | /* | |
196 | * UTF-8 (UCS Transformation Format) | |
197 | * | |
198 | * The following subset of UTF-8 is used to encode UCS-2 filenames. It | |
199 | * requires a maximum of three 3 bytes per UCS-2 character. Only the | |
200 | * shortest encoding required to represent the significant UCS-2 bits | |
201 | * is legal. | |
202 | * | |
203 | * UTF-8 Multibyte Codes | |
204 | * | |
205 | * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary) | |
206 | * ------------------------------------------------------------------- | |
207 | * 1 7 0x0000 0x007F 0xxxxxxx | |
208 | * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx | |
209 | * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx | |
210 | * ------------------------------------------------------------------- | |
211 | */ | |
212 | ||
213 | ||
214 | /* | |
215 | * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a | |
216 | * null terminated UTF-8 string at utf8p. | |
217 | * | |
218 | * ucslen is the number of UCS-2 input characters (not bytes) | |
219 | * bufsize is the size of the output buffer in bytes | |
220 | */ | |
221 | void | |
222 | utf_encodestr(const u_int16_t *ucsp, int ucslen, u_int8_t *utf8p, u_int32_t bufsize) | |
223 | { | |
224 | u_int8_t *bufend; | |
225 | u_int16_t ucs_ch; | |
226 | ||
227 | bufend = utf8p + bufsize; | |
228 | ||
229 | while (ucslen-- > 0) { | |
230 | ucs_ch = *ucsp++; | |
231 | ||
232 | if (ucs_ch < 0x0080) { | |
233 | if (utf8p >= bufend) | |
234 | break; | |
235 | if (ucs_ch == '\0') | |
236 | continue; /* skip over embedded NULLs */ | |
237 | *utf8p++ = ucs_ch; | |
238 | ||
239 | } else if (ucs_ch < 0x800) { | |
240 | if ((utf8p + 1) >= bufend) | |
241 | break; | |
242 | *utf8p++ = (ucs_ch >> 6) | 0xc0; | |
243 | *utf8p++ = (ucs_ch & 0x3f) | 0x80; | |
244 | ||
245 | } else { | |
246 | if ((utf8p + 2) >= bufend) | |
247 | break; | |
248 | *utf8p++ = (ucs_ch >> 12) | 0xe0; | |
249 | *utf8p++ = ((ucs_ch >> 6) & 0x3f) | 0x80; | |
250 | *utf8p++ = ((ucs_ch) & 0x3f) | 0x80; | |
251 | } | |
252 | ||
253 | } | |
254 | ||
255 | *utf8p = '\0'; | |
256 | } | |
257 | ||
258 | ||
259 | /* | |
260 | * utf_decodestr - Decodes the null terminated UTF-8 string at | |
261 | * utf8p into a UCS-2 (Unicode) string at ucsp. | |
262 | * | |
263 | * ucslen is the number of UCS-2 output characters (not bytes) | |
264 | * bufsize is the size of the output buffer in bytes | |
265 | */ | |
266 | void | |
267 | utf_decodestr(const u_int8_t *utf8p, u_int16_t *ucsp, u_int16_t *ucslen, u_int32_t bufsize) | |
268 | { | |
269 | u_int16_t *bufstart; | |
270 | u_int16_t *bufend; | |
271 | u_int16_t ucs_ch; | |
272 | u_int8_t byte; | |
273 | ||
274 | bufstart = ucsp; | |
275 | bufend = (u_int16_t *)((u_int8_t *)ucsp + bufsize); | |
276 | ||
277 | while ((byte = *utf8p++) != '\0') { | |
278 | if (ucsp >= bufend) | |
279 | break; | |
280 | ||
281 | /* check for ascii */ | |
282 | if (byte < 0x80) { | |
283 | ucs_ch = byte; | |
284 | ||
285 | *ucsp++ = ucs_ch; | |
286 | continue; | |
287 | } | |
288 | ||
289 | switch (byte & 0xf0) { | |
290 | /* 2 byte sequence*/ | |
291 | case 0xc0: | |
292 | case 0xd0: | |
293 | /* extract bits 6 - 10 from first byte */ | |
294 | ucs_ch = (byte & 0x1F) << 6; | |
295 | break; | |
296 | /* 3 byte sequence*/ | |
297 | case 0xe0: | |
298 | /* extract bits 12 - 15 from first byte */ | |
299 | ucs_ch = (byte & 0x0F) << 6; | |
300 | ||
301 | /* extract bits 6 - 11 from second byte */ | |
302 | if (((byte = *utf8p++) & 0xc0) != 0x80) | |
303 | goto stop; | |
304 | ||
305 | ucs_ch += (byte & 0x3F); | |
306 | ucs_ch <<= 6; | |
307 | break; | |
308 | default: | |
309 | goto stop; | |
310 | } | |
311 | ||
312 | /* extract bits 0 - 5 from final byte */ | |
313 | if (((byte = *utf8p++) & 0xc0) != 0x80) | |
314 | goto stop; | |
315 | ucs_ch += (byte & 0x3F); | |
316 | ||
317 | *ucsp++ = ucs_ch; | |
318 | } | |
319 | stop: | |
320 | *ucslen = ucsp - bufstart; | |
321 | } |