]>
Commit | Line | Data |
---|---|---|
1c79356b A |
1 | /* |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * The contents of this file constitute Original Code as defined in and | |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
11 | * | |
12 | * This Original Code and all software distributed under the License are | |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the | |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
19 | * | |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* | |
23 | File: UnicodeWrappers.c | |
24 | ||
25 | Contains: Wrapper routines for Unicode conversion and comparison. | |
26 | ||
27 | */ | |
28 | #include <sys/param.h> | |
29 | #include <sys/utfconv.h> | |
30 | ||
31 | #include "../../hfs_macos_defs.h" | |
32 | #include "UCStringCompareData.h" | |
33 | ||
34 | #include "../headers/FileMgrInternal.h" | |
35 | #include "../headers/HFSUnicodeWrappers.h" | |
36 | ||
37 | enum { | |
38 | kMinFileExtensionChars = 1, /* does not include dot */ | |
39 | kMaxFileExtensionChars = 5 /* does not include dot */ | |
40 | }; | |
41 | ||
42 | ||
43 | #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \ | |
44 | ((c) >= 0x41 && (c) <= 0x5A) || \ | |
45 | ((c) >= 0x30 && (c) <= 0x39)) | |
46 | ||
47 | ||
48 | #define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \ | |
49 | ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F')) | |
50 | ||
51 | ||
52 | static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr ); | |
53 | ||
54 | static void GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr ); | |
55 | ||
56 | static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr ); | |
57 | ||
58 | ||
59 | ||
60 | /* | |
61 | * Convert file ID into a hexidecimal string with no leading zeros | |
62 | */ | |
63 | static void | |
64 | GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr ) | |
65 | { | |
66 | SInt32 i, b; | |
67 | UInt8 *translate = (UInt8 *) "0123456789ABCDEF"; | |
68 | UInt8 c; | |
69 | ||
70 | fileIDStr[0] = '#'; | |
71 | ||
72 | for ( i = 0, b = 28; b >= 0; b -= 4 ) { | |
73 | c = *(translate + ((fileID >> b) & 0x0000000F)); | |
74 | ||
75 | /* if its not a leading zero add it to our string */ | |
76 | if ( (c != (UInt8) '0') || (i > 1) || (b == 0) ) | |
77 | fileIDStr[++i] = c; | |
78 | } | |
79 | ||
80 | fileIDStr[++i] = '\0'; | |
81 | } | |
82 | ||
83 | ||
84 | /* | |
85 | * Get filename extension (if any) as a C string | |
86 | */ | |
87 | static void | |
88 | GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr) | |
89 | { | |
90 | UInt32 i; | |
91 | UniChar c; | |
92 | UInt16 extChars; /* number of extension chars (excluding dot) */ | |
93 | UInt16 maxExtChars; | |
94 | Boolean foundExtension; | |
95 | ||
96 | extStr[0] = '\0'; /* assume there's no extension */ | |
97 | ||
98 | if ( length < 3 ) | |
99 | return; /* "x.y" is smallest possible extension */ | |
100 | ||
101 | if ( length < (kMaxFileExtensionChars + 2) ) | |
102 | maxExtChars = length - 2; /* save room for prefix + dot */ | |
103 | else | |
104 | maxExtChars = kMaxFileExtensionChars; | |
105 | ||
106 | i = length; | |
107 | extChars = 0; | |
108 | foundExtension = false; | |
109 | ||
110 | while ( extChars <= maxExtChars ) { | |
111 | c = unicodeStr[--i]; | |
112 | ||
113 | /* look for leading dot */ | |
114 | if ( c == (UniChar) '.' ) { | |
115 | if ( extChars > 0 ) /* cannot end with a dot */ | |
116 | foundExtension = true; | |
117 | break; | |
118 | } | |
119 | ||
120 | if ( EXTENSIONCHAR(c) ) | |
121 | ++extChars; | |
122 | else | |
123 | break; | |
124 | } | |
125 | ||
126 | /* if we found one then copy it */ | |
127 | if ( foundExtension ) { | |
128 | UInt8 *extStrPtr = extStr; | |
129 | const UniChar *unicodeStrPtr = &unicodeStr[i]; | |
130 | ||
131 | for ( i = 0; i <= extChars; ++i ) | |
132 | *(extStrPtr++) = (UInt8) *(unicodeStrPtr++); | |
133 | extStr[extChars + 1] = '\0'; /* terminate extension + dot */ | |
134 | } | |
135 | } | |
136 | ||
137 | ||
138 | ||
139 | /* | |
140 | * Count filename extension characters (if any) | |
141 | */ | |
142 | static UInt32 | |
143 | CountFilenameExtensionChars( const unsigned char * filename, UInt32 length ) | |
144 | { | |
145 | UInt32 i; | |
146 | UniChar c; | |
147 | UInt32 extChars; /* number of extension chars (excluding dot) */ | |
148 | UInt16 maxExtChars; | |
149 | Boolean foundExtension; | |
150 | ||
1c79356b A |
151 | if ( length < 3 ) |
152 | return 0; /* "x.y" is smallest possible extension */ | |
153 | ||
154 | if ( length < (kMaxFileExtensionChars + 2) ) | |
155 | maxExtChars = length - 2; /* save room for prefix + dot */ | |
156 | else | |
157 | maxExtChars = kMaxFileExtensionChars; | |
158 | ||
159 | extChars = 0; /* assume there's no extension */ | |
160 | i = length - 1; /* index to last ascii character */ | |
161 | foundExtension = false; | |
162 | ||
163 | while ( extChars <= maxExtChars ) { | |
164 | c = filename[i--]; | |
165 | ||
166 | /* look for leading dot */ | |
167 | if ( c == (UInt8) '.' ) { | |
168 | if ( extChars > 0 ) /* cannot end with a dot */ | |
169 | return (extChars); | |
170 | ||
171 | break; | |
172 | } | |
173 | ||
174 | if ( EXTENSIONCHAR(c) ) | |
175 | ++extChars; | |
176 | else | |
177 | break; | |
178 | } | |
179 | ||
180 | return 0; | |
181 | } | |
182 | ||
183 | ||
184 | /* | |
185 | * extract the file id from a mangled name | |
186 | */ | |
187 | HFSCatalogNodeID | |
188 | GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength) | |
189 | { | |
190 | short extChars; | |
191 | short i; | |
192 | UInt8 c; | |
193 | ||
194 | *prefixLength = 0; | |
195 | ||
196 | if ( filename == NULL ) | |
197 | return 0; | |
198 | ||
1c79356b A |
199 | if ( length < 28 ) |
200 | return 0; /* too small to have been mangled */ | |
201 | ||
202 | /* big enough for a file ID (#10) and an extension (.x) ? */ | |
203 | if ( length > 5 ) | |
204 | extChars = CountFilenameExtensionChars(filename, length); | |
205 | else | |
206 | extChars = 0; | |
207 | ||
208 | /* skip over dot plus extension characters */ | |
209 | if ( extChars > 0 ) | |
210 | length -= (extChars + 1); | |
211 | ||
212 | /* scan for file id digits */ | |
213 | for ( i = length - 1; i >= 0; --i) { | |
214 | c = filename[i]; | |
215 | ||
216 | /* look for file ID marker */ | |
217 | if ( c == '#' ) { | |
218 | if ( (length - i) < 3 ) | |
219 | break; /* too small to be a file ID */ | |
220 | ||
221 | *prefixLength = i; | |
222 | return HexStringToInteger(length - i - 1, &filename[i+1]); | |
223 | } | |
224 | ||
225 | if ( !IsHexDigit(c) ) | |
226 | break; /* file ID string must have hex digits */ | |
227 | } | |
228 | ||
229 | return 0; | |
230 | } | |
231 | ||
232 | ||
233 | ||
234 | static UInt32 | |
235 | HexStringToInteger(UInt32 length, const UInt8 *hexStr) | |
236 | { | |
237 | UInt32 value; | |
238 | short i; | |
239 | UInt8 c; | |
240 | const UInt8 *p; | |
241 | ||
242 | value = 0; | |
243 | p = hexStr; | |
244 | ||
245 | for ( i = 0; i < length; ++i ) { | |
246 | c = *p++; | |
247 | ||
248 | if (c >= '0' && c <= '9') { | |
249 | value = value << 4; | |
250 | value += (UInt32) c - (UInt32) '0'; | |
251 | } else if (c >= 'A' && c <= 'F') { | |
252 | value = value << 4; | |
253 | value += 10 + ((unsigned int) c - (unsigned int) 'A'); | |
254 | } else { | |
255 | return 0; /* bad character */ | |
256 | } | |
257 | } | |
258 | ||
259 | return value; | |
260 | } | |
261 | ||
262 | ||
263 | /* | |
264 | * Routine: FastRelString | |
265 | * | |
266 | * Output: returns -1 if str1 < str2 | |
267 | * returns 1 if str1 > str2 | |
268 | * return 0 if equal | |
269 | * | |
270 | */ | |
1c79356b A |
271 | SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 ) |
272 | { | |
273 | UInt16* compareTable; | |
274 | SInt32 bestGuess; | |
275 | UInt8 length, length2; | |
276 | UInt8 delta; | |
277 | ||
278 | delta = 0; | |
279 | length = *(str1++); | |
280 | length2 = *(str2++); | |
281 | ||
282 | if (length == length2) | |
283 | bestGuess = 0; | |
284 | else if (length < length2) | |
285 | { | |
286 | bestGuess = -1; | |
287 | delta = length2 - length; | |
288 | } | |
289 | else | |
290 | { | |
291 | bestGuess = 1; | |
292 | length = length2; | |
293 | } | |
294 | ||
295 | compareTable = (UInt16*) gCompareTable; | |
296 | ||
297 | while (length--) | |
298 | { | |
299 | UInt8 aChar, bChar; | |
300 | ||
301 | aChar = *(str1++); | |
302 | bChar = *(str2++); | |
303 | ||
304 | if (aChar != bChar) // If they don't match exacly, do case conversion | |
305 | { | |
306 | UInt16 aSortWord, bSortWord; | |
307 | ||
308 | aSortWord = compareTable[aChar]; | |
309 | bSortWord = compareTable[bChar]; | |
310 | ||
311 | if (aSortWord > bSortWord) | |
312 | return 1; | |
313 | ||
314 | if (aSortWord < bSortWord) | |
315 | return -1; | |
316 | } | |
317 | ||
318 | // If characters match exactly, then go on to next character immediately without | |
319 | // doing any extra work. | |
320 | } | |
321 | ||
322 | // if you got to here, then return bestGuess | |
323 | return bestGuess; | |
324 | } | |
325 | ||
326 | ||
327 | ||
328 | // | |
329 | // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering | |
330 | // | |
331 | // IF RESULT | |
332 | // -------------------------- | |
333 | // str1 < str2 => -1 | |
334 | // str1 = str2 => 0 | |
335 | // str1 > str2 => +1 | |
336 | // | |
337 | // The lower case table starts with 256 entries (one for each of the upper bytes | |
338 | // of the original Unicode char). If that entry is zero, then all characters with | |
339 | // that upper byte are already case folded. If the entry is non-zero, then it is | |
340 | // the _index_ (not byte offset) of the start of the sub-table for the characters | |
341 | // with that upper byte. All ignorable characters are folded to the value zero. | |
342 | // | |
343 | // In pseudocode: | |
344 | // | |
345 | // Let c = source Unicode character | |
346 | // Let table[] = lower case table | |
347 | // | |
348 | // lower = table[highbyte(c)] | |
349 | // if (lower == 0) | |
350 | // lower = c | |
351 | // else | |
352 | // lower = table[lower+lowbyte(c)] | |
353 | // | |
354 | // if (lower == 0) | |
355 | // ignore this character | |
356 | // | |
357 | // To handle ignorable characters, we now need a loop to find the next valid character. | |
358 | // Also, we can't pre-compute the number of characters to compare; the string length might | |
359 | // be larger than the number of non-ignorable characters. Further, we must be able to handle | |
360 | // ignorable characters at any point in the string, including as the first or last characters. | |
361 | // We use a zero value as a sentinel to detect both end-of-string and ignorable characters. | |
362 | // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename, | |
363 | // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is | |
364 | // an invalid Unicode character). | |
365 | // | |
366 | // Pseudocode: | |
367 | // | |
368 | // while (1) { | |
369 | // c1 = GetNextValidChar(str1) // returns zero if at end of string | |
370 | // c2 = GetNextValidChar(str2) | |
371 | // | |
372 | // if (c1 != c2) break // found a difference | |
373 | // | |
374 | // if (c1 == 0) // reached end of string on both strings at once? | |
375 | // return 0; // yes, so strings are equal | |
376 | // } | |
377 | // | |
378 | // // When we get here, c1 != c2. So, we just need to determine which one is less. | |
379 | // if (c1 < c2) | |
380 | // return -1; | |
381 | // else | |
382 | // return 1; | |
383 | // | |
384 | ||
1c79356b A |
385 | SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1, |
386 | register ConstUniCharArrayPtr str2, register ItemCount length2) | |
387 | { | |
388 | register UInt16 c1,c2; | |
389 | register UInt16 temp; | |
390 | register UInt16* lowerCaseTable; | |
391 | ||
392 | lowerCaseTable = (UInt16*) gLowerCaseTable; | |
393 | ||
394 | while (1) { | |
395 | /* Set default values for c1, c2 in case there are no more valid chars */ | |
396 | c1 = 0; | |
397 | c2 = 0; | |
398 | ||
399 | /* Find next non-ignorable char from str1, or zero if no more */ | |
400 | while (length1 && c1 == 0) { | |
401 | c1 = *(str1++); | |
402 | --length1; | |
403 | /* check for basic latin first */ | |
404 | if (c1 < 0x0100) { | |
405 | c1 = gLatinCaseFold[c1]; | |
406 | break; | |
407 | } | |
408 | /* case fold if neccessary */ | |
409 | if ((temp = lowerCaseTable[c1>>8]) != 0) | |
410 | c1 = lowerCaseTable[temp + (c1 & 0x00FF)]; | |
411 | } | |
412 | ||
413 | ||
414 | /* Find next non-ignorable char from str2, or zero if no more */ | |
415 | while (length2 && c2 == 0) { | |
416 | c2 = *(str2++); | |
417 | --length2; | |
418 | /* check for basic latin first */ | |
419 | if (c2 < 0x0100) { | |
420 | c2 = gLatinCaseFold[c2]; | |
421 | break; | |
422 | } | |
423 | /* case fold if neccessary */ | |
424 | if ((temp = lowerCaseTable[c2>>8]) != 0) | |
425 | c2 = lowerCaseTable[temp + (c2 & 0x00FF)]; | |
426 | } | |
427 | ||
428 | if (c1 != c2) // found a difference, so stop looping | |
429 | break; | |
430 | ||
431 | if (c1 == 0) // did we reach the end of both strings at the same time? | |
432 | return 0; // yes, so strings are equal | |
433 | } | |
434 | ||
435 | if (c1 < c2) | |
436 | return -1; | |
437 | else | |
438 | return 1; | |
439 | } | |
440 | ||
441 | ||
442 | OSErr | |
443 | ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen, | |
444 | ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid) | |
445 | { | |
446 | ByteCount subMaxLen; | |
447 | size_t utf8len; | |
448 | char fileIDStr[15]; | |
449 | char extStr[15]; | |
450 | ||
451 | GetFileIDString(cnid, fileIDStr); | |
452 | GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr); | |
453 | ||
454 | /* remove extension chars from source */ | |
455 | srcLen -= strlen(extStr) * sizeof(UniChar); | |
456 | subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr)); | |
457 | ||
458 | (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0); | |
459 | ||
460 | strcat(dstStr, fileIDStr); | |
461 | strcat(dstStr, extStr); | |
462 | *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr)); | |
463 | ||
464 | return noErr; | |
465 | } | |
466 |