]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c
90c08d466ac411ea912321f45fd655fa7b9c20b6
[apple/xnu.git] / bsd / hfs / hfscommon / Unicode / UnicodeWrappers.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 File: UnicodeWrappers.c
24
25 Contains: Wrapper routines for Unicode conversion and comparison.
26
27 */
28 #include <sys/param.h>
29 #include <sys/utfconv.h>
30
31 #include "../../hfs_macos_defs.h"
32 #include "UCStringCompareData.h"
33
34 #include "../headers/FileMgrInternal.h"
35 #include "../headers/HFSUnicodeWrappers.h"
36
37 enum {
38 kMinFileExtensionChars = 1, /* does not include dot */
39 kMaxFileExtensionChars = 5 /* does not include dot */
40 };
41
42
43 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
44 ((c) >= 0x41 && (c) <= 0x5A) || \
45 ((c) >= 0x30 && (c) <= 0x39))
46
47
48 #define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
49 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
50
51
52 static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
53
54 static void GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr );
55
56 static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );
57
58
59
60 /*
61 * Convert file ID into a hexidecimal string with no leading zeros
62 */
63 static void
64 GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr )
65 {
66 SInt32 i, b;
67 UInt8 *translate = (UInt8 *) "0123456789ABCDEF";
68 UInt8 c;
69
70 fileIDStr[0] = '#';
71
72 for ( i = 0, b = 28; b >= 0; b -= 4 ) {
73 c = *(translate + ((fileID >> b) & 0x0000000F));
74
75 /* if its not a leading zero add it to our string */
76 if ( (c != (UInt8) '0') || (i > 1) || (b == 0) )
77 fileIDStr[++i] = c;
78 }
79
80 fileIDStr[++i] = '\0';
81 }
82
83
84 /*
85 * Get filename extension (if any) as a C string
86 */
87 static void
88 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
89 {
90 UInt32 i;
91 UniChar c;
92 UInt16 extChars; /* number of extension chars (excluding dot) */
93 UInt16 maxExtChars;
94 Boolean foundExtension;
95
96 extStr[0] = '\0'; /* assume there's no extension */
97
98 if ( length < 3 )
99 return; /* "x.y" is smallest possible extension */
100
101 if ( length < (kMaxFileExtensionChars + 2) )
102 maxExtChars = length - 2; /* save room for prefix + dot */
103 else
104 maxExtChars = kMaxFileExtensionChars;
105
106 i = length;
107 extChars = 0;
108 foundExtension = false;
109
110 while ( extChars <= maxExtChars ) {
111 c = unicodeStr[--i];
112
113 /* look for leading dot */
114 if ( c == (UniChar) '.' ) {
115 if ( extChars > 0 ) /* cannot end with a dot */
116 foundExtension = true;
117 break;
118 }
119
120 if ( EXTENSIONCHAR(c) )
121 ++extChars;
122 else
123 break;
124 }
125
126 /* if we found one then copy it */
127 if ( foundExtension ) {
128 UInt8 *extStrPtr = extStr;
129 const UniChar *unicodeStrPtr = &unicodeStr[i];
130
131 for ( i = 0; i <= extChars; ++i )
132 *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
133 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
134 }
135 }
136
137
138
139 /*
140 * Count filename extension characters (if any)
141 */
142 static UInt32
143 CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
144 {
145 UInt32 i;
146 UniChar c;
147 UInt32 extChars; /* number of extension chars (excluding dot) */
148 UInt16 maxExtChars;
149 Boolean foundExtension;
150
151 if (length == kUndefinedStrLen)
152 length = strlen(filename);
153
154 if ( length < 3 )
155 return 0; /* "x.y" is smallest possible extension */
156
157 if ( length < (kMaxFileExtensionChars + 2) )
158 maxExtChars = length - 2; /* save room for prefix + dot */
159 else
160 maxExtChars = kMaxFileExtensionChars;
161
162 extChars = 0; /* assume there's no extension */
163 i = length - 1; /* index to last ascii character */
164 foundExtension = false;
165
166 while ( extChars <= maxExtChars ) {
167 c = filename[i--];
168
169 /* look for leading dot */
170 if ( c == (UInt8) '.' ) {
171 if ( extChars > 0 ) /* cannot end with a dot */
172 return (extChars);
173
174 break;
175 }
176
177 if ( EXTENSIONCHAR(c) )
178 ++extChars;
179 else
180 break;
181 }
182
183 return 0;
184 }
185
186
187 /*
188 * extract the file id from a mangled name
189 */
190 HFSCatalogNodeID
191 GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
192 {
193 short extChars;
194 short i;
195 UInt8 c;
196
197 *prefixLength = 0;
198
199 if ( filename == NULL )
200 return 0;
201
202 if (length == kUndefinedStrLen)
203 length = strlen(filename);
204
205 if ( length < 28 )
206 return 0; /* too small to have been mangled */
207
208 /* big enough for a file ID (#10) and an extension (.x) ? */
209 if ( length > 5 )
210 extChars = CountFilenameExtensionChars(filename, length);
211 else
212 extChars = 0;
213
214 /* skip over dot plus extension characters */
215 if ( extChars > 0 )
216 length -= (extChars + 1);
217
218 /* scan for file id digits */
219 for ( i = length - 1; i >= 0; --i) {
220 c = filename[i];
221
222 /* look for file ID marker */
223 if ( c == '#' ) {
224 if ( (length - i) < 3 )
225 break; /* too small to be a file ID */
226
227 *prefixLength = i;
228 return HexStringToInteger(length - i - 1, &filename[i+1]);
229 }
230
231 if ( !IsHexDigit(c) )
232 break; /* file ID string must have hex digits */
233 }
234
235 return 0;
236 }
237
238
239
240 static UInt32
241 HexStringToInteger(UInt32 length, const UInt8 *hexStr)
242 {
243 UInt32 value;
244 short i;
245 UInt8 c;
246 const UInt8 *p;
247
248 value = 0;
249 p = hexStr;
250
251 for ( i = 0; i < length; ++i ) {
252 c = *p++;
253
254 if (c >= '0' && c <= '9') {
255 value = value << 4;
256 value += (UInt32) c - (UInt32) '0';
257 } else if (c >= 'A' && c <= 'F') {
258 value = value << 4;
259 value += 10 + ((unsigned int) c - (unsigned int) 'A');
260 } else {
261 return 0; /* bad character */
262 }
263 }
264
265 return value;
266 }
267
268
269 /*
270 * Routine: FastRelString
271 *
272 * Output: returns -1 if str1 < str2
273 * returns 1 if str1 > str2
274 * return 0 if equal
275 *
276 */
277 extern unsigned short gCompareTable[];
278
279 SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
280 {
281 UInt16* compareTable;
282 SInt32 bestGuess;
283 UInt8 length, length2;
284 UInt8 delta;
285
286 delta = 0;
287 length = *(str1++);
288 length2 = *(str2++);
289
290 if (length == length2)
291 bestGuess = 0;
292 else if (length < length2)
293 {
294 bestGuess = -1;
295 delta = length2 - length;
296 }
297 else
298 {
299 bestGuess = 1;
300 length = length2;
301 }
302
303 compareTable = (UInt16*) gCompareTable;
304
305 while (length--)
306 {
307 UInt8 aChar, bChar;
308
309 aChar = *(str1++);
310 bChar = *(str2++);
311
312 if (aChar != bChar) // If they don't match exacly, do case conversion
313 {
314 UInt16 aSortWord, bSortWord;
315
316 aSortWord = compareTable[aChar];
317 bSortWord = compareTable[bChar];
318
319 if (aSortWord > bSortWord)
320 return 1;
321
322 if (aSortWord < bSortWord)
323 return -1;
324 }
325
326 // If characters match exactly, then go on to next character immediately without
327 // doing any extra work.
328 }
329
330 // if you got to here, then return bestGuess
331 return bestGuess;
332 }
333
334
335
336 //
337 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
338 //
339 // IF RESULT
340 // --------------------------
341 // str1 < str2 => -1
342 // str1 = str2 => 0
343 // str1 > str2 => +1
344 //
345 // The lower case table starts with 256 entries (one for each of the upper bytes
346 // of the original Unicode char). If that entry is zero, then all characters with
347 // that upper byte are already case folded. If the entry is non-zero, then it is
348 // the _index_ (not byte offset) of the start of the sub-table for the characters
349 // with that upper byte. All ignorable characters are folded to the value zero.
350 //
351 // In pseudocode:
352 //
353 // Let c = source Unicode character
354 // Let table[] = lower case table
355 //
356 // lower = table[highbyte(c)]
357 // if (lower == 0)
358 // lower = c
359 // else
360 // lower = table[lower+lowbyte(c)]
361 //
362 // if (lower == 0)
363 // ignore this character
364 //
365 // To handle ignorable characters, we now need a loop to find the next valid character.
366 // Also, we can't pre-compute the number of characters to compare; the string length might
367 // be larger than the number of non-ignorable characters. Further, we must be able to handle
368 // ignorable characters at any point in the string, including as the first or last characters.
369 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
370 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
371 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
372 // an invalid Unicode character).
373 //
374 // Pseudocode:
375 //
376 // while (1) {
377 // c1 = GetNextValidChar(str1) // returns zero if at end of string
378 // c2 = GetNextValidChar(str2)
379 //
380 // if (c1 != c2) break // found a difference
381 //
382 // if (c1 == 0) // reached end of string on both strings at once?
383 // return 0; // yes, so strings are equal
384 // }
385 //
386 // // When we get here, c1 != c2. So, we just need to determine which one is less.
387 // if (c1 < c2)
388 // return -1;
389 // else
390 // return 1;
391 //
392
393 extern UInt16 gLowerCaseTable[];
394 extern UInt16 gLatinCaseFold[];
395
396 SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
397 register ConstUniCharArrayPtr str2, register ItemCount length2)
398 {
399 register UInt16 c1,c2;
400 register UInt16 temp;
401 register UInt16* lowerCaseTable;
402
403 lowerCaseTable = (UInt16*) gLowerCaseTable;
404
405 while (1) {
406 /* Set default values for c1, c2 in case there are no more valid chars */
407 c1 = 0;
408 c2 = 0;
409
410 /* Find next non-ignorable char from str1, or zero if no more */
411 while (length1 && c1 == 0) {
412 c1 = *(str1++);
413 --length1;
414 /* check for basic latin first */
415 if (c1 < 0x0100) {
416 c1 = gLatinCaseFold[c1];
417 break;
418 }
419 /* case fold if neccessary */
420 if ((temp = lowerCaseTable[c1>>8]) != 0)
421 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
422 }
423
424
425 /* Find next non-ignorable char from str2, or zero if no more */
426 while (length2 && c2 == 0) {
427 c2 = *(str2++);
428 --length2;
429 /* check for basic latin first */
430 if (c2 < 0x0100) {
431 c2 = gLatinCaseFold[c2];
432 break;
433 }
434 /* case fold if neccessary */
435 if ((temp = lowerCaseTable[c2>>8]) != 0)
436 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
437 }
438
439 if (c1 != c2) // found a difference, so stop looping
440 break;
441
442 if (c1 == 0) // did we reach the end of both strings at the same time?
443 return 0; // yes, so strings are equal
444 }
445
446 if (c1 < c2)
447 return -1;
448 else
449 return 1;
450 }
451
452
453 OSErr
454 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
455 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
456 {
457 ByteCount subMaxLen;
458 size_t utf8len;
459 char fileIDStr[15];
460 char extStr[15];
461
462 GetFileIDString(cnid, fileIDStr);
463 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
464
465 /* remove extension chars from source */
466 srcLen -= strlen(extStr) * sizeof(UniChar);
467 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
468
469 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
470
471 strcat(dstStr, fileIDStr);
472 strcat(dstStr, extStr);
473 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
474
475 return noErr;
476 }
477