]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c
efd8b8a4cc27a3376eee0c872a9e998f3e23b7bd
[apple/xnu.git] / bsd / hfs / hfscommon / Unicode / UnicodeWrappers.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 File: UnicodeWrappers.c
27
28 Contains: Wrapper routines for Unicode conversion and comparison.
29
30 */
31 #include <sys/param.h>
32 #include <sys/utfconv.h>
33
34 #include "../../hfs_macos_defs.h"
35 #include "UCStringCompareData.h"
36
37 #include "../headers/FileMgrInternal.h"
38 #include "../headers/HFSUnicodeWrappers.h"
39
40 enum {
41 kMinFileExtensionChars = 1, /* does not include dot */
42 kMaxFileExtensionChars = 5 /* does not include dot */
43 };
44
45
46 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
47 ((c) >= 0x41 && (c) <= 0x5A) || \
48 ((c) >= 0x30 && (c) <= 0x39))
49
50
51 #define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
52 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
53
54
55 static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
56
57 static void GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr );
58
59 static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );
60
61
62
63 /*
64 * Convert file ID into a hexidecimal string with no leading zeros
65 */
66 static void
67 GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr )
68 {
69 SInt32 i, b;
70 UInt8 *translate = (UInt8 *) "0123456789ABCDEF";
71 UInt8 c;
72
73 fileIDStr[0] = '#';
74
75 for ( i = 0, b = 28; b >= 0; b -= 4 ) {
76 c = *(translate + ((fileID >> b) & 0x0000000F));
77
78 /* if its not a leading zero add it to our string */
79 if ( (c != (UInt8) '0') || (i > 1) || (b == 0) )
80 fileIDStr[++i] = c;
81 }
82
83 fileIDStr[++i] = '\0';
84 }
85
86
87 /*
88 * Get filename extension (if any) as a C string
89 */
90 static void
91 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
92 {
93 UInt32 i;
94 UniChar c;
95 UInt16 extChars; /* number of extension chars (excluding dot) */
96 UInt16 maxExtChars;
97 Boolean foundExtension;
98
99 extStr[0] = '\0'; /* assume there's no extension */
100
101 if ( length < 3 )
102 return; /* "x.y" is smallest possible extension */
103
104 if ( length < (kMaxFileExtensionChars + 2) )
105 maxExtChars = length - 2; /* save room for prefix + dot */
106 else
107 maxExtChars = kMaxFileExtensionChars;
108
109 i = length;
110 extChars = 0;
111 foundExtension = false;
112
113 while ( extChars <= maxExtChars ) {
114 c = unicodeStr[--i];
115
116 /* look for leading dot */
117 if ( c == (UniChar) '.' ) {
118 if ( extChars > 0 ) /* cannot end with a dot */
119 foundExtension = true;
120 break;
121 }
122
123 if ( EXTENSIONCHAR(c) )
124 ++extChars;
125 else
126 break;
127 }
128
129 /* if we found one then copy it */
130 if ( foundExtension ) {
131 UInt8 *extStrPtr = extStr;
132 const UniChar *unicodeStrPtr = &unicodeStr[i];
133
134 for ( i = 0; i <= extChars; ++i )
135 *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
136 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
137 }
138 }
139
140
141
142 /*
143 * Count filename extension characters (if any)
144 */
145 static UInt32
146 CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
147 {
148 UInt32 i;
149 UniChar c;
150 UInt32 extChars; /* number of extension chars (excluding dot) */
151 UInt16 maxExtChars;
152 Boolean foundExtension;
153
154 if ( length < 3 )
155 return 0; /* "x.y" is smallest possible extension */
156
157 if ( length < (kMaxFileExtensionChars + 2) )
158 maxExtChars = length - 2; /* save room for prefix + dot */
159 else
160 maxExtChars = kMaxFileExtensionChars;
161
162 extChars = 0; /* assume there's no extension */
163 i = length - 1; /* index to last ascii character */
164 foundExtension = false;
165
166 while ( extChars <= maxExtChars ) {
167 c = filename[i--];
168
169 /* look for leading dot */
170 if ( c == (UInt8) '.' ) {
171 if ( extChars > 0 ) /* cannot end with a dot */
172 return (extChars);
173
174 break;
175 }
176
177 if ( EXTENSIONCHAR(c) )
178 ++extChars;
179 else
180 break;
181 }
182
183 return 0;
184 }
185
186
187 /*
188 * extract the file id from a mangled name
189 */
190 HFSCatalogNodeID
191 GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
192 {
193 short extChars;
194 short i;
195 UInt8 c;
196
197 *prefixLength = 0;
198
199 if ( filename == NULL )
200 return 0;
201
202 if ( length < 28 )
203 return 0; /* too small to have been mangled */
204
205 /* big enough for a file ID (#10) and an extension (.x) ? */
206 if ( length > 5 )
207 extChars = CountFilenameExtensionChars(filename, length);
208 else
209 extChars = 0;
210
211 /* skip over dot plus extension characters */
212 if ( extChars > 0 )
213 length -= (extChars + 1);
214
215 /* scan for file id digits */
216 for ( i = length - 1; i >= 0; --i) {
217 c = filename[i];
218
219 /* look for file ID marker */
220 if ( c == '#' ) {
221 if ( (length - i) < 3 )
222 break; /* too small to be a file ID */
223
224 *prefixLength = i;
225 return HexStringToInteger(length - i - 1, &filename[i+1]);
226 }
227
228 if ( !IsHexDigit(c) )
229 break; /* file ID string must have hex digits */
230 }
231
232 return 0;
233 }
234
235
236
237 static UInt32
238 HexStringToInteger(UInt32 length, const UInt8 *hexStr)
239 {
240 UInt32 value;
241 short i;
242 UInt8 c;
243 const UInt8 *p;
244
245 value = 0;
246 p = hexStr;
247
248 for ( i = 0; i < length; ++i ) {
249 c = *p++;
250
251 if (c >= '0' && c <= '9') {
252 value = value << 4;
253 value += (UInt32) c - (UInt32) '0';
254 } else if (c >= 'A' && c <= 'F') {
255 value = value << 4;
256 value += 10 + ((unsigned int) c - (unsigned int) 'A');
257 } else {
258 return 0; /* bad character */
259 }
260 }
261
262 return value;
263 }
264
265
266 /*
267 * Routine: FastRelString
268 *
269 * Output: returns -1 if str1 < str2
270 * returns 1 if str1 > str2
271 * return 0 if equal
272 *
273 */
274 SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
275 {
276 UInt16* compareTable;
277 SInt32 bestGuess;
278 UInt8 length, length2;
279 UInt8 delta;
280
281 delta = 0;
282 length = *(str1++);
283 length2 = *(str2++);
284
285 if (length == length2)
286 bestGuess = 0;
287 else if (length < length2)
288 {
289 bestGuess = -1;
290 delta = length2 - length;
291 }
292 else
293 {
294 bestGuess = 1;
295 length = length2;
296 }
297
298 compareTable = (UInt16*) gCompareTable;
299
300 while (length--)
301 {
302 UInt8 aChar, bChar;
303
304 aChar = *(str1++);
305 bChar = *(str2++);
306
307 if (aChar != bChar) // If they don't match exacly, do case conversion
308 {
309 UInt16 aSortWord, bSortWord;
310
311 aSortWord = compareTable[aChar];
312 bSortWord = compareTable[bChar];
313
314 if (aSortWord > bSortWord)
315 return 1;
316
317 if (aSortWord < bSortWord)
318 return -1;
319 }
320
321 // If characters match exactly, then go on to next character immediately without
322 // doing any extra work.
323 }
324
325 // if you got to here, then return bestGuess
326 return bestGuess;
327 }
328
329
330
331 //
332 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
333 //
334 // IF RESULT
335 // --------------------------
336 // str1 < str2 => -1
337 // str1 = str2 => 0
338 // str1 > str2 => +1
339 //
340 // The lower case table starts with 256 entries (one for each of the upper bytes
341 // of the original Unicode char). If that entry is zero, then all characters with
342 // that upper byte are already case folded. If the entry is non-zero, then it is
343 // the _index_ (not byte offset) of the start of the sub-table for the characters
344 // with that upper byte. All ignorable characters are folded to the value zero.
345 //
346 // In pseudocode:
347 //
348 // Let c = source Unicode character
349 // Let table[] = lower case table
350 //
351 // lower = table[highbyte(c)]
352 // if (lower == 0)
353 // lower = c
354 // else
355 // lower = table[lower+lowbyte(c)]
356 //
357 // if (lower == 0)
358 // ignore this character
359 //
360 // To handle ignorable characters, we now need a loop to find the next valid character.
361 // Also, we can't pre-compute the number of characters to compare; the string length might
362 // be larger than the number of non-ignorable characters. Further, we must be able to handle
363 // ignorable characters at any point in the string, including as the first or last characters.
364 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
365 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
366 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
367 // an invalid Unicode character).
368 //
369 // Pseudocode:
370 //
371 // while (1) {
372 // c1 = GetNextValidChar(str1) // returns zero if at end of string
373 // c2 = GetNextValidChar(str2)
374 //
375 // if (c1 != c2) break // found a difference
376 //
377 // if (c1 == 0) // reached end of string on both strings at once?
378 // return 0; // yes, so strings are equal
379 // }
380 //
381 // // When we get here, c1 != c2. So, we just need to determine which one is less.
382 // if (c1 < c2)
383 // return -1;
384 // else
385 // return 1;
386 //
387
388 SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
389 register ConstUniCharArrayPtr str2, register ItemCount length2)
390 {
391 register UInt16 c1,c2;
392 register UInt16 temp;
393 register UInt16* lowerCaseTable;
394
395 lowerCaseTable = (UInt16*) gLowerCaseTable;
396
397 while (1) {
398 /* Set default values for c1, c2 in case there are no more valid chars */
399 c1 = 0;
400 c2 = 0;
401
402 /* Find next non-ignorable char from str1, or zero if no more */
403 while (length1 && c1 == 0) {
404 c1 = *(str1++);
405 --length1;
406 /* check for basic latin first */
407 if (c1 < 0x0100) {
408 c1 = gLatinCaseFold[c1];
409 break;
410 }
411 /* case fold if neccessary */
412 if ((temp = lowerCaseTable[c1>>8]) != 0)
413 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
414 }
415
416
417 /* Find next non-ignorable char from str2, or zero if no more */
418 while (length2 && c2 == 0) {
419 c2 = *(str2++);
420 --length2;
421 /* check for basic latin first */
422 if (c2 < 0x0100) {
423 c2 = gLatinCaseFold[c2];
424 break;
425 }
426 /* case fold if neccessary */
427 if ((temp = lowerCaseTable[c2>>8]) != 0)
428 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
429 }
430
431 if (c1 != c2) // found a difference, so stop looping
432 break;
433
434 if (c1 == 0) // did we reach the end of both strings at the same time?
435 return 0; // yes, so strings are equal
436 }
437
438 if (c1 < c2)
439 return -1;
440 else
441 return 1;
442 }
443
444
445 OSErr
446 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
447 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
448 {
449 ByteCount subMaxLen;
450 size_t utf8len;
451 char fileIDStr[15];
452 char extStr[15];
453
454 GetFileIDString(cnid, fileIDStr);
455 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
456
457 /* remove extension chars from source */
458 srcLen -= strlen(extStr) * sizeof(UniChar);
459 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
460
461 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
462
463 strcat(dstStr, fileIDStr);
464 strcat(dstStr, extStr);
465 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
466
467 return noErr;
468 }
469