]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c
xnu-792.tar.gz
[apple/xnu.git] / bsd / hfs / hfscommon / Unicode / UnicodeWrappers.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 File: UnicodeWrappers.c
24
25 Contains: Wrapper routines for Unicode conversion and comparison.
26
27 */
28 #include <sys/param.h>
29 #include <sys/utfconv.h>
30
31 #include "../../hfs_macos_defs.h"
32 #include "UCStringCompareData.h"
33
34 #include "../headers/FileMgrInternal.h"
35 #include "../headers/HFSUnicodeWrappers.h"
36
37 enum {
38 kMinFileExtensionChars = 1, /* does not include dot */
39 kMaxFileExtensionChars = 5 /* does not include dot */
40 };
41
42
43 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
44 ((c) >= 0x41 && (c) <= 0x5A) || \
45 ((c) >= 0x30 && (c) <= 0x39))
46
47
48 #define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
49 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
50
51
52 static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
53
54
55 static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );
56
57
58 /*
59 * Get filename extension (if any) as a C string
60 */
61 static void
62 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
63 {
64 UInt32 i;
65 UniChar c;
66 UInt16 extChars; /* number of extension chars (excluding dot) */
67 UInt16 maxExtChars;
68 Boolean foundExtension;
69
70 extStr[0] = '\0'; /* assume there's no extension */
71
72 if ( length < 3 )
73 return; /* "x.y" is smallest possible extension */
74
75 if ( length < (kMaxFileExtensionChars + 2) )
76 maxExtChars = length - 2; /* save room for prefix + dot */
77 else
78 maxExtChars = kMaxFileExtensionChars;
79
80 i = length;
81 extChars = 0;
82 foundExtension = false;
83
84 while ( extChars <= maxExtChars ) {
85 c = unicodeStr[--i];
86
87 /* look for leading dot */
88 if ( c == (UniChar) '.' ) {
89 if ( extChars > 0 ) /* cannot end with a dot */
90 foundExtension = true;
91 break;
92 }
93
94 if ( EXTENSIONCHAR(c) )
95 ++extChars;
96 else
97 break;
98 }
99
100 /* if we found one then copy it */
101 if ( foundExtension ) {
102 UInt8 *extStrPtr = extStr;
103 const UniChar *unicodeStrPtr = &unicodeStr[i];
104
105 for ( i = 0; i <= extChars; ++i )
106 *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
107 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
108 }
109 }
110
111
112
113 /*
114 * Count filename extension characters (if any)
115 */
116 static UInt32
117 CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
118 {
119 UInt32 i;
120 UniChar c;
121 UInt32 extChars; /* number of extension chars (excluding dot) */
122 UInt16 maxExtChars;
123 Boolean foundExtension;
124
125 if ( length < 3 )
126 return 0; /* "x.y" is smallest possible extension */
127
128 if ( length < (kMaxFileExtensionChars + 2) )
129 maxExtChars = length - 2; /* save room for prefix + dot */
130 else
131 maxExtChars = kMaxFileExtensionChars;
132
133 extChars = 0; /* assume there's no extension */
134 i = length - 1; /* index to last ascii character */
135 foundExtension = false;
136
137 while ( extChars <= maxExtChars ) {
138 c = filename[i--];
139
140 /* look for leading dot */
141 if ( c == (UInt8) '.' ) {
142 if ( extChars > 0 ) /* cannot end with a dot */
143 return (extChars);
144
145 break;
146 }
147
148 if ( EXTENSIONCHAR(c) )
149 ++extChars;
150 else
151 break;
152 }
153
154 return 0;
155 }
156
157
158 /*
159 * extract the file id from a mangled name
160 */
161 HFSCatalogNodeID
162 GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
163 {
164 short extChars;
165 short i;
166 UInt8 c;
167
168 *prefixLength = 0;
169
170 if ( filename == NULL )
171 return 0;
172
173 if ( length < 28 )
174 return 0; /* too small to have been mangled */
175
176 /* big enough for a file ID (#10) and an extension (.x) ? */
177 if ( length > 5 )
178 extChars = CountFilenameExtensionChars(filename, length);
179 else
180 extChars = 0;
181
182 /* skip over dot plus extension characters */
183 if ( extChars > 0 )
184 length -= (extChars + 1);
185
186 /* scan for file id digits */
187 for ( i = length - 1; i >= 0; --i) {
188 c = filename[i];
189
190 /* look for file ID marker */
191 if ( c == '#' ) {
192 if ( (length - i) < 3 )
193 break; /* too small to be a file ID */
194
195 *prefixLength = i;
196 return HexStringToInteger(length - i - 1, &filename[i+1]);
197 }
198
199 if ( !IsHexDigit(c) )
200 break; /* file ID string must have hex digits */
201 }
202
203 return 0;
204 }
205
206
207
208 static UInt32
209 HexStringToInteger(UInt32 length, const UInt8 *hexStr)
210 {
211 UInt32 value;
212 UInt32 i;
213 UInt8 c;
214 const UInt8 *p;
215
216 value = 0;
217 p = hexStr;
218
219 for ( i = 0; i < length; ++i ) {
220 c = *p++;
221
222 if (c >= '0' && c <= '9') {
223 value = value << 4;
224 value += (UInt32) c - (UInt32) '0';
225 } else if (c >= 'A' && c <= 'F') {
226 value = value << 4;
227 value += 10 + ((unsigned int) c - (unsigned int) 'A');
228 } else {
229 return 0; /* bad character */
230 }
231 }
232
233 return value;
234 }
235
236
237 /*
238 * Routine: FastRelString
239 *
240 * Output: returns -1 if str1 < str2
241 * returns 1 if str1 > str2
242 * return 0 if equal
243 *
244 */
245 SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
246 {
247 UInt16* compareTable;
248 SInt32 bestGuess;
249 UInt8 length, length2;
250 UInt8 delta;
251
252 delta = 0;
253 length = *(str1++);
254 length2 = *(str2++);
255
256 if (length == length2)
257 bestGuess = 0;
258 else if (length < length2)
259 {
260 bestGuess = -1;
261 delta = length2 - length;
262 }
263 else
264 {
265 bestGuess = 1;
266 length = length2;
267 }
268
269 compareTable = (UInt16*) gCompareTable;
270
271 while (length--)
272 {
273 UInt8 aChar, bChar;
274
275 aChar = *(str1++);
276 bChar = *(str2++);
277
278 if (aChar != bChar) // If they don't match exacly, do case conversion
279 {
280 UInt16 aSortWord, bSortWord;
281
282 aSortWord = compareTable[aChar];
283 bSortWord = compareTable[bChar];
284
285 if (aSortWord > bSortWord)
286 return 1;
287
288 if (aSortWord < bSortWord)
289 return -1;
290 }
291
292 // If characters match exactly, then go on to next character immediately without
293 // doing any extra work.
294 }
295
296 // if you got to here, then return bestGuess
297 return bestGuess;
298 }
299
300
301
302 //
303 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
304 //
305 // IF RESULT
306 // --------------------------
307 // str1 < str2 => -1
308 // str1 = str2 => 0
309 // str1 > str2 => +1
310 //
311 // The lower case table starts with 256 entries (one for each of the upper bytes
312 // of the original Unicode char). If that entry is zero, then all characters with
313 // that upper byte are already case folded. If the entry is non-zero, then it is
314 // the _index_ (not byte offset) of the start of the sub-table for the characters
315 // with that upper byte. All ignorable characters are folded to the value zero.
316 //
317 // In pseudocode:
318 //
319 // Let c = source Unicode character
320 // Let table[] = lower case table
321 //
322 // lower = table[highbyte(c)]
323 // if (lower == 0)
324 // lower = c
325 // else
326 // lower = table[lower+lowbyte(c)]
327 //
328 // if (lower == 0)
329 // ignore this character
330 //
331 // To handle ignorable characters, we now need a loop to find the next valid character.
332 // Also, we can't pre-compute the number of characters to compare; the string length might
333 // be larger than the number of non-ignorable characters. Further, we must be able to handle
334 // ignorable characters at any point in the string, including as the first or last characters.
335 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
336 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
337 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
338 // an invalid Unicode character).
339 //
340 // Pseudocode:
341 //
342 // while (1) {
343 // c1 = GetNextValidChar(str1) // returns zero if at end of string
344 // c2 = GetNextValidChar(str2)
345 //
346 // if (c1 != c2) break // found a difference
347 //
348 // if (c1 == 0) // reached end of string on both strings at once?
349 // return 0; // yes, so strings are equal
350 // }
351 //
352 // // When we get here, c1 != c2. So, we just need to determine which one is less.
353 // if (c1 < c2)
354 // return -1;
355 // else
356 // return 1;
357 //
358
359 SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
360 register ConstUniCharArrayPtr str2, register ItemCount length2)
361 {
362 register UInt16 c1,c2;
363 register UInt16 temp;
364 register UInt16* lowerCaseTable;
365
366 lowerCaseTable = (UInt16*) gLowerCaseTable;
367
368 while (1) {
369 /* Set default values for c1, c2 in case there are no more valid chars */
370 c1 = 0;
371 c2 = 0;
372
373 /* Find next non-ignorable char from str1, or zero if no more */
374 while (length1 && c1 == 0) {
375 c1 = *(str1++);
376 --length1;
377 /* check for basic latin first */
378 if (c1 < 0x0100) {
379 c1 = gLatinCaseFold[c1];
380 break;
381 }
382 /* case fold if neccessary */
383 if ((temp = lowerCaseTable[c1>>8]) != 0)
384 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
385 }
386
387
388 /* Find next non-ignorable char from str2, or zero if no more */
389 while (length2 && c2 == 0) {
390 c2 = *(str2++);
391 --length2;
392 /* check for basic latin first */
393 if (c2 < 0x0100) {
394 c2 = gLatinCaseFold[c2];
395 break;
396 }
397 /* case fold if neccessary */
398 if ((temp = lowerCaseTable[c2>>8]) != 0)
399 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
400 }
401
402 if (c1 != c2) // found a difference, so stop looping
403 break;
404
405 if (c1 == 0) // did we reach the end of both strings at the same time?
406 return 0; // yes, so strings are equal
407 }
408
409 if (c1 < c2)
410 return -1;
411 else
412 return 1;
413 }
414
415
416 OSErr
417 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
418 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
419 {
420 ByteCount subMaxLen;
421 size_t utf8len;
422 char fileIDStr[15];
423 char extStr[15];
424
425 sprintf(fileIDStr, "#%X", cnid);
426 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
427
428 /* remove extension chars from source */
429 srcLen -= strlen(extStr) * sizeof(UniChar);
430 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
431
432 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
433
434 strcat(dstStr, fileIDStr);
435 strcat(dstStr, extStr);
436 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
437
438 return noErr;
439 }
440