]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c
f51970868e1a77171d8fd62e42aefd09913829ac
[apple/xnu.git] / bsd / hfs / hfscommon / Unicode / UnicodeWrappers.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*
24 File: UnicodeWrappers.c
25
26 Contains: Wrapper routines for Unicode conversion and comparison.
27
28 */
29 #include <sys/param.h>
30 #include <sys/utfconv.h>
31
32 #include "../../hfs_macos_defs.h"
33 #include "UCStringCompareData.h"
34
35 #include "../headers/FileMgrInternal.h"
36 #include "../headers/HFSUnicodeWrappers.h"
37
38 enum {
39 kMinFileExtensionChars = 1, /* does not include dot */
40 kMaxFileExtensionChars = 5 /* does not include dot */
41 };
42
43
44 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
45 ((c) >= 0x41 && (c) <= 0x5A) || \
46 ((c) >= 0x30 && (c) <= 0x39))
47
48
49 #define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
50 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
51
52
53 static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
54
55
56 static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );
57
58
59 /*
60 * Get filename extension (if any) as a C string
61 */
62 static void
63 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
64 {
65 UInt32 i;
66 UniChar c;
67 UInt16 extChars; /* number of extension chars (excluding dot) */
68 UInt16 maxExtChars;
69 Boolean foundExtension;
70
71 extStr[0] = '\0'; /* assume there's no extension */
72
73 if ( length < 3 )
74 return; /* "x.y" is smallest possible extension */
75
76 if ( length < (kMaxFileExtensionChars + 2) )
77 maxExtChars = length - 2; /* save room for prefix + dot */
78 else
79 maxExtChars = kMaxFileExtensionChars;
80
81 i = length;
82 extChars = 0;
83 foundExtension = false;
84
85 while ( extChars <= maxExtChars ) {
86 c = unicodeStr[--i];
87
88 /* look for leading dot */
89 if ( c == (UniChar) '.' ) {
90 if ( extChars > 0 ) /* cannot end with a dot */
91 foundExtension = true;
92 break;
93 }
94
95 if ( EXTENSIONCHAR(c) )
96 ++extChars;
97 else
98 break;
99 }
100
101 /* if we found one then copy it */
102 if ( foundExtension ) {
103 UInt8 *extStrPtr = extStr;
104 const UniChar *unicodeStrPtr = &unicodeStr[i];
105
106 for ( i = 0; i <= extChars; ++i )
107 *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
108 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
109 }
110 }
111
112
113
114 /*
115 * Count filename extension characters (if any)
116 */
117 static UInt32
118 CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
119 {
120 UInt32 i;
121 UniChar c;
122 UInt32 extChars; /* number of extension chars (excluding dot) */
123 UInt16 maxExtChars;
124 Boolean foundExtension;
125
126 if ( length < 3 )
127 return 0; /* "x.y" is smallest possible extension */
128
129 if ( length < (kMaxFileExtensionChars + 2) )
130 maxExtChars = length - 2; /* save room for prefix + dot */
131 else
132 maxExtChars = kMaxFileExtensionChars;
133
134 extChars = 0; /* assume there's no extension */
135 i = length - 1; /* index to last ascii character */
136 foundExtension = false;
137
138 while ( extChars <= maxExtChars ) {
139 c = filename[i--];
140
141 /* look for leading dot */
142 if ( c == (UInt8) '.' ) {
143 if ( extChars > 0 ) /* cannot end with a dot */
144 return (extChars);
145
146 break;
147 }
148
149 if ( EXTENSIONCHAR(c) )
150 ++extChars;
151 else
152 break;
153 }
154
155 return 0;
156 }
157
158
159 /*
160 * extract the file id from a mangled name
161 */
162 HFSCatalogNodeID
163 GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
164 {
165 short extChars;
166 short i;
167 UInt8 c;
168
169 *prefixLength = 0;
170
171 if ( filename == NULL )
172 return 0;
173
174 if ( length < 28 )
175 return 0; /* too small to have been mangled */
176
177 /* big enough for a file ID (#10) and an extension (.x) ? */
178 if ( length > 5 )
179 extChars = CountFilenameExtensionChars(filename, length);
180 else
181 extChars = 0;
182
183 /* skip over dot plus extension characters */
184 if ( extChars > 0 )
185 length -= (extChars + 1);
186
187 /* scan for file id digits */
188 for ( i = length - 1; i >= 0; --i) {
189 c = filename[i];
190
191 /* look for file ID marker */
192 if ( c == '#' ) {
193 if ( (length - i) < 3 )
194 break; /* too small to be a file ID */
195
196 *prefixLength = i;
197 return HexStringToInteger(length - i - 1, &filename[i+1]);
198 }
199
200 if ( !IsHexDigit(c) )
201 break; /* file ID string must have hex digits */
202 }
203
204 return 0;
205 }
206
207
208
209 static UInt32
210 HexStringToInteger(UInt32 length, const UInt8 *hexStr)
211 {
212 UInt32 value;
213 UInt32 i;
214 UInt8 c;
215 const UInt8 *p;
216
217 value = 0;
218 p = hexStr;
219
220 for ( i = 0; i < length; ++i ) {
221 c = *p++;
222
223 if (c >= '0' && c <= '9') {
224 value = value << 4;
225 value += (UInt32) c - (UInt32) '0';
226 } else if (c >= 'A' && c <= 'F') {
227 value = value << 4;
228 value += 10 + ((unsigned int) c - (unsigned int) 'A');
229 } else {
230 return 0; /* bad character */
231 }
232 }
233
234 return value;
235 }
236
237
238 /*
239 * Routine: FastRelString
240 *
241 * Output: returns -1 if str1 < str2
242 * returns 1 if str1 > str2
243 * return 0 if equal
244 *
245 */
246 SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
247 {
248 UInt16* compareTable;
249 SInt32 bestGuess;
250 UInt8 length, length2;
251 UInt8 delta;
252
253 delta = 0;
254 length = *(str1++);
255 length2 = *(str2++);
256
257 if (length == length2)
258 bestGuess = 0;
259 else if (length < length2)
260 {
261 bestGuess = -1;
262 delta = length2 - length;
263 }
264 else
265 {
266 bestGuess = 1;
267 length = length2;
268 }
269
270 compareTable = (UInt16*) gCompareTable;
271
272 while (length--)
273 {
274 UInt8 aChar, bChar;
275
276 aChar = *(str1++);
277 bChar = *(str2++);
278
279 if (aChar != bChar) // If they don't match exacly, do case conversion
280 {
281 UInt16 aSortWord, bSortWord;
282
283 aSortWord = compareTable[aChar];
284 bSortWord = compareTable[bChar];
285
286 if (aSortWord > bSortWord)
287 return 1;
288
289 if (aSortWord < bSortWord)
290 return -1;
291 }
292
293 // If characters match exactly, then go on to next character immediately without
294 // doing any extra work.
295 }
296
297 // if you got to here, then return bestGuess
298 return bestGuess;
299 }
300
301
302
303 //
304 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
305 //
306 // IF RESULT
307 // --------------------------
308 // str1 < str2 => -1
309 // str1 = str2 => 0
310 // str1 > str2 => +1
311 //
312 // The lower case table starts with 256 entries (one for each of the upper bytes
313 // of the original Unicode char). If that entry is zero, then all characters with
314 // that upper byte are already case folded. If the entry is non-zero, then it is
315 // the _index_ (not byte offset) of the start of the sub-table for the characters
316 // with that upper byte. All ignorable characters are folded to the value zero.
317 //
318 // In pseudocode:
319 //
320 // Let c = source Unicode character
321 // Let table[] = lower case table
322 //
323 // lower = table[highbyte(c)]
324 // if (lower == 0)
325 // lower = c
326 // else
327 // lower = table[lower+lowbyte(c)]
328 //
329 // if (lower == 0)
330 // ignore this character
331 //
332 // To handle ignorable characters, we now need a loop to find the next valid character.
333 // Also, we can't pre-compute the number of characters to compare; the string length might
334 // be larger than the number of non-ignorable characters. Further, we must be able to handle
335 // ignorable characters at any point in the string, including as the first or last characters.
336 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
337 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
338 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
339 // an invalid Unicode character).
340 //
341 // Pseudocode:
342 //
343 // while (1) {
344 // c1 = GetNextValidChar(str1) // returns zero if at end of string
345 // c2 = GetNextValidChar(str2)
346 //
347 // if (c1 != c2) break // found a difference
348 //
349 // if (c1 == 0) // reached end of string on both strings at once?
350 // return 0; // yes, so strings are equal
351 // }
352 //
353 // // When we get here, c1 != c2. So, we just need to determine which one is less.
354 // if (c1 < c2)
355 // return -1;
356 // else
357 // return 1;
358 //
359
360 SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
361 register ConstUniCharArrayPtr str2, register ItemCount length2)
362 {
363 register UInt16 c1,c2;
364 register UInt16 temp;
365 register UInt16* lowerCaseTable;
366
367 lowerCaseTable = (UInt16*) gLowerCaseTable;
368
369 while (1) {
370 /* Set default values for c1, c2 in case there are no more valid chars */
371 c1 = 0;
372 c2 = 0;
373
374 /* Find next non-ignorable char from str1, or zero if no more */
375 while (length1 && c1 == 0) {
376 c1 = *(str1++);
377 --length1;
378 /* check for basic latin first */
379 if (c1 < 0x0100) {
380 c1 = gLatinCaseFold[c1];
381 break;
382 }
383 /* case fold if neccessary */
384 if ((temp = lowerCaseTable[c1>>8]) != 0)
385 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
386 }
387
388
389 /* Find next non-ignorable char from str2, or zero if no more */
390 while (length2 && c2 == 0) {
391 c2 = *(str2++);
392 --length2;
393 /* check for basic latin first */
394 if (c2 < 0x0100) {
395 c2 = gLatinCaseFold[c2];
396 break;
397 }
398 /* case fold if neccessary */
399 if ((temp = lowerCaseTable[c2>>8]) != 0)
400 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
401 }
402
403 if (c1 != c2) // found a difference, so stop looping
404 break;
405
406 if (c1 == 0) // did we reach the end of both strings at the same time?
407 return 0; // yes, so strings are equal
408 }
409
410 if (c1 < c2)
411 return -1;
412 else
413 return 1;
414 }
415
416
417 OSErr
418 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
419 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
420 {
421 ByteCount subMaxLen;
422 size_t utf8len;
423 char fileIDStr[15];
424 char extStr[15];
425
426 sprintf(fileIDStr, "#%X", cnid);
427 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
428
429 /* remove extension chars from source */
430 srcLen -= strlen(extStr) * sizeof(UniChar);
431 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
432
433 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
434
435 strcat(dstStr, fileIDStr);
436 strcat(dstStr, extStr);
437 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
438
439 return noErr;
440 }
441