]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c
1b80569eb20ef532c7120eb2c4fce1ffea6ef61f
[apple/xnu.git] / bsd / hfs / hfscommon / Unicode / UnicodeWrappers.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /*
31 File: UnicodeWrappers.c
32
33 Contains: Wrapper routines for Unicode conversion and comparison.
34
35 */
36 #include <sys/param.h>
37 #include <sys/utfconv.h>
38
39 #include "../../hfs_macos_defs.h"
40 #include "UCStringCompareData.h"
41
42 #include "../headers/FileMgrInternal.h"
43 #include "../headers/HFSUnicodeWrappers.h"
44
45 enum {
46 kMinFileExtensionChars = 1, /* does not include dot */
47 kMaxFileExtensionChars = 5 /* does not include dot */
48 };
49
50
51 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
52 ((c) >= 0x41 && (c) <= 0x5A) || \
53 ((c) >= 0x30 && (c) <= 0x39))
54
55
56 #define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
57 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
58
59
60 static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
61
62
63 static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );
64
65
66 /*
67 * Get filename extension (if any) as a C string
68 */
69 static void
70 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
71 {
72 UInt32 i;
73 UniChar c;
74 UInt16 extChars; /* number of extension chars (excluding dot) */
75 UInt16 maxExtChars;
76 Boolean foundExtension;
77
78 extStr[0] = '\0'; /* assume there's no extension */
79
80 if ( length < 3 )
81 return; /* "x.y" is smallest possible extension */
82
83 if ( length < (kMaxFileExtensionChars + 2) )
84 maxExtChars = length - 2; /* save room for prefix + dot */
85 else
86 maxExtChars = kMaxFileExtensionChars;
87
88 i = length;
89 extChars = 0;
90 foundExtension = false;
91
92 while ( extChars <= maxExtChars ) {
93 c = unicodeStr[--i];
94
95 /* look for leading dot */
96 if ( c == (UniChar) '.' ) {
97 if ( extChars > 0 ) /* cannot end with a dot */
98 foundExtension = true;
99 break;
100 }
101
102 if ( EXTENSIONCHAR(c) )
103 ++extChars;
104 else
105 break;
106 }
107
108 /* if we found one then copy it */
109 if ( foundExtension ) {
110 UInt8 *extStrPtr = extStr;
111 const UniChar *unicodeStrPtr = &unicodeStr[i];
112
113 for ( i = 0; i <= extChars; ++i )
114 *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
115 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
116 }
117 }
118
119
120
121 /*
122 * Count filename extension characters (if any)
123 */
124 static UInt32
125 CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
126 {
127 UInt32 i;
128 UniChar c;
129 UInt32 extChars; /* number of extension chars (excluding dot) */
130 UInt16 maxExtChars;
131 Boolean foundExtension;
132
133 if ( length < 3 )
134 return 0; /* "x.y" is smallest possible extension */
135
136 if ( length < (kMaxFileExtensionChars + 2) )
137 maxExtChars = length - 2; /* save room for prefix + dot */
138 else
139 maxExtChars = kMaxFileExtensionChars;
140
141 extChars = 0; /* assume there's no extension */
142 i = length - 1; /* index to last ascii character */
143 foundExtension = false;
144
145 while ( extChars <= maxExtChars ) {
146 c = filename[i--];
147
148 /* look for leading dot */
149 if ( c == (UInt8) '.' ) {
150 if ( extChars > 0 ) /* cannot end with a dot */
151 return (extChars);
152
153 break;
154 }
155
156 if ( EXTENSIONCHAR(c) )
157 ++extChars;
158 else
159 break;
160 }
161
162 return 0;
163 }
164
165
166 /*
167 * extract the file id from a mangled name
168 */
169 HFSCatalogNodeID
170 GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
171 {
172 short extChars;
173 short i;
174 UInt8 c;
175
176 *prefixLength = 0;
177
178 if ( filename == NULL )
179 return 0;
180
181 if ( length < 28 )
182 return 0; /* too small to have been mangled */
183
184 /* big enough for a file ID (#10) and an extension (.x) ? */
185 if ( length > 5 )
186 extChars = CountFilenameExtensionChars(filename, length);
187 else
188 extChars = 0;
189
190 /* skip over dot plus extension characters */
191 if ( extChars > 0 )
192 length -= (extChars + 1);
193
194 /* scan for file id digits */
195 for ( i = length - 1; i >= 0; --i) {
196 c = filename[i];
197
198 /* look for file ID marker */
199 if ( c == '#' ) {
200 if ( (length - i) < 3 )
201 break; /* too small to be a file ID */
202
203 *prefixLength = i;
204 return HexStringToInteger(length - i - 1, &filename[i+1]);
205 }
206
207 if ( !IsHexDigit(c) )
208 break; /* file ID string must have hex digits */
209 }
210
211 return 0;
212 }
213
214
215
216 static UInt32
217 HexStringToInteger(UInt32 length, const UInt8 *hexStr)
218 {
219 UInt32 value;
220 UInt32 i;
221 UInt8 c;
222 const UInt8 *p;
223
224 value = 0;
225 p = hexStr;
226
227 for ( i = 0; i < length; ++i ) {
228 c = *p++;
229
230 if (c >= '0' && c <= '9') {
231 value = value << 4;
232 value += (UInt32) c - (UInt32) '0';
233 } else if (c >= 'A' && c <= 'F') {
234 value = value << 4;
235 value += 10 + ((unsigned int) c - (unsigned int) 'A');
236 } else {
237 return 0; /* bad character */
238 }
239 }
240
241 return value;
242 }
243
244
245 /*
246 * Routine: FastRelString
247 *
248 * Output: returns -1 if str1 < str2
249 * returns 1 if str1 > str2
250 * return 0 if equal
251 *
252 */
253 SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
254 {
255 UInt16* compareTable;
256 SInt32 bestGuess;
257 UInt8 length, length2;
258 UInt8 delta;
259
260 delta = 0;
261 length = *(str1++);
262 length2 = *(str2++);
263
264 if (length == length2)
265 bestGuess = 0;
266 else if (length < length2)
267 {
268 bestGuess = -1;
269 delta = length2 - length;
270 }
271 else
272 {
273 bestGuess = 1;
274 length = length2;
275 }
276
277 compareTable = (UInt16*) gCompareTable;
278
279 while (length--)
280 {
281 UInt8 aChar, bChar;
282
283 aChar = *(str1++);
284 bChar = *(str2++);
285
286 if (aChar != bChar) // If they don't match exacly, do case conversion
287 {
288 UInt16 aSortWord, bSortWord;
289
290 aSortWord = compareTable[aChar];
291 bSortWord = compareTable[bChar];
292
293 if (aSortWord > bSortWord)
294 return 1;
295
296 if (aSortWord < bSortWord)
297 return -1;
298 }
299
300 // If characters match exactly, then go on to next character immediately without
301 // doing any extra work.
302 }
303
304 // if you got to here, then return bestGuess
305 return bestGuess;
306 }
307
308
309
310 //
311 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
312 //
313 // IF RESULT
314 // --------------------------
315 // str1 < str2 => -1
316 // str1 = str2 => 0
317 // str1 > str2 => +1
318 //
319 // The lower case table starts with 256 entries (one for each of the upper bytes
320 // of the original Unicode char). If that entry is zero, then all characters with
321 // that upper byte are already case folded. If the entry is non-zero, then it is
322 // the _index_ (not byte offset) of the start of the sub-table for the characters
323 // with that upper byte. All ignorable characters are folded to the value zero.
324 //
325 // In pseudocode:
326 //
327 // Let c = source Unicode character
328 // Let table[] = lower case table
329 //
330 // lower = table[highbyte(c)]
331 // if (lower == 0)
332 // lower = c
333 // else
334 // lower = table[lower+lowbyte(c)]
335 //
336 // if (lower == 0)
337 // ignore this character
338 //
339 // To handle ignorable characters, we now need a loop to find the next valid character.
340 // Also, we can't pre-compute the number of characters to compare; the string length might
341 // be larger than the number of non-ignorable characters. Further, we must be able to handle
342 // ignorable characters at any point in the string, including as the first or last characters.
343 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
344 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
345 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
346 // an invalid Unicode character).
347 //
348 // Pseudocode:
349 //
350 // while (1) {
351 // c1 = GetNextValidChar(str1) // returns zero if at end of string
352 // c2 = GetNextValidChar(str2)
353 //
354 // if (c1 != c2) break // found a difference
355 //
356 // if (c1 == 0) // reached end of string on both strings at once?
357 // return 0; // yes, so strings are equal
358 // }
359 //
360 // // When we get here, c1 != c2. So, we just need to determine which one is less.
361 // if (c1 < c2)
362 // return -1;
363 // else
364 // return 1;
365 //
366
367 SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
368 register ConstUniCharArrayPtr str2, register ItemCount length2)
369 {
370 register UInt16 c1,c2;
371 register UInt16 temp;
372 register UInt16* lowerCaseTable;
373
374 lowerCaseTable = (UInt16*) gLowerCaseTable;
375
376 while (1) {
377 /* Set default values for c1, c2 in case there are no more valid chars */
378 c1 = 0;
379 c2 = 0;
380
381 /* Find next non-ignorable char from str1, or zero if no more */
382 while (length1 && c1 == 0) {
383 c1 = *(str1++);
384 --length1;
385 /* check for basic latin first */
386 if (c1 < 0x0100) {
387 c1 = gLatinCaseFold[c1];
388 break;
389 }
390 /* case fold if neccessary */
391 if ((temp = lowerCaseTable[c1>>8]) != 0)
392 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
393 }
394
395
396 /* Find next non-ignorable char from str2, or zero if no more */
397 while (length2 && c2 == 0) {
398 c2 = *(str2++);
399 --length2;
400 /* check for basic latin first */
401 if (c2 < 0x0100) {
402 c2 = gLatinCaseFold[c2];
403 break;
404 }
405 /* case fold if neccessary */
406 if ((temp = lowerCaseTable[c2>>8]) != 0)
407 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
408 }
409
410 if (c1 != c2) // found a difference, so stop looping
411 break;
412
413 if (c1 == 0) // did we reach the end of both strings at the same time?
414 return 0; // yes, so strings are equal
415 }
416
417 if (c1 < c2)
418 return -1;
419 else
420 return 1;
421 }
422
423
424 OSErr
425 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
426 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
427 {
428 ByteCount subMaxLen;
429 size_t utf8len;
430 char fileIDStr[15];
431 char extStr[15];
432
433 sprintf(fileIDStr, "#%X", cnid);
434 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
435
436 /* remove extension chars from source */
437 srcLen -= strlen(extStr) * sizeof(UniChar);
438 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
439
440 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
441
442 strcat(dstStr, fileIDStr);
443 strcat(dstStr, extStr);
444 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
445
446 return noErr;
447 }
448