]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c
xnu-792.18.15.tar.gz
[apple/xnu.git] / bsd / hfs / hfscommon / Unicode / UnicodeWrappers.c
CommitLineData
1c79356b 1/*
5d5c5d0d
A
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 File: UnicodeWrappers.c
30
31 Contains: Wrapper routines for Unicode conversion and comparison.
32
33*/
34#include <sys/param.h>
35#include <sys/utfconv.h>
36
37#include "../../hfs_macos_defs.h"
38#include "UCStringCompareData.h"
39
40#include "../headers/FileMgrInternal.h"
41#include "../headers/HFSUnicodeWrappers.h"
42
43enum {
44 kMinFileExtensionChars = 1, /* does not include dot */
45 kMaxFileExtensionChars = 5 /* does not include dot */
46};
47
48
49#define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
50 ((c) >= 0x41 && (c) <= 0x5A) || \
51 ((c) >= 0x30 && (c) <= 0x39))
52
53
54#define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
55 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
56
57
58static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
59
1c79356b
A
60
61static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );
62
63
1c79356b
A
64/*
65 * Get filename extension (if any) as a C string
66 */
67static void
68GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
69{
70 UInt32 i;
71 UniChar c;
72 UInt16 extChars; /* number of extension chars (excluding dot) */
73 UInt16 maxExtChars;
74 Boolean foundExtension;
75
76 extStr[0] = '\0'; /* assume there's no extension */
77
78 if ( length < 3 )
79 return; /* "x.y" is smallest possible extension */
80
81 if ( length < (kMaxFileExtensionChars + 2) )
82 maxExtChars = length - 2; /* save room for prefix + dot */
83 else
84 maxExtChars = kMaxFileExtensionChars;
85
86 i = length;
87 extChars = 0;
88 foundExtension = false;
89
90 while ( extChars <= maxExtChars ) {
91 c = unicodeStr[--i];
92
93 /* look for leading dot */
94 if ( c == (UniChar) '.' ) {
95 if ( extChars > 0 ) /* cannot end with a dot */
96 foundExtension = true;
97 break;
98 }
99
100 if ( EXTENSIONCHAR(c) )
101 ++extChars;
102 else
103 break;
104 }
105
106 /* if we found one then copy it */
107 if ( foundExtension ) {
108 UInt8 *extStrPtr = extStr;
109 const UniChar *unicodeStrPtr = &unicodeStr[i];
110
111 for ( i = 0; i <= extChars; ++i )
112 *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
113 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
114 }
115}
116
117
118
119/*
120 * Count filename extension characters (if any)
121 */
122static UInt32
123CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
124{
125 UInt32 i;
126 UniChar c;
127 UInt32 extChars; /* number of extension chars (excluding dot) */
128 UInt16 maxExtChars;
129 Boolean foundExtension;
130
1c79356b
A
131 if ( length < 3 )
132 return 0; /* "x.y" is smallest possible extension */
133
134 if ( length < (kMaxFileExtensionChars + 2) )
135 maxExtChars = length - 2; /* save room for prefix + dot */
136 else
137 maxExtChars = kMaxFileExtensionChars;
138
139 extChars = 0; /* assume there's no extension */
140 i = length - 1; /* index to last ascii character */
141 foundExtension = false;
142
143 while ( extChars <= maxExtChars ) {
144 c = filename[i--];
145
146 /* look for leading dot */
147 if ( c == (UInt8) '.' ) {
148 if ( extChars > 0 ) /* cannot end with a dot */
149 return (extChars);
150
151 break;
152 }
153
154 if ( EXTENSIONCHAR(c) )
155 ++extChars;
156 else
157 break;
158 }
159
160 return 0;
161}
162
163
164/*
165 * extract the file id from a mangled name
166 */
167HFSCatalogNodeID
168GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
169{
170 short extChars;
171 short i;
172 UInt8 c;
173
174 *prefixLength = 0;
175
176 if ( filename == NULL )
177 return 0;
178
1c79356b
A
179 if ( length < 28 )
180 return 0; /* too small to have been mangled */
181
182 /* big enough for a file ID (#10) and an extension (.x) ? */
183 if ( length > 5 )
184 extChars = CountFilenameExtensionChars(filename, length);
185 else
186 extChars = 0;
187
188 /* skip over dot plus extension characters */
189 if ( extChars > 0 )
190 length -= (extChars + 1);
191
192 /* scan for file id digits */
193 for ( i = length - 1; i >= 0; --i) {
194 c = filename[i];
195
196 /* look for file ID marker */
197 if ( c == '#' ) {
198 if ( (length - i) < 3 )
199 break; /* too small to be a file ID */
200
201 *prefixLength = i;
202 return HexStringToInteger(length - i - 1, &filename[i+1]);
203 }
204
205 if ( !IsHexDigit(c) )
206 break; /* file ID string must have hex digits */
207 }
208
209 return 0;
210}
211
212
213
214static UInt32
215HexStringToInteger(UInt32 length, const UInt8 *hexStr)
216{
217 UInt32 value;
91447636 218 UInt32 i;
1c79356b
A
219 UInt8 c;
220 const UInt8 *p;
221
222 value = 0;
223 p = hexStr;
224
225 for ( i = 0; i < length; ++i ) {
226 c = *p++;
227
228 if (c >= '0' && c <= '9') {
229 value = value << 4;
230 value += (UInt32) c - (UInt32) '0';
231 } else if (c >= 'A' && c <= 'F') {
232 value = value << 4;
233 value += 10 + ((unsigned int) c - (unsigned int) 'A');
234 } else {
235 return 0; /* bad character */
236 }
237 }
238
239 return value;
240}
241
242
243/*
244 * Routine: FastRelString
245 *
246 * Output: returns -1 if str1 < str2
247 * returns 1 if str1 > str2
248 * return 0 if equal
249 *
250 */
1c79356b
A
251SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
252{
253 UInt16* compareTable;
254 SInt32 bestGuess;
255 UInt8 length, length2;
256 UInt8 delta;
257
258 delta = 0;
259 length = *(str1++);
260 length2 = *(str2++);
261
262 if (length == length2)
263 bestGuess = 0;
264 else if (length < length2)
265 {
266 bestGuess = -1;
267 delta = length2 - length;
268 }
269 else
270 {
271 bestGuess = 1;
272 length = length2;
273 }
274
275 compareTable = (UInt16*) gCompareTable;
276
277 while (length--)
278 {
279 UInt8 aChar, bChar;
280
281 aChar = *(str1++);
282 bChar = *(str2++);
283
284 if (aChar != bChar) // If they don't match exacly, do case conversion
285 {
286 UInt16 aSortWord, bSortWord;
287
288 aSortWord = compareTable[aChar];
289 bSortWord = compareTable[bChar];
290
291 if (aSortWord > bSortWord)
292 return 1;
293
294 if (aSortWord < bSortWord)
295 return -1;
296 }
297
298 // If characters match exactly, then go on to next character immediately without
299 // doing any extra work.
300 }
301
302 // if you got to here, then return bestGuess
303 return bestGuess;
304}
305
306
307
308//
309// FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
310//
311// IF RESULT
312// --------------------------
313// str1 < str2 => -1
314// str1 = str2 => 0
315// str1 > str2 => +1
316//
317// The lower case table starts with 256 entries (one for each of the upper bytes
318// of the original Unicode char). If that entry is zero, then all characters with
319// that upper byte are already case folded. If the entry is non-zero, then it is
320// the _index_ (not byte offset) of the start of the sub-table for the characters
321// with that upper byte. All ignorable characters are folded to the value zero.
322//
323// In pseudocode:
324//
325// Let c = source Unicode character
326// Let table[] = lower case table
327//
328// lower = table[highbyte(c)]
329// if (lower == 0)
330// lower = c
331// else
332// lower = table[lower+lowbyte(c)]
333//
334// if (lower == 0)
335// ignore this character
336//
337// To handle ignorable characters, we now need a loop to find the next valid character.
338// Also, we can't pre-compute the number of characters to compare; the string length might
339// be larger than the number of non-ignorable characters. Further, we must be able to handle
340// ignorable characters at any point in the string, including as the first or last characters.
341// We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
342// Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
343// the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
344// an invalid Unicode character).
345//
346// Pseudocode:
347//
348// while (1) {
349// c1 = GetNextValidChar(str1) // returns zero if at end of string
350// c2 = GetNextValidChar(str2)
351//
352// if (c1 != c2) break // found a difference
353//
354// if (c1 == 0) // reached end of string on both strings at once?
355// return 0; // yes, so strings are equal
356// }
357//
358// // When we get here, c1 != c2. So, we just need to determine which one is less.
359// if (c1 < c2)
360// return -1;
361// else
362// return 1;
363//
364
1c79356b
A
365SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
366 register ConstUniCharArrayPtr str2, register ItemCount length2)
367{
368 register UInt16 c1,c2;
369 register UInt16 temp;
370 register UInt16* lowerCaseTable;
371
372 lowerCaseTable = (UInt16*) gLowerCaseTable;
373
374 while (1) {
375 /* Set default values for c1, c2 in case there are no more valid chars */
376 c1 = 0;
377 c2 = 0;
378
379 /* Find next non-ignorable char from str1, or zero if no more */
380 while (length1 && c1 == 0) {
381 c1 = *(str1++);
382 --length1;
383 /* check for basic latin first */
384 if (c1 < 0x0100) {
385 c1 = gLatinCaseFold[c1];
386 break;
387 }
388 /* case fold if neccessary */
389 if ((temp = lowerCaseTable[c1>>8]) != 0)
390 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
391 }
392
393
394 /* Find next non-ignorable char from str2, or zero if no more */
395 while (length2 && c2 == 0) {
396 c2 = *(str2++);
397 --length2;
398 /* check for basic latin first */
399 if (c2 < 0x0100) {
400 c2 = gLatinCaseFold[c2];
401 break;
402 }
403 /* case fold if neccessary */
404 if ((temp = lowerCaseTable[c2>>8]) != 0)
405 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
406 }
407
408 if (c1 != c2) // found a difference, so stop looping
409 break;
410
411 if (c1 == 0) // did we reach the end of both strings at the same time?
412 return 0; // yes, so strings are equal
413 }
414
415 if (c1 < c2)
416 return -1;
417 else
418 return 1;
419}
420
421
422OSErr
423ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
424 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
425{
426 ByteCount subMaxLen;
427 size_t utf8len;
428 char fileIDStr[15];
429 char extStr[15];
430
91447636 431 sprintf(fileIDStr, "#%X", cnid);
1c79356b
A
432 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
433
434 /* remove extension chars from source */
435 srcLen -= strlen(extStr) * sizeof(UniChar);
436 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
437
438 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
439
440 strcat(dstStr, fileIDStr);
441 strcat(dstStr, extStr);
442 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
443
444 return noErr;
445}
446