]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c
xnu-344.tar.gz
[apple/xnu.git] / bsd / hfs / hfscommon / Unicode / UnicodeWrappers.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 File: UnicodeWrappers.c
24
25 Contains: Wrapper routines for Unicode conversion and comparison.
26
27*/
28#include <sys/param.h>
29#include <sys/utfconv.h>
30
31#include "../../hfs_macos_defs.h"
32#include "UCStringCompareData.h"
33
34#include "../headers/FileMgrInternal.h"
35#include "../headers/HFSUnicodeWrappers.h"
36
37enum {
38 kMinFileExtensionChars = 1, /* does not include dot */
39 kMaxFileExtensionChars = 5 /* does not include dot */
40};
41
42
43#define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
44 ((c) >= 0x41 && (c) <= 0x5A) || \
45 ((c) >= 0x30 && (c) <= 0x39))
46
47
48#define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
49 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
50
51
52static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
53
54static void GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr );
55
56static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );
57
58
59
60/*
61 * Convert file ID into a hexidecimal string with no leading zeros
62 */
63static void
64GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr )
65{
66 SInt32 i, b;
67 UInt8 *translate = (UInt8 *) "0123456789ABCDEF";
68 UInt8 c;
69
70 fileIDStr[0] = '#';
71
72 for ( i = 0, b = 28; b >= 0; b -= 4 ) {
73 c = *(translate + ((fileID >> b) & 0x0000000F));
74
75 /* if its not a leading zero add it to our string */
76 if ( (c != (UInt8) '0') || (i > 1) || (b == 0) )
77 fileIDStr[++i] = c;
78 }
79
80 fileIDStr[++i] = '\0';
81}
82
83
84/*
85 * Get filename extension (if any) as a C string
86 */
87static void
88GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
89{
90 UInt32 i;
91 UniChar c;
92 UInt16 extChars; /* number of extension chars (excluding dot) */
93 UInt16 maxExtChars;
94 Boolean foundExtension;
95
96 extStr[0] = '\0'; /* assume there's no extension */
97
98 if ( length < 3 )
99 return; /* "x.y" is smallest possible extension */
100
101 if ( length < (kMaxFileExtensionChars + 2) )
102 maxExtChars = length - 2; /* save room for prefix + dot */
103 else
104 maxExtChars = kMaxFileExtensionChars;
105
106 i = length;
107 extChars = 0;
108 foundExtension = false;
109
110 while ( extChars <= maxExtChars ) {
111 c = unicodeStr[--i];
112
113 /* look for leading dot */
114 if ( c == (UniChar) '.' ) {
115 if ( extChars > 0 ) /* cannot end with a dot */
116 foundExtension = true;
117 break;
118 }
119
120 if ( EXTENSIONCHAR(c) )
121 ++extChars;
122 else
123 break;
124 }
125
126 /* if we found one then copy it */
127 if ( foundExtension ) {
128 UInt8 *extStrPtr = extStr;
129 const UniChar *unicodeStrPtr = &unicodeStr[i];
130
131 for ( i = 0; i <= extChars; ++i )
132 *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
133 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
134 }
135}
136
137
138
139/*
140 * Count filename extension characters (if any)
141 */
142static UInt32
143CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
144{
145 UInt32 i;
146 UniChar c;
147 UInt32 extChars; /* number of extension chars (excluding dot) */
148 UInt16 maxExtChars;
149 Boolean foundExtension;
150
1c79356b
A
151 if ( length < 3 )
152 return 0; /* "x.y" is smallest possible extension */
153
154 if ( length < (kMaxFileExtensionChars + 2) )
155 maxExtChars = length - 2; /* save room for prefix + dot */
156 else
157 maxExtChars = kMaxFileExtensionChars;
158
159 extChars = 0; /* assume there's no extension */
160 i = length - 1; /* index to last ascii character */
161 foundExtension = false;
162
163 while ( extChars <= maxExtChars ) {
164 c = filename[i--];
165
166 /* look for leading dot */
167 if ( c == (UInt8) '.' ) {
168 if ( extChars > 0 ) /* cannot end with a dot */
169 return (extChars);
170
171 break;
172 }
173
174 if ( EXTENSIONCHAR(c) )
175 ++extChars;
176 else
177 break;
178 }
179
180 return 0;
181}
182
183
184/*
185 * extract the file id from a mangled name
186 */
187HFSCatalogNodeID
188GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
189{
190 short extChars;
191 short i;
192 UInt8 c;
193
194 *prefixLength = 0;
195
196 if ( filename == NULL )
197 return 0;
198
1c79356b
A
199 if ( length < 28 )
200 return 0; /* too small to have been mangled */
201
202 /* big enough for a file ID (#10) and an extension (.x) ? */
203 if ( length > 5 )
204 extChars = CountFilenameExtensionChars(filename, length);
205 else
206 extChars = 0;
207
208 /* skip over dot plus extension characters */
209 if ( extChars > 0 )
210 length -= (extChars + 1);
211
212 /* scan for file id digits */
213 for ( i = length - 1; i >= 0; --i) {
214 c = filename[i];
215
216 /* look for file ID marker */
217 if ( c == '#' ) {
218 if ( (length - i) < 3 )
219 break; /* too small to be a file ID */
220
221 *prefixLength = i;
222 return HexStringToInteger(length - i - 1, &filename[i+1]);
223 }
224
225 if ( !IsHexDigit(c) )
226 break; /* file ID string must have hex digits */
227 }
228
229 return 0;
230}
231
232
233
234static UInt32
235HexStringToInteger(UInt32 length, const UInt8 *hexStr)
236{
237 UInt32 value;
238 short i;
239 UInt8 c;
240 const UInt8 *p;
241
242 value = 0;
243 p = hexStr;
244
245 for ( i = 0; i < length; ++i ) {
246 c = *p++;
247
248 if (c >= '0' && c <= '9') {
249 value = value << 4;
250 value += (UInt32) c - (UInt32) '0';
251 } else if (c >= 'A' && c <= 'F') {
252 value = value << 4;
253 value += 10 + ((unsigned int) c - (unsigned int) 'A');
254 } else {
255 return 0; /* bad character */
256 }
257 }
258
259 return value;
260}
261
262
263/*
264 * Routine: FastRelString
265 *
266 * Output: returns -1 if str1 < str2
267 * returns 1 if str1 > str2
268 * return 0 if equal
269 *
270 */
1c79356b
A
271SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
272{
273 UInt16* compareTable;
274 SInt32 bestGuess;
275 UInt8 length, length2;
276 UInt8 delta;
277
278 delta = 0;
279 length = *(str1++);
280 length2 = *(str2++);
281
282 if (length == length2)
283 bestGuess = 0;
284 else if (length < length2)
285 {
286 bestGuess = -1;
287 delta = length2 - length;
288 }
289 else
290 {
291 bestGuess = 1;
292 length = length2;
293 }
294
295 compareTable = (UInt16*) gCompareTable;
296
297 while (length--)
298 {
299 UInt8 aChar, bChar;
300
301 aChar = *(str1++);
302 bChar = *(str2++);
303
304 if (aChar != bChar) // If they don't match exacly, do case conversion
305 {
306 UInt16 aSortWord, bSortWord;
307
308 aSortWord = compareTable[aChar];
309 bSortWord = compareTable[bChar];
310
311 if (aSortWord > bSortWord)
312 return 1;
313
314 if (aSortWord < bSortWord)
315 return -1;
316 }
317
318 // If characters match exactly, then go on to next character immediately without
319 // doing any extra work.
320 }
321
322 // if you got to here, then return bestGuess
323 return bestGuess;
324}
325
326
327
328//
329// FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
330//
331// IF RESULT
332// --------------------------
333// str1 < str2 => -1
334// str1 = str2 => 0
335// str1 > str2 => +1
336//
337// The lower case table starts with 256 entries (one for each of the upper bytes
338// of the original Unicode char). If that entry is zero, then all characters with
339// that upper byte are already case folded. If the entry is non-zero, then it is
340// the _index_ (not byte offset) of the start of the sub-table for the characters
341// with that upper byte. All ignorable characters are folded to the value zero.
342//
343// In pseudocode:
344//
345// Let c = source Unicode character
346// Let table[] = lower case table
347//
348// lower = table[highbyte(c)]
349// if (lower == 0)
350// lower = c
351// else
352// lower = table[lower+lowbyte(c)]
353//
354// if (lower == 0)
355// ignore this character
356//
357// To handle ignorable characters, we now need a loop to find the next valid character.
358// Also, we can't pre-compute the number of characters to compare; the string length might
359// be larger than the number of non-ignorable characters. Further, we must be able to handle
360// ignorable characters at any point in the string, including as the first or last characters.
361// We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
362// Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
363// the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
364// an invalid Unicode character).
365//
366// Pseudocode:
367//
368// while (1) {
369// c1 = GetNextValidChar(str1) // returns zero if at end of string
370// c2 = GetNextValidChar(str2)
371//
372// if (c1 != c2) break // found a difference
373//
374// if (c1 == 0) // reached end of string on both strings at once?
375// return 0; // yes, so strings are equal
376// }
377//
378// // When we get here, c1 != c2. So, we just need to determine which one is less.
379// if (c1 < c2)
380// return -1;
381// else
382// return 1;
383//
384
1c79356b
A
385SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
386 register ConstUniCharArrayPtr str2, register ItemCount length2)
387{
388 register UInt16 c1,c2;
389 register UInt16 temp;
390 register UInt16* lowerCaseTable;
391
392 lowerCaseTable = (UInt16*) gLowerCaseTable;
393
394 while (1) {
395 /* Set default values for c1, c2 in case there are no more valid chars */
396 c1 = 0;
397 c2 = 0;
398
399 /* Find next non-ignorable char from str1, or zero if no more */
400 while (length1 && c1 == 0) {
401 c1 = *(str1++);
402 --length1;
403 /* check for basic latin first */
404 if (c1 < 0x0100) {
405 c1 = gLatinCaseFold[c1];
406 break;
407 }
408 /* case fold if neccessary */
409 if ((temp = lowerCaseTable[c1>>8]) != 0)
410 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
411 }
412
413
414 /* Find next non-ignorable char from str2, or zero if no more */
415 while (length2 && c2 == 0) {
416 c2 = *(str2++);
417 --length2;
418 /* check for basic latin first */
419 if (c2 < 0x0100) {
420 c2 = gLatinCaseFold[c2];
421 break;
422 }
423 /* case fold if neccessary */
424 if ((temp = lowerCaseTable[c2>>8]) != 0)
425 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
426 }
427
428 if (c1 != c2) // found a difference, so stop looping
429 break;
430
431 if (c1 == 0) // did we reach the end of both strings at the same time?
432 return 0; // yes, so strings are equal
433 }
434
435 if (c1 < c2)
436 return -1;
437 else
438 return 1;
439}
440
441
442OSErr
443ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
444 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
445{
446 ByteCount subMaxLen;
447 size_t utf8len;
448 char fileIDStr[15];
449 char extStr[15];
450
451 GetFileIDString(cnid, fileIDStr);
452 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
453
454 /* remove extension chars from source */
455 srcLen -= strlen(extStr) * sizeof(UniChar);
456 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
457
458 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
459
460 strcat(dstStr, fileIDStr);
461 strcat(dstStr, extStr);
462 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
463
464 return noErr;
465}
466