2 // lf_hfs_unicode_wrappers.c
5 // Created by Yakov Ben Zaken on 22/03/2018.
8 #include "lf_hfs_unicode_wrappers.h"
9 #include "lf_hfs_ucs_string_cmp_data.h"
10 #include "lf_hfs_sbunicode.h"
15 kMinFileExtensionChars
= 1, /* does not include dot */
16 kMaxFileExtensionChars
= 5 /* does not include dot */
20 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
21 ((c) >= 0x41 && (c) <= 0x5A) || \
22 ((c) >= 0x30 && (c) <= 0x39))
25 #define IsHexDigit(c) (((c) >= (u_int8_t) '0' && (c) <= (u_int8_t) '9') || \
26 ((c) >= (u_int8_t) 'A' && (c) <= (u_int8_t) 'F'))
30 GetFilenameExtension( ItemCount length
, ConstUniCharArrayPtr unicodeStr
, char* extStr
);
34 HexStringToInteger( u_int32_t length
, const u_int8_t
*hexStr
);
38 * Get filename extension (if any) as a C string
41 GetFilenameExtension(ItemCount length
, ConstUniCharArrayPtr unicodeStr
, char * extStr
)
45 u_int16_t extChars
; /* number of extension chars (excluding dot) */
46 u_int16_t maxExtChars
;
47 Boolean foundExtension
;
49 extStr
[0] = '\0'; /* assume there's no extension */
52 return; /* "x.y" is smallest possible extension */
54 if ( length
< (kMaxFileExtensionChars
+ 2) )
55 maxExtChars
= length
- 2; /* save room for prefix + dot */
57 maxExtChars
= kMaxFileExtensionChars
;
61 foundExtension
= false;
63 while ( extChars
<= maxExtChars
) {
66 /* look for leading dot */
67 if ( c
== (UniChar
) '.' ) {
68 if ( extChars
> 0 ) /* cannot end with a dot */
69 foundExtension
= true;
73 if ( EXTENSIONCHAR(c
) )
79 /* if we found one then copy it */
80 if ( foundExtension
) {
81 u_int8_t
*extStrPtr
= (u_int8_t
*)extStr
;
82 const UniChar
*unicodeStrPtr
= &unicodeStr
[i
];
84 for ( i
= 0; i
<= extChars
; ++i
)
85 *(extStrPtr
++) = (u_int8_t
) *(unicodeStrPtr
++);
86 extStr
[extChars
+ 1] = '\0'; /* terminate extension + dot */
91 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
94 // --------------------------
99 // The lower case table starts with 256 entries (one for each of the upper bytes
100 // of the original Unicode char). If that entry is zero, then all characters with
101 // that upper byte are already case folded. If the entry is non-zero, then it is
102 // the _index_ (not byte offset) of the start of the sub-table for the characters
103 // with that upper byte. All ignorable characters are folded to the value zero.
107 // Let c = source Unicode character
108 // Let table[] = lower case table
110 // lower = table[highbyte(c)]
114 // lower = table[lower+lowbyte(c)]
117 // ignore this character
119 // To handle ignorable characters, we now need a loop to find the next valid character.
120 // Also, we can't pre-compute the number of characters to compare; the string length might
121 // be larger than the number of non-ignorable characters. Further, we must be able to handle
122 // ignorable characters at any point in the string, including as the first or last characters.
123 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
124 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
125 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
126 // an invalid Unicode character).
131 // c1 = GetNextValidChar(str1) // returns zero if at end of string
132 // c2 = GetNextValidChar(str2)
134 // if (c1 != c2) break // found a difference
136 // if (c1 == 0) // reached end of string on both strings at once?
137 // return 0; // yes, so strings are equal
140 // // When we get here, c1 != c2. So, we just need to determine which one is less.
147 int32_t FastUnicodeCompare ( register ConstUniCharArrayPtr str1
, register ItemCount length1
,
148 register ConstUniCharArrayPtr str2
, register ItemCount length2
)
150 register u_int16_t c1
,c2
;
151 register u_int16_t temp
;
152 register u_int16_t
* lowerCaseTable
;
154 lowerCaseTable
= (u_int16_t
*) gLowerCaseTable
;
157 /* Set default values for c1, c2 in case there are no more valid chars */
161 /* Find next non-ignorable char from str1, or zero if no more */
162 while (length1
&& c1
== 0) {
165 /* check for basic latin first */
167 c1
= gLatinCaseFold
[c1
];
170 /* case fold if neccessary */
171 if ((temp
= lowerCaseTable
[c1
>>8]) != 0)
172 c1
= lowerCaseTable
[temp
+ (c1
& 0x00FF)];
176 /* Find next non-ignorable char from str2, or zero if no more */
177 while (length2
&& c2
== 0) {
180 /* check for basic latin first */
182 c2
= gLatinCaseFold
[c2
];
185 /* case fold if neccessary */
186 if ((temp
= lowerCaseTable
[c2
>>8]) != 0)
187 c2
= lowerCaseTable
[temp
+ (c2
& 0x00FF)];
190 if (c1
!= c2
) // found a difference, so stop looping
193 if (c1
== 0) // did we reach the end of both strings at the same time?
194 return 0; // yes, so strings are equal
205 * UnicodeBinaryCompare
206 * Compare two UTF-16 strings and perform case-sensitive (binary) matching against them.
208 * Results are emitted like FastUnicodeCompare:
212 * --------------------------
217 * The case matching source code is greatly simplified due to the lack of case-folding
218 * in this comparison routine. We compare, in order: the lengths, then do character-by-
219 * character comparisons.
222 int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1
, register ItemCount len1
,
223 register ConstUniCharArrayPtr str2
, register ItemCount len2
) {
226 ItemCount string_length
;
229 /* First generate the string length (for comparison purposes) */
231 string_length
= len1
;
234 else if (len1
> len2
) {
235 string_length
= len2
;
239 string_length
= len1
;
242 /* now compare the two string pointers */
243 while (string_length
--) {
256 /* If equal, iterate to the next two respective chars */
263 * extract the file id from a mangled name
266 GetEmbeddedFileID(const unsigned char * filename
, u_int32_t length
, u_int32_t
*prefixLength
)
274 if ( filename
== NULL
)
278 return 0; /* too small to have been mangled */
280 /* big enough for a file ID (#10) and an extension (.x) ? */
282 extChars
= CountFilenameExtensionChars(filename
, length
);
286 /* skip over dot plus extension characters */
288 length
-= (extChars
+ 1);
290 /* scan for file id digits */
291 for ( i
= length
- 1; i
>= 0; --i
) {
294 /* look for file ID marker */
296 if ( (length
- i
) < 3 )
297 break; /* too small to be a file ID */
300 return HexStringToInteger(length
- i
- 1, &filename
[i
+1]);
303 if ( !IsHexDigit(c
) )
304 break; /* file ID string must have hex digits */
311 * Count filename extension characters (if any)
314 CountFilenameExtensionChars( const unsigned char * filename
, u_int32_t length
)
317 u_int16_t maxExtChars
;
320 return 0; /* "x.y" is smallest possible extension */
322 if ( length
< (kMaxFileExtensionChars
+ 2) )
323 maxExtChars
= length
- 2; /* save room for prefix + dot */
325 maxExtChars
= kMaxFileExtensionChars
;
327 u_int32_t extChars
= 0; /* number of extension chars (excluding dot) - assume there's no extension */
328 u_int32_t i
= length
- 1; /* index to last ascii character */
330 while ( extChars
<= maxExtChars
) {
333 /* look for leading dot */
334 if ( c
== (u_int8_t
) '.' ) {
335 if ( extChars
> 0 ) /* cannot end with a dot */
341 if ( EXTENSIONCHAR(c
) )
351 HexStringToInteger(u_int32_t length
, const u_int8_t
*hexStr
)
361 for ( i
= 0; i
< length
; ++i
) {
364 if (c
>= '0' && c
<= '9') {
366 value
+= (u_int32_t
) c
- (u_int32_t
) '0';
367 } else if (c
>= 'A' && c
<= 'F') {
369 value
+= 10 + ((unsigned int) c
- (unsigned int) 'A');
371 return 0; /* bad character */
379 ConvertUnicodeToUTF8Mangled(ByteCount srcLen
, ConstUniCharArrayPtr srcStr
, ByteCount maxDstLen
,
380 ByteCount
*actualDstLen
, unsigned char* dstStr
, HFSCatalogNodeID cnid
)
387 snprintf(fileIDStr
, sizeof(fileIDStr
), "#%X", cnid
);
388 GetFilenameExtension(srcLen
/sizeof(UniChar
), srcStr
, extStr
);
390 /* remove extension chars from source */
391 srcLen
-= strlen(extStr
) * sizeof(UniChar
);
392 subMaxLen
= maxDstLen
- (strlen(extStr
) + strlen(fileIDStr
));
394 (void) utf8_encodestr(srcStr
, srcLen
, dstStr
, &utf8len
, subMaxLen
, ':', UTF_ADD_NULL_TERM
);
396 strlcat((char *)dstStr
, fileIDStr
, maxDstLen
);
397 strlcat((char *)dstStr
, extStr
, maxDstLen
);
398 *actualDstLen
= utf8len
+ (strlen(extStr
) + strlen(fileIDStr
));