]> git.saurik.com Git - apple/hfs.git/blob - livefiles_hfs_plugin/lf_hfs_unicode_wrappers.c
hfs-522.100.5.tar.gz
[apple/hfs.git] / livefiles_hfs_plugin / lf_hfs_unicode_wrappers.c
1 //
2 // lf_hfs_unicode_wrappers.c
3 // livefiles_hfs
4 //
5 // Created by Yakov Ben Zaken on 22/03/2018.
6 //
7
8 #include "lf_hfs_unicode_wrappers.h"
9 #include "lf_hfs_ucs_string_cmp_data.h"
10 #include "lf_hfs_sbunicode.h"
11
12
13
14 enum {
15 kMinFileExtensionChars = 1, /* does not include dot */
16 kMaxFileExtensionChars = 5 /* does not include dot */
17 };
18
19
20 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
21 ((c) >= 0x41 && (c) <= 0x5A) || \
22 ((c) >= 0x30 && (c) <= 0x39))
23
24
25 #define IsHexDigit(c) (((c) >= (u_int8_t) '0' && (c) <= (u_int8_t) '9') || \
26 ((c) >= (u_int8_t) 'A' && (c) <= (u_int8_t) 'F'))
27
28
29 static void
30 GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
31
32
33 static u_int32_t
34 HexStringToInteger( u_int32_t length, const u_int8_t *hexStr );
35
36
37 /*
38 * Get filename extension (if any) as a C string
39 */
40 static void
41 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
42 {
43 u_int64_t i;
44 UniChar c;
45 u_int16_t extChars; /* number of extension chars (excluding dot) */
46 u_int16_t maxExtChars;
47 Boolean foundExtension;
48
49 extStr[0] = '\0'; /* assume there's no extension */
50
51 if ( length < 3 )
52 return; /* "x.y" is smallest possible extension */
53
54 if ( length < (kMaxFileExtensionChars + 2) )
55 maxExtChars = length - 2; /* save room for prefix + dot */
56 else
57 maxExtChars = kMaxFileExtensionChars;
58
59 i = length;
60 extChars = 0;
61 foundExtension = false;
62
63 while ( extChars <= maxExtChars ) {
64 c = unicodeStr[--i];
65
66 /* look for leading dot */
67 if ( c == (UniChar) '.' ) {
68 if ( extChars > 0 ) /* cannot end with a dot */
69 foundExtension = true;
70 break;
71 }
72
73 if ( EXTENSIONCHAR(c) )
74 ++extChars;
75 else
76 break;
77 }
78
79 /* if we found one then copy it */
80 if ( foundExtension ) {
81 u_int8_t *extStrPtr = (u_int8_t *)extStr;
82 const UniChar *unicodeStrPtr = &unicodeStr[i];
83
84 for ( i = 0; i <= extChars; ++i )
85 *(extStrPtr++) = (u_int8_t) *(unicodeStrPtr++);
86 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
87 }
88 }
89
90 //
91 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
92 //
93 // IF RESULT
94 // --------------------------
95 // str1 < str2 => -1
96 // str1 = str2 => 0
97 // str1 > str2 => +1
98 //
99 // The lower case table starts with 256 entries (one for each of the upper bytes
100 // of the original Unicode char). If that entry is zero, then all characters with
101 // that upper byte are already case folded. If the entry is non-zero, then it is
102 // the _index_ (not byte offset) of the start of the sub-table for the characters
103 // with that upper byte. All ignorable characters are folded to the value zero.
104 //
105 // In pseudocode:
106 //
107 // Let c = source Unicode character
108 // Let table[] = lower case table
109 //
110 // lower = table[highbyte(c)]
111 // if (lower == 0)
112 // lower = c
113 // else
114 // lower = table[lower+lowbyte(c)]
115 //
116 // if (lower == 0)
117 // ignore this character
118 //
119 // To handle ignorable characters, we now need a loop to find the next valid character.
120 // Also, we can't pre-compute the number of characters to compare; the string length might
121 // be larger than the number of non-ignorable characters. Further, we must be able to handle
122 // ignorable characters at any point in the string, including as the first or last characters.
123 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
124 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
125 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
126 // an invalid Unicode character).
127 //
128 // Pseudocode:
129 //
130 // while (1) {
131 // c1 = GetNextValidChar(str1) // returns zero if at end of string
132 // c2 = GetNextValidChar(str2)
133 //
134 // if (c1 != c2) break // found a difference
135 //
136 // if (c1 == 0) // reached end of string on both strings at once?
137 // return 0; // yes, so strings are equal
138 // }
139 //
140 // // When we get here, c1 != c2. So, we just need to determine which one is less.
141 // if (c1 < c2)
142 // return -1;
143 // else
144 // return 1;
145 //
146
147 int32_t FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
148 register ConstUniCharArrayPtr str2, register ItemCount length2)
149 {
150 register u_int16_t c1,c2;
151 register u_int16_t temp;
152 register u_int16_t* lowerCaseTable;
153
154 lowerCaseTable = (u_int16_t*) gLowerCaseTable;
155
156 while (1) {
157 /* Set default values for c1, c2 in case there are no more valid chars */
158 c1 = 0;
159 c2 = 0;
160
161 /* Find next non-ignorable char from str1, or zero if no more */
162 while (length1 && c1 == 0) {
163 c1 = *(str1++);
164 --length1;
165 /* check for basic latin first */
166 if (c1 < 0x0100) {
167 c1 = gLatinCaseFold[c1];
168 break;
169 }
170 /* case fold if neccessary */
171 if ((temp = lowerCaseTable[c1>>8]) != 0)
172 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
173 }
174
175
176 /* Find next non-ignorable char from str2, or zero if no more */
177 while (length2 && c2 == 0) {
178 c2 = *(str2++);
179 --length2;
180 /* check for basic latin first */
181 if (c2 < 0x0100) {
182 c2 = gLatinCaseFold[c2];
183 break;
184 }
185 /* case fold if neccessary */
186 if ((temp = lowerCaseTable[c2>>8]) != 0)
187 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
188 }
189
190 if (c1 != c2) // found a difference, so stop looping
191 break;
192
193 if (c1 == 0) // did we reach the end of both strings at the same time?
194 return 0; // yes, so strings are equal
195 }
196
197 if (c1 < c2)
198 return -1;
199 else
200 return 1;
201 }
202
203
204 /*
205 * UnicodeBinaryCompare
206 * Compare two UTF-16 strings and perform case-sensitive (binary) matching against them.
207 *
208 * Results are emitted like FastUnicodeCompare:
209 *
210 *
211 * IF RESULT
212 * --------------------------
213 * str1 < str2 => -1
214 * str1 = str2 => 0
215 * str1 > str2 => +1
216 *
217 * The case matching source code is greatly simplified due to the lack of case-folding
218 * in this comparison routine. We compare, in order: the lengths, then do character-by-
219 * character comparisons.
220 *
221 */
222 int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1, register ItemCount len1,
223 register ConstUniCharArrayPtr str2, register ItemCount len2) {
224 uint16_t c1 =0;
225 uint16_t c2 =0;
226 ItemCount string_length;
227 int32_t result = 0;
228
229 /* First generate the string length (for comparison purposes) */
230 if (len1 < len2) {
231 string_length = len1;
232 --result;
233 }
234 else if (len1 > len2) {
235 string_length = len2;
236 ++result;
237 }
238 else {
239 string_length = len1;
240 }
241
242 /* now compare the two string pointers */
243 while (string_length--) {
244 c1 = *(str1++);
245 c2 = *(str2++);
246
247 if (c1 > c2) {
248 result = 1;
249 break;
250 }
251
252 if (c1 < c2) {
253 result = -1;
254 break;
255 }
256 /* If equal, iterate to the next two respective chars */
257 }
258
259 return result;
260 }
261
262 /*
263 * extract the file id from a mangled name
264 */
265 HFSCatalogNodeID
266 GetEmbeddedFileID(const unsigned char * filename, u_int32_t length, u_int32_t *prefixLength)
267 {
268 short extChars;
269 short i;
270 u_int8_t c;
271
272 *prefixLength = 0;
273
274 if ( filename == NULL )
275 return 0;
276
277 if ( length < 28 )
278 return 0; /* too small to have been mangled */
279
280 /* big enough for a file ID (#10) and an extension (.x) ? */
281 if ( length > 5 )
282 extChars = CountFilenameExtensionChars(filename, length);
283 else
284 extChars = 0;
285
286 /* skip over dot plus extension characters */
287 if ( extChars > 0 )
288 length -= (extChars + 1);
289
290 /* scan for file id digits */
291 for ( i = length - 1; i >= 0; --i) {
292 c = filename[i];
293
294 /* look for file ID marker */
295 if ( c == '#' ) {
296 if ( (length - i) < 3 )
297 break; /* too small to be a file ID */
298
299 *prefixLength = i;
300 return HexStringToInteger(length - i - 1, &filename[i+1]);
301 }
302
303 if ( !IsHexDigit(c) )
304 break; /* file ID string must have hex digits */
305 }
306
307 return 0;
308 }
309
310 /*
311 * Count filename extension characters (if any)
312 */
313 u_int32_t
314 CountFilenameExtensionChars( const unsigned char * filename, u_int32_t length )
315 {
316 UniChar c;
317 u_int16_t maxExtChars;
318
319 if ( length < 3 )
320 return 0; /* "x.y" is smallest possible extension */
321
322 if ( length < (kMaxFileExtensionChars + 2) )
323 maxExtChars = length - 2; /* save room for prefix + dot */
324 else
325 maxExtChars = kMaxFileExtensionChars;
326
327 u_int32_t extChars = 0; /* number of extension chars (excluding dot) - assume there's no extension */
328 u_int32_t i = length - 1; /* index to last ascii character */
329
330 while ( extChars <= maxExtChars ) {
331 c = filename[i--];
332
333 /* look for leading dot */
334 if ( c == (u_int8_t) '.' ) {
335 if ( extChars > 0 ) /* cannot end with a dot */
336 return (extChars);
337
338 break;
339 }
340
341 if ( EXTENSIONCHAR(c) )
342 ++extChars;
343 else
344 break;
345 }
346
347 return 0;
348 }
349
350 static u_int32_t
351 HexStringToInteger(u_int32_t length, const u_int8_t *hexStr)
352 {
353 u_int32_t value;
354 u_int32_t i;
355 u_int8_t c;
356 const u_int8_t *p;
357
358 value = 0;
359 p = hexStr;
360
361 for ( i = 0; i < length; ++i ) {
362 c = *p++;
363
364 if (c >= '0' && c <= '9') {
365 value = value << 4;
366 value += (u_int32_t) c - (u_int32_t) '0';
367 } else if (c >= 'A' && c <= 'F') {
368 value = value << 4;
369 value += 10 + ((unsigned int) c - (unsigned int) 'A');
370 } else {
371 return 0; /* bad character */
372 }
373 }
374
375 return value;
376 }
377
378 OSErr
379 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
380 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
381 {
382 ByteCount subMaxLen;
383 size_t utf8len;
384 char fileIDStr[15];
385 char extStr[15];
386
387 snprintf(fileIDStr, sizeof(fileIDStr), "#%X", cnid);
388 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
389
390 /* remove extension chars from source */
391 srcLen -= strlen(extStr) * sizeof(UniChar);
392 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
393
394 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', UTF_ADD_NULL_TERM);
395
396 strlcat((char *)dstStr, fileIDStr, maxDstLen);
397 strlcat((char *)dstStr, extStr, maxDstLen);
398 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
399
400 return noErr;
401 }