]> git.saurik.com Git - apple/hfs.git/blame - livefiles_hfs_plugin/lf_hfs_unicode_wrappers.c
hfs-522.100.5.tar.gz
[apple/hfs.git] / livefiles_hfs_plugin / lf_hfs_unicode_wrappers.c
CommitLineData
de8ee011
A
1//
2// lf_hfs_unicode_wrappers.c
3// livefiles_hfs
4//
5// Created by Yakov Ben Zaken on 22/03/2018.
6//
7
8#include "lf_hfs_unicode_wrappers.h"
9#include "lf_hfs_ucs_string_cmp_data.h"
10#include "lf_hfs_sbunicode.h"
11
12
13
14enum {
15 kMinFileExtensionChars = 1, /* does not include dot */
16 kMaxFileExtensionChars = 5 /* does not include dot */
17};
18
19
20#define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
21 ((c) >= 0x41 && (c) <= 0x5A) || \
22 ((c) >= 0x30 && (c) <= 0x39))
23
24
25#define IsHexDigit(c) (((c) >= (u_int8_t) '0' && (c) <= (u_int8_t) '9') || \
26 ((c) >= (u_int8_t) 'A' && (c) <= (u_int8_t) 'F'))
27
28
29static void
30GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
31
32
33static u_int32_t
34HexStringToInteger( u_int32_t length, const u_int8_t *hexStr );
35
36
37/*
38 * Get filename extension (if any) as a C string
39 */
40static void
41GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
42{
43 u_int64_t i;
44 UniChar c;
45 u_int16_t extChars; /* number of extension chars (excluding dot) */
46 u_int16_t maxExtChars;
47 Boolean foundExtension;
48
49 extStr[0] = '\0'; /* assume there's no extension */
50
51 if ( length < 3 )
52 return; /* "x.y" is smallest possible extension */
53
54 if ( length < (kMaxFileExtensionChars + 2) )
55 maxExtChars = length - 2; /* save room for prefix + dot */
56 else
57 maxExtChars = kMaxFileExtensionChars;
58
59 i = length;
60 extChars = 0;
61 foundExtension = false;
62
63 while ( extChars <= maxExtChars ) {
64 c = unicodeStr[--i];
65
66 /* look for leading dot */
67 if ( c == (UniChar) '.' ) {
68 if ( extChars > 0 ) /* cannot end with a dot */
69 foundExtension = true;
70 break;
71 }
72
73 if ( EXTENSIONCHAR(c) )
74 ++extChars;
75 else
76 break;
77 }
78
79 /* if we found one then copy it */
80 if ( foundExtension ) {
81 u_int8_t *extStrPtr = (u_int8_t *)extStr;
82 const UniChar *unicodeStrPtr = &unicodeStr[i];
83
84 for ( i = 0; i <= extChars; ++i )
85 *(extStrPtr++) = (u_int8_t) *(unicodeStrPtr++);
86 extStr[extChars + 1] = '\0'; /* terminate extension + dot */
87 }
88}
89
90//
91// FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
92//
93// IF RESULT
94// --------------------------
95// str1 < str2 => -1
96// str1 = str2 => 0
97// str1 > str2 => +1
98//
99// The lower case table starts with 256 entries (one for each of the upper bytes
100// of the original Unicode char). If that entry is zero, then all characters with
101// that upper byte are already case folded. If the entry is non-zero, then it is
102// the _index_ (not byte offset) of the start of the sub-table for the characters
103// with that upper byte. All ignorable characters are folded to the value zero.
104//
105// In pseudocode:
106//
107// Let c = source Unicode character
108// Let table[] = lower case table
109//
110// lower = table[highbyte(c)]
111// if (lower == 0)
112// lower = c
113// else
114// lower = table[lower+lowbyte(c)]
115//
116// if (lower == 0)
117// ignore this character
118//
119// To handle ignorable characters, we now need a loop to find the next valid character.
120// Also, we can't pre-compute the number of characters to compare; the string length might
121// be larger than the number of non-ignorable characters. Further, we must be able to handle
122// ignorable characters at any point in the string, including as the first or last characters.
123// We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
124// Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
125// the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
126// an invalid Unicode character).
127//
128// Pseudocode:
129//
130// while (1) {
131// c1 = GetNextValidChar(str1) // returns zero if at end of string
132// c2 = GetNextValidChar(str2)
133//
134// if (c1 != c2) break // found a difference
135//
136// if (c1 == 0) // reached end of string on both strings at once?
137// return 0; // yes, so strings are equal
138// }
139//
140// // When we get here, c1 != c2. So, we just need to determine which one is less.
141// if (c1 < c2)
142// return -1;
143// else
144// return 1;
145//
146
147int32_t FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
148 register ConstUniCharArrayPtr str2, register ItemCount length2)
149{
150 register u_int16_t c1,c2;
151 register u_int16_t temp;
152 register u_int16_t* lowerCaseTable;
153
154 lowerCaseTable = (u_int16_t*) gLowerCaseTable;
155
156 while (1) {
157 /* Set default values for c1, c2 in case there are no more valid chars */
158 c1 = 0;
159 c2 = 0;
160
161 /* Find next non-ignorable char from str1, or zero if no more */
162 while (length1 && c1 == 0) {
163 c1 = *(str1++);
164 --length1;
165 /* check for basic latin first */
166 if (c1 < 0x0100) {
167 c1 = gLatinCaseFold[c1];
168 break;
169 }
170 /* case fold if neccessary */
171 if ((temp = lowerCaseTable[c1>>8]) != 0)
172 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
173 }
174
175
176 /* Find next non-ignorable char from str2, or zero if no more */
177 while (length2 && c2 == 0) {
178 c2 = *(str2++);
179 --length2;
180 /* check for basic latin first */
181 if (c2 < 0x0100) {
182 c2 = gLatinCaseFold[c2];
183 break;
184 }
185 /* case fold if neccessary */
186 if ((temp = lowerCaseTable[c2>>8]) != 0)
187 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
188 }
189
190 if (c1 != c2) // found a difference, so stop looping
191 break;
192
193 if (c1 == 0) // did we reach the end of both strings at the same time?
194 return 0; // yes, so strings are equal
195 }
196
197 if (c1 < c2)
198 return -1;
199 else
200 return 1;
201}
202
203
204/*
205 * UnicodeBinaryCompare
206 * Compare two UTF-16 strings and perform case-sensitive (binary) matching against them.
207 *
208 * Results are emitted like FastUnicodeCompare:
209 *
210 *
211 * IF RESULT
212 * --------------------------
213 * str1 < str2 => -1
214 * str1 = str2 => 0
215 * str1 > str2 => +1
216 *
217 * The case matching source code is greatly simplified due to the lack of case-folding
218 * in this comparison routine. We compare, in order: the lengths, then do character-by-
219 * character comparisons.
220 *
221 */
222int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1, register ItemCount len1,
223 register ConstUniCharArrayPtr str2, register ItemCount len2) {
224 uint16_t c1 =0;
225 uint16_t c2 =0;
226 ItemCount string_length;
227 int32_t result = 0;
228
229 /* First generate the string length (for comparison purposes) */
230 if (len1 < len2) {
231 string_length = len1;
232 --result;
233 }
234 else if (len1 > len2) {
235 string_length = len2;
236 ++result;
237 }
238 else {
239 string_length = len1;
240 }
241
242 /* now compare the two string pointers */
243 while (string_length--) {
244 c1 = *(str1++);
245 c2 = *(str2++);
246
247 if (c1 > c2) {
248 result = 1;
249 break;
250 }
251
252 if (c1 < c2) {
253 result = -1;
254 break;
255 }
256 /* If equal, iterate to the next two respective chars */
257 }
258
259 return result;
260}
261
262/*
263 * extract the file id from a mangled name
264 */
265HFSCatalogNodeID
266GetEmbeddedFileID(const unsigned char * filename, u_int32_t length, u_int32_t *prefixLength)
267{
268 short extChars;
269 short i;
270 u_int8_t c;
271
272 *prefixLength = 0;
273
274 if ( filename == NULL )
275 return 0;
276
277 if ( length < 28 )
278 return 0; /* too small to have been mangled */
279
280 /* big enough for a file ID (#10) and an extension (.x) ? */
281 if ( length > 5 )
282 extChars = CountFilenameExtensionChars(filename, length);
283 else
284 extChars = 0;
285
286 /* skip over dot plus extension characters */
287 if ( extChars > 0 )
288 length -= (extChars + 1);
289
290 /* scan for file id digits */
291 for ( i = length - 1; i >= 0; --i) {
292 c = filename[i];
293
294 /* look for file ID marker */
295 if ( c == '#' ) {
296 if ( (length - i) < 3 )
297 break; /* too small to be a file ID */
298
299 *prefixLength = i;
300 return HexStringToInteger(length - i - 1, &filename[i+1]);
301 }
302
303 if ( !IsHexDigit(c) )
304 break; /* file ID string must have hex digits */
305 }
306
307 return 0;
308}
309
310/*
311 * Count filename extension characters (if any)
312 */
313u_int32_t
314CountFilenameExtensionChars( const unsigned char * filename, u_int32_t length )
315{
316 UniChar c;
317 u_int16_t maxExtChars;
318
319 if ( length < 3 )
320 return 0; /* "x.y" is smallest possible extension */
321
322 if ( length < (kMaxFileExtensionChars + 2) )
323 maxExtChars = length - 2; /* save room for prefix + dot */
324 else
325 maxExtChars = kMaxFileExtensionChars;
326
327 u_int32_t extChars = 0; /* number of extension chars (excluding dot) - assume there's no extension */
328 u_int32_t i = length - 1; /* index to last ascii character */
329
330 while ( extChars <= maxExtChars ) {
331 c = filename[i--];
332
333 /* look for leading dot */
334 if ( c == (u_int8_t) '.' ) {
335 if ( extChars > 0 ) /* cannot end with a dot */
336 return (extChars);
337
338 break;
339 }
340
341 if ( EXTENSIONCHAR(c) )
342 ++extChars;
343 else
344 break;
345 }
346
347 return 0;
348}
349
350static u_int32_t
351HexStringToInteger(u_int32_t length, const u_int8_t *hexStr)
352{
353 u_int32_t value;
354 u_int32_t i;
355 u_int8_t c;
356 const u_int8_t *p;
357
358 value = 0;
359 p = hexStr;
360
361 for ( i = 0; i < length; ++i ) {
362 c = *p++;
363
364 if (c >= '0' && c <= '9') {
365 value = value << 4;
366 value += (u_int32_t) c - (u_int32_t) '0';
367 } else if (c >= 'A' && c <= 'F') {
368 value = value << 4;
369 value += 10 + ((unsigned int) c - (unsigned int) 'A');
370 } else {
371 return 0; /* bad character */
372 }
373 }
374
375 return value;
376}
377
378OSErr
379ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
380 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
381{
382 ByteCount subMaxLen;
383 size_t utf8len;
384 char fileIDStr[15];
385 char extStr[15];
386
387 snprintf(fileIDStr, sizeof(fileIDStr), "#%X", cnid);
388 GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
389
390 /* remove extension chars from source */
391 srcLen -= strlen(extStr) * sizeof(UniChar);
392 subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
393
394 (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', UTF_ADD_NULL_TERM);
395
396 strlcat((char *)dstStr, fileIDStr, maxDstLen);
397 strlcat((char *)dstStr, extStr, maxDstLen);
398 *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
399
400 return noErr;
401}