X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/378393581903b274cb7a4d18e0d978071a6b592d..13fec9890cf095cc781fdf7b8917cb03bf32dd4c:/bsd/vfs/vfs_utfconv.c diff --git a/bsd/vfs/vfs_utfconv.c b/bsd/vfs/vfs_utfconv.c index 01a49889e..11af9238a 100644 --- a/bsd/vfs/vfs_utfconv.c +++ b/bsd/vfs/vfs_utfconv.c @@ -117,10 +117,31 @@ unicode_decomposeable(u_int16_t character) { return (0); } + +/* + * Get the combing class. + * + * Similar to CFUniCharGetCombiningPropertyForCharacter. + */ +static inline u_int8_t +get_combining_class(u_int16_t character) { + const u_int8_t *bitmap = __CFUniCharCombiningPropertyBitmap; + + u_int8_t value = bitmap[(character >> 8)]; + + if (value) { + bitmap = bitmap + (value * 256); + return bitmap[character % 256]; + } + return (0); +} + + static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars); static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining); +static void priortysort(u_int16_t* characters, int count); char utf_extrabytes[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -316,6 +337,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, u_int16_t* bufend; unsigned int ucs_ch; unsigned int byte; + int combcharcnt = 0; int result = 0; int decompose, precompose, swapbytes; @@ -416,6 +438,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, if (ucsp >= bufend) goto toolong; } + combcharcnt += count - 1; continue; } } else if (precompose && (ucsp != bufstart)) { @@ -436,8 +459,25 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, if (ucs_ch == altslash) ucs_ch = '/'; + /* + * Make multiple combining character sequences canonical + */ + if (unicode_combinable(ucs_ch)) { + ++combcharcnt; /* start tracking a run */ + } else if (combcharcnt) { + if (combcharcnt > 1) { + priortysort(ucsp - combcharcnt, combcharcnt); + } + combcharcnt = 0; /* start over */ + } *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch; } + /* + * Make a previous combining sequence canonical + */ + if (combcharcnt > 1) { + priortysort(ucsp - combcharcnt, combcharcnt); + } exit: *ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart; @@ -729,3 +769,39 @@ unicode_combine(u_int16_t base, u_int16_t combining) return (value); } + +/* + * priortysort - order combining chars into canonical order + * + * Similar to CFUniCharPrioritySort + */ +static void +priortysort(u_int16_t* characters, int count) +{ + u_int32_t p1, p2; + u_int16_t *ch1, *ch2; + u_int16_t *end; + int changes = 1; + + end = characters + count; + do { + changes = 0; + ch1 = characters; + ch2 = characters + 1; + p2 = get_combining_class(*ch1); + while (ch2 < end) { + p1 = p2; + p2 = get_combining_class(*ch2); + if (p1 > p2) { + u_int32_t tmp; + + tmp = *ch1; + *ch1 = *ch2; + *ch2 = tmp; + changes = 1; + } + ++ch1; + ++ch2; + } + } while (changes); +}