]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/vfs/vfs_utfconv.c
xnu-792.10.96.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_utfconv.c
index 01a49889e3b5aab73923cac07eee2c2c836daaac..97b08226f0212d7564446fd9610b7d3d7258c9aa 100644 (file)
@@ -27,7 +27,7 @@
 #include <sys/param.h>
 #include <sys/utfconv.h>
 #include <sys/errno.h>
-#include <architecture/byte_order.h>
+#include <libkern/OSByteOrder.h>
 
 /*
  * UTF-8 (Unicode Transformation Format)
@@ -117,10 +117,31 @@ unicode_decomposeable(u_int16_t character) {
        return (0);
 }
 
+
+/*
+ * Get the combing class.
+ *
+ * Similar to CFUniCharGetCombiningPropertyForCharacter.
+ */
+static inline u_int8_t
+get_combining_class(u_int16_t character) {
+       const u_int8_t *bitmap = __CFUniCharCombiningPropertyBitmap;
+
+       u_int8_t value = bitmap[(character >> 8)];
+
+       if (value) {
+               bitmap = bitmap + (value * 256);
+               return bitmap[character % 256];
+       }
+       return (0);
+}
+
+
 static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
 
 static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
 
+static void priortysort(u_int16_t* characters, int count);
 
 char utf_extrabytes[32] = {
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -154,7 +175,7 @@ utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
                ucs_ch = *ucsp++;
 
                if (swapbytes)
-                       ucs_ch = NXSwapShort(ucs_ch);
+                       ucs_ch = OSSwapInt16(ucs_ch);
                if (ucs_ch == '/')
                        ucs_ch = altslash ? altslash : '_';
                else if (ucs_ch == '\0')
@@ -211,7 +232,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
                        --extra;
                        ucs_ch = *chp++;
                } else {
-                       ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
+                       ucs_ch = swapbytes ? OSSwapInt16(*ucsp++) : *ucsp++;
 
                        if (decompose && unicode_decomposeable(ucs_ch)) {
                                extra = unicode_decompose(ucs_ch, sequence) - 1;
@@ -255,7 +276,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
                                u_int16_t ch2;
                                u_int32_t pair;
 
-                               ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp;
+                               ch2 = swapbytes ? OSSwapInt16(*ucsp) : *ucsp;
                                if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
                                        pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
                                                + (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
@@ -316,6 +337,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
        u_int16_t* bufend;
        unsigned int ucs_ch;
        unsigned int byte;
+       int combcharcnt = 0;
        int result = 0;
        int decompose, precompose, swapbytes;
 
@@ -392,13 +414,13 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
                                ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
                                if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
                                        goto invalid;
-                               *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+                               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
                                if (ucsp >= bufend)
                                        goto toolong;
                                ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
                                if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
                                        goto invalid;
-                               *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+                               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
                                continue;
                        default:
                                goto invalid;
@@ -412,17 +434,18 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
 
                                        for (i = 0; i < count; ++i) {
                                                ucs_ch = sequence[i];
-                                               *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+                                               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
                                                if (ucsp >= bufend)
                                                        goto toolong;
                                        }
+                                       combcharcnt += count - 1;
                                        continue;                       
                                }
                        } else if (precompose && (ucsp != bufstart)) {
                                u_int16_t composite, base;
 
                                if (unicode_combinable(ucs_ch)) {
-                                       base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
+                                       base = swapbytes ? OSSwapInt16(*(ucsp - 1)) : *(ucsp - 1);
                                        composite = unicode_combine(base, ucs_ch);
                                        if (composite) {
                                                --ucsp;
@@ -436,7 +459,24 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
                if (ucs_ch == altslash)
                        ucs_ch = '/';
 
-               *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+               /*
+                * Make multiple combining character sequences canonical
+                */
+               if (unicode_combinable(ucs_ch)) {
+                       ++combcharcnt;   /* start tracking a run */
+               } else if (combcharcnt) {
+                       if (combcharcnt > 1) {
+                               priortysort(ucsp - combcharcnt, combcharcnt);
+                       }
+                       combcharcnt = 0;  /* start over */
+               }
+               *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
+       }
+       /*
+        * Make a previous combining sequence canonical
+        */
+       if (combcharcnt > 1) {
+               priortysort(ucsp - combcharcnt, combcharcnt);
        }
 
 exit:
@@ -729,3 +769,39 @@ unicode_combine(u_int16_t base, u_int16_t combining)
        return (value);
 }
 
+
+/*
+ * priortysort - order combining chars into canonical order
+ *
+ * Similar to CFUniCharPrioritySort
+ */
+static void
+priortysort(u_int16_t* characters, int count)
+{
+       u_int32_t p1, p2;
+       u_int16_t *ch1, *ch2;
+       u_int16_t *end;
+       int changes = 1;
+
+       end = characters + count;
+       do {
+               changes = 0;
+               ch1 = characters;
+               ch2 = characters + 1;
+               p2 = get_combining_class(*ch1);
+               while (ch2 < end) {
+                       p1 = p2;
+                       p2 = get_combining_class(*ch2);
+                       if (p1 > p2) {
+                               u_int32_t tmp;
+
+                               tmp = *ch1;
+                               *ch1 = *ch2;
+                               *ch2 = tmp;
+                               changes = 1;
+                       }
+                       ++ch1;
+                       ++ch2;
+               }
+       } while (changes);
+}