]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/hfs/hfs_encodinghint.c
xnu-792.25.20.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_encodinghint.c
index 776f2b361d9c52d0811697dfe0561a86b208461d..02a1fce32385ced5efd3c45963cd7f479dabc658 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2001-2003 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -22,6 +22,7 @@
 
 #include <sys/param.h>
 #include <hfs/hfs_macos_defs.h>
+#include <hfs/hfs.h>
 
 
 /* CJK Mac Encoding Bits */
 #define CJK_CHINESE_SIMP       0x8
 #define CJK_ALL                    0xF
 
-#define CJK_CHINESE        (CJK_CHINESE_TRAD | CJK_CHINESE_SIMP)
-#define CJK_KATAKANA   (CJK_JAPAN | CJK_CHINESE_SIMP | CJK_KOREAN)
+#define CJK_CHINESE    (CJK_CHINESE_TRAD | CJK_CHINESE_SIMP)
+#define CJK_KATAKANA   (CJK_JAPAN)
 
 
-/* Remeber the las unique CJK bit */
+/* Remember the last unique CJK bit */
 u_int8_t cjk_lastunique = 0;
 
+/* Encoding bias */
+u_int32_t hfs_encodingbias = 0;
+int hfs_islatinbias = 0;
+
+extern lck_mtx_t  encodinglst_mutex;
+
 
 /* Map CJK bits to Mac encoding */
 u_int8_t cjk_encoding[] = {
@@ -787,6 +794,14 @@ hfs_pickencoding(const u_int16_t *src, int len)
                                cjkstate = CJK_ALL;
                        continue;
                }
+               if (hfs_islatinbias && ch >= 0x0300 && ch <= 0x0329) {
+                       guess = hfs_encodingbias;
+                       continue;
+               }
+               if (ch <= 0x03CE && ch >= 0x0384) {
+                       guess = kTextEncodingMacGreek;
+                       continue;
+               }
                if (ch <= 0x0491 && ch >= 0x0401) {
                        guess = kTextEncodingMacCyrillic;
                        continue;
@@ -800,6 +815,35 @@ hfs_pickencoding(const u_int16_t *src, int len)
                if (ch >= 0x0E00 && ch <= 0x0E5B) {
                        return kTextEncodingMacThai;
                }
+               /* Catch a few Shift-JIS strays */
+               if (guess == 0 || guess == kTextEncodingMacUnicode) {
+                       if (ch == 0x2010 || ch == 0x2014 || ch == 0x2015 || ch == 0x2016) {
+                               guess = kTextEncodingMacJapanese;
+                               if ((cjkstate == 0) || (cjkstate & CJK_JAPAN))
+                                       cjkstate = CJK_JAPAN;
+                               else
+                                       cjkstate |= CJK_JAPAN;
+                               continue;
+                       }
+                       if ((hfs_encodingbias == kTextEncodingMacJapanese) &&
+                           (ch == 0x00A2 || ch == 0x00A3 || ch == 0x00AC)) {
+                               guess = kTextEncodingMacJapanese;
+                               continue;
+                       }
+                       /* TM char depends on the Mac encoding used. */
+                       if (ch == 0x2122) {
+                               switch(hfs_encodingbias) {
+                               case kTextEncodingMacJapanese:
+                               case kTextEncodingMacChineseTrad:
+                               case kTextEncodingMacKorean:
+                               case kTextEncodingMacGreek:
+                               case kTextEncodingMacThai:
+                               case kTextEncodingMacChineseSimp:
+                                       guess = hfs_encodingbias;
+                                       break;
+                               }
+                       }
+               }
                if (guess == 0 && ch > 0x2122) {
                        guess = kTextEncodingMacUnicode;
                }
@@ -808,7 +852,33 @@ hfs_pickencoding(const u_int16_t *src, int len)
        if (cjkstate) {
                if (powerof2(cjkstate)) {
                        cjk_lastunique = cjkstate;
-               } else if (cjk_lastunique) {
+                       return ((u_int32_t)cjk_encoding[cjkstate]);
+               } 
+               if (hfs_encodingbias != 0) {
+                       switch(hfs_encodingbias) {
+                       case kTextEncodingMacJapanese:
+                               if (cjkstate & CJK_JAPAN)
+                                       return (kTextEncodingMacJapanese);
+                               break;
+                       case kTextEncodingMacKorean:
+                               if (cjkstate & CJK_KOREAN)
+                                       return (kTextEncodingMacKorean);
+                               break;
+                       case kTextEncodingMacChineseTrad:
+                               if (cjkstate & CJK_CHINESE_TRAD)
+                                       return (kTextEncodingMacChineseTrad);
+                               if (cjkstate & CJK_CHINESE_SIMP)
+                                       return (kTextEncodingMacChineseSimp);
+                               break;
+                       case kTextEncodingMacChineseSimp:
+                               if (cjkstate & CJK_CHINESE_SIMP)
+                                       return (kTextEncodingMacChineseSimp);
+                               if (cjkstate & CJK_CHINESE_TRAD)
+                                       return (kTextEncodingMacChineseTrad);
+                               break;
+                       }
+               }
+               if (cjk_lastunique) {
                        if (cjkstate & cjk_lastunique)
                                cjkstate = cjk_lastunique;
                        else
@@ -821,3 +891,36 @@ hfs_pickencoding(const u_int16_t *src, int len)
 }
 
 
+__private_extern__
+u_int32_t
+hfs_getencodingbias(void)
+{
+       return (hfs_encodingbias);
+}
+
+
+__private_extern__
+void
+hfs_setencodingbias(u_int32_t bias)
+{
+       lck_mtx_lock(&encodinglst_mutex);
+
+       hfs_encodingbias = bias;
+
+       switch (bias) {
+       case kTextEncodingMacRoman:
+       case kTextEncodingMacCentralEurRoman:
+       case kTextEncodingMacTurkish:
+       case kTextEncodingMacCroatian:
+       case kTextEncodingMacIcelandic:
+       case kTextEncodingMacRomanian:
+               hfs_islatinbias = 1;
+               break;
+       default:
+               hfs_islatinbias = 0;
+               break;                                  
+       }
+
+       lck_mtx_unlock(&encodinglst_mutex);
+}
+