]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/uscript_props.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / common / uscript_props.cpp
diff --git a/icuSources/common/uscript_props.cpp b/icuSources/common/uscript_props.cpp
new file mode 100644 (file)
index 0000000..e0d1ecc
--- /dev/null
@@ -0,0 +1,267 @@
+/*
+*******************************************************************************
+*   Copyright (C) 2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  uscript_props.cpp
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2013feb16
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/uscript.h"
+#include "unicode/utf16.h"
+#include "ustr_imp.h"
+
+#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
+
+namespace {
+
+// Script metadata (script properties).
+// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
+
+// 0 = NOT_ENCODED, no sample character, default false script properties.
+// Bits 20.. 0: sample character
+
+// Bits 23..21: usage
+const int32_t UNKNOWN = 1 << 21;
+const int32_t EXCLUSION = 2 << 21;
+const int32_t LIMITED_USE = 3 << 21;
+const int32_t ASPIRATIONAL = 4 << 21;
+const int32_t RECOMMENDED = 5 << 21;
+
+// Bits 31..24: Single-bit flags
+const int32_t RTL = 1 << 24;
+const int32_t LB_LETTERS = 1 << 25;
+const int32_t CASED = 1 << 26;
+
+const int32_t SCRIPT_PROPS[] = {
+    // Begin copy-paste output from
+    // tools/trunk/unicode/py/parsescriptmetadata.py
+    0x0040 | UNKNOWN,  // Zyyy
+    0x0308 | UNKNOWN,  // Zinh
+    0x0628 | RECOMMENDED | RTL,  // Arab
+    0x0531 | RECOMMENDED | CASED,  // Armn
+    0x0995 | RECOMMENDED,  // Beng
+    0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
+    0x13C4 | LIMITED_USE,  // Cher
+    0x03E2 | EXCLUSION | CASED,  // Copt
+    0x042F | RECOMMENDED | CASED,  // Cyrl
+    0x10414 | EXCLUSION | CASED,  // Dsrt
+    0x0905 | RECOMMENDED,  // Deva
+    0x12A0 | RECOMMENDED,  // Ethi
+    0x10D3 | RECOMMENDED,  // Geor
+    0x10330 | EXCLUSION,  // Goth
+    0x03A9 | RECOMMENDED | CASED,  // Grek
+    0x0A95 | RECOMMENDED,  // Gujr
+    0x0A15 | RECOMMENDED,  // Guru
+    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
+    0xAC00 | RECOMMENDED,  // Hang
+    0x05D0 | RECOMMENDED | RTL,  // Hebr
+    0x304B | RECOMMENDED | LB_LETTERS,  // Hira
+    0x0C95 | RECOMMENDED,  // Knda
+    0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
+    0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
+    0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
+    0x004C | RECOMMENDED | CASED,  // Latn
+    0x0D15 | RECOMMENDED,  // Mlym
+    0x1826 | ASPIRATIONAL,  // Mong
+    0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
+    0x168F | EXCLUSION,  // Ogam
+    0x10300 | EXCLUSION,  // Ital
+    0x0B15 | RECOMMENDED,  // Orya
+    0x16A0 | EXCLUSION,  // Runr
+    0x0D85 | RECOMMENDED,  // Sinh
+    0x0710 | LIMITED_USE | RTL,  // Syrc
+    0x0B95 | RECOMMENDED,  // Taml
+    0x0C15 | RECOMMENDED,  // Telu
+    0x078C | RECOMMENDED | RTL,  // Thaa
+    0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
+    0x0F40 | RECOMMENDED,  // Tibt
+    0x14C0 | ASPIRATIONAL,  // Cans
+    0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii
+    0x1703 | EXCLUSION,  // Tglg
+    0x1723 | EXCLUSION,  // Hano
+    0x1743 | EXCLUSION,  // Buhd
+    0x1763 | EXCLUSION,  // Tagb
+    0x2800 | UNKNOWN,  // Brai
+    0x10800 | EXCLUSION | RTL,  // Cprt
+    0x1900 | LIMITED_USE,  // Limb
+    0x10000 | EXCLUSION,  // Linb
+    0x10480 | EXCLUSION,  // Osma
+    0x10450 | EXCLUSION,  // Shaw
+    0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
+    0x10380 | EXCLUSION,  // Ugar
+    0,
+    0x1A00 | EXCLUSION,  // Bugi
+    0x2C00 | EXCLUSION | CASED,  // Glag
+    0x10A00 | EXCLUSION | RTL,  // Khar
+    0xA800 | LIMITED_USE,  // Sylo
+    0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
+    0x2D30 | ASPIRATIONAL,  // Tfng
+    0x103A0 | EXCLUSION,  // Xpeo
+    0x1B05 | LIMITED_USE | LB_LETTERS,  // Bali
+    0x1BC0 | LIMITED_USE,  // Batk
+    0,
+    0x11005 | EXCLUSION,  // Brah
+    0xAA00 | LIMITED_USE,  // Cham
+    0,
+    0,
+    0,
+    0,
+    0x13153 | EXCLUSION,  // Egyp
+    0,
+    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
+    0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
+    0,
+    0,
+    0,
+    0xA984 | LIMITED_USE | LB_LETTERS,  // Java
+    0xA90A | LIMITED_USE,  // Kali
+    0,
+    0,
+    0x1C00 | LIMITED_USE,  // Lepc
+    0,
+    0x0840 | LIMITED_USE | RTL,  // Mand
+    0,
+    0x10980 | EXCLUSION | RTL,  // Mero
+    0x07CA | LIMITED_USE | RTL,  // Nkoo
+    0x10C00 | EXCLUSION | RTL,  // Orkh
+    0,
+    0xA840 | EXCLUSION,  // Phag
+    0x10900 | EXCLUSION | RTL,  // Phnx
+    0x16F00 | ASPIRATIONAL,  // Plrd
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0xA549 | LIMITED_USE,  // Vaii
+    0,
+    0x12000 | EXCLUSION,  // Xsux
+    0,
+    0xFDD0 | UNKNOWN,  // Zzzz
+    0x102A0 | EXCLUSION,  // Cari
+    0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
+    0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
+    0x10280 | EXCLUSION,  // Lyci
+    0x10920 | EXCLUSION | RTL,  // Lydi
+    0x1C5A | LIMITED_USE,  // Olck
+    0xA930 | EXCLUSION,  // Rjng
+    0xA882 | LIMITED_USE,  // Saur
+    0,
+    0x1B83 | LIMITED_USE,  // Sund
+    0,
+    0xABC0 | LIMITED_USE,  // Mtei
+    0x10840 | EXCLUSION | RTL,  // Armi
+    0x10B00 | EXCLUSION | RTL,  // Avst
+    0x11103 | LIMITED_USE,  // Cakm
+    0xAC00 | RECOMMENDED,  // Kore
+    0x11083 | EXCLUSION,  // Kthi
+    0,
+    0x10B60 | EXCLUSION | RTL,  // Phli
+    0,
+    0,
+    0x10B40 | EXCLUSION | RTL,  // Prti
+    0x0800 | EXCLUSION | RTL,  // Samr
+    0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
+    0,
+    0,
+    0xA6A0 | LIMITED_USE,  // Bamu
+    0xA4D0 | LIMITED_USE,  // Lisu
+    0,
+    0x10A60 | EXCLUSION | RTL,  // Sarb
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0x109A0 | EXCLUSION | RTL,  // Merc
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0x11183 | EXCLUSION,  // Shrd
+    0x110D0 | EXCLUSION,  // Sora
+    0x11680 | EXCLUSION,  // Takr
+    0,
+    0,
+    0,
+    0,
+    0,
+    // End copy-paste from parsescriptmetadata.py
+};
+
+int32_t getScriptProps(UScriptCode script) {
+    if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
+        return SCRIPT_PROPS[script];
+    } else {
+        return 0;
+    }
+}
+
+}  // namespace
+
+U_CAPI int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) { return 0; }
+    if(capacity < 0 || (capacity > 0 && dest == NULL)) {
+        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
+    int32_t length;
+    if(sampleChar == 0) {
+        length = 0;
+    } else {
+        length = U16_LENGTH(sampleChar);
+        if(length <= capacity) {
+            int32_t i = 0;
+            U16_APPEND_UNSAFE(dest, i, sampleChar);
+        }
+    }
+    return u_terminateUChars(dest, capacity, length, pErrorCode);
+}
+
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script) {
+    icu::UnicodeString sample;
+    int32_t sampleChar = getScriptProps(script) & 0x1fffff;
+    if(sampleChar != 0) {
+        sample.append(sampleChar);
+    }
+    return sample;
+}
+
+U_CAPI UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script) {
+    return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script) {
+    return (getScriptProps(script) & RTL) != 0;
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script) {
+    return (getScriptProps(script) & LB_LETTERS) != 0;
+}
+
+U_CAPI UBool U_EXPORT2
+uscript_isCased(UScriptCode script) {
+    return (getScriptProps(script) & CASED) != 0;
+}