]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/uscript.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / common / uscript.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1997-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File USCRIPT.C
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 07/06/2001 Ram Creation.
15 ******************************************************************************
16 */
17
18 #include "unicode/uchar.h"
19 #include "unicode/uscript.h"
20 #include "unicode/uloc.h"
21 #include "bytesinkutil.h"
22 #include "charstr.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25 #include "ulocimp.h"
26
27 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
28 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
29 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
30
31 static int32_t
32 setCodes(const UScriptCode *src, int32_t length,
33 UScriptCode *dest, int32_t capacity, UErrorCode *err) {
34 int32_t i;
35 if(U_FAILURE(*err)) { return 0; }
36 if(length > capacity) {
37 *err = U_BUFFER_OVERFLOW_ERROR;
38 return length;
39 }
40 for(i = 0; i < length; ++i) {
41 dest[i] = src[i];
42 }
43 return length;
44 }
45
46 static int32_t
47 setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
48 if(U_FAILURE(*err)) { return 0; }
49 if(1 > capacity) {
50 *err = U_BUFFER_OVERFLOW_ERROR;
51 return 1;
52 }
53 scripts[0] = script;
54 return 1;
55 }
56
57 static int32_t
58 getCodesFromLocale(const char *locale,
59 UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
60 UErrorCode internalErrorCode = U_ZERO_ERROR;
61 char lang[8];
62 char script[8];
63 int32_t scriptLength;
64 if(U_FAILURE(*err)) { return 0; }
65 // Multi-script languages, equivalent to the LocaleScript data
66 // that we used to load from locale resource bundles.
67 /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
68 if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
69 return 0;
70 }
71 if(0 == uprv_strcmp(lang, "ja")) {
72 return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
73 }
74 if(0 == uprv_strcmp(lang, "ko")) {
75 return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
76 }
77 scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
78 if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
79 return 0;
80 }
81 if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
82 return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
83 }
84 // Explicit script code.
85 if(scriptLength != 0) {
86 if (0 == uprv_strcmp(script, "Aran")) { // Apple <rdar://problem/47494884>
87 uprv_strcpy(script, "Arab");
88 }
89 UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
90 if(scriptCode != USCRIPT_INVALID_CODE) {
91 if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
92 scriptCode = USCRIPT_HAN;
93 }
94 return setOneCode(scriptCode, scripts, capacity, err);
95 }
96 }
97 return 0;
98 }
99
100 /* TODO: this is a bad API and should be deprecated, ticket #11141 */
101 U_CAPI int32_t U_EXPORT2
102 uscript_getCode(const char* nameOrAbbrOrLocale,
103 UScriptCode* fillIn,
104 int32_t capacity,
105 UErrorCode* err){
106 UBool triedCode;
107 UErrorCode internalErrorCode;
108 int32_t length;
109
110 if(U_FAILURE(*err)) {
111 return 0;
112 }
113 if(nameOrAbbrOrLocale==NULL ||
114 (fillIn == NULL ? capacity != 0 : capacity < 0)) {
115 *err = U_ILLEGAL_ARGUMENT_ERROR;
116 return 0;
117 }
118 if (0 == uprv_strcmp(nameOrAbbrOrLocale, "Aran")) { // Apple <rdar://problem/47494884>
119 nameOrAbbrOrLocale = "Arab";
120 }
121
122 triedCode = FALSE;
123 if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){
124 /* try long and abbreviated script names first */
125 UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
126 if(code!=USCRIPT_INVALID_CODE) {
127 return setOneCode(code, fillIn, capacity, err);
128 }
129 triedCode = TRUE;
130 }
131 internalErrorCode = U_ZERO_ERROR;
132 length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
133 if(U_FAILURE(*err) || length != 0) {
134 return length;
135 }
136 icu::CharString likely;
137 {
138 icu::CharStringByteSink sink(&likely);
139 ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode);
140 }
141 if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
142 length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
143 if(U_FAILURE(*err) || length != 0) {
144 return length;
145 }
146 }
147 if(!triedCode) {
148 /* still not found .. try long and abbreviated script names again */
149 UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
150 if(code!=USCRIPT_INVALID_CODE) {
151 return setOneCode(code, fillIn, capacity, err);
152 }
153 }
154 return 0;
155 }