]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/scriptset.cpp
ICU-511.27.tar.gz
[apple/icu.git] / icuSources / i18n / scriptset.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2013, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * scriptset.cpp
8 *
9 * created on: 2013 Jan 7
10 * created by: Andy Heninger
11 */
12
13 #include "unicode/utypes.h"
14
15 #include "unicode/uchar.h"
16 #include "unicode/unistr.h"
17
18 #include "scriptset.h"
19 #include "uassert.h"
20
21 U_NAMESPACE_BEGIN
22
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
24
25 //----------------------------------------------------------------------------
26 //
27 // ScriptSet implementation
28 //
29 //----------------------------------------------------------------------------
30 ScriptSet::ScriptSet() {
31 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
32 bits[i] = 0;
33 }
34 }
35
36 ScriptSet::~ScriptSet() {
37 }
38
39 ScriptSet::ScriptSet(const ScriptSet &other) {
40 *this = other;
41 }
42
43
44 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
45 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
46 bits[i] = other.bits[i];
47 }
48 return *this;
49 }
50
51
52 UBool ScriptSet::operator == (const ScriptSet &other) const {
53 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
54 if (bits[i] != other.bits[i]) {
55 return FALSE;
56 }
57 }
58 return TRUE;
59 }
60
61 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const {
62 if (U_FAILURE(status)) {
63 return FALSE;
64 }
65 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
66 status = U_ILLEGAL_ARGUMENT_ERROR;
67 return FALSE;
68 }
69 uint32_t index = script / 32;
70 uint32_t bit = 1 << (script & 31);
71 return ((bits[index] & bit) != 0);
72 }
73
74
75 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) {
76 if (U_FAILURE(status)) {
77 return *this;
78 }
79 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
80 status = U_ILLEGAL_ARGUMENT_ERROR;
81 return *this;
82 }
83 uint32_t index = script / 32;
84 uint32_t bit = 1 << (script & 31);
85 bits[index] |= bit;
86 return *this;
87 }
88
89 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) {
90 if (U_FAILURE(status)) {
91 return *this;
92 }
93 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) {
94 status = U_ILLEGAL_ARGUMENT_ERROR;
95 return *this;
96 }
97 uint32_t index = script / 32;
98 uint32_t bit = 1 << (script & 31);
99 bits[index] &= ~bit;
100 return *this;
101 }
102
103
104
105 ScriptSet &ScriptSet::Union(const ScriptSet &other) {
106 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
107 bits[i] |= other.bits[i];
108 }
109 return *this;
110 }
111
112 ScriptSet &ScriptSet::intersect(const ScriptSet &other) {
113 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
114 bits[i] &= other.bits[i];
115 }
116 return *this;
117 }
118
119 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) {
120 ScriptSet t;
121 t.set(script, status);
122 if (U_SUCCESS(status)) {
123 this->intersect(t);
124 }
125 return *this;
126 }
127
128 UBool ScriptSet::intersects(const ScriptSet &other) const {
129 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
130 if ((bits[i] & other.bits[i]) != 0) {
131 return true;
132 }
133 }
134 return false;
135 }
136
137 UBool ScriptSet::contains(const ScriptSet &other) const {
138 ScriptSet t(*this);
139 t.intersect(other);
140 return (t == other);
141 }
142
143
144 ScriptSet &ScriptSet::setAll() {
145 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
146 bits[i] = 0xffffffffu;
147 }
148 return *this;
149 }
150
151
152 ScriptSet &ScriptSet::resetAll() {
153 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
154 bits[i] = 0;
155 }
156 return *this;
157 }
158
159 int32_t ScriptSet::countMembers() const {
160 // This bit counter is good for sparse numbers of '1's, which is
161 // very much the case that we will usually have.
162 int32_t count = 0;
163 for (uint32_t i=0; i<LENGTHOF(bits); i++) {
164 uint32_t x = bits[i];
165 while (x > 0) {
166 count++;
167 x &= (x - 1); // and off the least significant one bit.
168 }
169 }
170 return count;
171 }
172
173 int32_t ScriptSet::hashCode() const {
174 int32_t hash = 0;
175 for (int32_t i=0; i<LENGTHOF(bits); i++) {
176 hash ^= bits[i];
177 }
178 return hash;
179 }
180
181 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const {
182 // TODO: Wants a better implementation.
183 if (fromIndex < 0) {
184 return -1;
185 }
186 UErrorCode status = U_ZERO_ERROR;
187 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) {
188 if (test((UScriptCode)scriptIndex, status)) {
189 return scriptIndex;
190 }
191 }
192 return -1;
193 }
194
195 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const {
196 UBool firstTime = TRUE;
197 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) {
198 if (!firstTime) {
199 dest.append(0x20);
200 }
201 firstTime = FALSE;
202 const char *scriptName = uscript_getShortName((UScriptCode(i)));
203 dest.append(UnicodeString(scriptName, -1, US_INV));
204 }
205 return dest;
206 }
207
208 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) {
209 resetAll();
210 if (U_FAILURE(status)) {
211 return *this;
212 }
213 UnicodeString oneScriptName;
214 for (int32_t i=0; i<scriptString.length();) {
215 UChar32 c = scriptString.char32At(i);
216 i = scriptString.moveIndex32(i, 1);
217 if (!u_isUWhiteSpace(c)) {
218 oneScriptName.append(c);
219 if (i < scriptString.length()) {
220 continue;
221 }
222 }
223 if (oneScriptName.length() > 0) {
224 char buf[40];
225 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV);
226 buf[sizeof(buf)-1] = 0;
227 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf);
228 if (sc == UCHAR_INVALID_CODE) {
229 status = U_ILLEGAL_ARGUMENT_ERROR;
230 } else {
231 this->set((UScriptCode)sc, status);
232 }
233 if (U_FAILURE(status)) {
234 return *this;
235 }
236 oneScriptName.remove();
237 }
238 }
239 return *this;
240 }
241
242 U_NAMESPACE_END
243
244 U_CAPI UBool U_EXPORT2
245 uhash_equalsScriptSet(const UElement key1, const UElement key2) {
246 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
247 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer);
248 return (*s1 == *s2);
249 }
250
251 U_CAPI int8_t U_EXPORT2
252 uhash_compareScriptSet(UElement key0, UElement key1) {
253 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer);
254 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer);
255 int32_t diff = s0->countMembers() - s1->countMembers();
256 if (diff != 0) return diff;
257 int32_t i0 = s0->nextSetBit(0);
258 int32_t i1 = s1->nextSetBit(0);
259 while ((diff = i0-i1) == 0 && i0 > 0) {
260 i0 = s0->nextSetBit(i0+1);
261 i1 = s1->nextSetBit(i1+1);
262 }
263 return (int8_t)diff;
264 }
265
266 U_CAPI int32_t U_EXPORT2
267 uhash_hashScriptSet(const UElement key) {
268 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer);
269 return s->hashCode();
270 }
271
272 U_CAPI void U_EXPORT2
273 uhash_deleteScriptSet(void *obj) {
274 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj);
275 delete s;
276 }