2 **********************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * created on: 2013 Jan 7
10 * created by: Andy Heninger
13 #include "unicode/utypes.h"
15 #include "unicode/uchar.h"
16 #include "unicode/unistr.h"
18 #include "scriptset.h"
24 //----------------------------------------------------------------------------
26 // ScriptSet implementation
28 //----------------------------------------------------------------------------
29 ScriptSet::ScriptSet() {
30 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
35 ScriptSet::~ScriptSet() {
38 ScriptSet::ScriptSet(const ScriptSet
&other
) {
43 ScriptSet
& ScriptSet::operator =(const ScriptSet
&other
) {
44 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
45 bits
[i
] = other
.bits
[i
];
51 UBool
ScriptSet::operator == (const ScriptSet
&other
) const {
52 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
53 if (bits
[i
] != other
.bits
[i
]) {
60 UBool
ScriptSet::test(UScriptCode script
, UErrorCode
&status
) const {
61 if (U_FAILURE(status
)) {
64 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
65 status
= U_ILLEGAL_ARGUMENT_ERROR
;
68 uint32_t index
= script
/ 32;
69 uint32_t bit
= 1 << (script
& 31);
70 return ((bits
[index
] & bit
) != 0);
74 ScriptSet
&ScriptSet::set(UScriptCode script
, UErrorCode
&status
) {
75 if (U_FAILURE(status
)) {
78 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
79 status
= U_ILLEGAL_ARGUMENT_ERROR
;
82 uint32_t index
= script
/ 32;
83 uint32_t bit
= 1 << (script
& 31);
88 ScriptSet
&ScriptSet::reset(UScriptCode script
, UErrorCode
&status
) {
89 if (U_FAILURE(status
)) {
92 if (script
< 0 || script
>= (int32_t)sizeof(bits
) * 8) {
93 status
= U_ILLEGAL_ARGUMENT_ERROR
;
96 uint32_t index
= script
/ 32;
97 uint32_t bit
= 1 << (script
& 31);
104 ScriptSet
&ScriptSet::Union(const ScriptSet
&other
) {
105 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
106 bits
[i
] |= other
.bits
[i
];
111 ScriptSet
&ScriptSet::intersect(const ScriptSet
&other
) {
112 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
113 bits
[i
] &= other
.bits
[i
];
118 ScriptSet
&ScriptSet::intersect(UScriptCode script
, UErrorCode
&status
) {
120 t
.set(script
, status
);
121 if (U_SUCCESS(status
)) {
127 UBool
ScriptSet::intersects(const ScriptSet
&other
) const {
128 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
129 if ((bits
[i
] & other
.bits
[i
]) != 0) {
136 UBool
ScriptSet::contains(const ScriptSet
&other
) const {
143 ScriptSet
&ScriptSet::setAll() {
144 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
145 bits
[i
] = 0xffffffffu
;
151 ScriptSet
&ScriptSet::resetAll() {
152 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
158 int32_t ScriptSet::countMembers() const {
159 // This bit counter is good for sparse numbers of '1's, which is
160 // very much the case that we will usually have.
162 for (uint32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
163 uint32_t x
= bits
[i
];
166 x
&= (x
- 1); // and off the least significant one bit.
172 int32_t ScriptSet::hashCode() const {
174 for (int32_t i
=0; i
<UPRV_LENGTHOF(bits
); i
++) {
180 int32_t ScriptSet::nextSetBit(int32_t fromIndex
) const {
181 // TODO: Wants a better implementation.
185 UErrorCode status
= U_ZERO_ERROR
;
186 for (int32_t scriptIndex
= fromIndex
; scriptIndex
< (int32_t)sizeof(bits
)*8; scriptIndex
++) {
187 if (test((UScriptCode
)scriptIndex
, status
)) {
194 UnicodeString
&ScriptSet::displayScripts(UnicodeString
&dest
) const {
195 UBool firstTime
= TRUE
;
196 for (int32_t i
= nextSetBit(0); i
>= 0; i
= nextSetBit(i
+ 1)) {
198 dest
.append((UChar
)0x20);
201 const char *scriptName
= uscript_getShortName((UScriptCode(i
)));
202 dest
.append(UnicodeString(scriptName
, -1, US_INV
));
207 ScriptSet
&ScriptSet::parseScripts(const UnicodeString
&scriptString
, UErrorCode
&status
) {
209 if (U_FAILURE(status
)) {
212 UnicodeString oneScriptName
;
213 for (int32_t i
=0; i
<scriptString
.length();) {
214 UChar32 c
= scriptString
.char32At(i
);
215 i
= scriptString
.moveIndex32(i
, 1);
216 if (!u_isUWhiteSpace(c
)) {
217 oneScriptName
.append(c
);
218 if (i
< scriptString
.length()) {
222 if (oneScriptName
.length() > 0) {
224 oneScriptName
.extract(0, oneScriptName
.length(), buf
, sizeof(buf
)-1, US_INV
);
225 buf
[sizeof(buf
)-1] = 0;
226 int32_t sc
= u_getPropertyValueEnum(UCHAR_SCRIPT
, buf
);
227 if (sc
== UCHAR_INVALID_CODE
) {
228 status
= U_ILLEGAL_ARGUMENT_ERROR
;
230 this->set((UScriptCode
)sc
, status
);
232 if (U_FAILURE(status
)) {
235 oneScriptName
.remove();
243 U_CAPI UBool U_EXPORT2
244 uhash_equalsScriptSet(const UElement key1
, const UElement key2
) {
245 icu::ScriptSet
*s1
= static_cast<icu::ScriptSet
*>(key1
.pointer
);
246 icu::ScriptSet
*s2
= static_cast<icu::ScriptSet
*>(key2
.pointer
);
250 U_CAPI
int8_t U_EXPORT2
251 uhash_compareScriptSet(UElement key0
, UElement key1
) {
252 icu::ScriptSet
*s0
= static_cast<icu::ScriptSet
*>(key0
.pointer
);
253 icu::ScriptSet
*s1
= static_cast<icu::ScriptSet
*>(key1
.pointer
);
254 int32_t diff
= s0
->countMembers() - s1
->countMembers();
255 if (diff
!= 0) return diff
;
256 int32_t i0
= s0
->nextSetBit(0);
257 int32_t i1
= s1
->nextSetBit(0);
258 while ((diff
= i0
-i1
) == 0 && i0
> 0) {
259 i0
= s0
->nextSetBit(i0
+1);
260 i1
= s1
->nextSetBit(i1
+1);
265 U_CAPI
int32_t U_EXPORT2
266 uhash_hashScriptSet(const UElement key
) {
267 icu::ScriptSet
*s
= static_cast<icu::ScriptSet
*>(key
.pointer
);
268 return s
->hashCode();
271 U_CAPI
void U_EXPORT2
272 uhash_deleteScriptSet(void *obj
) {
273 icu::ScriptSet
*s
= static_cast<icu::ScriptSet
*>(obj
);