2 **********************************************************************
3 * Copyright (C) 1999-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/20/99 alan Creation.
8 **********************************************************************
11 #include "unicode/utypes.h"
12 #include "unicode/uniset.h"
13 #include "unicode/parsepos.h"
14 #include "unicode/symtable.h"
26 // Define UChar constants using hex for EBCDIC compatibility
27 // Used #define to reduce private static exports and memory access time.
28 #define SET_OPEN ((UChar)0x005B) /*[*/
29 #define SET_CLOSE ((UChar)0x005D) /*]*/
30 #define HYPHEN ((UChar)0x002D) /*-*/
31 #define COMPLEMENT ((UChar)0x005E) /*^*/
32 #define COLON ((UChar)0x003A) /*:*/
33 #define BACKSLASH ((UChar)0x005C) /*\*/
34 #define INTERSECTION ((UChar)0x0026) /*&*/
35 #define UPPER_U ((UChar)0x0055) /*U*/
36 #define LOWER_U ((UChar)0x0075) /*u*/
37 #define OPEN_BRACE ((UChar)123) /*{*/
38 #define CLOSE_BRACE ((UChar)125) /*}*/
39 #define UPPER_P ((UChar)0x0050) /*P*/
40 #define LOWER_P ((UChar)0x0070) /*p*/
41 #define UPPER_N ((UChar)78) /*N*/
42 #define EQUALS ((UChar)0x003D) /*=*/
44 // HIGH_VALUE > all valid values. 110000 for codepoints
45 #define UNICODESET_HIGH 0x0110000
47 // LOW <= all valid values. ZERO for codepoints
48 #define UNICODESET_LOW 0x000000
50 // initial storage. Must be >= 0
51 #define START_EXTRA 16
53 // extra amount for growth. Must be >= 0
54 #define GROW_EXTRA START_EXTRA
58 SymbolTable::~SymbolTable() {}
61 * Minimum value that can be stored in a UnicodeSet.
63 const UChar32
UnicodeSet::MIN_VALUE
= UNICODESET_LOW
;
66 * Maximum value that can be stored in a UnicodeSet.
68 const UChar32
UnicodeSet::MAX_VALUE
= UNICODESET_HIGH
- 1;
70 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet
)
73 * Modify the given UChar32 variable so that it is in range, by
74 * pinning values < UNICODESET_LOW to UNICODESET_LOW, and
75 * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1.
76 * It modifies its argument in-place and also returns it.
78 static inline UChar32
pinCodePoint(UChar32
& c
) {
79 if (c
< UNICODESET_LOW
) {
81 } else if (c
> (UNICODESET_HIGH
-1)) {
82 c
= (UNICODESET_HIGH
-1);
87 //----------------------------------------------------------------
89 //----------------------------------------------------------------
91 // DO NOT DELETE THIS CODE. This code is used to debug memory leaks.
92 // To enable the debugging, define the symbol DEBUG_MEM in the line
93 // below. This will result in text being sent to stdout that looks
95 // DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85-
96 // DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85-
97 // Each line lists a construction (ct) or destruction (dt) event, the
98 // object address, the number of outstanding objects after the event,
99 // and the pattern of the object in question.
105 static int32_t _dbgCount
= 0;
107 static inline void _dbgct(UnicodeSet
* set
) {
109 set
->toPattern(str
, TRUE
);
111 str
.extract(0, 39, buf
, "");
112 printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set
, ++_dbgCount
, buf
);
115 static inline void _dbgdt(UnicodeSet
* set
) {
117 set
->toPattern(str
, TRUE
);
119 str
.extract(0, 39, buf
, "");
120 printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set
, --_dbgCount
, buf
);
130 //----------------------------------------------------------------
131 // UnicodeString in UVector support
132 //----------------------------------------------------------------
134 static void U_CALLCONV
cloneUnicodeString(UHashTok
*dst
, UHashTok
*src
) {
135 dst
->pointer
= new UnicodeString(*(UnicodeString
*)src
->pointer
);
138 static int8_t U_CALLCONV
compareUnicodeString(UHashTok t1
, UHashTok t2
) {
139 const UnicodeString
&a
= *(const UnicodeString
*)t1
.pointer
;
140 const UnicodeString
&b
= *(const UnicodeString
*)t2
.pointer
;
144 //----------------------------------------------------------------
146 //----------------------------------------------------------------
149 * Constructs an empty set.
151 UnicodeSet::UnicodeSet() :
152 len(1), capacity(1 + START_EXTRA
), bufferCapacity(0),
153 list(0), buffer(0), strings(0)
155 list
= (UChar32
*) uprv_malloc(sizeof(UChar32
) * capacity
);
157 list
[0] = UNICODESET_HIGH
;
164 * Constructs a set containing the given range. If <code>end >
165 * start</code> then an empty set is created.
167 * @param start first character, inclusive, of range
168 * @param end last character, inclusive, of range
170 UnicodeSet::UnicodeSet(UChar32 start
, UChar32 end
) :
171 len(1), capacity(1 + START_EXTRA
), bufferCapacity(0),
172 list(0), buffer(0), strings(0)
174 list
= (UChar32
*) uprv_malloc(sizeof(UChar32
) * capacity
);
176 list
[0] = UNICODESET_HIGH
;
179 complement(start
, end
);
184 * Constructs a set that is identical to the given UnicodeSet.
186 UnicodeSet::UnicodeSet(const UnicodeSet
& o
) :
188 len(0), capacity(o
.len
+ GROW_EXTRA
), bufferCapacity(0),
189 list(0), buffer(0), strings(0)
191 list
= (UChar32
*) uprv_malloc(sizeof(UChar32
) * capacity
);
202 UnicodeSet::~UnicodeSet() {
203 _dbgdt(this); // first!
212 * Assigns this object to be a copy of another.
214 UnicodeSet
& UnicodeSet::operator=(const UnicodeSet
& o
) {
215 ensureCapacity(o
.len
);
217 uprv_memcpy(list
, o
.list
, len
*sizeof(UChar32
));
218 UErrorCode ec
= U_ZERO_ERROR
;
219 strings
->assign(*o
.strings
, cloneUnicodeString
, ec
);
225 * Compares the specified object with this set for equality. Returns
226 * <tt>true</tt> if the two sets
227 * have the same size, and every member of the specified set is
228 * contained in this set (or equivalently, every member of this set is
229 * contained in the specified set).
231 * @param o set to be compared for equality with this set.
232 * @return <tt>true</tt> if the specified set is equal to this set.
234 UBool
UnicodeSet::operator==(const UnicodeSet
& o
) const {
235 if (len
!= o
.len
) return FALSE
;
236 for (int32_t i
= 0; i
< len
; ++i
) {
237 if (list
[i
] != o
.list
[i
]) return FALSE
;
239 if (*strings
!= *o
.strings
) return FALSE
;
244 * Returns a copy of this object. All UnicodeMatcher objects have
245 * to support cloning in order to allow classes using
246 * UnicodeMatchers, such as Transliterator, to implement cloning.
248 UnicodeFunctor
* UnicodeSet::clone() const {
249 return new UnicodeSet(*this);
253 * Returns the hash code value for this set.
255 * @return the hash code value for this set.
256 * @see Object#hashCode()
258 int32_t UnicodeSet::hashCode(void) const {
259 int32_t result
= len
;
260 for (int32_t i
= 0; i
< len
; ++i
) {
267 //----------------------------------------------------------------
269 //----------------------------------------------------------------
272 * Make this object represent the range <code>start - end</code>.
273 * If <code>end > start</code> then this object is set to an
276 * @param start first character in the set, inclusive
277 * @rparam end last character in the set, inclusive
279 UnicodeSet
& UnicodeSet::set(UChar32 start
, UChar32 end
) {
281 complement(start
, end
);
286 * Returns the number of elements in this set (its cardinality),
287 * Note than the elements of a set may include both individual
288 * codepoints and strings.
290 * @return the number of elements in this set (its cardinality).
292 int32_t UnicodeSet::size(void) const {
294 int32_t count
= getRangeCount();
295 for (int32_t i
= 0; i
< count
; ++i
) {
296 n
+= getRangeEnd(i
) - getRangeStart(i
) + 1;
298 return n
+ strings
->size();
302 * Returns <tt>true</tt> if this set contains no elements.
304 * @return <tt>true</tt> if this set contains no elements.
306 UBool
UnicodeSet::isEmpty(void) const {
307 return len
== 1 && strings
->size() == 0;
311 * Returns true if this set contains the given character.
312 * @param c character to be checked for containment
313 * @return true if the test condition is met
315 UBool
UnicodeSet::contains(UChar32 c
) const {
316 // Set i to the index of the start item greater than ch
317 // We know we will terminate without length test!
318 // LATER: for large sets, add binary search
321 // if (c < list[++i]) break;
323 if (c
>= UNICODESET_HIGH
) { // Don't need to check LOW bound
326 int32_t i
= findCodePoint(c
);
327 return ((i
& 1) != 0); // return true if odd
331 * Returns the smallest value i such that c < list[i]. Caller
332 * must ensure that c is a legal value or this method will enter
333 * an infinite loop. This method performs a binary search.
334 * @param c a character in the range MIN_VALUE..MAX_VALUE
336 * @return the smallest integer i in the range 0..len-1,
337 * inclusive, such that c < list[i]
339 int32_t UnicodeSet::findCodePoint(UChar32 c
) const {
342 set list[] c=0 1 3 4 7 8
343 === ============== ===========
344 [] [110000] 0 0 0 0 0 0
345 [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2
346 [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2
347 [:Any:] [0, 110000] 1 1 1 1 1 1
350 // Return the smallest i such that c < list[i]. Assume
351 // list[len - 1] == HIGH and that c is legal (0..HIGH-1).
352 if (c
< list
[0]) return 0;
353 // High runner test. c is often after the last range, so an
354 // initial check for this condition pays off.
355 if (len
>= 2 && c
>= list
[len
-2]) return len
-1;
357 int32_t hi
= len
- 1;
358 // invariant: c >= list[lo]
359 // invariant: c < list[hi]
361 int32_t i
= (lo
+ hi
) >> 1;
362 if (i
== lo
) return hi
;
369 return 0; // To make compiler happy; never reached
373 * Returns true if this set contains every character
374 * of the given range.
375 * @param start first character, inclusive, of the range
376 * @param end last character, inclusive, of the range
377 * @return true if the test condition is met
379 UBool
UnicodeSet::contains(UChar32 start
, UChar32 end
) const {
382 // if (start < list[++i]) break;
384 int32_t i
= findCodePoint(start
);
385 return ((i
& 1) != 0 && end
< list
[i
]);
389 * Returns <tt>true</tt> if this set contains the given
390 * multicharacter string.
391 * @param s string to be checked for containment
392 * @return <tt>true</tt> if this set contains the specified string
394 UBool
UnicodeSet::contains(const UnicodeString
& s
) const {
395 if (s
.length() == 0) return FALSE
;
396 int32_t cp
= getSingleCP(s
);
398 return strings
->contains((void*) &s
);
400 return contains((UChar32
) cp
);
405 * Returns true if this set contains all the characters and strings
407 * @param c set to be checked for containment
408 * @return true if the test condition is met
410 UBool
UnicodeSet::containsAll(const UnicodeSet
& c
) const {
411 // The specified set is a subset if all of its pairs are contained in
412 // this set. It's possible to code this more efficiently in terms of
413 // direct manipulation of the inversion lists if the need arises.
414 int32_t n
= c
.getRangeCount();
415 for (int i
=0; i
<n
; ++i
) {
416 if (!contains(c
.getRangeStart(i
), c
.getRangeEnd(i
))) {
420 if (!strings
->containsAll(*c
.strings
)) return FALSE
;
425 * Returns true if this set contains all the characters
426 * of the given string.
427 * @param s string containing characters to be checked for containment
428 * @return true if the test condition is met
430 UBool
UnicodeSet::containsAll(const UnicodeString
& s
) const {
432 for (int32_t i
= 0; i
< s
.length(); i
+= UTF_CHAR_LENGTH(cp
)) {
434 if (!contains(cp
)) return FALSE
;
440 * Returns true if this set contains none of the characters
441 * of the given range.
442 * @param start first character, inclusive, of the range
443 * @param end last character, inclusive, of the range
444 * @return true if the test condition is met
446 UBool
UnicodeSet::containsNone(UChar32 start
, UChar32 end
) const {
449 // if (start < list[++i]) break;
451 int32_t i
= findCodePoint(start
);
452 return ((i
& 1) == 0 && end
< list
[i
]);
456 * Returns true if this set contains none of the characters and strings
458 * @param c set to be checked for containment
459 * @return true if the test condition is met
461 UBool
UnicodeSet::containsNone(const UnicodeSet
& c
) const {
462 // The specified set is a subset if all of its pairs are contained in
463 // this set. It's possible to code this more efficiently in terms of
464 // direct manipulation of the inversion lists if the need arises.
465 int32_t n
= c
.getRangeCount();
466 for (int32_t i
=0; i
<n
; ++i
) {
467 if (!containsNone(c
.getRangeStart(i
), c
.getRangeEnd(i
))) {
471 if (!strings
->containsNone(*c
.strings
)) return FALSE
;
476 * Returns true if this set contains none of the characters
477 * of the given string.
478 * @param s string containing characters to be checked for containment
479 * @return true if the test condition is met
481 UBool
UnicodeSet::containsNone(const UnicodeString
& s
) const {
483 for (int32_t i
= 0; i
< s
.length(); i
+= UTF_CHAR_LENGTH(cp
)) {
485 if (contains(cp
)) return FALSE
;
491 * Returns <tt>true</tt> if this set contains any character whose low byte
492 * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for
495 UBool
UnicodeSet::matchesIndexValue(uint8_t v
) const {
496 /* The index value v, in the range [0,255], is contained in this set if
497 * it is contained in any pair of this set. Pairs either have the high
498 * bytes equal, or unequal. If the high bytes are equal, then we have
499 * aaxx..aayy, where aa is the high byte. Then v is contained if xx <=
500 * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa.
501 * Then v is contained if xx <= v || v <= yy. (This is identical to the
502 * time zone month containment logic.)
505 for (i
=0; i
<getRangeCount(); ++i
) {
506 UChar32 low
= getRangeStart(i
);
507 UChar32 high
= getRangeEnd(i
);
508 if ((low
& ~0xFF) == (high
& ~0xFF)) {
509 if ((low
& 0xFF) <= v
&& v
<= (high
& 0xFF)) {
512 } else if ((low
& 0xFF) <= v
|| v
<= (high
& 0xFF)) {
516 if (strings
->size() != 0) {
517 for (i
=0; i
<strings
->size(); ++i
) {
518 const UnicodeString
& s
= *(const UnicodeString
*)strings
->elementAt(i
);
519 //if (s.length() == 0) {
520 // // Empty strings match everything
523 // assert(s.length() != 0); // We enforce this elsewhere
524 UChar32 c
= s
.char32At(0);
525 if ((c
& 0xFF) == v
) {
534 * Implementation of UnicodeMatcher::matches(). Always matches the
535 * longest possible multichar string.
537 UMatchDegree
UnicodeSet::matches(const Replaceable
& text
,
541 if (offset
== limit
) {
542 // Strings, if any, have length != 0, so we don't worry
543 // about them here. If we ever allow zero-length strings
544 // we much check for them here.
545 if (contains(U_ETHER
)) {
546 return incremental
? U_PARTIAL_MATCH
: U_MATCH
;
551 if (strings
->size() != 0) { // try strings first
553 // might separate forward and backward loops later
554 // for now they are combined
556 // TODO Improve efficiency of this, at least in the forward
557 // direction, if not in both. In the forward direction we
558 // can assume the strings are sorted.
561 UBool forward
= offset
< limit
;
563 // firstChar is the leftmost char to match in the
564 // forward direction or the rightmost char to match in
565 // the reverse direction.
566 UChar firstChar
= text
.charAt(offset
);
568 // If there are multiple strings that can match we
569 // return the longest match.
570 int32_t highWaterLength
= 0;
572 for (i
=0; i
<strings
->size(); ++i
) {
573 const UnicodeString
& trial
= *(const UnicodeString
*)strings
->elementAt(i
);
575 //if (trial.length() == 0) {
576 // return U_MATCH; // null-string always matches
578 // assert(trial.length() != 0); // We ensure this elsewhere
580 UChar c
= trial
.charAt(forward
? 0 : trial
.length() - 1);
582 // Strings are sorted, so we can optimize in the
583 // forward direction.
584 if (forward
&& c
> firstChar
) break;
585 if (c
!= firstChar
) continue;
587 int32_t matchLen
= matchRest(text
, offset
, limit
, trial
);
590 int32_t maxLen
= forward
? limit
-offset
: offset
-limit
;
591 if (matchLen
== maxLen
) {
592 // We have successfully matched but only up to limit.
593 return U_PARTIAL_MATCH
;
597 if (matchLen
== trial
.length()) {
598 // We have successfully matched the whole string.
599 if (matchLen
> highWaterLength
) {
600 highWaterLength
= matchLen
;
602 // In the forward direction we know strings
603 // are sorted so we can bail early.
604 if (forward
&& matchLen
< highWaterLength
) {
611 // We've checked all strings without a partial match.
612 // If we have full matches, return the longest one.
613 if (highWaterLength
!= 0) {
614 offset
+= forward
? highWaterLength
: -highWaterLength
;
618 return UnicodeFilter::matches(text
, offset
, limit
, incremental
);
623 * Returns the longest match for s in text at the given position.
624 * If limit > start then match forward from start+1 to limit
625 * matching all characters except s.charAt(0). If limit < start,
626 * go backward starting from start-1 matching all characters
627 * except s.charAt(s.length()-1). This method assumes that the
628 * first character, text.charAt(start), matches s, so it does not
630 * @param text the text to match
631 * @param start the first character to match. In the forward
632 * direction, text.charAt(start) is matched against s.charAt(0).
633 * In the reverse direction, it is matched against
634 * s.charAt(s.length()-1).
635 * @param limit the limit offset for matching, either last+1 in
636 * the forward direction, or last-1 in the reverse direction,
637 * where last is the index of the last character to match.
638 * @return If part of s matches up to the limit, return |limit -
639 * start|. If all of s matches before reaching the limit, return
640 * s.length(). If there is a mismatch between s and text, return
643 int32_t UnicodeSet::matchRest(const Replaceable
& text
,
644 int32_t start
, int32_t limit
,
645 const UnicodeString
& s
) {
648 int32_t slen
= s
.length();
650 maxLen
= limit
- start
;
651 if (maxLen
> slen
) maxLen
= slen
;
652 for (i
= 1; i
< maxLen
; ++i
) {
653 if (text
.charAt(start
+ i
) != s
.charAt(i
)) return 0;
656 maxLen
= start
- limit
;
657 if (maxLen
> slen
) maxLen
= slen
;
658 --slen
; // <=> slen = s.length() - 1;
659 for (i
= 1; i
< maxLen
; ++i
) {
660 if (text
.charAt(start
- i
) != s
.charAt(slen
- i
)) return 0;
667 * Implement of UnicodeMatcher
669 void UnicodeSet::addMatchSetTo(UnicodeSet
& toUnionTo
) const {
670 toUnionTo
.addAll(*this);
674 * Returns the index of the given character within this set, where
675 * the set is ordered by ascending code point. If the character
676 * is not in this set, return -1. The inverse of this method is
677 * <code>charAt()</code>.
678 * @return an index from 0..size()-1, or -1
680 int32_t UnicodeSet::indexOf(UChar32 c
) const {
681 if (c
< MIN_VALUE
|| c
> MAX_VALUE
) {
687 UChar32 start
= list
[i
++];
691 UChar32 limit
= list
[i
++];
693 return n
+ c
- start
;
700 * Returns the character at the given index within this set, where
701 * the set is ordered by ascending code point. If the index is
702 * out of range, return (UChar32)-1. The inverse of this method is
703 * <code>indexOf()</code>.
704 * @param index an index from 0..size()-1
705 * @return the character at the given index, or (UChar32)-1.
707 UChar32
UnicodeSet::charAt(int32_t index
) const {
709 // len2 is the largest even integer <= len, that is, it is len
710 // for even values and len-1 for odd values. With odd values
711 // the last entry is UNICODESET_HIGH.
712 int32_t len2
= len
& ~1;
713 for (int32_t i
=0; i
< len2
;) {
714 UChar32 start
= list
[i
++];
715 int32_t count
= list
[i
++] - start
;
717 return (UChar32
)(start
+ index
);
726 * Adds the specified range to this set if it is not already
727 * present. If this set already contains the specified range,
728 * the call leaves this set unchanged. If <code>end > start</code>
729 * then an empty range is added, leaving the set unchanged.
731 * @param start first character, inclusive, of range to be added
733 * @param end last character, inclusive, of range to be added
736 UnicodeSet
& UnicodeSet::add(UChar32 start
, UChar32 end
) {
737 if (pinCodePoint(start
) < pinCodePoint(end
)) {
738 UChar32 range
[3] = { start
, end
+1, UNICODESET_HIGH
};
740 } else if (start
== end
) {
746 // #define DEBUG_US_ADD
750 void dump(UChar32 c
) {
752 printf("%c", (char)c
);
757 void dump(const UChar32
* list
, int32_t len
) {
759 for (int32_t i
=0; i
<len
; ++i
) {
760 if (i
!= 0) printf(", ");
768 * Adds the specified character to this set if it is not already
769 * present. If this set already contains the specified character,
770 * the call leaves this set unchanged.
772 UnicodeSet
& UnicodeSet::add(UChar32 c
) {
773 // find smallest i such that c < list[i]
774 // if odd, then it is IN the set
775 // if even, then it is OUT of the set
776 int32_t i
= findCodePoint(pinCodePoint(c
));
779 if ((i
& 1) != 0) return *this;
782 // assert(list[len-1] == HIGH);
785 // [start_0, limit_0, start_1, limit_1, HIGH]
787 // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
791 // i == 0 means c is before the first range
796 printf(" found at %d", i
);
802 if (c
== list
[i
]-1) {
803 // c is before start of next range
805 // if we touched the HIGH mark, then add a new one
806 if (c
== (UNICODESET_HIGH
- 1)) {
807 ensureCapacity(len
+1);
808 list
[len
++] = UNICODESET_HIGH
;
810 if (i
> 0 && c
== list
[i
-1]) {
811 // collapse adjacent ranges
813 // [..., start_k-1, c, c, limit_k, ..., HIGH]
817 //for (int32_t k=i-1; k<len-2; ++k) {
818 // list[k] = list[k+2];
820 UChar32
* dst
= list
+ i
- 1;
821 UChar32
* src
= dst
+ 2;
822 UChar32
* srclimit
= list
+ len
;
823 while (src
< srclimit
) *(dst
++) = *(src
++);
829 else if (i
> 0 && c
== list
[i
-1]) {
830 // c is after end of prior range
832 // no need to check for collapse here
836 // At this point we know the new char is not adjacent to
837 // any existing ranges, and it is not 10FFFF.
840 // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH]
844 // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH]
848 ensureCapacity(len
+2);
850 //for (int32_t k=len-1; k>=i; --k) {
851 // list[k+2] = list[k];
853 UChar32
* src
= list
+ len
;
854 UChar32
* dst
= src
+ 2;
855 UChar32
* srclimit
= list
+ i
;
856 while (src
> srclimit
) *(--dst
) = *(--src
);
867 for (i
=1; i
<len
; ++i
) {
868 if (list
[i
] <= list
[i
-1]) {
870 printf("ERROR: list has been corrupted\n");
881 * Adds the specified multicharacter to this set if it is not already
882 * present. If this set already contains the multicharacter,
883 * the call leaves this set unchanged.
884 * Thus "ch" => {"ch"}
885 * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
886 * @param s the source string
887 * @return the modified set, for chaining
889 UnicodeSet
& UnicodeSet::add(const UnicodeString
& s
) {
890 if (s
.length() == 0) return *this;
891 int32_t cp
= getSingleCP(s
);
893 if (!strings
->contains((void*) &s
)) {
898 add((UChar32
)cp
, (UChar32
)cp
);
904 * Adds the given string, in order, to 'strings'. The given string
905 * must have been checked by the caller to not be empty and to not
906 * already be in 'strings'.
908 void UnicodeSet::_add(const UnicodeString
& s
) {
909 UnicodeString
* t
= new UnicodeString(s
);
910 UErrorCode ec
= U_ZERO_ERROR
;
911 strings
->sortedInsert(t
, compareUnicodeString
, ec
);
915 * @return a code point IF the string consists of a single one.
916 * otherwise returns -1.
917 * @param string to test
919 int32_t UnicodeSet::getSingleCP(const UnicodeString
& s
) {
920 //if (s.length() < 1) {
921 // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
923 if (s
.length() > 2) return -1;
924 if (s
.length() == 1) return s
.charAt(0);
926 // at this point, len = 2
927 UChar32 cp
= s
.char32At(0);
928 if (cp
> 0xFFFF) { // is surrogate pair
935 * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"}
936 * If this set already any particular character, it has no effect on that character.
937 * @param the source string
938 * @return the modified set, for chaining
940 UnicodeSet
& UnicodeSet::addAll(const UnicodeString
& s
) {
942 for (int32_t i
= 0; i
< s
.length(); i
+= UTF_CHAR_LENGTH(cp
)) {
950 * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
951 * If this set already any particular character, it has no effect on that character.
952 * @param the source string
953 * @return the modified set, for chaining
955 UnicodeSet
& UnicodeSet::retainAll(const UnicodeString
& s
) {
963 * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
964 * If this set already any particular character, it has no effect on that character.
965 * @param the source string
966 * @return the modified set, for chaining
968 UnicodeSet
& UnicodeSet::complementAll(const UnicodeString
& s
) {
976 * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
977 * If this set already any particular character, it has no effect on that character.
978 * @param the source string
979 * @return the modified set, for chaining
981 UnicodeSet
& UnicodeSet::removeAll(const UnicodeString
& s
) {
989 * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
990 * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
991 * @param the source string
992 * @return a newly created set containing the given string
994 UnicodeSet
* U_EXPORT2
UnicodeSet::createFrom(const UnicodeString
& s
) {
995 UnicodeSet
*set
= new UnicodeSet();
1002 * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
1003 * @param the source string
1004 * @return a newly created set containing the given characters
1006 UnicodeSet
* U_EXPORT2
UnicodeSet::createFromAll(const UnicodeString
& s
) {
1007 UnicodeSet
*set
= new UnicodeSet();
1013 * Retain only the elements in this set that are contained in the
1014 * specified range. If <code>end > start</code> then an empty range is
1015 * retained, leaving the set empty.
1017 * @param start first character, inclusive, of range to be retained
1019 * @param end last character, inclusive, of range to be retained
1022 UnicodeSet
& UnicodeSet::retain(UChar32 start
, UChar32 end
) {
1023 if (pinCodePoint(start
) <= pinCodePoint(end
)) {
1024 UChar32 range
[3] = { start
, end
+1, UNICODESET_HIGH
};
1025 retain(range
, 2, 0);
1032 UnicodeSet
& UnicodeSet::retain(UChar32 c
) {
1033 return retain(c
, c
);
1037 * Removes the specified range from this set if it is present.
1038 * The set will not contain the specified range once the call
1039 * returns. If <code>end > start</code> then an empty range is
1040 * removed, leaving the set unchanged.
1042 * @param start first character, inclusive, of range to be removed
1044 * @param end last character, inclusive, of range to be removed
1047 UnicodeSet
& UnicodeSet::remove(UChar32 start
, UChar32 end
) {
1048 if (pinCodePoint(start
) <= pinCodePoint(end
)) {
1049 UChar32 range
[3] = { start
, end
+1, UNICODESET_HIGH
};
1050 retain(range
, 2, 2);
1056 * Removes the specified character from this set if it is present.
1057 * The set will not contain the specified range once the call
1060 UnicodeSet
& UnicodeSet::remove(UChar32 c
) {
1061 return remove(c
, c
);
1065 * Removes the specified string from this set if it is present.
1066 * The set will not contain the specified character once the call
1068 * @param the source string
1069 * @return the modified set, for chaining
1071 UnicodeSet
& UnicodeSet::remove(const UnicodeString
& s
) {
1072 if (s
.length() == 0) return *this;
1073 int32_t cp
= getSingleCP(s
);
1075 strings
->removeElement((void*) &s
);
1078 remove((UChar32
)cp
, (UChar32
)cp
);
1084 * Complements the specified range in this set. Any character in
1085 * the range will be removed if it is in this set, or will be
1086 * added if it is not in this set. If <code>end > start</code>
1087 * then an empty range is xor'ed, leaving the set unchanged.
1089 * @param start first character, inclusive, of range to be removed
1091 * @param end last character, inclusive, of range to be removed
1094 UnicodeSet
& UnicodeSet::complement(UChar32 start
, UChar32 end
) {
1095 if (pinCodePoint(start
) <= pinCodePoint(end
)) {
1096 UChar32 range
[3] = { start
, end
+1, UNICODESET_HIGH
};
1097 exclusiveOr(range
, 2, 0);
1103 UnicodeSet
& UnicodeSet::complement(UChar32 c
) {
1104 return complement(c
, c
);
1108 * This is equivalent to
1109 * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
1111 UnicodeSet
& UnicodeSet::complement(void) {
1112 if (list
[0] == UNICODESET_LOW
) {
1113 ensureBufferCapacity(len
-1);
1114 uprv_memcpy(buffer
, list
+ 1, (len
-1)*sizeof(UChar32
));
1117 ensureBufferCapacity(len
+1);
1118 uprv_memcpy(buffer
+ 1, list
, len
*sizeof(UChar32
));
1119 buffer
[0] = UNICODESET_LOW
;
1128 * Complement the specified string in this set.
1129 * The set will not contain the specified string once the call
1131 * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
1132 * @param s the string to complement
1133 * @return this object, for chaining
1135 UnicodeSet
& UnicodeSet::complement(const UnicodeString
& s
) {
1136 if (s
.length() == 0) return *this;
1137 int32_t cp
= getSingleCP(s
);
1139 if (strings
->contains((void*) &s
)) {
1140 strings
->removeElement((void*) &s
);
1146 complement((UChar32
)cp
, (UChar32
)cp
);
1152 * Adds all of the elements in the specified set to this set if
1153 * they're not already present. This operation effectively
1154 * modifies this set so that its value is the <i>union</i> of the two
1155 * sets. The behavior of this operation is unspecified if the specified
1156 * collection is modified while the operation is in progress.
1158 * @param c set whose elements are to be added to this set.
1159 * @see #add(char, char)
1161 UnicodeSet
& UnicodeSet::addAll(const UnicodeSet
& c
) {
1162 add(c
.list
, c
.len
, 0);
1164 // Add strings in order
1165 for (int32_t i
=0; i
<c
.strings
->size(); ++i
) {
1166 const UnicodeString
* s
= (const UnicodeString
*)c
.strings
->elementAt(i
);
1167 if (!strings
->contains((void*) s
)) {
1175 * Retains only the elements in this set that are contained in the
1176 * specified set. In other words, removes from this set all of
1177 * its elements that are not contained in the specified set. This
1178 * operation effectively modifies this set so that its value is
1179 * the <i>intersection</i> of the two sets.
1181 * @param c set that defines which elements this set will retain.
1183 UnicodeSet
& UnicodeSet::retainAll(const UnicodeSet
& c
) {
1184 retain(c
.list
, c
.len
, 0);
1185 strings
->retainAll(*c
.strings
);
1190 * Removes from this set all of its elements that are contained in the
1191 * specified set. This operation effectively modifies this
1192 * set so that its value is the <i>asymmetric set difference</i> of
1195 * @param c set that defines which elements will be removed from
1198 UnicodeSet
& UnicodeSet::removeAll(const UnicodeSet
& c
) {
1199 retain(c
.list
, c
.len
, 2);
1200 strings
->removeAll(*c
.strings
);
1205 * Complements in this set all elements contained in the specified
1206 * set. Any character in the other set will be removed if it is
1207 * in this set, or will be added if it is not in this set.
1209 * @param c set that defines which elements will be xor'ed from
1212 UnicodeSet
& UnicodeSet::complementAll(const UnicodeSet
& c
) {
1213 exclusiveOr(c
.list
, c
.len
, 0);
1215 for (int32_t i
=0; i
<c
.strings
->size(); ++i
) {
1216 void* e
= c
.strings
->elementAt(i
);
1217 if (!strings
->removeElement(e
)) {
1218 _add(*(const UnicodeString
*)e
);
1225 * Removes all of the elements from this set. This set will be
1226 * empty after this call returns.
1228 UnicodeSet
& UnicodeSet::clear(void) {
1229 list
[0] = UNICODESET_HIGH
;
1232 strings
->removeAllElements();
1237 * Iteration method that returns the number of ranges contained in
1239 * @see #getRangeStart
1242 int32_t UnicodeSet::getRangeCount() const {
1247 * Iteration method that returns the first character in the
1248 * specified range of this set.
1249 * @see #getRangeCount
1252 UChar32
UnicodeSet::getRangeStart(int32_t index
) const {
1253 return list
[index
*2];
1257 * Iteration method that returns the last character in the
1258 * specified range of this set.
1259 * @see #getRangeStart
1262 UChar32
UnicodeSet::getRangeEnd(int32_t index
) const {
1263 return list
[index
*2 + 1] - 1;
1266 int32_t UnicodeSet::getStringCount() const {
1267 return strings
->size();
1270 const UnicodeString
* UnicodeSet::getString(int32_t index
) const {
1271 return (const UnicodeString
*) strings
->elementAt(index
);
1275 * Reallocate this objects internal structures to take up the least
1276 * possible space, without changing this object's value.
1278 UnicodeSet
& UnicodeSet::compact() {
1279 if (len
!= capacity
) {
1281 UChar32
* temp
= (UChar32
*) uprv_malloc(sizeof(UChar32
) * capacity
);
1282 uprv_memcpy(temp
, list
, len
*sizeof(UChar32
));
1291 int32_t UnicodeSet::serialize(uint16_t *dest
, int32_t destCapacity
, UErrorCode
& ec
) const {
1292 int32_t bmpLength
, length
, destLength
;
1294 if (U_FAILURE(ec
)) {
1298 if (destCapacity
<0 || (destCapacity
>0 && dest
==NULL
)) {
1299 ec
=U_ILLEGAL_ARGUMENT_ERROR
;
1303 /* count necessary 16-bit units */
1304 length
=this->len
-1; // Subtract 1 to ignore final UNICODESET_HIGH
1305 // assert(length>=0);
1308 if (destCapacity
>0) {
1311 ec
=U_BUFFER_OVERFLOW_ERROR
;
1317 if (this->list
[length
-1]<=0xffff) {
1320 } else if (this->list
[0]>=0x10000) {
1321 /* all supplementary */
1325 /* some BMP, some supplementary */
1326 for (bmpLength
=0; bmpLength
<length
&& this->list
[bmpLength
]<=0xffff; ++bmpLength
) {}
1327 length
=bmpLength
+2*(length
-bmpLength
);
1330 /* length: number of 16-bit array units */
1331 if (length
>0x7fff) {
1332 /* there are only 15 bits for the length in the first serialized word */
1333 ec
=U_INDEX_OUTOFBOUNDS_ERROR
;
1338 * total serialized length:
1339 * number of 16-bit array units (length) +
1340 * 1 length unit (always) +
1341 * 1 bmpLength unit (if there are supplementary values)
1343 destLength
=length
+((length
>bmpLength
)?2:1);
1344 if (destLength
<=destCapacity
) {
1348 *dest
=(uint16_t)length
;
1349 if (length
>bmpLength
) {
1351 *++dest
=(uint16_t)bmpLength
;
1355 /* write the BMP part of the array */
1357 for (i
=0; i
<bmpLength
; ++i
) {
1358 *dest
++=(uint16_t)*p
++;
1361 /* write the supplementary part of the array */
1362 for (; i
<length
; i
+=2) {
1363 *dest
++=(uint16_t)(*p
>>16);
1364 *dest
++=(uint16_t)*p
++;
1367 ec
=U_BUFFER_OVERFLOW_ERROR
;
1372 //----------------------------------------------------------------
1373 // Implementation: Utility methods
1374 //----------------------------------------------------------------
1377 * Allocate our strings vector and return TRUE if successful.
1379 UBool
UnicodeSet::allocateStrings() {
1380 UErrorCode ec
= U_ZERO_ERROR
;
1381 strings
= new UVector(uhash_deleteUnicodeString
,
1382 uhash_compareUnicodeString
, ec
);
1383 if (U_FAILURE(ec
)) {
1391 void UnicodeSet::ensureCapacity(int32_t newLen
) {
1392 if (newLen
<= capacity
)
1394 capacity
= newLen
+ GROW_EXTRA
;
1395 UChar32
* temp
= (UChar32
*) uprv_malloc(sizeof(UChar32
) * capacity
);
1396 uprv_memcpy(temp
, list
, len
*sizeof(UChar32
));
1401 void UnicodeSet::ensureBufferCapacity(int32_t newLen
) {
1402 if (buffer
!= NULL
&& newLen
<= bufferCapacity
)
1407 bufferCapacity
= newLen
+ GROW_EXTRA
;
1408 buffer
= (UChar32
*) uprv_malloc(sizeof(UChar32
) * bufferCapacity
);
1412 * Swap list and buffer.
1414 void UnicodeSet::swapBuffers(void) {
1415 // swap list and buffer
1416 UChar32
* temp
= list
;
1420 int32_t c
= capacity
;
1421 capacity
= bufferCapacity
;
1425 //----------------------------------------------------------------
1426 // Implementation: Fundamental operators
1427 //----------------------------------------------------------------
1429 static inline UChar32
max(UChar32 a
, UChar32 b
) {
1430 return (a
> b
) ? a
: b
;
1433 // polarity = 0, 3 is normal: x xor y
1434 // polarity = 1, 2: x xor ~y == x === y
1436 void UnicodeSet::exclusiveOr(const UChar32
* other
, int32_t otherLen
, int8_t polarity
) {
1437 ensureBufferCapacity(len
+ otherLen
);
1438 int32_t i
= 0, j
= 0, k
= 0;
1439 UChar32 a
= list
[i
++];
1441 if (polarity
== 1 || polarity
== 2) {
1443 if (other
[j
] == UNICODESET_LOW
) { // skip base if already LOW
1450 // simplest of all the routines
1451 // sort the values, discarding identicals!
1459 } else if (a
!= UNICODESET_HIGH
) { // at this point, a == b
1460 // discard both values!
1464 buffer
[k
++] = UNICODESET_HIGH
;
1473 // polarity = 0 is normal: x union y
1474 // polarity = 2: x union ~y
1475 // polarity = 1: ~x union y
1476 // polarity = 3: ~x union ~y
1478 void UnicodeSet::add(const UChar32
* other
, int32_t otherLen
, int8_t polarity
) {
1479 ensureBufferCapacity(len
+ otherLen
);
1480 int32_t i
= 0, j
= 0, k
= 0;
1481 UChar32 a
= list
[i
++];
1482 UChar32 b
= other
[j
++];
1483 // change from xor is that we have to check overlapping pairs
1484 // polarity bit 1 means a is second, bit 2 means b is.
1487 case 0: // both first; take lower if unequal
1488 if (a
< b
) { // take a
1489 // Back up over overlapping ranges in buffer[]
1490 if (k
> 0 && a
<= buffer
[k
-1]) {
1491 // Pick latter end value in buffer[] vs. list[]
1492 a
= max(list
[i
], buffer
[--k
]);
1498 i
++; // Common if/else code factored out
1500 } else if (b
< a
) { // take b
1501 if (k
> 0 && b
<= buffer
[k
-1]) {
1502 b
= max(other
[j
], buffer
[--k
]);
1509 } else { // a == b, take a, drop b
1510 if (a
== UNICODESET_HIGH
) goto loop_end
;
1511 // This is symmetrical; it doesn't matter if
1512 // we backtrack with a or b. - liu
1513 if (k
> 0 && a
<= buffer
[k
-1]) {
1514 a
= max(list
[i
], buffer
[--k
]);
1526 case 3: // both second; take higher if unequal, and drop other
1527 if (b
<= a
) { // take a
1528 if (a
== UNICODESET_HIGH
) goto loop_end
;
1531 if (b
== UNICODESET_HIGH
) goto loop_end
;
1535 polarity
^= 1; // factored common code
1539 case 1: // a second, b first; if b < a, overlap
1540 if (a
< b
) { // no overlap, take a
1541 buffer
[k
++] = a
; a
= list
[i
++]; polarity
^= 1;
1542 } else if (b
< a
) { // OVERLAP, drop b
1545 } else { // a == b, drop both!
1546 if (a
== UNICODESET_HIGH
) goto loop_end
;
1553 case 2: // a first, b second; if a < b, overlap
1554 if (b
< a
) { // no overlap, take b
1558 } else if (a
< b
) { // OVERLAP, drop a
1561 } else { // a == b, drop both!
1562 if (a
== UNICODESET_HIGH
) goto loop_end
;
1572 buffer
[k
++] = UNICODESET_HIGH
; // terminate
1578 // polarity = 0 is normal: x intersect y
1579 // polarity = 2: x intersect ~y == set-minus
1580 // polarity = 1: ~x intersect y
1581 // polarity = 3: ~x intersect ~y
1583 void UnicodeSet::retain(const UChar32
* other
, int32_t otherLen
, int8_t polarity
) {
1584 ensureBufferCapacity(len
+ otherLen
);
1585 int32_t i
= 0, j
= 0, k
= 0;
1586 UChar32 a
= list
[i
++];
1587 UChar32 b
= other
[j
++];
1588 // change from xor is that we have to check overlapping pairs
1589 // polarity bit 1 means a is second, bit 2 means b is.
1592 case 0: // both first; drop the smaller
1593 if (a
< b
) { // drop a
1596 } else if (b
< a
) { // drop b
1599 } else { // a == b, take one, drop other
1600 if (a
== UNICODESET_HIGH
) goto loop_end
;
1608 case 3: // both second; take lower if unequal
1609 if (a
< b
) { // take a
1613 } else if (b
< a
) { // take b
1617 } else { // a == b, take one, drop other
1618 if (a
== UNICODESET_HIGH
) goto loop_end
;
1626 case 1: // a second, b first;
1627 if (a
< b
) { // NO OVERLAP, drop a
1630 } else if (b
< a
) { // OVERLAP, take b
1634 } else { // a == b, drop both!
1635 if (a
== UNICODESET_HIGH
) goto loop_end
;
1642 case 2: // a first, b second; if a < b, overlap
1643 if (b
< a
) { // no overlap, drop b
1646 } else if (a
< b
) { // OVERLAP, take a
1650 } else { // a == b, drop both!
1651 if (a
== UNICODESET_HIGH
) goto loop_end
;
1661 buffer
[k
++] = UNICODESET_HIGH
; // terminate
1668 * Append the <code>toPattern()</code> representation of a
1669 * string to the given <code>StringBuffer</code>.
1671 void UnicodeSet::_appendToPat(UnicodeString
& buf
, const UnicodeString
& s
, UBool
1672 escapeUnprintable
) {
1674 for (int32_t i
= 0; i
< s
.length(); i
+= UTF_CHAR_LENGTH(cp
)) {
1675 _appendToPat(buf
, cp
= s
.char32At(i
), escapeUnprintable
);
1680 * Append the <code>toPattern()</code> representation of a
1681 * character to the given <code>StringBuffer</code>.
1683 void UnicodeSet::_appendToPat(UnicodeString
& buf
, UChar32 c
, UBool
1684 escapeUnprintable
) {
1685 if (escapeUnprintable
&& ICU_Utility::isUnprintable(c
)) {
1686 // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything
1688 if (ICU_Utility::escapeUnprintable(buf
, c
)) {
1692 // Okay to let ':' pass through
1703 case SymbolTable::SYMBOL_REF
:
1704 buf
.append(BACKSLASH
);
1707 // Escape whitespace
1708 if (uprv_isRuleWhiteSpace(c
)) {
1709 buf
.append(BACKSLASH
);
1717 * Append a string representation of this set to result. This will be
1718 * a cleaned version of the string passed to applyPattern(), if there
1719 * is one. Otherwise it will be generated.
1721 UnicodeString
& UnicodeSet::_toPattern(UnicodeString
& result
,
1722 UBool escapeUnprintable
) const {
1723 if (pat
.length() > 0) {
1725 int32_t backslashCount
= 0;
1726 for (i
=0; i
<pat
.length(); ) {
1727 UChar32 c
= pat
.char32At(i
);
1728 i
+= UTF_CHAR_LENGTH(c
);
1729 if (escapeUnprintable
&& ICU_Utility::isUnprintable(c
)) {
1730 // If the unprintable character is preceded by an odd
1731 // number of backslashes, then it has been escaped.
1732 // Before unescaping it, we delete the final
1734 if ((backslashCount
% 2) == 1) {
1735 result
.truncate(result
.length() - 1);
1737 ICU_Utility::escapeUnprintable(result
, c
);
1741 if (c
== BACKSLASH
) {
1751 return _generatePattern(result
, escapeUnprintable
);
1755 * Returns a string representation of this set. If the result of
1756 * calling this function is passed to a UnicodeSet constructor, it
1757 * will produce another set that is equal to this one.
1759 UnicodeString
& UnicodeSet::toPattern(UnicodeString
& result
,
1760 UBool escapeUnprintable
) const {
1762 return _toPattern(result
, escapeUnprintable
);
1766 * Generate and append a string representation of this set to result.
1767 * This does not use this.pat, the cleaned up copy of the string
1768 * passed to applyPattern().
1770 UnicodeString
& UnicodeSet::_generatePattern(UnicodeString
& result
,
1771 UBool escapeUnprintable
) const {
1772 result
.append(SET_OPEN
);
1774 // // Check against the predefined categories. We implicitly build
1775 // // up ALL category sets the first time toPattern() is called.
1776 // for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
1777 // if (*this == getCategorySet(cat)) {
1778 // result.append(COLON);
1779 // result.append(CATEGORY_NAMES, cat*2, 2);
1780 // return result.append(CATEGORY_CLOSE);
1784 int32_t count
= getRangeCount();
1786 // If the set contains at least 2 intervals and includes both
1787 // MIN_VALUE and MAX_VALUE, then the inverse representation will
1788 // be more economical.
1790 getRangeStart(0) == MIN_VALUE
&&
1791 getRangeEnd(count
-1) == MAX_VALUE
) {
1794 result
.append(COMPLEMENT
);
1796 for (int32_t i
= 1; i
< count
; ++i
) {
1797 UChar32 start
= getRangeEnd(i
-1)+1;
1798 UChar32 end
= getRangeStart(i
)-1;
1799 _appendToPat(result
, start
, escapeUnprintable
);
1801 if ((start
+1) != end
) {
1802 result
.append(HYPHEN
);
1804 _appendToPat(result
, end
, escapeUnprintable
);
1809 // Default; emit the ranges as pairs
1811 for (int32_t i
= 0; i
< count
; ++i
) {
1812 UChar32 start
= getRangeStart(i
);
1813 UChar32 end
= getRangeEnd(i
);
1814 _appendToPat(result
, start
, escapeUnprintable
);
1816 if ((start
+1) != end
) {
1817 result
.append(HYPHEN
);
1819 _appendToPat(result
, end
, escapeUnprintable
);
1824 for (int32_t i
= 0; i
<strings
->size(); ++i
) {
1825 result
.append(OPEN_BRACE
);
1826 _appendToPat(result
,
1827 *(const UnicodeString
*) strings
->elementAt(i
),
1829 result
.append(CLOSE_BRACE
);
1831 return result
.append(SET_CLOSE
);