+/**
+* Release existing cached pattern
+*/
+void UnicodeSet::releasePattern() {
+ if (pat) {
+ uprv_free(pat);
+ pat = NULL;
+ patLen = 0;
+ }
+}
+
+/**
+* Set the new pattern to cache.
+*/
+void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) {
+ releasePattern();
+ pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar));
+ if (pat) {
+ patLen = newPatLen;
+ u_memcpy(pat, newPat, patLen);
+ pat[patLen] = 0;
+ }
+ // else we don't care if malloc failed. This was just a nice cache.
+ // We can regenerate an equivalent pattern later when requested.
+}
+
+UnicodeFunctor *UnicodeSet::freeze() {
+ if(!isFrozen() && !isBogus()) {
+ compact();
+
+ // Optimize contains() and span() and similar functions.
+ if (hasStrings()) {
+ stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL);
+ if (stringSpan == nullptr) {
+ setToBogus();
+ return this;
+ } else if (!stringSpan->needsStringSpanUTF16()) {
+ // All strings are irrelevant for span() etc. because
+ // all of each string's code points are contained in this set.
+ // Do not check needsStringSpanUTF8() because UTF-8 has at most as
+ // many relevant strings as UTF-16.
+ // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().)
+ delete stringSpan;
+ stringSpan = NULL;
+ }
+ }
+ if (stringSpan == NULL) {
+ // No span-relevant strings: Optimize for code point spans.
+ bmpSet=new BMPSet(list, len);
+ if (bmpSet == NULL) { // Check for memory allocation error.
+ setToBogus();
+ }
+ }
+ }
+ return this;
+}
+
+int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(length>0 && bmpSet!=NULL) {
+ return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s);
+ }
+ if(length<0) {
+ length=u_strlen(s);
+ }
+ if(length==0) {
+ return 0;
+ }
+ if(stringSpan!=NULL) {
+ return stringSpan->span(s, length, spanCondition);
+ } else if(hasStrings()) {
+ uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+ UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED :
+ UnicodeSetStringSpan::FWD_UTF16_CONTAINED;
+ UnicodeSetStringSpan strSpan(*this, *strings, which);
+ if(strSpan.needsStringSpanUTF16()) {
+ return strSpan.span(s, length, spanCondition);
+ }
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ UChar32 c;
+ int32_t start=0, prev=0;
+ do {
+ U16_NEXT(s, start, length, c);
+ if(spanCondition!=contains(c)) {
+ break;
+ }
+ } while((prev=start)<length);
+ return prev;
+}
+
+int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(length>0 && bmpSet!=NULL) {
+ return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s);
+ }
+ if(length<0) {
+ length=u_strlen(s);
+ }
+ if(length==0) {
+ return 0;
+ }
+ if(stringSpan!=NULL) {
+ return stringSpan->spanBack(s, length, spanCondition);
+ } else if(hasStrings()) {
+ uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+ UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED :
+ UnicodeSetStringSpan::BACK_UTF16_CONTAINED;
+ UnicodeSetStringSpan strSpan(*this, *strings, which);
+ if(strSpan.needsStringSpanUTF16()) {
+ return strSpan.spanBack(s, length, spanCondition);
+ }
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ UChar32 c;
+ int32_t prev=length;
+ do {
+ U16_PREV(s, 0, length, c);
+ if(spanCondition!=contains(c)) {
+ break;
+ }
+ } while((prev=length)>0);
+ return prev;
+}
+
+int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(length>0 && bmpSet!=NULL) {
+ const uint8_t *s0=(const uint8_t *)s;
+ return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0);
+ }
+ if(length<0) {
+ length=(int32_t)uprv_strlen(s);
+ }
+ if(length==0) {
+ return 0;
+ }
+ if(stringSpan!=NULL) {
+ return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition);
+ } else if(hasStrings()) {
+ uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+ UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED :
+ UnicodeSetStringSpan::FWD_UTF8_CONTAINED;
+ UnicodeSetStringSpan strSpan(*this, *strings, which);
+ if(strSpan.needsStringSpanUTF8()) {
+ return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition);
+ }
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ UChar32 c;
+ int32_t start=0, prev=0;
+ do {
+ U8_NEXT_OR_FFFD(s, start, length, c);
+ if(spanCondition!=contains(c)) {
+ break;
+ }
+ } while((prev=start)<length);
+ return prev;
+}
+
+int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const {
+ if(length>0 && bmpSet!=NULL) {
+ const uint8_t *s0=(const uint8_t *)s;
+ return bmpSet->spanBackUTF8(s0, length, spanCondition);
+ }
+ if(length<0) {
+ length=(int32_t)uprv_strlen(s);
+ }
+ if(length==0) {
+ return 0;
+ }
+ if(stringSpan!=NULL) {
+ return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition);
+ } else if(hasStrings()) {
+ uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ?
+ UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED :
+ UnicodeSetStringSpan::BACK_UTF8_CONTAINED;
+ UnicodeSetStringSpan strSpan(*this, *strings, which);
+ if(strSpan.needsStringSpanUTF8()) {
+ return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition);
+ }
+ }
+
+ if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
+ spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values.
+ }
+
+ UChar32 c;
+ int32_t prev=length;
+ do {
+ U8_PREV_OR_FFFD(s, 0, length, c);
+ if(spanCondition!=contains(c)) {
+ break;
+ }
+ } while((prev=length)>0);
+ return prev;
+}