2 *******************************************************************************
3 * Copyright (C) 2014-2016, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
8 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
13 #include "unicode/filteredbrk.h"
14 #include "unicode/ucharstriebuilder.h"
15 #include "unicode/ures.h"
17 #include "uresimp.h" // ures_getByKeyWithFallback
18 #include "ubrkimpl.h" // U_ICUDATA_BRKITR
30 static void _fb_trace(const char *m
, const UnicodeString
*s
, UBool b
, int32_t d
, const char *f
, int l
) {
33 s
->extract(0,s
->length(),buf
,2048);
37 fprintf(stderr
,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
38 f
, l
, m
, buf
, (const void*)s
, b
?'T':'F',(int)d
);
41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
43 #define FB_TRACE(m,s,b,d)
47 * Used with sortedInsert()
49 static int8_t U_CALLCONV
compareUnicodeString(UElement t1
, UElement t2
) {
50 const UnicodeString
&a
= *(const UnicodeString
*)t1
.pointer
;
51 const UnicodeString
&b
= *(const UnicodeString
*)t2
.pointer
;
56 * A UVector which implements a set of strings.
58 class U_COMMON_API UStringSet
: public UVector
{
60 UStringSet(UErrorCode
&status
) : UVector(uprv_deleteUObject
,
61 uhash_compareUnicodeString
,
64 virtual ~UStringSet();
66 * Is this UnicodeSet contained?
68 inline UBool
contains(const UnicodeString
& s
) {
69 return contains((void*) &s
);
71 using UVector::contains
;
73 * Return the ith UnicodeString alias
75 inline const UnicodeString
* getStringAt(int32_t i
) const {
76 return (const UnicodeString
*)elementAt(i
);
79 * Adopt the UnicodeString if not already contained.
80 * Caller no longer owns the pointer in any case.
81 * @return true if adopted successfully, false otherwise (error, or else duplicate)
83 inline UBool
adopt(UnicodeString
*str
, UErrorCode
&status
) {
84 if(U_FAILURE(status
) || contains(*str
)) {
88 sortedInsert(str
, compareUnicodeString
, status
);
89 if(U_FAILURE(status
)) {
98 * @return true if successfully adopted.
100 inline UBool
add(const UnicodeString
& str
, UErrorCode
&status
) {
101 if(U_FAILURE(status
)) return false;
102 UnicodeString
*t
= new UnicodeString(str
);
104 status
= U_MEMORY_ALLOCATION_ERROR
; return false;
106 return adopt(t
, status
);
109 * Remove this string.
110 * @return true if successfully removed, false otherwise (error, or else it wasn't there)
112 inline UBool
remove(const UnicodeString
&s
, UErrorCode
&status
) {
113 if(U_FAILURE(status
)) return false;
114 return removeElement((void*) &s
);
119 * Virtual, won't be inlined
121 UStringSet::~UStringSet() {}
123 /* ----------------------------------------------------------- */
126 /* Filtered Break constants */
127 static const int32_t kPARTIAL
= (1<<0); //< partial - need to run through forward trie
128 static const int32_t kMATCH
= (1<<1); //< exact match - skip this one.
129 static const int32_t kSuppressInReverse
= (1<<0);
130 static const int32_t kAddToForward
= (1<<1);
131 static const UChar kFULLSTOP
= 0x002E; // '.'
134 * Shared data for SimpleFilteredSentenceBreakIterator
136 class SimpleFilteredSentenceBreakData
: public UMemory
{
138 SimpleFilteredSentenceBreakData(UCharsTrie
*forwards
, UCharsTrie
*backwards
)
139 : fForwardsPartialTrie(forwards
), fBackwardsTrie(backwards
), refcount(1) { }
140 SimpleFilteredSentenceBreakData
*incr() { refcount
++; return this; }
141 SimpleFilteredSentenceBreakData
*decr() { if((--refcount
) <= 0) delete this; return 0; }
142 virtual ~SimpleFilteredSentenceBreakData();
144 LocalPointer
<UCharsTrie
> fForwardsPartialTrie
; // Has ".a" for "a.M."
145 LocalPointer
<UCharsTrie
> fBackwardsTrie
; // i.e. ".srM" for Mrs.
149 SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
152 * Concrete implementation
154 class SimpleFilteredSentenceBreakIterator
: public BreakIterator
{
156 SimpleFilteredSentenceBreakIterator(BreakIterator
*adopt
, UCharsTrie
*forwards
, UCharsTrie
*backwards
, UErrorCode
&status
);
157 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator
& other
);
158 virtual ~SimpleFilteredSentenceBreakIterator();
160 SimpleFilteredSentenceBreakData
*fData
;
161 LocalPointer
<BreakIterator
> fDelegate
;
162 LocalUTextPointer fText
;
164 /* -- subclass interface -- */
166 /* -- cloning and other subclass stuff -- */
167 virtual BreakIterator
* createBufferClone(void * /*stackBuffer*/,
168 int32_t &/*BufferSize*/,
169 UErrorCode
&status
) {
170 // for now - always deep clone
171 status
= U_SAFECLONE_ALLOCATED_WARNING
;
174 virtual BreakIterator
* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); }
175 virtual UClassID
getDynamicClassID(void) const { return NULL
; }
176 virtual UBool
operator==(const BreakIterator
& o
) const { if(this==&o
) return true; return false; }
178 /* -- text modifying -- */
179 virtual void setText(UText
*text
, UErrorCode
&status
) { fDelegate
->setText(text
,status
); }
180 virtual BreakIterator
&refreshInputText(UText
*input
, UErrorCode
&status
) { fDelegate
->refreshInputText(input
,status
); return *this; }
181 virtual void adoptText(CharacterIterator
* it
) { fDelegate
->adoptText(it
); }
182 virtual void setText(const UnicodeString
&text
) { fDelegate
->setText(text
); }
184 /* -- other functions that are just delegated -- */
185 virtual UText
*getUText(UText
*fillIn
, UErrorCode
&status
) const { return fDelegate
->getUText(fillIn
,status
); }
186 virtual CharacterIterator
& getText(void) const { return fDelegate
->getText(); }
188 /* -- ITERATION -- */
189 virtual int32_t first(void);
190 virtual int32_t preceding(int32_t offset
);
191 virtual int32_t previous(void);
192 virtual UBool
isBoundary(int32_t offset
);
193 virtual int32_t current(void) const { return fDelegate
->current(); } // we keep the delegate current, so this should be correct.
195 virtual int32_t next(void);
197 virtual int32_t next(int32_t n
);
198 virtual int32_t following(int32_t offset
);
199 virtual int32_t last(void);
203 * Given that the fDelegate has already given its "initial" answer,
204 * find the NEXT actual (non-excepted) break.
205 * @param n initial position from delegate
206 * @return new break position or UBRK_DONE
208 int32_t internalNext(int32_t n
);
210 * Given that the fDelegate has already given its "initial" answer,
211 * find the PREV actual (non-excepted) break.
212 * @param n initial position from delegate
213 * @return new break position or UBRK_DONE
215 int32_t internalPrev(int32_t n
);
217 * set up the UText with the value of the fDelegate.
218 * Call this before calling breakExceptionAt.
219 * May be able to avoid excess calls
221 void resetState(UErrorCode
&status
);
223 * Is there a match (exception) at this spot?
225 enum EFBMatchResult
{ kNoExceptionHere
, kExceptionHere
};
227 * Determine if there is an exception at this spot
228 * @param n spot to check
229 * @return kNoExceptionHere or kExceptionHere
231 enum EFBMatchResult
breakExceptionAt(int32_t n
);
234 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator
& other
)
235 : BreakIterator(other
), fData(other
.fData
->incr()), fDelegate(other
.fDelegate
->clone())
240 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator
*adopt
, UCharsTrie
*forwards
, UCharsTrie
*backwards
, UErrorCode
&status
) :
241 BreakIterator(adopt
->getLocale(ULOC_VALID_LOCALE
,status
),adopt
->getLocale(ULOC_ACTUAL_LOCALE
,status
)),
242 fData(new SimpleFilteredSentenceBreakData(forwards
, backwards
)),
248 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
249 fData
= fData
->decr();
252 void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode
&status
) {
253 fText
.adoptInstead(fDelegate
->getUText(fText
.orphan(), status
));
256 SimpleFilteredSentenceBreakIterator::EFBMatchResult
257 SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n
) {
258 int64_t bestPosn
= -1;
259 int32_t bestValue
= -1;
260 // loops while 'n' points to an exception.
261 utext_setNativeIndex(fText
.getAlias(), n
); // from n..
262 fData
->fBackwardsTrie
->reset();
265 //if(debug2) u_printf(" n@ %d\n", n);
266 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
267 if((uch
=utext_previous32(fText
.getAlias()))==(UChar32
)0x0020) { // TODO: skip a class of chars here??
268 // TODO only do this the 1st time?
269 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
271 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
272 uch
= utext_next32(fText
.getAlias());
273 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
276 UStringTrieResult r
= USTRINGTRIE_INTERMEDIATE_VALUE
;
278 while((uch
=utext_previous32(fText
.getAlias()))!=U_SENTINEL
&& // more to consume backwards and..
279 USTRINGTRIE_HAS_NEXT(r
=fData
->fBackwardsTrie
->nextForCodePoint(uch
))) {// more in the trie
280 if(USTRINGTRIE_HAS_VALUE(r
)) { // remember the best match so far
281 bestPosn
= utext_getNativeIndex(fText
.getAlias());
282 bestValue
= fData
->fBackwardsTrie
->getValue();
284 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
287 if(USTRINGTRIE_MATCHES(r
)) { // exact match?
288 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
289 bestValue
= fData
->fBackwardsTrie
->getValue();
290 bestPosn
= utext_getNativeIndex(fText
.getAlias());
291 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
295 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
297 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
298 //int32_t bestValue = fBackwardsTrie->getValue();
299 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
302 UChar32 prevch
= utext_char32At(fText
.getAlias(), bestPosn
-1); // char before the best match
303 if (prevch
!= U_SENTINEL
&& u_isUAlphabetic(prevch
)) {
304 // The match is preceded by other alphabetic characters, => invalid
305 return kNoExceptionHere
;
309 if(bestValue
== kMATCH
) { // exact match!
310 //if(debug2) u_printf(" exact backward match\n");
311 return kExceptionHere
; // See if the next is another exception.
312 } else if(bestValue
== kPARTIAL
313 && fData
->fForwardsPartialTrie
.isValid()) { // make sure there's a forward trie
314 //if(debug2) u_printf(" partial backward match\n");
315 // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
316 // to see if it matches something going forward.
317 fData
->fForwardsPartialTrie
->reset();
318 UStringTrieResult rfwd
= USTRINGTRIE_INTERMEDIATE_VALUE
;
319 utext_setNativeIndex(fText
.getAlias(), bestPosn
); // hope that's close ..
320 //if(debug2) u_printf("Retrying at %d\n", bestPosn);
321 while((uch
=utext_next32(fText
.getAlias()))!=U_SENTINEL
&&
322 USTRINGTRIE_HAS_NEXT(rfwd
=fData
->fForwardsPartialTrie
->nextForCodePoint(uch
))) {
323 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
325 if(USTRINGTRIE_MATCHES(rfwd
)) {
326 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
327 // only full matches here, nothing to check
329 return kExceptionHere
;
331 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
332 // no match (no exception) -return the 'underlying' break
333 return kNoExceptionHere
;
336 return kNoExceptionHere
; // internal error and/or no forwards trie
339 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match
340 return kNoExceptionHere
; // No match - so exit. Not an exception.
344 // the workhorse single next.
346 SimpleFilteredSentenceBreakIterator::internalNext(int32_t n
) {
347 if(n
== UBRK_DONE
|| // at end or
348 fData
->fBackwardsTrie
.isNull()) { // .. no backwards table loaded == no exceptions
351 // OK, do we need to break here?
352 UErrorCode status
= U_ZERO_ERROR
;
355 if(U_FAILURE(status
)) return UBRK_DONE
; // bail out
356 int64_t utextLen
= utext_nativeLength(fText
.getAlias());
358 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
359 while (n
!= UBRK_DONE
&& n
!= utextLen
) { // outer loop runs once per underlying break (from fDelegate).
360 SimpleFilteredSentenceBreakIterator::EFBMatchResult m
= breakExceptionAt(n
);
364 n
= fDelegate
->next(); // skip this one. Find the next lowerlevel break.
368 case kNoExceptionHere
:
376 SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n
) {
377 if(n
== 0 || n
== UBRK_DONE
|| // at end or
378 fData
->fBackwardsTrie
.isNull()) { // .. no backwards table loaded == no exceptions
381 // OK, do we need to break here?
382 UErrorCode status
= U_ZERO_ERROR
;
385 if(U_FAILURE(status
)) return UBRK_DONE
; // bail out
387 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
388 while (n
!= UBRK_DONE
&& n
!= 0) { // outer loop runs once per underlying break (from fDelegate).
389 SimpleFilteredSentenceBreakIterator::EFBMatchResult m
= breakExceptionAt(n
);
393 n
= fDelegate
->previous(); // skip this one. Find the next lowerlevel break.
397 case kNoExceptionHere
:
406 SimpleFilteredSentenceBreakIterator::next() {
407 return internalNext(fDelegate
->next());
411 SimpleFilteredSentenceBreakIterator::first(void) {
412 return internalNext(fDelegate
->first());
416 SimpleFilteredSentenceBreakIterator::preceding(int32_t offset
) {
417 return internalPrev(fDelegate
->preceding(offset
));
421 SimpleFilteredSentenceBreakIterator::previous(void) {
422 return internalPrev(fDelegate
->previous());
425 UBool
SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset
) {
426 if(!fDelegate
->isBoundary(offset
)) return false; // no break to suppress
428 UErrorCode status
= U_ZERO_ERROR
;
431 SimpleFilteredSentenceBreakIterator::EFBMatchResult m
= breakExceptionAt(offset
);
437 case kNoExceptionHere
:
443 SimpleFilteredSentenceBreakIterator::next(int32_t offset
) {
444 return internalNext(fDelegate
->next(offset
));
448 SimpleFilteredSentenceBreakIterator::following(int32_t offset
) {
449 return internalNext(fDelegate
->following(offset
));
453 SimpleFilteredSentenceBreakIterator::last(void) {
454 // Don't suppress a break opportunity at the end of text.
455 return fDelegate
->last();
460 * Concrete implementation of builder class.
462 class U_COMMON_API SimpleFilteredBreakIteratorBuilder
: public FilteredBreakIteratorBuilder
{
464 virtual ~SimpleFilteredBreakIteratorBuilder();
465 SimpleFilteredBreakIteratorBuilder(const Locale
&fromLocale
, UErrorCode
&status
);
466 SimpleFilteredBreakIteratorBuilder(UErrorCode
&status
);
467 virtual UBool
suppressBreakAfter(const UnicodeString
& exception
, UErrorCode
& status
);
468 virtual UBool
unsuppressBreakAfter(const UnicodeString
& exception
, UErrorCode
& status
);
469 virtual BreakIterator
*build(BreakIterator
* adoptBreakIterator
, UErrorCode
& status
);
474 SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
478 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode
&status
)
483 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale
&fromLocale
, UErrorCode
&status
)
486 if(U_SUCCESS(status
)) {
487 LocalUResourceBundlePointer
b(ures_open(U_ICUDATA_BRKITR
, fromLocale
.getBaseName(), &status
));
488 LocalUResourceBundlePointer
exceptions(ures_getByKeyWithFallback(b
.getAlias(), "exceptions", NULL
, &status
));
489 LocalUResourceBundlePointer
breaks(ures_getByKeyWithFallback(exceptions
.getAlias(), "SentenceBreak", NULL
, &status
));
490 if(U_FAILURE(status
)) return; // leaves the builder empty, if you try to use it.
492 LocalUResourceBundlePointer strs
;
493 UErrorCode subStatus
= status
;
495 strs
.adoptInstead(ures_getNextResource(breaks
.getAlias(), strs
.orphan(), &subStatus
));
496 if(strs
.isValid() && U_SUCCESS(subStatus
)) {
497 UnicodeString
str(ures_getUnicodeString(strs
.getAlias(), &status
));
498 suppressBreakAfter(str
, status
); // load the string
500 } while (strs
.isValid() && U_SUCCESS(subStatus
));
501 if(U_FAILURE(subStatus
)&&subStatus
!=U_INDEX_OUTOFBOUNDS_ERROR
&&U_SUCCESS(status
)) {
508 SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString
& exception
, UErrorCode
& status
)
510 UBool r
= fSet
.add(exception
, status
);
511 FB_TRACE("suppressBreakAfter",&exception
,r
,0);
516 SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString
& exception
, UErrorCode
& status
)
518 UBool r
= fSet
.remove(exception
, status
);
519 FB_TRACE("unsuppressBreakAfter",&exception
,r
,0);
524 * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
527 * Note: "new UnicodeString[subCount]" ends up calling global operator new
528 * on MSVC2012 for some reason.
530 static inline UnicodeString
* newUnicodeStringArray(size_t count
) {
531 return new UnicodeString
[count
? count
: 1];
535 SimpleFilteredBreakIteratorBuilder::build(BreakIterator
* adoptBreakIterator
, UErrorCode
& status
) {
536 LocalPointer
<BreakIterator
> adopt(adoptBreakIterator
);
538 LocalPointer
<UCharsTrieBuilder
> builder(new UCharsTrieBuilder(status
), status
);
539 LocalPointer
<UCharsTrieBuilder
> builder2(new UCharsTrieBuilder(status
), status
);
540 if(U_FAILURE(status
)) {
544 int32_t revCount
= 0;
545 int32_t fwdCount
= 0;
547 int32_t subCount
= fSet
.size();
549 UnicodeString
*ustrs_ptr
= newUnicodeStringArray(subCount
);
551 LocalArray
<UnicodeString
> ustrs(ustrs_ptr
);
553 LocalMemory
<int> partials
;
554 partials
.allocateInsteadAndReset(subCount
);
556 LocalPointer
<UCharsTrie
> backwardsTrie
; // i.e. ".srM" for Mrs.
557 LocalPointer
<UCharsTrie
> forwardsPartialTrie
; // Has ".a" for "a.M."
563 const UnicodeString
*abbr
= fSet
.getStringAt(i
);
565 FB_TRACE("build",abbr
,TRUE
,i
);
566 ustrs
[n
] = *abbr
; // copy by value
567 FB_TRACE("ustrs[n]",&ustrs
[n
],TRUE
,i
);
569 FB_TRACE("build",abbr
,FALSE
,i
);
570 status
= U_MEMORY_ALLOCATION_ERROR
;
573 partials
[n
] = 0; // default: not partial
576 // first pass - find partials.
577 for(int i
=0;i
<subCount
;i
++) {
578 int nn
= ustrs
[i
].indexOf(kFULLSTOP
); // TODO: non-'.' abbreviations
579 if(nn
>-1 && (nn
+1)!=ustrs
[i
].length()) {
580 FB_TRACE("partial",&ustrs
[i
],FALSE
,i
);
584 for(int j
=0;j
<subCount
;j
++) {
586 if(ustrs
[i
].compare(0,nn
+1,ustrs
[j
],0,nn
+1)==0) {
587 FB_TRACE("prefix",&ustrs
[j
],FALSE
,nn
+1);
588 //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
589 if(partials
[j
]==0) { // hasn't been processed yet
590 partials
[j
] = (ustrs
[j
].length() == nn
+1)? (kSuppressInReverse
| kAddToForward
): kAddToForward
;
591 FB_TRACE("suppressing",&ustrs
[j
],FALSE
,j
);
592 } else if(partials
[j
] & kSuppressInReverse
) {
593 sameAs
= j
; // the other entry is already in the reverse table.
597 FB_TRACE("for partial same-",&ustrs
[i
],FALSE
,sameAs
);
598 FB_TRACE(" == partial #",&ustrs
[i
],FALSE
,partials
[i
]);
599 UnicodeString
prefix(ustrs
[i
], 0, nn
+1);
600 if(sameAs
== -1 && partials
[i
] == 0) {
601 // first one - add the prefix to the reverse table.
603 builder
->add(prefix
, kPARTIAL
, status
);
605 FB_TRACE("Added partial",&prefix
,FALSE
, i
);
606 FB_TRACE(u_errorName(status
),&ustrs
[i
],FALSE
,i
);
607 partials
[i
] = kAddToForward
;
609 FB_TRACE("NOT adding partial",&prefix
,FALSE
, i
);
610 FB_TRACE(u_errorName(status
),&ustrs
[i
],FALSE
,i
);
614 for(int i
=0;i
<subCount
;i
++) {
615 if((partials
[i
] & kSuppressInReverse
) == 0) {
617 builder
->add(ustrs
[i
], kMATCH
, status
);
619 FB_TRACE(u_errorName(status
), &ustrs
[i
], FALSE
, i
);
621 if((partials
[i
] & kAddToForward
) != 0) {
622 FB_TRACE("Adding fwd",&ustrs
[i
], FALSE
, i
);
624 // an optimization would be to only add the portion after the '.'
625 // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
626 // instead of "Ph.D." since we already know the "Ph." part is a match.
627 // would need the trie to be able to hold 0-length strings, though.
628 builder2
->add(ustrs
[i
], kMATCH
, status
); // forward
630 //ustrs[i].reverse();
631 ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
634 FB_TRACE("AbbrCount",NULL
,FALSE
, subCount
);
637 backwardsTrie
.adoptInstead(builder
->build(USTRINGTRIE_BUILD_FAST
, status
));
638 if(U_FAILURE(status
)) {
639 FB_TRACE(u_errorName(status
),NULL
,FALSE
, -1);
645 forwardsPartialTrie
.adoptInstead(builder2
->build(USTRINGTRIE_BUILD_FAST
, status
));
646 if(U_FAILURE(status
)) {
647 FB_TRACE(u_errorName(status
),NULL
,FALSE
, -1);
652 return new SimpleFilteredSentenceBreakIterator(adopt
.orphan(), forwardsPartialTrie
.orphan(), backwardsTrie
.orphan(), status
);
656 // ----------- Base class implementation
658 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
661 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
664 FilteredBreakIteratorBuilder
*
665 FilteredBreakIteratorBuilder::createInstance(const Locale
& where
, UErrorCode
& status
) {
666 if(U_FAILURE(status
)) return NULL
;
667 LocalPointer
<FilteredBreakIteratorBuilder
> ret(new SimpleFilteredBreakIteratorBuilder(where
, status
), status
);
668 return (U_SUCCESS(status
))? ret
.orphan(): NULL
;
671 FilteredBreakIteratorBuilder
*
672 FilteredBreakIteratorBuilder::createInstance(UErrorCode
& status
) {
673 if(U_FAILURE(status
)) return NULL
;
674 LocalPointer
<FilteredBreakIteratorBuilder
> ret(new SimpleFilteredBreakIteratorBuilder(status
), status
);
675 return (U_SUCCESS(status
))? ret
.orphan(): NULL
;
680 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION