]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/filteredbrk.cpp
ICU-551.51.3.tar.gz
[apple/icu.git] / icuSources / common / filteredbrk.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 2014-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
10
11 #include "cmemory.h"
12
13 #include "unicode/filteredbrk.h"
14 #include "unicode/ucharstriebuilder.h"
15 #include "unicode/ures.h"
16
17 #include "uresimp.h" // ures_getByKeyWithFallback
18 #include "ubrkimpl.h" // U_ICUDATA_BRKITR
19 #include "uvector.h"
20 #include "cmemory.h"
21
22 U_NAMESPACE_BEGIN
23
24 #ifndef FB_DEBUG
25 #define FB_DEBUG 0
26 #endif
27
28 #if FB_DEBUG
29 #include <stdio.h>
30 static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) {
31 char buf[2048];
32 if(s) {
33 s->extract(0,s->length(),buf,2048);
34 } else {
35 strcpy(buf,"NULL");
36 }
37 fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
38 f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
39 }
40
41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
42 #else
43 #define FB_TRACE(m,s,b,d)
44 #endif
45
46 static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
47 const UnicodeString &a = *(const UnicodeString*)t1.pointer;
48 const UnicodeString &b = *(const UnicodeString*)t2.pointer;
49 return a.compare(b);
50 }
51
52 /**
53 * A UVector which implements a set of strings.
54 */
55 class U_COMMON_API UStringSet : public UVector {
56 public:
57 UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
58 uhash_compareUnicodeString,
59 1,
60 status) {}
61 virtual ~UStringSet();
62 /**
63 * Is this UnicodeSet contained?
64 */
65 inline UBool contains(const UnicodeString& s) {
66 return contains((void*) &s);
67 }
68 using UVector::contains;
69 /**
70 * Return the ith UnicodeString alias
71 */
72 inline const UnicodeString* getStringAt(int32_t i) const {
73 return (const UnicodeString*)elementAt(i);
74 }
75 /**
76 * Adopt the UnicodeString if not already contained.
77 * Caller no longer owns the pointer in any case.
78 * @return true if adopted successfully, false otherwise (error, or else duplicate)
79 */
80 inline UBool adopt(UnicodeString *str, UErrorCode &status) {
81 if(U_FAILURE(status) || contains(*str)) {
82 delete str;
83 return false;
84 } else {
85 sortedInsert(str, compareUnicodeString, status);
86 if(U_FAILURE(status)) {
87 delete str;
88 return false;
89 }
90 return true;
91 }
92 }
93 /**
94 * Add by value.
95 * @return true if successfully adopted.
96 */
97 inline UBool add(const UnicodeString& str, UErrorCode &status) {
98 if(U_FAILURE(status)) return false;
99 UnicodeString *t = new UnicodeString(str);
100 if(t==NULL) {
101 status = U_MEMORY_ALLOCATION_ERROR; return false;
102 }
103 return adopt(t, status);
104 }
105 /**
106 * Remove this string.
107 * @return true if successfully removed, false otherwise (error, or else it wasn't there)
108 */
109 inline UBool remove(const UnicodeString &s, UErrorCode &status) {
110 if(U_FAILURE(status)) return false;
111 return removeElement((void*) &s);
112 }
113 };
114
115 /**
116 * Virtual, won't be inlined
117 */
118 UStringSet::~UStringSet() {}
119
120
121 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie
122 static const int32_t kMATCH = (1<<1); //< exact match - skip this one.
123 static const int32_t kSuppressInReverse = (1<<0);
124 static const int32_t kAddToForward = (1<<1);
125 static const UChar kFULLSTOP = 0x002E; // '.'
126
127 class SimpleFilteredSentenceBreakIterator : public BreakIterator {
128 public:
129 SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status);
130 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
131 virtual ~SimpleFilteredSentenceBreakIterator();
132 private:
133 LocalPointer<BreakIterator> fDelegate;
134 LocalUTextPointer fText;
135 LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
136 LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
137
138 /* -- subclass interface -- */
139 public:
140 /* -- cloning and other subclass stuff -- */
141 virtual BreakIterator * createBufferClone(void * /*stackBuffer*/,
142 int32_t &/*BufferSize*/,
143 UErrorCode &status) {
144 // for now - always deep clone
145 status = U_SAFECLONE_ALLOCATED_WARNING;
146 return clone();
147 }
148 virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBreakIterator(*this); }
149 virtual UClassID getDynamicClassID(void) const { return NULL; }
150 virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
151
152 /* -- text modifying -- */
153 virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
154 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
155 virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
156 virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
157
158 /* -- other functions that are just delegated -- */
159 virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
160 virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
161
162 /* -- ITERATION -- */
163 virtual int32_t first(void) { return fDelegate->first(); }
164 virtual UBool isBoundary(int32_t offset) { return fDelegate->isBoundary(offset); }
165 virtual int32_t current(void) const { return fDelegate->current(); }
166 virtual int32_t next(int32_t n) { return fDelegate->next(n); } // fallback implementation, undoing r36410
167 virtual int32_t last(void) { return fDelegate->last(); }
168
169 virtual int32_t next(void);
170 virtual int32_t following(int32_t offset);
171 virtual int32_t previous(void);
172 virtual int32_t preceding(int32_t offset);
173
174 private:
175 virtual int32_t nextCore(int32_t n);
176 virtual int32_t previousCore(int32_t n);
177
178 };
179
180 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other)
181 : BreakIterator(other), fDelegate(other.fDelegate->clone())
182 {
183 /*
184 TODO: not able to clone Tries. Should be a refcounted hidden master instead.
185 if(other.fBackwardsTrie.isValid()) {
186 fBackwardsTrie.adoptInstead(other.fBackwardsTrie->clone());
187 }
188 if(other.fForwardsPartialTrie.isValid()) {
189 fForwardsPartialTrie.adoptInstead(other.fForwardsPartialTrie->clone());
190 }
191 */
192 }
193
194
195 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
196 BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)),
197 fDelegate(adopt),
198 fBackwardsTrie(backwards),
199 fForwardsPartialTrie(forwards)
200 {
201 // all set..
202 }
203
204 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {}
205
206 int32_t SimpleFilteredSentenceBreakIterator::next() {
207 int32_t n = fDelegate->next();
208 return nextCore(n);
209 }
210
211 int32_t SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
212 int32_t n = fDelegate->following(offset);
213 return nextCore(n);
214 }
215
216 int32_t SimpleFilteredSentenceBreakIterator::nextCore(int32_t n) {
217 if(n == UBRK_DONE || // at end or
218 fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
219 return n;
220 }
221 // OK, do we need to break here?
222 UErrorCode status = U_ZERO_ERROR;
223 // refresh text
224 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
225 int64_t utextLen = utext_nativeLength(fText.getAlias());
226 if(n == utextLen) {
227 return n;
228 }
229 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias()));
230 do { // outer loop runs once per underlying break (from fDelegate).
231 // loops while 'n' points to an exception.
232 utext_setNativeIndex(fText.getAlias(), n); // from n..
233 fBackwardsTrie->reset();
234 UChar32 uch;
235 //if(debug2) u_printf(" n@ %d\n", n);
236 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown")
237 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
238 // TODO only do this the 1st time?
239 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
240 } else {
241 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
242 uch = utext_next32(fText.getAlias());
243 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
244 }
245 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
246
247 int32_t bestPosn = -1;
248 int32_t bestValue = -1;
249
250 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
251 USTRINGTRIE_HAS_NEXT(r=fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
252 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
253 bestPosn = utext_getNativeIndex(fText.getAlias());
254 bestValue = fBackwardsTrie->getValue();
255 }
256 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias()));
257 }
258
259 if(USTRINGTRIE_MATCHES(r)) { // exact match?
260 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
261 bestValue = fBackwardsTrie->getValue();
262 bestPosn = utext_getNativeIndex(fText.getAlias());
263 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
264 }
265
266 if(bestPosn>=0) {
267 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue);
268
269 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
270 //int32_t bestValue = fBackwardsTrie->getValue();
271 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue);
272
273 if(bestValue == kMATCH) { // exact match!
274 //if(debug2) u_printf(" exact backward match\n");
275 n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
276 if(n==UBRK_DONE || n==utextLen) return n;
277 continue; // See if the next is another exception.
278 } else if(bestValue == kPARTIAL
279 && fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
280 //if(debug2) u_printf(" partial backward match\n");
281 // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie
282 // to see if it matches something going forward.
283 fForwardsPartialTrie->reset();
284 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
285 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
286 //if(debug2) u_printf("Retrying at %d\n", bestPosn);
287 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
288 USTRINGTRIE_HAS_NEXT(rfwd=fForwardsPartialTrie->nextForCodePoint(uch))) {
289 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias()));
290 }
291 if(USTRINGTRIE_MATCHES(rfwd)) {
292 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
293 // only full matches here, nothing to check
294 // skip the next:
295 n = fDelegate->next();
296 if(n==UBRK_DONE || n==utextLen) return n;
297 continue;
298 } else {
299 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
300 // no match (no exception) -return the 'underlying' break
301 return n;
302 }
303 } else {
304 return n; // internal error and/or no forwards trie
305 }
306 } else {
307 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match
308 return n; // No match - so exit. Not an exception.
309 }
310 } while(n != UBRK_DONE);
311 return n;
312 }
313
314 int32_t SimpleFilteredSentenceBreakIterator::previous() {
315 int32_t n = fDelegate->previous();
316 return previousCore(n);
317 }
318
319 int32_t SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
320 int32_t n = fDelegate->preceding(offset);
321 return previousCore(n);
322 }
323
324 int32_t SimpleFilteredSentenceBreakIterator::previousCore(int32_t n) {
325 if(n == UBRK_DONE || n == 0 || // at end or
326 fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
327 return n;
328 }
329 // OK, do we need to break here?
330 UErrorCode status = U_ZERO_ERROR;
331 // refresh text
332 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
333 do { // outer loop runs once per underlying break (from fDelegate).
334 // loops while 'n' points to an exception.
335 utext_setNativeIndex(fText.getAlias(), n); // from n..
336 fBackwardsTrie->reset();
337 UChar32 uch;
338 // Skip over any space preceding the break
339 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here??
340 // TODO only do this the 1st time?
341 } else {
342 //restore what we skipped
343 uch = utext_next32(fText.getAlias());
344 }
345 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
346
347 int32_t bestPosn = -1;
348 int32_t bestValue = -1;
349
350 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
351 USTRINGTRIE_HAS_NEXT(r=fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
352 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
353 bestPosn = utext_getNativeIndex(fText.getAlias());
354 bestValue = fBackwardsTrie->getValue();
355 }
356 }
357
358 if(USTRINGTRIE_MATCHES(r)) { // exact match?
359 bestValue = fBackwardsTrie->getValue();
360 bestPosn = utext_getNativeIndex(fText.getAlias());
361 }
362
363 if(bestPosn>=0) {
364
365 if(bestValue == kMATCH) { // exact match!
366 n = fDelegate->previous(); // skip this one. Find the next lowerlevel break.
367 if(n==UBRK_DONE || n==0) return n;
368 continue; // See if the next is another exception.
369 } else if(bestValue == kPARTIAL
370 && fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
371 fForwardsPartialTrie->reset();
372 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
373 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close ..
374 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
375 USTRINGTRIE_HAS_NEXT(rfwd=fForwardsPartialTrie->nextForCodePoint(uch))) {
376 }
377 if(USTRINGTRIE_MATCHES(rfwd)) {
378 n = fDelegate->previous();
379 if(n==UBRK_DONE || n==0) return n;
380 continue;
381 } else {
382 // no match (no exception) -return the 'underlying' break
383 return n;
384 }
385 } else {
386 return n; // internal error and/or no forwards trie
387 }
388 } else {
389 return n; // No match - so exit. Not an exception.
390 }
391 } while(n != UBRK_DONE);
392 return n;
393 }
394
395 /**
396 * Concrete implementation of builder class.
397 */
398 class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder {
399 public:
400 virtual ~SimpleFilteredBreakIteratorBuilder();
401 SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
402 SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
403 virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
404 virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
405 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
406 private:
407 UStringSet fSet;
408 };
409
410 SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder()
411 {
412 }
413
414 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status)
415 : fSet(status)
416 {
417 }
418
419 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status)
420 : fSet(status)
421 {
422 if(U_SUCCESS(status)) {
423 LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &status));
424 LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &status));
425 LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &status));
426 if(U_FAILURE(status)) return; // leaves the builder empty, if you try to use it.
427
428 LocalUResourceBundlePointer strs;
429 UErrorCode subStatus = status;
430 do {
431 strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus));
432 if(strs.isValid() && U_SUCCESS(subStatus)) {
433 UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status));
434 suppressBreakAfter(str, status); // load the string
435 }
436 } while (strs.isValid() && U_SUCCESS(subStatus));
437 if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) {
438 status = subStatus;
439 }
440 }
441 }
442
443 UBool
444 SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
445 {
446 UBool r = fSet.add(exception, status);
447 FB_TRACE("suppressBreakAfter",&exception,r,0);
448 return r;
449 }
450
451 UBool
452 SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status)
453 {
454 UBool r = fSet.remove(exception, status);
455 FB_TRACE("unsuppressBreakAfter",&exception,r,0);
456 return r;
457 }
458
459 /**
460 * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly.
461 * Work around this.
462 *
463 * Note: "new UnicodeString[subCount]" ends up calling global operator new
464 * on MSVC2012 for some reason.
465 */
466 static inline UnicodeString* newUnicodeStringArray(size_t count) {
467 return new UnicodeString[count ? count : 1];
468 }
469
470 BreakIterator *
471 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) {
472 LocalPointer<BreakIterator> adopt(adoptBreakIterator);
473
474 LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status);
475 LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status);
476 if(U_FAILURE(status)) {
477 return NULL;
478 }
479
480 int32_t revCount = 0;
481 int32_t fwdCount = 0;
482
483 int32_t subCount = fSet.size();
484
485 UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
486
487 LocalArray<UnicodeString> ustrs(ustrs_ptr);
488
489 LocalMemory<int> partials;
490 partials.allocateInsteadAndReset(subCount);
491
492 LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs.
493 LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M."
494
495 int n=0;
496 for ( int32_t i = 0;
497 i<fSet.size();
498 i++) {
499 const UnicodeString *abbr = fSet.getStringAt(i);
500 if(abbr) {
501 FB_TRACE("build",abbr,TRUE,i);
502 ustrs[n] = *abbr; // copy by value
503 FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i);
504 } else {
505 FB_TRACE("build",abbr,FALSE,i);
506 status = U_MEMORY_ALLOCATION_ERROR;
507 return NULL;
508 }
509 partials[n] = 0; // default: not partial
510 n++;
511 }
512 // first pass - find partials.
513 for(int i=0;i<subCount;i++) {
514 int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations
515 if(nn>-1 && (nn+1)!=ustrs[i].length()) {
516 FB_TRACE("partial",&ustrs[i],FALSE,i);
517 // is partial.
518 // is it unique?
519 int sameAs = -1;
520 for(int j=0;j<subCount;j++) {
521 if(j==i) continue;
522 if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) {
523 FB_TRACE("prefix",&ustrs[j],FALSE,nn+1);
524 //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn
525 if(partials[j]==0) { // hasn't been processed yet
526 partials[j] = kSuppressInReverse | kAddToForward;
527 FB_TRACE("suppressing",&ustrs[j],FALSE,j);
528 } else if(partials[j] & kSuppressInReverse) {
529 sameAs = j; // the other entry is already in the reverse table.
530 }
531 }
532 }
533 FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs);
534 FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]);
535 UnicodeString prefix(ustrs[i], 0, nn+1);
536 if(sameAs == -1 && partials[i] == 0) {
537 // first one - add the prefix to the reverse table.
538 prefix.reverse();
539 builder->add(prefix, kPARTIAL, status);
540 revCount++;
541 FB_TRACE("Added partial",&prefix,FALSE, i);
542 FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
543 partials[i] = kSuppressInReverse | kAddToForward;
544 } else {
545 FB_TRACE("NOT adding partial",&prefix,FALSE, i);
546 FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i);
547 }
548 }
549 }
550 for(int i=0;i<subCount;i++) {
551 if(partials[i]==0) {
552 ustrs[i].reverse();
553 builder->add(ustrs[i], kMATCH, status);
554 revCount++;
555 FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i);
556 } else {
557 FB_TRACE("Adding fwd",&ustrs[i], FALSE, i);
558
559 // an optimization would be to only add the portion after the '.'
560 // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward,
561 // instead of "Ph.D." since we already know the "Ph." part is a match.
562 // would need the trie to be able to hold 0-length strings, though.
563 builder2->add(ustrs[i], kMATCH, status); // forward
564 fwdCount++;
565 //ustrs[i].reverse();
566 ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status));
567 }
568 }
569 FB_TRACE("AbbrCount",NULL,FALSE, subCount);
570
571 if(revCount>0) {
572 backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status));
573 if(U_FAILURE(status)) {
574 FB_TRACE(u_errorName(status),NULL,FALSE, -1);
575 return NULL;
576 }
577 }
578
579 if(fwdCount>0) {
580 forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status));
581 if(U_FAILURE(status)) {
582 FB_TRACE(u_errorName(status),NULL,FALSE, -1);
583 return NULL;
584 }
585 }
586
587 return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status);
588 }
589
590
591 // ----------- Base class implementation
592
593 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
594 }
595
596 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
597 }
598
599 FilteredBreakIteratorBuilder *
600 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) {
601 if(U_FAILURE(status)) return NULL;
602 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status);
603 return (U_SUCCESS(status))? ret.orphan(): NULL;
604 }
605
606 FilteredBreakIteratorBuilder *
607 FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) {
608 if(U_FAILURE(status)) return NULL;
609 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status);
610 return (U_SUCCESS(status))? ret.orphan(): NULL;
611 }
612
613 U_NAMESPACE_END
614
615 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION