icuSources/common/rbbi_cache.cpp

   1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3
   4 // file: rbbi_cache.cpp
   5
   6 #include "unicode/utypes.h"
   7
   8 #if !UCONFIG_NO_BREAK_ITERATION
   9
  10 #include "unicode/ubrk.h"
  11 #include "unicode/rbbi.h"
  12
  13 #include "rbbi_cache.h"
  14
  15 #include "brkeng.h"
  16 #include "cmemory.h"
  17 #include "rbbidata.h"
  18 #include "rbbirb.h"
  19 #include "uassert.h"
  20 #include "uvectr32.h"
  21
  22 U_NAMESPACE_BEGIN
  23
  24 /*
  25  * DictionaryCache implementation
  26  */
  27
  28 RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
  29         fBI(bi), fBreaks(status), fPositionInCache(-1),
  30         fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
  31 }
  32
  33 RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
  34 }
  35
  36 void RuleBasedBreakIterator::DictionaryCache::reset() {
  37     fPositionInCache = -1;
  38     fStart = 0;
  39     fLimit = 0;
  40     fFirstRuleStatusIndex = 0;
  41     fOtherRuleStatusIndex = 0;
  42     fBreaks.removeAllElements();
  43 }
  44
  45 UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
  46     if (fromPos >= fLimit || fromPos < fStart) {
  47         fPositionInCache = -1;
  48         return FALSE;
  49     }
  50
  51     // Sequential iteration, move from previous boundary to the following
  52
  53     int32_t r = 0;
  54     if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
  55         ++fPositionInCache;
  56         if (fPositionInCache >= fBreaks.size()) {
  57             fPositionInCache = -1;
  58             return FALSE;
  59         }
  60         r = fBreaks.elementAti(fPositionInCache);
  61         U_ASSERT(r > fromPos);
  62         *result = r;
  63         *statusIndex = fOtherRuleStatusIndex;
  64         return TRUE;
  65     }
  66
  67     // Random indexing. Linear search for the boundary following the given position.
  68
  69     for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
  70         r= fBreaks.elementAti(fPositionInCache);
  71         if (r > fromPos) {
  72             *result = r;
  73             *statusIndex = fOtherRuleStatusIndex;
  74             return TRUE;
  75         }
  76     }
  77     U_ASSERT(FALSE);
  78     fPositionInCache = -1;
  79     return FALSE;
  80 }
  81
  82
  83 UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
  84     if (fromPos <= fStart || fromPos > fLimit) {
  85         fPositionInCache = -1;
  86         return FALSE;
  87     }
  88
  89     if (fromPos == fLimit) {
  90         fPositionInCache = fBreaks.size() - 1;
  91         if (fPositionInCache >= 0) {
  92             U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
  93         }
  94     }
  95
  96     int32_t r;
  97     if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
  98         --fPositionInCache;
  99         r = fBreaks.elementAti(fPositionInCache);
 100         U_ASSERT(r < fromPos);
 101         *result = r;
 102         *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
 103         return TRUE;
 104     }
 105
 106     if (fPositionInCache == 0) {
 107         fPositionInCache = -1;
 108         return FALSE;
 109     }
 110
 111     for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
 112         r = fBreaks.elementAti(fPositionInCache);
 113         if (r < fromPos) {
 114             *result = r;
 115             *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
 116             return TRUE;
 117         }
 118     }
 119     U_ASSERT(FALSE);
 120     fPositionInCache = -1;
 121     return FALSE;
 122 }
 123
 124 void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
 125                                        int32_t firstRuleStatus, int32_t otherRuleStatus) {
 126     if ((endPos - startPos) <= 1) {
 127         return;
 128     }
 129
 130     reset();
 131     fFirstRuleStatusIndex = firstRuleStatus;
 132     fOtherRuleStatusIndex = otherRuleStatus;
 133
 134     int32_t rangeStart = startPos;
 135     int32_t rangeEnd = endPos;
 136
 137     uint16_t    category;
 138     int32_t     current;
 139     UErrorCode  status = U_ZERO_ERROR;
 140     int32_t     foundBreakCount = 0;
 141     UText      *text = &fBI->fText;
 142
 143     // Loop through the text, looking for ranges of dictionary characters.
 144     // For each span, find the appropriate break engine, and ask it to find
 145     // any breaks within the span.
 146
 147     utext_setNativeIndex(text, rangeStart);
 148     UChar32     c = utext_current32(text);
 149     category = UTRIE2_GET16(fBI->fData->fTrie, c);
 150
 151     while(U_SUCCESS(status)) {
 152         while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
 153             utext_next32(text);           // TODO: cleaner loop structure.
 154             c = utext_current32(text);
 155             category = UTRIE2_GET16(fBI->fData->fTrie, c);
 156         }
 157         if (current >= rangeEnd) {
 158             break;
 159         }
 160
 161         // We now have a dictionary character. Get the appropriate language object
 162         // to deal with it.
 163         const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
 164
 165         // Ask the language object if there are any breaks. It will add them to the cache and
 166         // leave the text pointer on the other side of its range, ready to search for the next one.
 167         if (lbe != NULL) {
 168             foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
 169         }
 170
 171         // Reload the loop variables for the next go-round
 172         c = utext_current32(text);
 173         category = UTRIE2_GET16(fBI->fData->fTrie, c);
 174     }
 175
 176     // If we found breaks, ensure that the first and last entries are
 177     // the original starting and ending position. And initialize the
 178     // cache iteration position to the first entry.
 179
 180     // printf("foundBreakCount = %d\n", foundBreakCount);
 181     if (foundBreakCount > 0) {
 182         U_ASSERT(foundBreakCount == fBreaks.size());
 183         if (startPos < fBreaks.elementAti(0)) {
 184             // The dictionary did not place a boundary at the start of the segment of text.
 185             // Add one now. This should not commonly happen, but it would be easy for interactions
 186             // of the rules for dictionary segments and the break engine implementations to
 187             // inadvertently cause it. Cover it here, just in case.
 188             fBreaks.insertElementAt(startPos, 0, status);
 189         }
 190         if (endPos > fBreaks.peeki()) {
 191             fBreaks.push(endPos, status);
 192         }
 193         fPositionInCache = 0;
 194         // Note: Dictionary matching may extend beyond the original limit.
 195         fStart = fBreaks.elementAti(0);
 196         fLimit = fBreaks.peeki();
 197     } else {
 198         // there were no language-based breaks, even though the segment contained
 199         // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
 200         // for this range will fail, and the calling code will fall back to the rule based boundaries.
 201     }
 202 }
 203
 204
 205 /*
 206  *   BreakCache implemetation
 207  */
 208
 209 RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
 210         fBI(bi), fSideBuffer(status) {
 211     reset();
 212 }
 213
 214
 215 RuleBasedBreakIterator::BreakCache::~BreakCache() {
 216 }
 217
 218
 219 void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
 220     fStartBufIdx = 0;
 221     fEndBufIdx = 0;
 222     fTextIdx = pos;
 223     fBufIdx = 0;
 224     fBoundaries[0] = pos;
 225     fStatuses[0] = (uint16_t)ruleStatus;
 226 }
 227
 228
 229 int32_t  RuleBasedBreakIterator::BreakCache::current() {
 230     fBI->fPosition = fTextIdx;
 231     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 232     fBI->fDone = FALSE;
 233     return fTextIdx;
 234 }
 235
 236
 237 void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
 238     if (U_FAILURE(status)) {
 239         return;
 240     }
 241     if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
 242         // startPos is in the cache. Do a next() from that position.
 243         // TODO: an awkward set of interactions with bi->fDone
 244         //       seek() does not clear it; it can't because of interactions with populateNear().
 245         //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
 246         //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
 247         fBI->fDone = false;
 248         next();
 249     }
 250     return;
 251 }
 252
 253
 254 void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
 255     if (U_FAILURE(status)) {
 256         return;
 257     }
 258     if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
 259         if (startPos == fTextIdx) {
 260             previous(status);
 261         } else {
 262             // seek() leaves the BreakCache positioned at the preceding boundary
 263             //        if the requested position is between two bounaries.
 264             // current() pushes the BreakCache position out to the BreakIterator itself.
 265             U_ASSERT(startPos > fTextIdx);
 266             current();
 267         }
 268     }
 269     return;
 270 }
 271
 272
 273 /*
 274  * Out-of-line code for BreakCache::next().
 275  * Cache does not already contain the boundary
 276  */
 277 void RuleBasedBreakIterator::BreakCache::nextOL() {
 278     fBI->fDone = !populateFollowing();
 279     fBI->fPosition = fTextIdx;
 280     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 281     return;
 282 }
 283
 284
 285 void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
 286     if (U_FAILURE(status)) {
 287         return;
 288     }
 289     int32_t initialBufIdx = fBufIdx;
 290     if (fBufIdx == fStartBufIdx) {
 291         // At start of cache. Prepend to it.
 292         populatePreceding(status);
 293     } else {
 294         // Cache already holds the next boundary
 295         fBufIdx = modChunkSize(fBufIdx - 1);
 296         fTextIdx = fBoundaries[fBufIdx];
 297     }
 298     fBI->fDone = (fBufIdx == initialBufIdx);
 299     fBI->fPosition = fTextIdx;
 300     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 301     return;
 302 }
 303
 304
 305 UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
 306     if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
 307         return FALSE;
 308     }
 309     if (pos == fBoundaries[fStartBufIdx]) {
 310         // Common case: seek(0), from BreakIterator::first()
 311         fBufIdx = fStartBufIdx;
 312         fTextIdx = fBoundaries[fBufIdx];
 313         return TRUE;
 314     }
 315     if (pos == fBoundaries[fEndBufIdx]) {
 316         fBufIdx = fEndBufIdx;
 317         fTextIdx = fBoundaries[fBufIdx];
 318         return TRUE;
 319     }
 320
 321     int32_t min = fStartBufIdx;
 322     int32_t max = fEndBufIdx;
 323     while (min != max) {
 324         int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
 325         probe = modChunkSize(probe);
 326         if (fBoundaries[probe] > pos) {
 327             max = probe;
 328         } else {
 329             min = modChunkSize(probe + 1);
 330         }
 331     }
 332     U_ASSERT(fBoundaries[max] > pos);
 333     fBufIdx = modChunkSize(max - 1);
 334     fTextIdx = fBoundaries[fBufIdx];
 335     U_ASSERT(fTextIdx <= pos);
 336     return TRUE;
 337 }
 338
 339
 340 UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
 341     if (U_FAILURE(status)) {
 342         return FALSE;
 343     }
 344     U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
 345
 346     // Find a boundary somewhere in the vicinity of the requested position.
 347     // Depending on the safe rules and the text data, it could be either before, at, or after
 348     // the requested position.
 349
 350
 351     // If the requested position is not near already cached positions, clear the existing cache,
 352     // find a near-by boundary and begin new cache contents there.
 353
 354     if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
 355         int32_t aBoundary = 0;
 356         int32_t ruleStatusIndex = 0;
 357         if (position > 20) {
 358             int32_t backupPos = fBI->handleSafePrevious(position);
 359
 360             if (backupPos > 0) {
 361                 // Advance to the boundary following the backup position.
 362                 // There is a complication: the safe reverse rules identify pairs of code points
 363                 // that are safe. If advancing from the safe point moves forwards by less than
 364                 // two code points, we need to advance one more time to ensure that the boundary
 365                 // is good, including a correct rules status value.
 366                 //
 367                 fBI->fPosition = backupPos;
 368                 aBoundary = fBI->handleNext();
 369                 if (aBoundary <= backupPos + 4) {
 370                     // +4 is a quick test for possibly having advanced only one codepoint.
 371                     // Four being the length of the longest potential code point, a supplementary in UTF-8
 372                     utext_setNativeIndex(&fBI->fText, aBoundary);
 373                     if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
 374                         // The initial handleNext() only advanced by a single code point. Go again.
 375                         aBoundary = fBI->handleNext();   // Safe rules identify safe pairs.
 376                     }
 377                 }
 378                 ruleStatusIndex = fBI->fRuleStatusIndex;
 379             }
 380         }
 381         reset(aBoundary, ruleStatusIndex);        // Reset cache to hold aBoundary as a single starting point.
 382     }
 383
 384     // Fill in boundaries between existing cache content and the new requested position.
 385
 386     if (fBoundaries[fEndBufIdx] < position) {
 387         // The last position in the cache precedes the requested position.
 388         // Add following position(s) to the cache.
 389         while (fBoundaries[fEndBufIdx] < position) {
 390             if (!populateFollowing()) {
 391                 U_ASSERT(false);
 392                 return false;
 393             }
 394         }
 395         fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
 396         fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
 397         while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
 398             previous(status);
 399         }
 400         return true;
 401     }
 402
 403     if (fBoundaries[fStartBufIdx] > position) {
 404         // The first position in the cache is beyond the requested position.
 405         // back up more until we get a boundary <= the requested position.
 406         while (fBoundaries[fStartBufIdx] > position) {
 407             populatePreceding(status);
 408         }
 409         fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
 410         fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
 411         while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
 412             next();
 413         }
 414         if (fTextIdx > position) {
 415             // If position is not itself a boundary, the next() loop above will overshoot.
 416             // Back up one, leaving cache position at the boundary preceding the requested position.
 417             previous(status);
 418         }
 419         return true;
 420     }
 421
 422     U_ASSERT(fTextIdx == position);
 423     return true;
 424 }
 425
 426
 427
 428 UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
 429     int32_t fromPosition = fBoundaries[fEndBufIdx];
 430     int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
 431     int32_t pos = 0;
 432     int32_t ruleStatusIdx = 0;
 433
 434     if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
 435         addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 436         return TRUE;
 437     }
 438
 439     fBI->fPosition = fromPosition;
 440     pos = fBI->handleNext();
 441     if (pos == UBRK_DONE) {
 442         return FALSE;
 443     }
 444
 445     ruleStatusIdx = fBI->fRuleStatusIndex;
 446     if (fBI->fDictionaryCharCount > 0) {
 447         // The text segment obtained from the rules includes dictionary characters.
 448         // Subdivide it, with subdivided results going into the dictionary cache.
 449         fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
 450         if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
 451             addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 452             return TRUE;
 453             // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
 454             //       But be careful with interactions with populateNear().
 455         }
 456     }
 457
 458     // Rule based segment did not include dictionary characters.
 459     // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
 460     //    meaning that we didn't take the return, above.
 461     // Add its end point to the cache.
 462     addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 463
 464     // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
 465     //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
 466     //
 467     for (int count=0; count<6; ++count) {
 468         pos = fBI->handleNext();
 469         if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
 470             break;
 471         }
 472         addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
 473     }
 474
 475     return TRUE;
 476 }
 477
 478
 479 UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
 480     if (U_FAILURE(status)) {
 481         return FALSE;
 482     }
 483
 484     int32_t fromPosition = fBoundaries[fStartBufIdx];
 485     if (fromPosition == 0) {
 486         return FALSE;
 487     }
 488
 489     int32_t position = 0;
 490     int32_t positionStatusIdx = 0;
 491
 492     if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
 493         addPreceding(position, positionStatusIdx, UpdateCachePosition);
 494         return TRUE;
 495     }
 496
 497     int32_t backupPosition = fromPosition;
 498
 499     // Find a boundary somewhere preceding the first already-cached boundary
 500     do {
 501         backupPosition = backupPosition - 30;
 502         if (backupPosition <= 0) {
 503             backupPosition = 0;
 504         } else {
 505             backupPosition = fBI->handleSafePrevious(backupPosition);
 506         }
 507         if (backupPosition == UBRK_DONE || backupPosition == 0) {
 508             position = 0;
 509             positionStatusIdx = 0;
 510         } else {
 511             // Advance to the boundary following the backup position.
 512             // There is a complication: the safe reverse rules identify pairs of code points
 513             // that are safe. If advancing from the safe point moves forwards by less than
 514             // two code points, we need to advance one more time to ensure that the boundary
 515             // is good, including a correct rules status value.
 516             //
 517             fBI->fPosition = backupPosition;
 518             position = fBI->handleNext();
 519             if (position <= backupPosition + 4) {
 520                 // +4 is a quick test for possibly having advanced only one codepoint.
 521                 // Four being the length of the longest potential code point, a supplementary in UTF-8
 522                 utext_setNativeIndex(&fBI->fText, position);
 523                 if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
 524                     // The initial handleNext() only advanced by a single code point. Go again.
 525                     position = fBI->handleNext();   // Safe rules identify safe pairs.
 526                 }
 527             };
 528             positionStatusIdx = fBI->fRuleStatusIndex;
 529         }
 530     } while (position >= fromPosition);
 531
 532     // Find boundaries between the one we just located and the first already-cached boundary
 533     // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
 534
 535     fSideBuffer.removeAllElements();
 536     fSideBuffer.addElement(position, status);
 537     fSideBuffer.addElement(positionStatusIdx, status);
 538
 539     do {
 540         int32_t prevPosition = fBI->fPosition = position;
 541         int32_t prevStatusIdx = positionStatusIdx;
 542         position = fBI->handleNext();
 543         positionStatusIdx = fBI->fRuleStatusIndex;
 544         if (position == UBRK_DONE) {
 545             break;
 546         }
 547
 548         UBool segmentHandledByDictionary = FALSE;
 549         if (fBI->fDictionaryCharCount != 0) {
 550             // Segment from the rules includes dictionary characters.
 551             // Subdivide it, with subdivided results going into the dictionary cache.
 552             int32_t dictSegEndPosition = position;
 553             fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
 554             while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
 555                 segmentHandledByDictionary = true;
 556                 U_ASSERT(position > prevPosition);
 557                 if (position >= fromPosition) {
 558                     break;
 559                 }
 560                 U_ASSERT(position <= dictSegEndPosition);
 561                 fSideBuffer.addElement(position, status);
 562                 fSideBuffer.addElement(positionStatusIdx, status);
 563                 prevPosition = position;
 564             }
 565             U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
 566         }
 567
 568         if (!segmentHandledByDictionary && position < fromPosition) {
 569             fSideBuffer.addElement(position, status);
 570             fSideBuffer.addElement(positionStatusIdx, status);
 571         }
 572     } while (position < fromPosition);
 573
 574     // Move boundaries from the side buffer to the main circular buffer.
 575     UBool success = FALSE;
 576     if (!fSideBuffer.isEmpty()) {
 577         positionStatusIdx = fSideBuffer.popi();
 578         position = fSideBuffer.popi();
 579         addPreceding(position, positionStatusIdx, UpdateCachePosition);
 580         success = TRUE;
 581     }
 582
 583     while (!fSideBuffer.isEmpty()) {
 584         positionStatusIdx = fSideBuffer.popi();
 585         position = fSideBuffer.popi();
 586         if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
 587             // No space in circular buffer to hold a new preceding result while
 588             // also retaining the current cache (iteration) position.
 589             // Bailing out is safe; the cache will refill again if needed.
 590             break;
 591         }
 592     }
 593
 594     return success;
 595 }
 596
 597
 598 void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
 599     U_ASSERT(position > fBoundaries[fEndBufIdx]);
 600     U_ASSERT(ruleStatusIdx <= UINT16_MAX);
 601     int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
 602     if (nextIdx == fStartBufIdx) {
 603         fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
 604     }
 605     fBoundaries[nextIdx] = position;
 606     fStatuses[nextIdx] = ruleStatusIdx;
 607     fEndBufIdx = nextIdx;
 608     if (update == UpdateCachePosition) {
 609         // Set current position to the newly added boundary.
 610         fBufIdx = nextIdx;
 611         fTextIdx = position;
 612     } else {
 613         // Retaining the original cache position.
 614         // Check if the added boundary wraps around the buffer, and would over-write the original position.
 615         // It's the responsibility of callers of this function to not add too many.
 616         U_ASSERT(nextIdx != fBufIdx);
 617     }
 618 }
 619
 620 bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
 621     U_ASSERT(position < fBoundaries[fStartBufIdx]);
 622     U_ASSERT(ruleStatusIdx <= UINT16_MAX);
 623     int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
 624     if (nextIdx == fEndBufIdx) {
 625         if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
 626             // Failure. The insertion of the new boundary would claim the buffer position that is the
 627             // current iteration position. And we also want to retain the current iteration position.
 628             // (The buffer is already completely full of entries that precede the iteration position.)
 629             return false;
 630         }
 631         fEndBufIdx = modChunkSize(fEndBufIdx - 1);
 632     }
 633     fBoundaries[nextIdx] = position;
 634     fStatuses[nextIdx] = ruleStatusIdx;
 635     fStartBufIdx = nextIdx;
 636     if (update == UpdateCachePosition) {
 637         fBufIdx = nextIdx;
 638         fTextIdx = position;
 639     }
 640     return true;
 641 }
 642
 643
 644 void RuleBasedBreakIterator::BreakCache::dumpCache() {
 645 #ifdef RBBI_DEBUG
 646     RBBIDebugPrintf("fTextIdx:%d   fBufIdx:%d\n", fTextIdx, fBufIdx);
 647     for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
 648         RBBIDebugPrintf("%d  %d\n", i, fBoundaries[i]);
 649         if (i == fEndBufIdx) {
 650             break;
 651         }
 652     }
 653 #endif
 654 }
 655
 656 U_NAMESPACE_END
 657
 658 #endif // #if !UCONFIG_NO_BREAK_ITERATION