icuSources/common/rbbi_cache.cpp

   1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3
   4 // file: rbbi_cache.cpp
   5
   6 #include "unicode/utypes.h"
   7
   8 #if !UCONFIG_NO_BREAK_ITERATION
   9
  10 #include "unicode/ubrk.h"
  11 #include "unicode/rbbi.h"
  12
  13 #include "rbbi_cache.h"
  14
  15 #include "brkeng.h"
  16 #include "cmemory.h"
  17 #include "rbbidata.h"
  18 #include "rbbirb.h"
  19 #include "uassert.h"
  20 #include "uvectr32.h"
  21
  22 U_NAMESPACE_BEGIN
  23
  24 /*
  25  * DictionaryCache implementation
  26  */
  27
  28 RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
  29         fBI(bi), fBreaks(status), fPositionInCache(-1),
  30         fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
  31 }
  32
  33 RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
  34 }
  35
  36 void RuleBasedBreakIterator::DictionaryCache::reset() {
  37     fPositionInCache = -1;
  38     fStart = 0;
  39     fLimit = 0;
  40     fFirstRuleStatusIndex = 0;
  41     fOtherRuleStatusIndex = 0;
  42     fBreaks.removeAllElements();
  43 }
  44
  45 UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
  46     if (fromPos >= fLimit || fromPos < fStart) {
  47         fPositionInCache = -1;
  48         return FALSE;
  49     }
  50
  51     // Sequential iteration, move from previous boundary to the following
  52
  53     int32_t r = 0;
  54     if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
  55         ++fPositionInCache;
  56         if (fPositionInCache >= fBreaks.size()) {
  57             fPositionInCache = -1;
  58             return FALSE;
  59         }
  60         r = fBreaks.elementAti(fPositionInCache);
  61         U_ASSERT(r > fromPos);
  62         *result = r;
  63         *statusIndex = fOtherRuleStatusIndex;
  64         return TRUE;
  65     }
  66
  67     // Random indexing. Linear search for the boundary following the given position.
  68
  69     for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
  70         r= fBreaks.elementAti(fPositionInCache);
  71         if (r > fromPos) {
  72             *result = r;
  73             *statusIndex = fOtherRuleStatusIndex;
  74             return TRUE;
  75         }
  76     }
  77     UPRV_UNREACHABLE;
  78 }
  79
  80
  81 UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
  82     if (fromPos <= fStart || fromPos > fLimit) {
  83         fPositionInCache = -1;
  84         return FALSE;
  85     }
  86
  87     if (fromPos == fLimit) {
  88         fPositionInCache = fBreaks.size() - 1;
  89         if (fPositionInCache >= 0) {
  90             U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
  91         }
  92     }
  93
  94     int32_t r;
  95     if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
  96         --fPositionInCache;
  97         r = fBreaks.elementAti(fPositionInCache);
  98         U_ASSERT(r < fromPos);
  99         *result = r;
 100         *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
 101         return TRUE;
 102     }
 103
 104     if (fPositionInCache == 0) {
 105         fPositionInCache = -1;
 106         return FALSE;
 107     }
 108
 109     for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
 110         r = fBreaks.elementAti(fPositionInCache);
 111         if (r < fromPos) {
 112             *result = r;
 113             *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
 114             return TRUE;
 115         }
 116     }
 117     UPRV_UNREACHABLE;
 118 }
 119
 120 void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
 121                                        int32_t firstRuleStatus, int32_t otherRuleStatus) {
 122     if ((endPos - startPos) <= 1) {
 123         return;
 124     }
 125
 126     reset();
 127     fFirstRuleStatusIndex = firstRuleStatus;
 128     fOtherRuleStatusIndex = otherRuleStatus;
 129
 130     int32_t rangeStart = startPos;
 131     int32_t rangeEnd = endPos;
 132
 133     uint16_t    category;
 134     int32_t     current;
 135     UErrorCode  status = U_ZERO_ERROR;
 136     int32_t     foundBreakCount = 0;
 137     UText      *text = &fBI->fText;
 138
 139     // Loop through the text, looking for ranges of dictionary characters.
 140     // For each span, find the appropriate break engine, and ask it to find
 141     // any breaks within the span.
 142
 143     utext_setNativeIndex(text, rangeStart);
 144     UChar32     c = utext_current32(text);
 145     category = UTRIE2_GET16(fBI->fData->fTrie, c);
 146
 147     while(U_SUCCESS(status)) {
 148         while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
 149             utext_next32(text);           // TODO: cleaner loop structure.
 150             c = utext_current32(text);
 151             category = UTRIE2_GET16(fBI->fData->fTrie, c);
 152         }
 153         if (current >= rangeEnd) {
 154             break;
 155         }
 156
 157         // We now have a dictionary character. Get the appropriate language object
 158         // to deal with it.
 159         const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
 160
 161         // Ask the language object if there are any breaks. It will add them to the cache and
 162         // leave the text pointer on the other side of its range, ready to search for the next one.
 163         if (lbe != NULL) {
 164             foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
 165         }
 166
 167         // Reload the loop variables for the next go-round
 168         c = utext_current32(text);
 169         category = UTRIE2_GET16(fBI->fData->fTrie, c);
 170     }
 171
 172     // If we found breaks, ensure that the first and last entries are
 173     // the original starting and ending position. And initialize the
 174     // cache iteration position to the first entry.
 175
 176     // printf("foundBreakCount = %d\n", foundBreakCount);
 177     if (foundBreakCount > 0) {
 178         U_ASSERT(foundBreakCount == fBreaks.size());
 179         if (startPos < fBreaks.elementAti(0)) {
 180             // The dictionary did not place a boundary at the start of the segment of text.
 181             // Add one now. This should not commonly happen, but it would be easy for interactions
 182             // of the rules for dictionary segments and the break engine implementations to
 183             // inadvertently cause it. Cover it here, just in case.
 184             fBreaks.insertElementAt(startPos, 0, status);
 185         }
 186         if (endPos > fBreaks.peeki()) {
 187             fBreaks.push(endPos, status);
 188         }
 189         fPositionInCache = 0;
 190         // Note: Dictionary matching may extend beyond the original limit.
 191         fStart = fBreaks.elementAti(0);
 192         fLimit = fBreaks.peeki();
 193     } else {
 194         // there were no language-based breaks, even though the segment contained
 195         // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
 196         // for this range will fail, and the calling code will fall back to the rule based boundaries.
 197     }
 198 }
 199
 200
 201 /*
 202  *   BreakCache implemetation
 203  */
 204
 205 RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
 206         fBI(bi), fSideBuffer(status) {
 207     reset();
 208 }
 209
 210
 211 RuleBasedBreakIterator::BreakCache::~BreakCache() {
 212 }
 213
 214
 215 void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
 216     fStartBufIdx = 0;
 217     fEndBufIdx = 0;
 218     fTextIdx = pos;
 219     fBufIdx = 0;
 220     fBoundaries[0] = pos;
 221     fStatuses[0] = (uint16_t)ruleStatus;
 222 }
 223
 224
 225 int32_t  RuleBasedBreakIterator::BreakCache::current() {
 226     fBI->fPosition = fTextIdx;
 227     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 228     fBI->fDone = FALSE;
 229     return fTextIdx;
 230 }
 231
 232
 233 void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
 234     if (U_FAILURE(status)) {
 235         return;
 236     }
 237     if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
 238         // startPos is in the cache. Do a next() from that position.
 239         // TODO: an awkward set of interactions with bi->fDone
 240         //       seek() does not clear it; it can't because of interactions with populateNear().
 241         //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
 242         //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
 243         fBI->fDone = false;
 244         next();
 245     }
 246     return;
 247 }
 248
 249
 250 void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
 251     if (U_FAILURE(status)) {
 252         return;
 253     }
 254     if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
 255         if (startPos == fTextIdx) {
 256             previous(status);
 257         } else {
 258             // seek() leaves the BreakCache positioned at the preceding boundary
 259             //        if the requested position is between two bounaries.
 260             // current() pushes the BreakCache position out to the BreakIterator itself.
 261             U_ASSERT(startPos > fTextIdx);
 262             current();
 263         }
 264     }
 265     return;
 266 }
 267
 268
 269 /*
 270  * Out-of-line code for BreakCache::next().
 271  * Cache does not already contain the boundary
 272  */
 273 void RuleBasedBreakIterator::BreakCache::nextOL() {
 274     fBI->fDone = !populateFollowing();
 275     fBI->fPosition = fTextIdx;
 276     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 277     return;
 278 }
 279
 280
 281 void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
 282     if (U_FAILURE(status)) {
 283         return;
 284     }
 285     int32_t initialBufIdx = fBufIdx;
 286     if (fBufIdx == fStartBufIdx) {
 287         // At start of cache. Prepend to it.
 288         populatePreceding(status);
 289     } else {
 290         // Cache already holds the next boundary
 291         fBufIdx = modChunkSize(fBufIdx - 1);
 292         fTextIdx = fBoundaries[fBufIdx];
 293     }
 294     fBI->fDone = (fBufIdx == initialBufIdx);
 295     fBI->fPosition = fTextIdx;
 296     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
 297     return;
 298 }
 299
 300
 301 UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
 302     if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
 303         return FALSE;
 304     }
 305     if (pos == fBoundaries[fStartBufIdx]) {
 306         // Common case: seek(0), from BreakIterator::first()
 307         fBufIdx = fStartBufIdx;
 308         fTextIdx = fBoundaries[fBufIdx];
 309         return TRUE;
 310     }
 311     if (pos == fBoundaries[fEndBufIdx]) {
 312         fBufIdx = fEndBufIdx;
 313         fTextIdx = fBoundaries[fBufIdx];
 314         return TRUE;
 315     }
 316
 317     int32_t min = fStartBufIdx;
 318     int32_t max = fEndBufIdx;
 319     while (min != max) {
 320         int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
 321         probe = modChunkSize(probe);
 322         if (fBoundaries[probe] > pos) {
 323             max = probe;
 324         } else {
 325             min = modChunkSize(probe + 1);
 326         }
 327     }
 328     U_ASSERT(fBoundaries[max] > pos);
 329     fBufIdx = modChunkSize(max - 1);
 330     fTextIdx = fBoundaries[fBufIdx];
 331     U_ASSERT(fTextIdx <= pos);
 332     return TRUE;
 333 }
 334
 335
 336 UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
 337     if (U_FAILURE(status)) {
 338         return FALSE;
 339     }
 340     U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
 341
 342     // Find a boundary somewhere in the vicinity of the requested position.
 343     // Depending on the safe rules and the text data, it could be either before, at, or after
 344     // the requested position.
 345
 346
 347     // If the requested position is not near already cached positions, clear the existing cache,
 348     // find a near-by boundary and begin new cache contents there.
 349
 350     if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
 351         int32_t aBoundary = 0;
 352         int32_t ruleStatusIndex = 0;
 353         if (position > 20) {
 354             int32_t backupPos = fBI->handleSafePrevious(position);
 355
 356             if (backupPos > 0) {
 357                 // Advance to the boundary following the backup position.
 358                 // There is a complication: the safe reverse rules identify pairs of code points
 359                 // that are safe. If advancing from the safe point moves forwards by less than
 360                 // two code points, we need to advance one more time to ensure that the boundary
 361                 // is good, including a correct rules status value.
 362                 //
 363                 fBI->fPosition = backupPos;
 364                 aBoundary = fBI->handleNext();
 365                 if (aBoundary <= backupPos + 4) {
 366                     // +4 is a quick test for possibly having advanced only one codepoint.
 367                     // Four being the length of the longest potential code point, a supplementary in UTF-8
 368                     utext_setNativeIndex(&fBI->fText, aBoundary);
 369                     if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
 370                         // The initial handleNext() only advanced by a single code point. Go again.
 371                         aBoundary = fBI->handleNext();   // Safe rules identify safe pairs.
 372                     }
 373                 }
 374                 ruleStatusIndex = fBI->fRuleStatusIndex;
 375             }
 376         }
 377         reset(aBoundary, ruleStatusIndex);        // Reset cache to hold aBoundary as a single starting point.
 378     }
 379
 380     // Fill in boundaries between existing cache content and the new requested position.
 381
 382     if (fBoundaries[fEndBufIdx] < position) {
 383         // The last position in the cache precedes the requested position.
 384         // Add following position(s) to the cache.
 385         while (fBoundaries[fEndBufIdx] < position) {
 386             if (!populateFollowing()) {
 387                 UPRV_UNREACHABLE;
 388             }
 389         }
 390         fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
 391         fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
 392         while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
 393             previous(status);
 394         }
 395         return true;
 396     }
 397
 398     if (fBoundaries[fStartBufIdx] > position) {
 399         // The first position in the cache is beyond the requested position.
 400         // back up more until we get a boundary <= the requested position.
 401         while (fBoundaries[fStartBufIdx] > position) {
 402             populatePreceding(status);
 403         }
 404         fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
 405         fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
 406         while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
 407             next();
 408         }
 409         if (fTextIdx > position) {
 410             // If position is not itself a boundary, the next() loop above will overshoot.
 411             // Back up one, leaving cache position at the boundary preceding the requested position.
 412             previous(status);
 413         }
 414         return true;
 415     }
 416
 417     U_ASSERT(fTextIdx == position);
 418     return true;
 419 }
 420
 421
 422
 423 UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
 424     int32_t fromPosition = fBoundaries[fEndBufIdx];
 425     int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
 426     int32_t pos = 0;
 427     int32_t ruleStatusIdx = 0;
 428
 429     if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
 430         addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 431         return TRUE;
 432     }
 433
 434     fBI->fPosition = fromPosition;
 435     pos = fBI->handleNext();
 436     if (pos == UBRK_DONE) {
 437         return FALSE;
 438     }
 439
 440     ruleStatusIdx = fBI->fRuleStatusIndex;
 441     if (fBI->fDictionaryCharCount > 0) {
 442         // The text segment obtained from the rules includes dictionary characters.
 443         // Subdivide it, with subdivided results going into the dictionary cache.
 444         fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
 445         if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
 446             addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 447             return TRUE;
 448             // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
 449             //       But be careful with interactions with populateNear().
 450         }
 451     }
 452
 453     // Rule based segment did not include dictionary characters.
 454     // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
 455     //    meaning that we didn't take the return, above.
 456     // Add its end point to the cache.
 457     addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
 458
 459     // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
 460     //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
 461     //
 462     for (int count=0; count<6; ++count) {
 463         pos = fBI->handleNext();
 464         if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
 465             break;
 466         }
 467         addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
 468     }
 469
 470     return TRUE;
 471 }
 472
 473
 474 UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
 475     if (U_FAILURE(status)) {
 476         return FALSE;
 477     }
 478
 479     int32_t fromPosition = fBoundaries[fStartBufIdx];
 480     if (fromPosition == 0) {
 481         return FALSE;
 482     }
 483
 484     int32_t position = 0;
 485     int32_t positionStatusIdx = 0;
 486
 487     if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
 488         addPreceding(position, positionStatusIdx, UpdateCachePosition);
 489         return TRUE;
 490     }
 491
 492     int32_t backupPosition = fromPosition;
 493
 494     // Find a boundary somewhere preceding the first already-cached boundary
 495     do {
 496         backupPosition = backupPosition - 30;
 497         if (backupPosition <= 0) {
 498             backupPosition = 0;
 499         } else {
 500             backupPosition = fBI->handleSafePrevious(backupPosition);
 501         }
 502         if (backupPosition == UBRK_DONE || backupPosition == 0) {
 503             position = 0;
 504             positionStatusIdx = 0;
 505         } else {
 506             // Advance to the boundary following the backup position.
 507             // There is a complication: the safe reverse rules identify pairs of code points
 508             // that are safe. If advancing from the safe point moves forwards by less than
 509             // two code points, we need to advance one more time to ensure that the boundary
 510             // is good, including a correct rules status value.
 511             //
 512             fBI->fPosition = backupPosition;
 513             position = fBI->handleNext();
 514             if (position <= backupPosition + 4) {
 515                 // +4 is a quick test for possibly having advanced only one codepoint.
 516                 // Four being the length of the longest potential code point, a supplementary in UTF-8
 517                 utext_setNativeIndex(&fBI->fText, position);
 518                 if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
 519                     // The initial handleNext() only advanced by a single code point. Go again.
 520                     position = fBI->handleNext();   // Safe rules identify safe pairs.
 521                 }
 522             }
 523             positionStatusIdx = fBI->fRuleStatusIndex;
 524         }
 525     } while (position >= fromPosition);
 526
 527     // Find boundaries between the one we just located and the first already-cached boundary
 528     // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
 529
 530     fSideBuffer.removeAllElements();
 531     fSideBuffer.addElement(position, status);
 532     fSideBuffer.addElement(positionStatusIdx, status);
 533
 534     do {
 535         int32_t prevPosition = fBI->fPosition = position;
 536         int32_t prevStatusIdx = positionStatusIdx;
 537         position = fBI->handleNext();
 538         positionStatusIdx = fBI->fRuleStatusIndex;
 539         if (position == UBRK_DONE) {
 540             break;
 541         }
 542
 543         UBool segmentHandledByDictionary = FALSE;
 544         if (fBI->fDictionaryCharCount != 0) {
 545             // Segment from the rules includes dictionary characters.
 546             // Subdivide it, with subdivided results going into the dictionary cache.
 547             int32_t dictSegEndPosition = position;
 548             fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
 549             while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
 550                 segmentHandledByDictionary = true;
 551                 U_ASSERT(position > prevPosition);
 552                 if (position >= fromPosition) {
 553                     break;
 554                 }
 555                 U_ASSERT(position <= dictSegEndPosition);
 556                 fSideBuffer.addElement(position, status);
 557                 fSideBuffer.addElement(positionStatusIdx, status);
 558                 prevPosition = position;
 559             }
 560             U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
 561         }
 562
 563         if (!segmentHandledByDictionary && position < fromPosition) {
 564             fSideBuffer.addElement(position, status);
 565             fSideBuffer.addElement(positionStatusIdx, status);
 566         }
 567     } while (position < fromPosition);
 568
 569     // Move boundaries from the side buffer to the main circular buffer.
 570     UBool success = FALSE;
 571     if (!fSideBuffer.isEmpty()) {
 572         positionStatusIdx = fSideBuffer.popi();
 573         position = fSideBuffer.popi();
 574         addPreceding(position, positionStatusIdx, UpdateCachePosition);
 575         success = TRUE;
 576     }
 577
 578     while (!fSideBuffer.isEmpty()) {
 579         positionStatusIdx = fSideBuffer.popi();
 580         position = fSideBuffer.popi();
 581         if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
 582             // No space in circular buffer to hold a new preceding result while
 583             // also retaining the current cache (iteration) position.
 584             // Bailing out is safe; the cache will refill again if needed.
 585             break;
 586         }
 587     }
 588
 589     return success;
 590 }
 591
 592
 593 void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
 594     U_ASSERT(position > fBoundaries[fEndBufIdx]);
 595     U_ASSERT(ruleStatusIdx <= UINT16_MAX);
 596     int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
 597     if (nextIdx == fStartBufIdx) {
 598         fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
 599     }
 600     fBoundaries[nextIdx] = position;
 601     fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
 602     fEndBufIdx = nextIdx;
 603     if (update == UpdateCachePosition) {
 604         // Set current position to the newly added boundary.
 605         fBufIdx = nextIdx;
 606         fTextIdx = position;
 607     } else {
 608         // Retaining the original cache position.
 609         // Check if the added boundary wraps around the buffer, and would over-write the original position.
 610         // It's the responsibility of callers of this function to not add too many.
 611         U_ASSERT(nextIdx != fBufIdx);
 612     }
 613 }
 614
 615 bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
 616     U_ASSERT(position < fBoundaries[fStartBufIdx]);
 617     U_ASSERT(ruleStatusIdx <= UINT16_MAX);
 618     int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
 619     if (nextIdx == fEndBufIdx) {
 620         if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
 621             // Failure. The insertion of the new boundary would claim the buffer position that is the
 622             // current iteration position. And we also want to retain the current iteration position.
 623             // (The buffer is already completely full of entries that precede the iteration position.)
 624             return false;
 625         }
 626         fEndBufIdx = modChunkSize(fEndBufIdx - 1);
 627     }
 628     fBoundaries[nextIdx] = position;
 629     fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
 630     fStartBufIdx = nextIdx;
 631     if (update == UpdateCachePosition) {
 632         fBufIdx = nextIdx;
 633         fTextIdx = position;
 634     }
 635     return true;
 636 }
 637
 638
 639 void RuleBasedBreakIterator::BreakCache::dumpCache() {
 640 #ifdef RBBI_DEBUG
 641     RBBIDebugPrintf("fTextIdx:%d   fBufIdx:%d\n", fTextIdx, fBufIdx);
 642     for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
 643         RBBIDebugPrintf("%d  %d\n", i, fBoundaries[i]);
 644         if (i == fEndBufIdx) {
 645             break;
 646         }
 647     }
 648 #endif
 649 }
 650
 651 U_NAMESPACE_END
 652
 653 #endif // #if !UCONFIG_NO_BREAK_ITERATION