- //
- // Check the alignment and size of the user supplied buffer.
- // Allocate heap memory if the user supplied memory is insufficient.
- //
- char *buf = (char *)stackBuffer;
- uint32_t s = bufferSize;
- if (stackBuffer == NULL) {
- s = 0; // Ignore size, force allocation if user didn't give us a buffer.
- }
- if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
- uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
- s -= offsetUp;
- buf += offsetUp;
- }
- if (s < sizeof(RuleBasedBreakIterator)) {
- // Not enough room in the caller-supplied buffer.
- // Do a plain-vanilla heap based clone and return that, along with
- // a warning that the clone was allocated.
- RuleBasedBreakIterator *clonedBI = new RuleBasedBreakIterator(*this);
- if (clonedBI == 0) {
- } else {
- }
- return clonedBI;
- }
- //
- // Clone the source BI into the caller-supplied buffer.
- // TODO: using an overloaded operator new to directly initialize the
- // copy in the user's buffer would be better, but it doesn't seem
- // to get along with namespaces. Investigate why.
- //
- // The memcpy is only safe with an empty (default constructed)
- // break iterator. Use on others can screw up reference counts
- // to data. memcpy-ing objects is not really a good idea...
- //
- RuleBasedBreakIterator localIter; // Empty break iterator, source for memcpy
- RuleBasedBreakIterator *clone = (RuleBasedBreakIterator *)buf;
- uprv_memcpy(clone, &localIter, sizeof(RuleBasedBreakIterator)); // init C++ gorp, BreakIterator base class part
- clone->init(); // Init RuleBasedBreakIterator part, (user default constructor)
- *clone = *this; // clone = the real BI we want.
- clone->fBufferClone = TRUE; // Flag to prevent deleting storage on close (From C code)
- return clone;
-// isDictionaryChar Return true if the category lookup for this char
-// indicates that it is in the set of dictionary lookup
-// chars.
-// This function is intended for use by dictionary based
-// break iterators.
-/*UBool RuleBasedBreakIterator::isDictionaryChar(UChar32 c) {
- if (fData == NULL) {
- return FALSE;
- }
- uint16_t category;
- UTRIE_GET16(&fData->fTrie, c, category);
- return (category & 0x4000) != 0;
-// checkDictionary This function handles all processing of characters in
-// the "dictionary" set. It will determine the appropriate
-// course of action, and possibly set up a cache in the
-// process.
-int32_t RuleBasedBreakIterator::checkDictionary(int32_t startPos,
- int32_t endPos,
- UBool reverse) {
- // Reset the old break cache first.
- uint32_t dictionaryCount = fDictionaryCharCount;
- reset();
- if (dictionaryCount <= 1 || (endPos - startPos) <= 1) {
- return (reverse ? startPos : endPos);
- }
- // Starting from the starting point, scan towards the proposed result,
- // looking for the first dictionary character (which may be the one
- // we're on, if we're starting in the middle of a range).
- utext_setNativeIndex(fText, reverse ? endPos : startPos);
- if (reverse) {
- }
- int32_t rangeStart = startPos;
- int32_t rangeEnd = endPos;
- uint16_t category;
- int32_t current;
- UErrorCode status = U_ZERO_ERROR;
- UStack breaks(status);
- int32_t foundBreakCount = 0;
- UChar32 c = utext_current32(fText);
- UTRIE_GET16(&fData->fTrie, c, category);
- // Is the character we're starting on a dictionary character? If so, we
- // need to back up to include the entire run; otherwise the results of
- // the break algorithm will differ depending on where we start. Since
- // the result is cached and there is typically a non-dictionary break
- // within a small number of words, there should be little performance impact.
- if (category & 0x4000) {
- if (reverse) {
- do {
- utext_next32(fText); // TODO: recast to work directly with postincrement.
- c = utext_current32(fText);
- UTRIE_GET16(&fData->fTrie, c, category);
- } while (c != U_SENTINEL && (category & 0x4000));
- // Back up to the last dictionary character
- rangeEnd = (int32_t)UTEXT_GETNATIVEINDEX(fText);
- if (c == U_SENTINEL) {
- // c = fText->last32();
- // TODO: why was this if needed?
- c = UTEXT_PREVIOUS32(fText);
- }
- else {
- c = UTEXT_PREVIOUS32(fText);
- }
- }
- else {
- do {
- c = UTEXT_PREVIOUS32(fText);
- UTRIE_GET16(&fData->fTrie, c, category);
- }
- while (c != U_SENTINEL && (category & 0x4000));
- // Back up to the last dictionary character
- if (c == U_SENTINEL) {
- // c = fText->first32();
- c = utext_current32(fText);
- }
- else {
- utext_next32(fText);
- c = utext_current32(fText);
- }
- rangeStart = (int32_t)UTEXT_GETNATIVEINDEX(fText);;
- }
- UTRIE_GET16(&fData->fTrie, c, category);
- }
- // Loop through the text, looking for ranges of dictionary characters.
- // For each span, find the appropriate break engine, and ask it to find
- // any breaks within the span.
- // Note: we always do this in the forward direction, so that the break
- // cache is built in the right order.
- if (reverse) {
- utext_setNativeIndex(fText, rangeStart);
- c = utext_current32(fText);
- UTRIE_GET16(&fData->fTrie, c, category);
- }
- while(U_SUCCESS(status)) {
- while((current = (int32_t)UTEXT_GETNATIVEINDEX(fText)) < rangeEnd && (category & 0x4000) == 0) {
- utext_next32(fText); // TODO: tweak for post-increment operation
- c = utext_current32(fText);
- UTRIE_GET16(&fData->fTrie, c, category);
- }
- if (current >= rangeEnd) {
- break;
- }
- // We now have a dictionary character. Get the appropriate language object
- // to deal with it.
- const LanguageBreakEngine *lbe = getLanguageBreakEngine(c);
- // Ask the language object if there are any breaks. It will leave the text
- // pointer on the other side of its range, ready to search for the next one.
- if (lbe != NULL) {
- foundBreakCount += lbe->findBreaks(fText, rangeStart, rangeEnd, FALSE, fBreakType, breaks);
- }
- // Reload the loop variables for the next go-round
- c = utext_current32(fText);
- UTRIE_GET16(&fData->fTrie, c, category);
- }
- // If we found breaks, build a new break cache. The first and last entries must
- // be the original starting and ending position.
- if (foundBreakCount > 0) {
- int32_t totalBreaks = foundBreakCount;
- if (startPos < breaks.elementAti(0)) {
- totalBreaks += 1;
- }
- if (endPos > breaks.peeki()) {
- totalBreaks += 1;
- }
- fCachedBreakPositions = (int32_t *)uprv_malloc(totalBreaks * sizeof(int32_t));
- if (fCachedBreakPositions != NULL) {
- int32_t out = 0;
- fNumCachedBreakPositions = totalBreaks;
- if (startPos < breaks.elementAti(0)) {
- fCachedBreakPositions[out++] = startPos;
- }
- for (int32_t i = 0; i < foundBreakCount; ++i) {
- fCachedBreakPositions[out++] = breaks.elementAti(i);
- }
- if (endPos > fCachedBreakPositions[out-1]) {
- fCachedBreakPositions[out] = endPos;
- }
- // If there are breaks, then by definition, we are replacing the original
- // proposed break by one of the breaks we found. Use following() and
- // preceding() to do the work. They should never recurse in this case.
- if (reverse) {
- return preceding(endPos - 1);
- }
- else {
- return following(startPos);
- }
- }
- // If the allocation failed, just fall through to the "no breaks found" case.