+UBool
+Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous,
+ const uint8_t *src, const uint8_t *limit,
+ ByteSink *sink, Edits *edits, UErrorCode &errorCode) const {
+ U_ASSERT(limit != nullptr);
+ UnicodeString s16;
+ uint8_t minNoMaybeLead = leadByteForCP(minCompNoMaybeCP);
+ const uint8_t *prevBoundary = src;
+
+ for (;;) {
+ // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
+ // or with (compYes && ccc==0) properties.
+ const uint8_t *prevSrc;
+ uint16_t norm16 = 0;
+ for (;;) {
+ if (src == limit) {
+ if (prevBoundary != limit && sink != nullptr) {
+ ByteSinkUtil::appendUnchanged(prevBoundary, limit,
+ *sink, options, edits, errorCode);
+ }
+ return TRUE;
+ }
+ if (*src < minNoMaybeLead) {
+ ++src;
+ } else {
+ prevSrc = src;
+ UTRIE2_U8_NEXT16(normTrie, src, limit, norm16);
+ if (!isCompYesAndZeroCC(norm16)) {
+ break;
+ }
+ }
+ }
+ // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
+ // The current character is either a "noNo" (has a mapping)
+ // or a "maybeYes" (combines backward)
+ // or a "yesYes" with ccc!=0.
+ // It is not a Hangul syllable or Jamo L because those have "yes" properties.
+
+ // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
+ if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ // Fast path for mapping a character that is immediately surrounded by boundaries.
+ // In this case, we need not decompose around the current character.
+ if (isDecompNoAlgorithmic(norm16)) {
+ // Maps to a single isCompYesAndZeroCC character
+ // which also implies hasCompBoundaryBefore.
+ if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
+ hasCompBoundaryBefore(src, limit)) {
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ appendCodePointDelta(prevSrc, src, getAlgorithmicDelta(norm16), *sink, edits);
+ prevBoundary = src;
+ continue;
+ }
+ } else if (norm16 < minNoNoCompBoundaryBefore) {
+ // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
+ if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
+ hasCompBoundaryBefore(src, limit)) {
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ const uint16_t *mapping = getMapping(norm16);
+ int32_t length = *mapping++ & MAPPING_LENGTH_MASK;
+ if (!ByteSinkUtil::appendChange(prevSrc, src, (const UChar *)mapping, length,
+ *sink, edits, errorCode)) {
+ break;
+ }
+ prevBoundary = src;
+ continue;
+ }
+ } else if (norm16 >= minNoNoEmpty) {
+ // The current character maps to nothing.
+ // Simply omit it from the output if there is a boundary before _or_ after it.
+ // The character itself implies no boundaries.
+ if (hasCompBoundaryBefore(src, limit) ||
+ hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) {
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ if (edits != nullptr) {
+ edits->addReplace((int32_t)(src - prevSrc), 0);
+ }
+ prevBoundary = src;
+ continue;
+ }
+ }
+ // Other "noNo" type, or need to examine more text around this character:
+ // Fall through to the slow path.
+ } else if (isJamoVT(norm16)) {
+ // Jamo L: E1 84 80..92
+ // Jamo V: E1 85 A1..B5
+ // Jamo T: E1 86 A8..E1 87 82
+ U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1);
+ UChar32 prev = previousHangulOrJamo(prevBoundary, prevSrc);
+ if (prevSrc[1] == 0x85) {
+ // The current character is a Jamo Vowel,
+ // compose with previous Jamo L and following Jamo T.
+ UChar32 l = prev - Hangul::JAMO_L_BASE;
+ if ((uint32_t)l < Hangul::JAMO_L_COUNT) {
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ int32_t t = getJamoTMinusBase(src, limit);
+ if (t >= 0) {
+ // The next character is a Jamo T.
+ src += 3;
+ } else if (hasCompBoundaryBefore(src, limit)) {
+ // No Jamo T follows, not even via decomposition.
+ t = 0;
+ }
+ if (t >= 0) {
+ UChar32 syllable = Hangul::HANGUL_BASE +
+ (l*Hangul::JAMO_V_COUNT + (prevSrc[2]-0xa1)) *
+ Hangul::JAMO_T_COUNT + t;
+ prevSrc -= 3; // Replace the Jamo L as well.
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
+ prevBoundary = src;
+ continue;
+ }
+ // If we see L+V+x where x!=T then we drop to the slow path,
+ // decompose and recompose.
+ // This is to deal with NFKC finding normal L and V but a
+ // compatibility variant of a T.
+ // We need to either fully compose that combination here
+ // (which would complicate the code and may not work with strange custom data)
+ // or use the slow path.
+ }
+ } else if (Hangul::isHangulLV(prev)) {
+ // The current character is a Jamo Trailing consonant,
+ // compose with previous Hangul LV that does not contain a Jamo T.
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ UChar32 syllable = prev + getJamoTMinusBase(prevSrc, src);
+ prevSrc -= 3; // Replace the Hangul LV as well.
+ if (prevBoundary != prevSrc &&
+ !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc,
+ *sink, options, edits, errorCode)) {
+ break;
+ }
+ ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits);
+ prevBoundary = src;
+ continue;
+ }
+ // No matching context, or may need to decompose surrounding text first:
+ // Fall through to the slow path.
+ } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC
+ // One or more combining marks that do not combine-back:
+ // Check for canonical order, copy unchanged if ok and
+ // if followed by a character with a boundary-before.
+ uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0
+ if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) {
+ // Fails FCD test, need to decompose and contiguously recompose.
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ } else {
+ // If !onlyContiguous (not FCC), then we ignore the tccc of
+ // the previous character which passed the quick check "yes && ccc==0" test.
+ const uint8_t *nextSrc;
+ uint16_t n16;
+ for (;;) {
+ if (src == limit) {
+ if (sink != nullptr) {
+ ByteSinkUtil::appendUnchanged(prevBoundary, limit,
+ *sink, options, edits, errorCode);
+ }
+ return TRUE;
+ }
+ uint8_t prevCC = cc;
+ nextSrc = src;
+ UTRIE2_U8_NEXT16(normTrie, nextSrc, limit, n16);
+ if (n16 >= MIN_YES_YES_WITH_CC) {
+ cc = getCCFromNormalYesOrMaybe(n16);
+ if (prevCC > cc) {
+ if (sink == nullptr) {
+ return FALSE;
+ }
+ break;
+ }
+ } else {
+ break;
+ }
+ src = nextSrc;
+ }
+ // src is after the last in-order combining mark.
+ // If there is a boundary here, then we continue with no change.
+ if (norm16HasCompBoundaryBefore(n16)) {
+ if (isCompYesAndZeroCC(n16)) {
+ src = nextSrc;
+ }
+ continue;
+ }
+ // Use the slow path. There is no boundary in [prevSrc, src[.
+ }
+ }
+
+ // Slow path: Find the nearest boundaries around the current character,
+ // decompose and recompose.
+ if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
+ const uint8_t *p = prevSrc;
+ UTRIE2_U8_PREV16(normTrie, prevBoundary, p, norm16);
+ if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
+ prevSrc = p;
+ }
+ }
+ ReorderingBuffer buffer(*this, s16, errorCode);
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ // We know there is not a boundary here.
+ decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous,
+ buffer, errorCode);
+ // Decompose until the next boundary.
+ src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous,
+ buffer, errorCode);
+ if (U_FAILURE(errorCode)) {
+ break;
+ }
+ if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals()
+ errorCode = U_INDEX_OUTOFBOUNDS_ERROR;