- for(UChar lead=0xd800; lead<0xdc00; ++lead) {
- uint32_t surrogateCPNorm16=utrie2_get32(norm16Trie, lead);
- Norm16Summary summary={ surrogateCPNorm16, surrogateCPNorm16 };
- utrie2_enumForLeadSurrogate(norm16Trie, lead, NULL, enumRangeMaxValue, &summary);
- uint32_t norm16=summary.maxNorm16;
- if(norm16>=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO] &&
- norm16>(uint32_t)indexes[Normalizer2Impl::IX_MIN_NO_NO]) {
- // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0.
- // Otherwise it might end up at something like JAMO_VT which stays in
- // the inner decomposition quick check loop.
- norm16=(uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]-1;
+ //
+ // First check that surrogate code *points* are inert.
+ // The parser should have rejected values/mappings for them.
+ uint32_t value;
+ UChar32 end = umutablecptrie_getRange(norm16Trie, 0xd800, UCPMAP_RANGE_NORMAL, 0,
+ nullptr, nullptr, &value);
+ if (value != Normalizer2Impl::INERT || end < 0xdfff) {
+ fprintf(stderr,
+ "gennorm2 error: not all surrogate code points are inert: U+d800..U+%04x=%lx\n",
+ (int)end, (long)value);
+ exit(U_INTERNAL_PROGRAM_ERROR);
+ }
+ uint32_t maxNorm16 = 0;
+ // ANDing values yields 0 bits where any value has a 0.
+ // Used for worst-case HAS_COMP_BOUNDARY_AFTER.
+ uint32_t andedNorm16 = 0;
+ end = 0;
+ for (UChar32 start = 0x10000;;) {
+ if (start > end) {
+ end = umutablecptrie_getRange(norm16Trie, start, UCPMAP_RANGE_NORMAL, 0,
+ nullptr, nullptr, &value);
+ if (end < 0) { break; }
+ }
+ if ((start & 0x3ff) == 0) {
+ // Data for a new lead surrogate.
+ maxNorm16 = andedNorm16 = value;
+ } else {
+ if (value > maxNorm16) {
+ maxNorm16 = value;
+ }
+ andedNorm16 &= value;
+ }
+ // Intersect each range with the code points for one lead surrogate.
+ UChar32 leadEnd = start | 0x3ff;
+ if (leadEnd <= end) {
+ // End of the supplementary block for a lead surrogate.
+ if (maxNorm16 >= (uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO]) {
+ // Set noNo ("worst" value) if it got into "less-bad" maybeYes or ccc!=0.
+ // Otherwise it might end up at something like JAMO_VT which stays in
+ // the inner decomposition quick check loop.
+ maxNorm16 = (uint32_t)indexes[Normalizer2Impl::IX_LIMIT_NO_NO];
+ }
+ maxNorm16 =
+ (maxNorm16 & ~Normalizer2Impl::HAS_COMP_BOUNDARY_AFTER)|
+ (andedNorm16 & Normalizer2Impl::HAS_COMP_BOUNDARY_AFTER);
+ if (maxNorm16 != Normalizer2Impl::INERT) {
+ umutablecptrie_set(norm16Trie, U16_LEAD(start), maxNorm16, errorCode);
+ }
+ if (value == Normalizer2Impl::INERT) {
+ // Potentially skip inert supplementary blocks for several lead surrogates.
+ start = (end + 1) & ~0x3ff;
+ } else {
+ start = leadEnd + 1;
+ }
+ } else {
+ start = end + 1;