-class Decomposer : public Normalizer2DBEnumerator {
-public:
- Decomposer(Normalizer2DataBuilder &b) : Normalizer2DBEnumerator(b), didDecompose(FALSE) {}
- virtual UBool rangeHandler(UChar32 start, UChar32 end, uint32_t value) {
- didDecompose|=builder.decompose(start, end, value);
- return TRUE;
- }
- UBool didDecompose;
-};
-
-UBool
-Normalizer2DataBuilder::decompose(UChar32 start, UChar32 end, uint32_t value) {
- if(norms[value].hasMapping()) {
- Norm &norm=norms[value];
- const UnicodeString &m=*norm.mapping;
- UnicodeString *decomposed=NULL;
- const UChar *s=toUCharPtr(m.getBuffer());
- int32_t length=m.length();
- int32_t prev, i=0;
- UChar32 c;
- while(i<length) {
- prev=i;
- U16_NEXT(s, i, length, c);
- if(start<=c && c<=end) {
- fprintf(stderr,
- "gennorm2 error: U+%04lX maps to itself directly or indirectly\n",
- (long)c);
- exit(U_INVALID_FORMAT_ERROR);
- }
- const Norm &cNorm=getNormRef(c);
- if(cNorm.hasMapping()) {
- if(norm.mappingType==Norm::ROUND_TRIP) {
- if(prev==0) {
- if(cNorm.mappingType!=Norm::ROUND_TRIP) {
- fprintf(stderr,
- "gennorm2 error: "
- "U+%04lX's round-trip mapping's starter "
- "U+%04lX one-way-decomposes, "
- "not possible in Unicode normalization\n",
- (long)start, (long)c);
- exit(U_INVALID_FORMAT_ERROR);
- }
- uint8_t myTrailCC=getCC(m.char32At(i));
- UChar32 cTrailChar=cNorm.mapping->char32At(cNorm.mapping->length()-1);
- uint8_t cTrailCC=getCC(cTrailChar);
- if(cTrailCC>myTrailCC) {
- fprintf(stderr,
- "gennorm2 error: "
- "U+%04lX's round-trip mapping's starter "
- "U+%04lX decomposes and the "
- "inner/earlier tccc=%hu > outer/following tccc=%hu, "
- "not possible in Unicode normalization\n",
- (long)start, (long)c,
- (short)cTrailCC, (short)myTrailCC);
- exit(U_INVALID_FORMAT_ERROR);
- }
- } else {
- fprintf(stderr,
- "gennorm2 error: "
- "U+%04lX's round-trip mapping's non-starter "
- "U+%04lX decomposes, "
- "not possible in Unicode normalization\n",
- (long)start, (long)c);
- exit(U_INVALID_FORMAT_ERROR);
- }
- }
- if(decomposed==NULL) {
- decomposed=new UnicodeString(m, 0, prev);
- }
- decomposed->append(*cNorm.mapping);
- } else if(Hangul::isHangul(c)) {
- UChar buffer[3];
- int32_t hangulLength=Hangul::decompose(c, buffer);
- if(norm.mappingType==Norm::ROUND_TRIP && prev!=0) {
- fprintf(stderr,
- "gennorm2 error: "
- "U+%04lX's round-trip mapping's non-starter "
- "U+%04lX decomposes, "
- "not possible in Unicode normalization\n",
- (long)start, (long)c);
- exit(U_INVALID_FORMAT_ERROR);
- }
- if(decomposed==NULL) {
- decomposed=new UnicodeString(m, 0, prev);
- }
- decomposed->append(buffer, hangulLength);
- } else if(decomposed!=NULL) {
- decomposed->append(m, prev, i-prev);
- }
- }
- if(decomposed!=NULL) {
- if(norm.rawMapping==NULL) {
- // Remember the original mapping when decomposing recursively.
- norm.rawMapping=norm.mapping;
- } else {
- delete norm.mapping;
- }
- norm.mapping=decomposed;
- // Not norm.setMappingCP(); because the original mapping
- // is most likely to be encodable as a delta.
- return TRUE;
- }
- }
- return FALSE;
-}
-
-class BuilderReorderingBuffer {
-public:
- BuilderReorderingBuffer() : fLength(0), fLastStarterIndex(-1), fDidReorder(FALSE) {}
- void reset() {
- fLength=0;
- fLastStarterIndex=-1;
- fDidReorder=FALSE;
- }
- int32_t length() const { return fLength; }
- UBool isEmpty() const { return fLength==0; }
- int32_t lastStarterIndex() const { return fLastStarterIndex; }
- UChar32 charAt(int32_t i) const { return fArray[i]>>8; }
- uint8_t ccAt(int32_t i) const { return (uint8_t)fArray[i]; }
- UBool didReorder() const { return fDidReorder; }
- void append(UChar32 c, uint8_t cc) {
- if(cc==0 || fLength==0 || ccAt(fLength-1)<=cc) {
- if(cc==0) {
- fLastStarterIndex=fLength;
- }
- fArray[fLength++]=(c<<8)|cc;
- return;
- }
- // Let this character bubble back to its canonical order.
- int32_t i=fLength-1;
- while(i>fLastStarterIndex && ccAt(i)>cc) {
- --i;
- }
- ++i; // after the last starter or prevCC<=cc
- // Move this and the following characters forward one to make space.
- for(int32_t j=fLength; i<j; --j) {
- fArray[j]=fArray[j-1];
- }
- fArray[i]=(c<<8)|cc;
- ++fLength;
- fDidReorder=TRUE;
- }
- void toString(UnicodeString &dest) {
- dest.remove();
- for(int32_t i=0; i<fLength; ++i) {
- dest.append(charAt(i));
- }
- }
- void setComposite(UChar32 composite, int32_t combMarkIndex) {
- fArray[fLastStarterIndex]=composite<<8;
- // Remove the combining mark that contributed to the composite.
- --fLength;
- while(combMarkIndex<fLength) {
- fArray[combMarkIndex]=fArray[combMarkIndex+1];
- ++combMarkIndex;
- }
- }
-private:
- int32_t fArray[Normalizer2Impl::MAPPING_LENGTH_MASK];
- int32_t fLength;
- int32_t fLastStarterIndex;
- UBool fDidReorder;
-};
-
-void
-Normalizer2DataBuilder::reorder(Norm *p, BuilderReorderingBuffer &buffer) {
- UnicodeString &m=*p->mapping;
- int32_t length=m.length();
- if(length>Normalizer2Impl::MAPPING_LENGTH_MASK) {
- return; // writeMapping() will complain about it and print the code point.
- }
- const UChar *s=toUCharPtr(m.getBuffer());
- int32_t i=0;
- UChar32 c;
- while(i<length) {
- U16_NEXT(s, i, length, c);
- buffer.append(c, getCC(c));
- }
- if(buffer.didReorder()) {
- buffer.toString(m);
- }
-}
-
-/*
- * Computes the flag for the last code branch in Normalizer2Impl::hasCompBoundaryAfter().
- * A starter character with a mapping does not have a composition boundary after it
- * if the character itself combines-forward (which is tested by the caller of this function),
- * or it is deleted (mapped to the empty string),
- * or its mapping contains no starter,
- * or the last starter combines-forward.
- */
-UBool Normalizer2DataBuilder::hasNoCompBoundaryAfter(BuilderReorderingBuffer &buffer) {