3 * (C) Copyright IBM Corp. 1998-2006 - All Rights Reserved
8 #include "OpenTypeTables.h"
9 #include "OpenTypeUtilities.h"
10 #include "IndicReordering.h"
11 #include "LEGlyphStorage.h"
12 #include "MPreFixups.h"
16 #define loclFeatureTag LE_LOCL_FEATURE_TAG
17 #define initFeatureTag LE_INIT_FEATURE_TAG
18 #define nuktFeatureTag LE_NUKT_FEATURE_TAG
19 #define akhnFeatureTag LE_AKHN_FEATURE_TAG
20 #define rphfFeatureTag LE_RPHF_FEATURE_TAG
21 #define blwfFeatureTag LE_BLWF_FEATURE_TAG
22 #define halfFeatureTag LE_HALF_FEATURE_TAG
23 #define pstfFeatureTag LE_PSTF_FEATURE_TAG
24 #define vatuFeatureTag LE_VATU_FEATURE_TAG
25 #define presFeatureTag LE_PRES_FEATURE_TAG
26 #define blwsFeatureTag LE_BLWS_FEATURE_TAG
27 #define abvsFeatureTag LE_ABVS_FEATURE_TAG
28 #define pstsFeatureTag LE_PSTS_FEATURE_TAG
29 #define halnFeatureTag LE_HALN_FEATURE_TAG
31 #define blwmFeatureTag LE_BLWM_FEATURE_TAG
32 #define abvmFeatureTag LE_ABVM_FEATURE_TAG
33 #define distFeatureTag LE_DIST_FEATURE_TAG
35 #define loclFeatureMask 0x80000000UL
36 #define rphfFeatureMask 0x40000000UL
37 #define blwfFeatureMask 0x20000000UL
38 #define halfFeatureMask 0x10000000UL
39 #define pstfFeatureMask 0x08000000UL
40 #define nuktFeatureMask 0x04000000UL
41 #define akhnFeatureMask 0x02000000UL
42 #define vatuFeatureMask 0x01000000UL
43 #define presFeatureMask 0x00800000UL
44 #define blwsFeatureMask 0x00400000UL
45 #define abvsFeatureMask 0x00200000UL
46 #define pstsFeatureMask 0x00100000UL
47 #define halnFeatureMask 0x00080000UL
48 #define blwmFeatureMask 0x00040000UL
49 #define abvmFeatureMask 0x00020000UL
50 #define distFeatureMask 0x00010000UL
51 #define initFeatureMask 0x00008000UL
53 class IndicReorderingOutput
: public UMemory
{
58 LEGlyphStorage
&fGlyphStorage
;
64 le_int32 fMbelowIndex
;
67 le_int32 fMaboveIndex
;
72 LEUnicode fLengthMark
;
73 le_int32 fLengthMarkIndex
;
76 le_int32 fViramaIndex
;
78 FeatureMask fMatraFeatures
;
80 le_int32 fMPreOutIndex
;
81 MPreFixups
*fMPreFixups
;
86 FeatureMask fVMFeatures
;
91 FeatureMask fSMFeatures
;
93 void saveMatra(LEUnicode matra
, le_int32 matraIndex
, IndicClassTable::CharClass matraClass
)
95 // FIXME: check if already set, or if not a matra...
96 if (IndicClassTable::isLengthMark(matraClass
)) {
98 fLengthMarkIndex
= matraIndex
;
99 } else if (IndicClassTable::isVirama(matraClass
)) {
101 fViramaIndex
= matraIndex
;
103 switch (matraClass
& CF_POS_MASK
) {
106 fMpreIndex
= matraIndex
;
111 fMbelowIndex
= matraIndex
;
116 fMaboveIndex
= matraIndex
;
121 fMpostIndex
= matraIndex
;
132 IndicReorderingOutput(LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
, MPreFixups
*mpreFixups
)
133 : fOutIndex(0), fOutChars(outChars
), fGlyphStorage(glyphStorage
),
134 fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0),
135 fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fVirama(0), fViramaIndex(0),
136 fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups
),
137 fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
138 fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0)
140 // nothing else to do...
143 ~IndicReorderingOutput()
145 // nothing to do here...
150 fMpre
= fMbelow
= fMabove
= fMpost
= fLengthMark
= fVirama
= 0;
153 fVMabove
= fVMpost
= 0;
154 fSMabove
= fSMbelow
= 0;
157 void writeChar(LEUnicode ch
, le_uint32 charIndex
, FeatureMask charFeatures
)
159 LEErrorCode success
= LE_NO_ERROR
;
161 fOutChars
[fOutIndex
] = ch
;
163 fGlyphStorage
.setCharIndex(fOutIndex
, charIndex
, success
);
164 fGlyphStorage
.setAuxData(fOutIndex
, charFeatures
, success
);
169 le_bool
noteMatra(const IndicClassTable
*classTable
, LEUnicode matra
, le_uint32 matraIndex
, FeatureMask matraFeatures
, le_bool wordStart
)
171 IndicClassTable::CharClass matraClass
= classTable
->getCharClass(matra
);
173 fMatraFeatures
= matraFeatures
;
176 fMatraFeatures
|= initFeatureMask
;
179 if (IndicClassTable::isMatra(matraClass
)) {
180 if (IndicClassTable::isSplitMatra(matraClass
)) {
181 const SplitMatra
*splitMatra
= classTable
->getSplitMatra(matraClass
);
184 for (i
= 0; i
< 3 && (*splitMatra
)[i
] != 0; i
+= 1) {
185 LEUnicode piece
= (*splitMatra
)[i
];
186 IndicClassTable::CharClass pieceClass
= classTable
->getCharClass(piece
);
188 saveMatra(piece
, matraIndex
, pieceClass
);
191 saveMatra(matra
, matraIndex
, matraClass
);
200 void noteVowelModifier(const IndicClassTable
*classTable
, LEUnicode vowelModifier
, le_uint32 vowelModifierIndex
, FeatureMask vowelModifierFeatures
)
202 IndicClassTable::CharClass vmClass
= classTable
->getCharClass(vowelModifier
);
204 fVMIndex
= vowelModifierIndex
;
205 fVMFeatures
= vowelModifierFeatures
;
207 if (IndicClassTable::isVowelModifier(vmClass
)) {
208 switch (vmClass
& CF_POS_MASK
) {
210 fVMabove
= vowelModifier
;
214 fVMpost
= vowelModifier
;
218 // FIXME: this is an error...
224 void noteStressMark(const IndicClassTable
*classTable
, LEUnicode stressMark
, le_uint32 stressMarkIndex
, FeatureMask stressMarkFeatures
)
226 IndicClassTable::CharClass smClass
= classTable
->getCharClass(stressMark
);
228 fSMIndex
= stressMarkIndex
;
229 fSMFeatures
= stressMarkFeatures
;
231 if (IndicClassTable::isStressMark(smClass
)) {
232 switch (smClass
& CF_POS_MASK
) {
234 fSMabove
= stressMark
;
238 fSMbelow
= stressMark
;
242 // FIXME: this is an error...
248 void noteBaseConsonant()
250 if (fMPreFixups
!= NULL
&& fMPreOutIndex
>= 0) {
251 fMPreFixups
->add(fOutIndex
, fMPreOutIndex
);
255 // Handles virama in Sinhala split vowels.
259 writeChar(fVirama
, fViramaIndex
, fMatraFeatures
);
266 fMPreOutIndex
= fOutIndex
;
267 writeChar(fMpre
, fMpreIndex
, fMatraFeatures
);
274 writeChar(fMbelow
, fMbelowIndex
, fMatraFeatures
);
281 writeChar(fMabove
, fMaboveIndex
, fMatraFeatures
);
288 writeChar(fMpost
, fMpostIndex
, fMatraFeatures
);
292 void writeLengthMark()
294 if (fLengthMark
!= 0) {
295 writeChar(fLengthMark
, fLengthMarkIndex
, fMatraFeatures
);
302 writeChar(fVMabove
, fVMIndex
, fVMFeatures
);
309 writeChar(fVMpost
, fVMIndex
, fVMFeatures
);
316 writeChar(fSMabove
, fSMIndex
, fSMFeatures
);
323 writeChar(fSMbelow
, fSMIndex
, fSMFeatures
);
327 le_int32
getOutputIndex()
335 C_DOTTED_CIRCLE
= 0x25CC
338 // TODO: Find better names for these!
339 #define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask)
340 #define tagArray3 (pstfFeatureMask | tagArray4)
341 #define tagArray2 (halfFeatureMask | tagArray3)
342 #define tagArray1 (blwfFeatureMask | tagArray2)
343 #define tagArray0 (rphfFeatureMask | tagArray1)
345 static const FeatureMap featureMap
[] =
347 {loclFeatureTag
, loclFeatureMask
},
348 {initFeatureTag
, initFeatureMask
},
349 {nuktFeatureTag
, nuktFeatureMask
},
350 {akhnFeatureTag
, akhnFeatureMask
},
351 {rphfFeatureTag
, rphfFeatureMask
},
352 {blwfFeatureTag
, blwfFeatureMask
},
353 {halfFeatureTag
, halfFeatureMask
},
354 {pstfFeatureTag
, pstfFeatureMask
},
355 {vatuFeatureTag
, vatuFeatureMask
},
356 {presFeatureTag
, presFeatureMask
},
357 {blwsFeatureTag
, blwsFeatureMask
},
358 {abvsFeatureTag
, abvsFeatureMask
},
359 {pstsFeatureTag
, pstsFeatureMask
},
360 {halnFeatureTag
, halnFeatureMask
},
361 {blwmFeatureTag
, blwmFeatureMask
},
362 {abvmFeatureTag
, abvmFeatureMask
},
363 {distFeatureTag
, distFeatureMask
}
366 static const le_int32 featureCount
= LE_ARRAY_SIZE(featureMap
);
368 static const le_int8 stateTable
[][CC_COUNT
] =
370 // xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw
371 { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1}, // 0 - ground state
372 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
373 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12}, // 2 - consonant with nukta
374 {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12}, // 3 - consonant
375 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
376 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
377 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
378 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
379 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
380 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
381 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1}, // 10 - second part of split vowel
382 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 11 - independent vowels that can take an iv
383 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
387 const FeatureMap
*IndicReordering::getFeatureMap(le_int32
&count
)
389 count
= featureCount
;
394 le_int32
IndicReordering::findSyllable(const IndicClassTable
*classTable
, const LEUnicode
*chars
, le_int32 prev
, le_int32 charCount
)
396 le_int32 cursor
= prev
;
399 while (cursor
< charCount
) {
400 IndicClassTable::CharClass charClass
= classTable
->getCharClass(chars
[cursor
]);
402 state
= stateTable
[state
][charClass
& CF_CLASS_MASK
];
414 le_int32
IndicReordering::reorder(const LEUnicode
*chars
, le_int32 charCount
, le_int32 scriptCode
,
415 LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
,
416 MPreFixups
**outMPreFixups
)
418 MPreFixups
*mpreFixups
= NULL
;
419 const IndicClassTable
*classTable
= IndicClassTable::getScriptClassTable(scriptCode
);
421 if (classTable
->scriptFlags
& SF_MPRE_FIXUP
) {
422 mpreFixups
= new MPreFixups(charCount
);
425 IndicReorderingOutput
output(outChars
, glyphStorage
, mpreFixups
);
426 le_int32 i
, prev
= 0;
427 le_bool lastInWord
= FALSE
;
429 while (prev
< charCount
) {
430 le_int32 syllable
= findSyllable(classTable
, chars
, prev
, charCount
);
431 le_int32 matra
, markStart
= syllable
;
435 if (classTable
->isStressMark(chars
[markStart
- 1])) {
437 output
.noteStressMark(classTable
, chars
[markStart
], markStart
, tagArray1
);
440 if (markStart
!= prev
&& classTable
->isVowelModifier(chars
[markStart
- 1])) {
442 output
.noteVowelModifier(classTable
, chars
[markStart
], markStart
, tagArray1
);
445 matra
= markStart
- 1;
447 while (output
.noteMatra(classTable
, chars
[matra
], matra
, tagArray1
, !lastInWord
) && matra
!= prev
) {
453 switch (classTable
->getCharClass(chars
[prev
]) & CF_CLASS_MASK
) {
458 case CC_INDEPENDENT_VOWEL
:
459 case CC_ZERO_WIDTH_MARK
:
460 for (i
= prev
; i
< syllable
; i
+= 1) {
461 output
.writeChar(chars
[i
], i
, tagArray1
);
468 output
.writeChar(C_DOTTED_CIRCLE
, prev
, tagArray1
);
469 output
.writeChar(chars
[prev
], prev
, tagArray1
);
472 case CC_DEPENDENT_VOWEL
:
473 case CC_SPLIT_VOWEL_PIECE_1
:
474 case CC_SPLIT_VOWEL_PIECE_2
:
475 case CC_SPLIT_VOWEL_PIECE_3
:
476 case CC_VOWEL_MODIFIER
:
480 output
.writeChar(C_DOTTED_CIRCLE
, prev
, tagArray1
);
482 output
.writeMbelow();
483 output
.writeSMbelow();
484 output
.writeMabove();
486 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) != 0) {
490 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) != 0) {
491 output
.writeVMabove();
492 output
.writeSMabove(); // FIXME: there are no SM's in these scripts...
495 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
499 output
.writeLengthMark();
500 output
.writeVirama();
502 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) == 0) {
503 output
.writeVMabove();
504 output
.writeSMabove();
507 output
.writeVMpost();
510 case CC_INDEPENDENT_VOWEL_2
:
511 case CC_INDEPENDENT_VOWEL_3
:
513 case CC_CONSONANT_WITH_NUKTA
:
515 le_uint32 length
= markStart
- prev
;
516 le_int32 lastConsonant
= markStart
- 1;
517 le_int32 baseLimit
= prev
;
519 // Check for REPH at front of syllable
520 if (length
> 2 && classTable
->isReph(chars
[prev
]) && classTable
->isVirama(chars
[prev
+ 1])) {
523 // Check for eyelash RA, if the script supports it
524 if ((classTable
->scriptFlags
& SF_EYELASH_RA
) != 0 &&
525 chars
[baseLimit
] == C_SIGN_ZWJ
) {
534 while (lastConsonant
> baseLimit
&& !classTable
->isConsonant(chars
[lastConsonant
])) {
538 le_int32 baseConsonant
= lastConsonant
;
539 le_int32 postBase
= lastConsonant
+ 1;
540 le_int32 postBaseLimit
= classTable
->scriptFlags
& SF_POST_BASE_LIMIT_MASK
;
541 le_bool seenVattu
= FALSE
;
542 le_bool seenBelowBaseForm
= FALSE
;
544 if (postBase
< markStart
&& classTable
->isNukta(chars
[postBase
])) {
548 while (baseConsonant
> baseLimit
) {
549 IndicClassTable::CharClass charClass
= classTable
->getCharClass(chars
[baseConsonant
]);
551 if (IndicClassTable::isConsonant(charClass
)) {
552 if (postBaseLimit
== 0 || seenVattu
||
553 (baseConsonant
> baseLimit
&& !classTable
->isVirama(chars
[baseConsonant
- 1])) ||
554 !IndicClassTable::hasPostOrBelowBaseForm(charClass
)) {
558 seenVattu
= IndicClassTable::isVattu(charClass
);
560 if (IndicClassTable::hasPostBaseForm(charClass
)) {
561 if (seenBelowBaseForm
) {
565 postBase
= baseConsonant
;
566 } else if (IndicClassTable::hasBelowBaseForm(charClass
)) {
567 seenBelowBaseForm
= TRUE
;
580 // NOTE: baseLimit == prev + 3 iff eyelash RA present...
581 if (baseLimit
== prev
+ 3) {
582 output
.writeChar(chars
[prev
], prev
, tagArray2
);
583 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray2
);
584 output
.writeChar(chars
[prev
+ 2], prev
+ 2, tagArray2
);
587 // write any pre-base consonants
588 le_bool supressVattu
= TRUE
;
590 for (i
= baseLimit
; i
< baseConsonant
; i
+= 1) {
591 LEUnicode ch
= chars
[i
];
592 // Don't put 'blwf' on first consonant.
593 FeatureMask features
= (i
== baseLimit
? tagArray2
: tagArray1
);
594 IndicClassTable::CharClass charClass
= classTable
->getCharClass(ch
);
596 if (IndicClassTable::isConsonant(charClass
)) {
597 if (IndicClassTable::isVattu(charClass
) && supressVattu
) {
598 features
= tagArray4
;
601 supressVattu
= IndicClassTable::isVattu(charClass
);
602 } else if (IndicClassTable::isVirama(charClass
) && chars
[i
+ 1] == C_SIGN_ZWNJ
)
604 features
= tagArray4
;
607 output
.writeChar(ch
, i
, features
);
610 le_int32 bcSpan
= baseConsonant
+ 1;
612 if (bcSpan
< markStart
&& classTable
->isNukta(chars
[bcSpan
])) {
616 if (baseConsonant
== lastConsonant
&& bcSpan
< markStart
&& classTable
->isVirama(chars
[bcSpan
])) {
619 if (bcSpan
< markStart
&& chars
[bcSpan
] == C_SIGN_ZWNJ
) {
624 // note the base consonant for post-GSUB fixups
625 output
.noteBaseConsonant();
627 // write base consonant
628 for (i
= baseConsonant
; i
< bcSpan
; i
+= 1) {
629 output
.writeChar(chars
[i
], i
, tagArray4
);
632 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) != 0) {
633 output
.writeMbelow();
634 output
.writeSMbelow(); // FIXME: there are no SMs in these scripts...
635 output
.writeMabove();
639 // write below-base consonants
640 if (baseConsonant
!= lastConsonant
) {
641 for (i
= bcSpan
+ 1; i
< postBase
; i
+= 1) {
642 output
.writeChar(chars
[i
], i
, tagArray1
);
645 if (postBase
> lastConsonant
) {
646 // write halant that was after base consonant
647 output
.writeChar(chars
[bcSpan
], bcSpan
, tagArray1
);
651 // write Mbelow, SMbelow, Mabove
652 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
653 output
.writeMbelow();
654 output
.writeSMbelow();
655 output
.writeMabove();
658 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) != 0) {
659 if (baseLimit
== prev
+ 2) {
660 output
.writeChar(chars
[prev
], prev
, tagArray0
);
661 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray0
);
664 output
.writeVMabove();
665 output
.writeSMabove(); // FIXME: there are no SM's in these scripts...
668 // write post-base consonants
669 // FIXME: does this put the right tags on post-base consonants?
670 if (baseConsonant
!= lastConsonant
) {
671 if (postBase
<= lastConsonant
) {
672 for (i
= postBase
; i
<= lastConsonant
; i
+= 1) {
673 output
.writeChar(chars
[i
], i
, tagArray3
);
676 // write halant that was after base consonant
677 output
.writeChar(chars
[bcSpan
], bcSpan
, tagArray1
);
680 // write the training halant, if there is one
681 if (lastConsonant
< matra
&& classTable
->isVirama(chars
[matra
])) {
682 output
.writeChar(chars
[matra
], matra
, tagArray4
);
687 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
691 output
.writeLengthMark();
692 output
.writeVirama();
695 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) == 0) {
696 if (baseLimit
== prev
+ 2) {
697 output
.writeChar(chars
[prev
], prev
, tagArray0
);
698 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray0
);
701 output
.writeVMabove();
702 output
.writeSMabove();
705 output
.writeVMpost();
717 *outMPreFixups
= mpreFixups
;
719 return output
.getOutputIndex();
722 void IndicReordering::adjustMPres(MPreFixups
*mpreFixups
, LEGlyphStorage
&glyphStorage
)
724 if (mpreFixups
!= NULL
) {
725 mpreFixups
->apply(glyphStorage
);