3 * (C) Copyright IBM Corp. 1998-2012 - All Rights Reserved
8 #include "OpenTypeTables.h"
9 #include "OpenTypeUtilities.h"
10 #include "IndicReordering.h"
11 #include "LEGlyphStorage.h"
12 #include "MPreFixups.h"
16 #define loclFeatureTag LE_LOCL_FEATURE_TAG
17 #define initFeatureTag LE_INIT_FEATURE_TAG
18 #define nuktFeatureTag LE_NUKT_FEATURE_TAG
19 #define akhnFeatureTag LE_AKHN_FEATURE_TAG
20 #define rphfFeatureTag LE_RPHF_FEATURE_TAG
21 #define rkrfFeatureTag LE_RKRF_FEATURE_TAG
22 #define blwfFeatureTag LE_BLWF_FEATURE_TAG
23 #define halfFeatureTag LE_HALF_FEATURE_TAG
24 #define pstfFeatureTag LE_PSTF_FEATURE_TAG
25 #define vatuFeatureTag LE_VATU_FEATURE_TAG
26 #define presFeatureTag LE_PRES_FEATURE_TAG
27 #define blwsFeatureTag LE_BLWS_FEATURE_TAG
28 #define abvsFeatureTag LE_ABVS_FEATURE_TAG
29 #define pstsFeatureTag LE_PSTS_FEATURE_TAG
30 #define halnFeatureTag LE_HALN_FEATURE_TAG
31 #define cjctFeatureTag LE_CJCT_FEATURE_TAG
32 #define blwmFeatureTag LE_BLWM_FEATURE_TAG
33 #define abvmFeatureTag LE_ABVM_FEATURE_TAG
34 #define distFeatureTag LE_DIST_FEATURE_TAG
35 #define caltFeatureTag LE_CALT_FEATURE_TAG
36 #define kernFeatureTag LE_KERN_FEATURE_TAG
38 #define loclFeatureMask 0x80000000UL
39 #define rphfFeatureMask 0x40000000UL
40 #define blwfFeatureMask 0x20000000UL
41 #define halfFeatureMask 0x10000000UL
42 #define pstfFeatureMask 0x08000000UL
43 #define nuktFeatureMask 0x04000000UL
44 #define akhnFeatureMask 0x02000000UL
45 #define vatuFeatureMask 0x01000000UL
46 #define presFeatureMask 0x00800000UL
47 #define blwsFeatureMask 0x00400000UL
48 #define abvsFeatureMask 0x00200000UL
49 #define pstsFeatureMask 0x00100000UL
50 #define halnFeatureMask 0x00080000UL
51 #define blwmFeatureMask 0x00040000UL
52 #define abvmFeatureMask 0x00020000UL
53 #define distFeatureMask 0x00010000UL
54 #define initFeatureMask 0x00008000UL
55 #define cjctFeatureMask 0x00004000UL
56 #define rkrfFeatureMask 0x00002000UL
57 #define caltFeatureMask 0x00001000UL
58 #define kernFeatureMask 0x00000800UL
60 // Syllable structure bits
61 #define baseConsonantMask 0x00000400UL
62 #define consonantMask 0x00000200UL
63 #define halfConsonantMask 0x00000100UL
64 #define rephConsonantMask 0x00000080UL
65 #define matraMask 0x00000040UL
66 #define vowelModifierMask 0x00000020UL
67 #define markPositionMask 0x00000018UL
69 #define postBasePosition 0x00000000UL
70 #define preBasePosition 0x00000008UL
71 #define aboveBasePosition 0x00000010UL
72 #define belowBasePosition 0x00000018UL
74 #define repositionedGlyphMask 0x00000002UL
76 #define basicShapingFormsMask ( loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask )
77 #define positioningFormsMask ( kernFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask )
78 #define presentationFormsMask ( presFeatureMask | abvsFeatureMask | blwsFeatureMask | pstsFeatureMask | halnFeatureMask | caltFeatureMask )
81 #define C_MALAYALAM_VOWEL_SIGN_U 0x0D41
82 #define C_DOTTED_CIRCLE 0x25CC
83 #define NO_GLYPH 0xFFFF
85 // Some level of debate as to the proper value for MAX_CONSONANTS_PER_SYLLABLE. Ticket 5588 states that 4
86 // is the magic number according to ISCII, but 5 seems to be the more consistent with XP.
87 #define MAX_CONSONANTS_PER_SYLLABLE 5
89 #define INDIC_BLOCK_SIZE 0x7F
91 class IndicReorderingOutput
: public UMemory
{
93 le_int32 fSyllableCount
;
97 LEGlyphStorage
&fGlyphStorage
;
103 le_int32 fMbelowIndex
;
106 le_int32 fMaboveIndex
;
109 le_int32 fMpostIndex
;
111 LEUnicode fLengthMark
;
112 le_int32 fLengthMarkIndex
;
115 le_int32 fAlLakunaIndex
;
117 FeatureMask fMatraFeatures
;
119 le_int32 fMPreOutIndex
;
120 MPreFixups
*fMPreFixups
;
125 FeatureMask fVMFeatures
;
130 FeatureMask fSMFeatures
;
132 LEUnicode fPreBaseConsonant
;
133 LEUnicode fPreBaseVirama
;
135 FeatureMask fPBCFeatures
;
137 void saveMatra(LEUnicode matra
, le_int32 matraIndex
, IndicClassTable::CharClass matraClass
)
139 // FIXME: check if already set, or if not a matra...
140 if (IndicClassTable::isLengthMark(matraClass
)) {
142 fLengthMarkIndex
= matraIndex
;
143 } else if (IndicClassTable::isAlLakuna(matraClass
)) {
145 fAlLakunaIndex
= matraIndex
;
147 switch (matraClass
& CF_POS_MASK
) {
150 fMpreIndex
= matraIndex
;
155 fMbelowIndex
= matraIndex
;
160 fMaboveIndex
= matraIndex
;
165 fMpostIndex
= matraIndex
;
176 IndicReorderingOutput(LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
, MPreFixups
*mpreFixups
)
177 : fSyllableCount(0), fOutIndex(0), fOutChars(outChars
), fGlyphStorage(glyphStorage
),
178 fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0),
179 fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0),
180 fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups
),
181 fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
182 fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0),
183 fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0)
185 // nothing else to do...
188 ~IndicReorderingOutput()
190 // nothing to do here...
197 fMpre
= fMbelow
= fMabove
= fMpost
= fLengthMark
= fAlLakuna
= 0;
200 fVMabove
= fVMpost
= 0;
201 fSMabove
= fSMbelow
= 0;
203 fPreBaseConsonant
= fPreBaseVirama
= 0;
206 void writeChar(LEUnicode ch
, le_uint32 charIndex
, FeatureMask charFeatures
)
208 LEErrorCode success
= LE_NO_ERROR
;
210 fOutChars
[fOutIndex
] = ch
;
212 fGlyphStorage
.setCharIndex(fOutIndex
, charIndex
, success
);
213 fGlyphStorage
.setAuxData(fOutIndex
, charFeatures
| (fSyllableCount
& LE_GLYPH_GROUP_MASK
), success
);
218 void setFeatures ( le_uint32 charIndex
, FeatureMask charFeatures
)
220 LEErrorCode success
= LE_NO_ERROR
;
222 fGlyphStorage
.setAuxData( charIndex
, charFeatures
, success
);
226 FeatureMask
getFeatures ( le_uint32 charIndex
)
228 LEErrorCode success
= LE_NO_ERROR
;
229 return fGlyphStorage
.getAuxData(charIndex
,success
);
232 void decomposeReorderMatras ( const IndicClassTable
*classTable
, le_int32 beginSyllable
, le_int32 nextSyllable
, le_int32 inv_count
) {
234 LEErrorCode success
= LE_NO_ERROR
;
236 for ( i
= beginSyllable
; i
< nextSyllable
; i
++ ) {
237 if ( classTable
->isMatra(fOutChars
[i
+inv_count
])) {
238 IndicClassTable::CharClass matraClass
= classTable
->getCharClass(fOutChars
[i
+inv_count
]);
239 if ( classTable
->isSplitMatra(matraClass
)) {
240 le_int32 saveIndex
= fGlyphStorage
.getCharIndex(i
+inv_count
,success
);
241 le_uint32 saveAuxData
= fGlyphStorage
.getAuxData(i
+inv_count
,success
);
242 const SplitMatra
*splitMatra
= classTable
->getSplitMatra(matraClass
);
244 for (j
= 0 ; j
< SM_MAX_PIECES
&& *(splitMatra
)[j
] != 0 ; j
++) {
245 LEUnicode piece
= (*splitMatra
)[j
];
247 fOutChars
[i
+inv_count
] = piece
;
248 matraClass
= classTable
->getCharClass(piece
);
250 insertCharacter(piece
,i
+1+inv_count
,saveIndex
,saveAuxData
);
256 if ((matraClass
& CF_POS_MASK
) == CF_POS_BEFORE
) {
257 moveCharacter(i
+inv_count
,beginSyllable
+inv_count
);
263 void moveCharacter( le_int32 fromPosition
, le_int32 toPosition
) {
264 le_int32 i
,saveIndex
;
265 le_uint32 saveAuxData
;
266 LEUnicode saveChar
= fOutChars
[fromPosition
];
267 LEErrorCode success
= LE_NO_ERROR
;
268 LEErrorCode success2
= LE_NO_ERROR
;
269 saveIndex
= fGlyphStorage
.getCharIndex(fromPosition
,success
);
270 saveAuxData
= fGlyphStorage
.getAuxData(fromPosition
,success
);
272 if ( fromPosition
> toPosition
) {
273 for ( i
= fromPosition
; i
> toPosition
; i
-- ) {
274 fOutChars
[i
] = fOutChars
[i
-1];
275 fGlyphStorage
.setCharIndex(i
,fGlyphStorage
.getCharIndex(i
-1,success2
),success
);
276 fGlyphStorage
.setAuxData(i
,fGlyphStorage
.getAuxData(i
-1,success2
), success
);
280 for ( i
= fromPosition
; i
< toPosition
; i
++ ) {
281 fOutChars
[i
] = fOutChars
[i
+1];
282 fGlyphStorage
.setCharIndex(i
,fGlyphStorage
.getCharIndex(i
+1,success2
),success
);
283 fGlyphStorage
.setAuxData(i
,fGlyphStorage
.getAuxData(i
+1,success2
), success
);
287 fOutChars
[toPosition
] = saveChar
;
288 fGlyphStorage
.setCharIndex(toPosition
,saveIndex
,success
);
289 fGlyphStorage
.setAuxData(toPosition
,saveAuxData
,success
);
292 void insertCharacter( LEUnicode ch
, le_int32 toPosition
, le_int32 charIndex
, le_uint32 auxData
) {
293 LEErrorCode success
= LE_NO_ERROR
;
297 for ( i
= fOutIndex
; i
> toPosition
; i
--) {
298 fOutChars
[i
] = fOutChars
[i
-1];
299 fGlyphStorage
.setCharIndex(i
,fGlyphStorage
.getCharIndex(i
-1,success
),success
);
300 fGlyphStorage
.setAuxData(i
,fGlyphStorage
.getAuxData(i
-1,success
), success
);
303 fOutChars
[toPosition
] = ch
;
304 fGlyphStorage
.setCharIndex(toPosition
,charIndex
,success
);
305 fGlyphStorage
.setAuxData(toPosition
,auxData
,success
);
308 void removeCharacter( le_int32 fromPosition
) {
309 LEErrorCode success
= LE_NO_ERROR
;
313 for ( i
= fromPosition
; i
< fOutIndex
; i
--) {
314 fOutChars
[i
] = fOutChars
[i
+1];
315 fGlyphStorage
.setCharIndex(i
,fGlyphStorage
.getCharIndex(i
+1,success
),success
);
316 fGlyphStorage
.setAuxData(i
,fGlyphStorage
.getAuxData(i
+1,success
), success
);
320 le_bool
noteMatra(const IndicClassTable
*classTable
, LEUnicode matra
, le_uint32 matraIndex
, FeatureMask matraFeatures
, le_bool wordStart
)
322 IndicClassTable::CharClass matraClass
= classTable
->getCharClass(matra
);
324 fMatraFeatures
= matraFeatures
;
327 fMatraFeatures
|= initFeatureMask
;
330 if (IndicClassTable::isMatra(matraClass
)) {
331 if (IndicClassTable::isSplitMatra(matraClass
)) {
332 const SplitMatra
*splitMatra
= classTable
->getSplitMatra(matraClass
);
335 for (i
= 0; i
< SM_MAX_PIECES
&& (*splitMatra
)[i
] != 0; i
+= 1) {
336 LEUnicode piece
= (*splitMatra
)[i
];
337 IndicClassTable::CharClass pieceClass
= classTable
->getCharClass(piece
);
339 saveMatra(piece
, matraIndex
, pieceClass
);
342 saveMatra(matra
, matraIndex
, matraClass
);
351 void noteVowelModifier(const IndicClassTable
*classTable
, LEUnicode vowelModifier
, le_uint32 vowelModifierIndex
, FeatureMask vowelModifierFeatures
)
353 IndicClassTable::CharClass vmClass
= classTable
->getCharClass(vowelModifier
);
355 fVMIndex
= vowelModifierIndex
;
356 fVMFeatures
= vowelModifierFeatures
;
358 if (IndicClassTable::isVowelModifier(vmClass
)) {
359 switch (vmClass
& CF_POS_MASK
) {
361 fVMabove
= vowelModifier
;
365 fVMpost
= vowelModifier
;
369 // FIXME: this is an error...
375 void noteStressMark(const IndicClassTable
*classTable
, LEUnicode stressMark
, le_uint32 stressMarkIndex
, FeatureMask stressMarkFeatures
)
377 IndicClassTable::CharClass smClass
= classTable
->getCharClass(stressMark
);
379 fSMIndex
= stressMarkIndex
;
380 fSMFeatures
= stressMarkFeatures
;
382 if (IndicClassTable::isStressMark(smClass
)) {
383 switch (smClass
& CF_POS_MASK
) {
385 fSMabove
= stressMark
;
389 fSMbelow
= stressMark
;
393 // FIXME: this is an error...
399 void notePreBaseConsonant(le_uint32 index
,LEUnicode PBConsonant
, LEUnicode PBVirama
, FeatureMask features
)
402 fPreBaseConsonant
= PBConsonant
;
403 fPreBaseVirama
= PBVirama
;
404 fPBCFeatures
= features
;
407 void noteBaseConsonant()
409 if (fMPreFixups
!= NULL
&& fMPreOutIndex
>= 0) {
410 fMPreFixups
->add(fOutIndex
, fMPreOutIndex
);
414 // Handles Al-Lakuna in Sinhala split vowels.
417 if (fAlLakuna
!= 0) {
418 writeChar(fAlLakuna
, fAlLakunaIndex
, fMatraFeatures
);
425 fMPreOutIndex
= fOutIndex
;
426 writeChar(fMpre
, fMpreIndex
, fMatraFeatures
);
433 writeChar(fMbelow
, fMbelowIndex
, fMatraFeatures
);
440 writeChar(fMabove
, fMaboveIndex
, fMatraFeatures
);
447 writeChar(fMpost
, fMpostIndex
, fMatraFeatures
);
451 void writeLengthMark()
453 if (fLengthMark
!= 0) {
454 writeChar(fLengthMark
, fLengthMarkIndex
, fMatraFeatures
);
461 writeChar(fVMabove
, fVMIndex
, fVMFeatures
);
468 writeChar(fVMpost
, fVMIndex
, fVMFeatures
);
475 writeChar(fSMabove
, fSMIndex
, fSMFeatures
);
482 writeChar(fSMbelow
, fSMIndex
, fSMFeatures
);
486 void writePreBaseConsonant()
488 // The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam. However,
489 // it seems that almost none of the fonts for Malayalam are set up to handle this.
490 // So, we're going to force the issue here by using the rakar as defined with RA in most fonts.
492 if (fPreBaseConsonant
== 0x0d31) { // RRA
493 fPreBaseConsonant
= 0x0d30; // RA
496 if (fPreBaseConsonant
!= 0) {
497 writeChar(fPreBaseConsonant
, fPBCIndex
, fPBCFeatures
);
498 writeChar(fPreBaseVirama
,fPBCIndex
-1,fPBCFeatures
);
502 le_int32
getOutputIndex()
510 // TODO: Find better names for these!
511 #define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask)
512 #define tagArray3 (pstfFeatureMask | tagArray4)
513 #define tagArray2 (halfFeatureMask | tagArray3)
514 #define tagArray1 (blwfFeatureMask | tagArray2)
515 #define tagArray0 (rphfFeatureMask | tagArray1)
517 static const FeatureMap featureMap
[] = {
518 {loclFeatureTag
, loclFeatureMask
},
519 {initFeatureTag
, initFeatureMask
},
520 {nuktFeatureTag
, nuktFeatureMask
},
521 {akhnFeatureTag
, akhnFeatureMask
},
522 {rphfFeatureTag
, rphfFeatureMask
},
523 {blwfFeatureTag
, blwfFeatureMask
},
524 {halfFeatureTag
, halfFeatureMask
},
525 {pstfFeatureTag
, pstfFeatureMask
},
526 {vatuFeatureTag
, vatuFeatureMask
},
527 {presFeatureTag
, presFeatureMask
},
528 {blwsFeatureTag
, blwsFeatureMask
},
529 {abvsFeatureTag
, abvsFeatureMask
},
530 {pstsFeatureTag
, pstsFeatureMask
},
531 {halnFeatureTag
, halnFeatureMask
},
532 {blwmFeatureTag
, blwmFeatureMask
},
533 {abvmFeatureTag
, abvmFeatureMask
},
534 {distFeatureTag
, distFeatureMask
}
537 static const le_int32 featureCount
= LE_ARRAY_SIZE(featureMap
);
539 static const FeatureMap v2FeatureMap
[] = {
540 {loclFeatureTag
, loclFeatureMask
},
541 {nuktFeatureTag
, nuktFeatureMask
},
542 {akhnFeatureTag
, akhnFeatureMask
},
543 {rphfFeatureTag
, rphfFeatureMask
},
544 {rkrfFeatureTag
, rkrfFeatureMask
},
545 {blwfFeatureTag
, blwfFeatureMask
},
546 {halfFeatureTag
, halfFeatureMask
},
547 {vatuFeatureTag
, vatuFeatureMask
},
548 {cjctFeatureTag
, cjctFeatureMask
},
549 {presFeatureTag
, presFeatureMask
},
550 {abvsFeatureTag
, abvsFeatureMask
},
551 {blwsFeatureTag
, blwsFeatureMask
},
552 {pstsFeatureTag
, pstsFeatureMask
},
553 {halnFeatureTag
, halnFeatureMask
},
554 {caltFeatureTag
, caltFeatureMask
},
555 {kernFeatureTag
, kernFeatureMask
},
556 {distFeatureTag
, distFeatureMask
},
557 {abvmFeatureTag
, abvmFeatureMask
},
558 {blwmFeatureTag
, blwmFeatureMask
}
561 static const le_int32 v2FeatureMapCount
= LE_ARRAY_SIZE(v2FeatureMap
);
563 static const le_int8 stateTable
[][CC_COUNT
] =
565 // xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al
566 { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state
567 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
568 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta
569 {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant
570 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama
571 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
572 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
573 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
574 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama
575 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel
576 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel
577 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv
578 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
579 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant
583 const FeatureMap
*IndicReordering::getFeatureMap(le_int32
&count
)
585 count
= featureCount
;
590 const FeatureMap
*IndicReordering::getv2FeatureMap(le_int32
&count
)
592 count
= v2FeatureMapCount
;
597 le_int32
IndicReordering::findSyllable(const IndicClassTable
*classTable
, const LEUnicode
*chars
, le_int32 prev
, le_int32 charCount
)
599 le_int32 cursor
= prev
;
601 le_int8 consonant_count
= 0;
603 while (cursor
< charCount
) {
604 IndicClassTable::CharClass charClass
= classTable
->getCharClass(chars
[cursor
]);
606 if ( IndicClassTable::isConsonant(charClass
) ) {
608 if ( consonant_count
> MAX_CONSONANTS_PER_SYLLABLE
) {
613 state
= stateTable
[state
][charClass
& CF_CLASS_MASK
];
625 le_int32
IndicReordering::reorder(const LEUnicode
*chars
, le_int32 charCount
, le_int32 scriptCode
,
626 LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
,
627 MPreFixups
**outMPreFixups
, LEErrorCode
& success
)
629 if (LE_FAILURE(success
)) {
633 MPreFixups
*mpreFixups
= NULL
;
634 const IndicClassTable
*classTable
= IndicClassTable::getScriptClassTable(scriptCode
);
636 if (classTable
->scriptFlags
& SF_MPRE_FIXUP
) {
637 mpreFixups
= new MPreFixups(charCount
);
638 if (mpreFixups
== NULL
) {
639 success
= LE_MEMORY_ALLOCATION_ERROR
;
644 IndicReorderingOutput
output(outChars
, glyphStorage
, mpreFixups
);
645 le_int32 i
, prev
= 0;
646 le_bool lastInWord
= FALSE
;
648 while (prev
< charCount
) {
649 le_int32 syllable
= findSyllable(classTable
, chars
, prev
, charCount
);
650 le_int32 matra
, markStart
= syllable
;
654 if (classTable
->isStressMark(chars
[markStart
- 1])) {
656 output
.noteStressMark(classTable
, chars
[markStart
], markStart
, tagArray1
);
659 if (markStart
!= prev
&& classTable
->isVowelModifier(chars
[markStart
- 1])) {
661 output
.noteVowelModifier(classTable
, chars
[markStart
], markStart
, tagArray1
);
664 matra
= markStart
- 1;
666 while (output
.noteMatra(classTable
, chars
[matra
], matra
, tagArray1
, !lastInWord
) && matra
!= prev
) {
672 switch (classTable
->getCharClass(chars
[prev
]) & CF_CLASS_MASK
) {
677 case CC_INDEPENDENT_VOWEL
:
678 case CC_ZERO_WIDTH_MARK
:
679 for (i
= prev
; i
< syllable
; i
+= 1) {
680 output
.writeChar(chars
[i
], i
, tagArray1
);
687 output
.writeChar(C_DOTTED_CIRCLE
, prev
, tagArray1
);
688 output
.writeChar(chars
[prev
], prev
, tagArray1
);
692 // A lone virama is illegal unless it follows a
693 // MALAYALAM_VOWEL_SIGN_U. Such a usage is called
695 if (chars
[prev
- 1] != C_MALAYALAM_VOWEL_SIGN_U
) {
696 output
.writeChar(C_DOTTED_CIRCLE
, prev
, tagArray1
);
699 output
.writeChar(chars
[prev
], prev
, tagArray1
);
702 case CC_DEPENDENT_VOWEL
:
703 case CC_SPLIT_VOWEL_PIECE_1
:
704 case CC_SPLIT_VOWEL_PIECE_2
:
705 case CC_SPLIT_VOWEL_PIECE_3
:
706 case CC_VOWEL_MODIFIER
:
710 output
.writeChar(C_DOTTED_CIRCLE
, prev
, tagArray1
);
712 output
.writeMbelow();
713 output
.writeSMbelow();
714 output
.writeMabove();
716 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) != 0) {
720 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) != 0) {
721 output
.writeVMabove();
722 output
.writeSMabove(); // FIXME: there are no SM's in these scripts...
725 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
729 output
.writeLengthMark();
730 output
.writeAlLakuna();
732 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) == 0) {
733 output
.writeVMabove();
734 output
.writeSMabove();
737 output
.writeVMpost();
740 case CC_INDEPENDENT_VOWEL_2
:
741 case CC_INDEPENDENT_VOWEL_3
:
743 case CC_CONSONANT_WITH_NUKTA
:
745 le_uint32 length
= markStart
- prev
;
746 le_int32 lastConsonant
= markStart
- 1;
747 le_int32 baseLimit
= prev
;
749 // Check for REPH at front of syllable
750 if (length
> 2 && classTable
->isReph(chars
[prev
]) && classTable
->isVirama(chars
[prev
+ 1]) && chars
[prev
+ 2] != C_SIGN_ZWNJ
) {
753 // Check for eyelash RA, if the script supports it
754 if ((classTable
->scriptFlags
& SF_EYELASH_RA
) != 0 &&
755 chars
[baseLimit
] == C_SIGN_ZWJ
) {
764 while (lastConsonant
> baseLimit
&& !classTable
->isConsonant(chars
[lastConsonant
])) {
769 IndicClassTable::CharClass charClass
= CC_RESERVED
;
770 IndicClassTable::CharClass nextClass
= CC_RESERVED
;
771 le_int32 baseConsonant
= lastConsonant
;
772 le_int32 postBase
= lastConsonant
+ 1;
773 le_int32 postBaseLimit
= classTable
->scriptFlags
& SF_POST_BASE_LIMIT_MASK
;
774 le_bool seenVattu
= FALSE
;
775 le_bool seenBelowBaseForm
= FALSE
;
776 le_bool seenPreBaseForm
= FALSE
;
777 le_bool hasNukta
= FALSE
;
778 le_bool hasBelowBaseForm
= FALSE
;
779 le_bool hasPostBaseForm
= FALSE
;
780 le_bool hasPreBaseForm
= FALSE
;
782 if (postBase
< markStart
&& classTable
->isNukta(chars
[postBase
])) {
783 charClass
= CC_NUKTA
;
787 while (baseConsonant
> baseLimit
) {
788 nextClass
= charClass
;
789 hasNukta
= IndicClassTable::isNukta(nextClass
);
790 charClass
= classTable
->getCharClass(chars
[baseConsonant
]);
792 hasBelowBaseForm
= IndicClassTable::hasBelowBaseForm(charClass
) && !hasNukta
;
793 hasPostBaseForm
= IndicClassTable::hasPostBaseForm(charClass
) && !hasNukta
;
794 hasPreBaseForm
= IndicClassTable::hasPreBaseForm(charClass
) && !hasNukta
;
796 if (IndicClassTable::isConsonant(charClass
)) {
797 if (postBaseLimit
== 0 || seenVattu
||
798 (baseConsonant
> baseLimit
&& !classTable
->isVirama(chars
[baseConsonant
- 1])) ||
799 !(hasBelowBaseForm
|| hasPostBaseForm
|| hasPreBaseForm
)) {
803 // Note any pre-base consonants
804 if ( baseConsonant
== lastConsonant
&& lastConsonant
> 0 &&
805 hasPreBaseForm
&& classTable
->isVirama(chars
[baseConsonant
- 1])) {
806 output
.notePreBaseConsonant(lastConsonant
,chars
[lastConsonant
],chars
[lastConsonant
-1],tagArray2
);
807 seenPreBaseForm
= TRUE
;
810 // consonants with nuktas are never vattus
811 seenVattu
= IndicClassTable::isVattu(charClass
) && !hasNukta
;
813 // consonants with nuktas never have below- or post-base forms
814 if (hasPostBaseForm
) {
815 if (seenBelowBaseForm
) {
819 postBase
= baseConsonant
;
820 } else if (hasBelowBaseForm
) {
821 seenBelowBaseForm
= TRUE
;
834 // NOTE: baseLimit == prev + 3 iff eyelash RA present...
835 if (baseLimit
== prev
+ 3) {
836 output
.writeChar(chars
[prev
], prev
, tagArray2
);
837 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray2
);
838 output
.writeChar(chars
[prev
+ 2], prev
+ 2, tagArray2
);
841 // write any pre-base consonants
842 output
.writePreBaseConsonant();
844 le_bool supressVattu
= TRUE
;
846 for (i
= baseLimit
; i
< baseConsonant
; i
+= 1) {
847 LEUnicode ch
= chars
[i
];
848 // Don't put 'pstf' or 'blwf' on anything before the base consonant.
849 FeatureMask features
= tagArray1
& ~( pstfFeatureMask
| blwfFeatureMask
);
851 charClass
= classTable
->getCharClass(ch
);
852 nextClass
= classTable
->getCharClass(chars
[i
+ 1]);
853 hasNukta
= IndicClassTable::isNukta(nextClass
);
855 if (IndicClassTable::isConsonant(charClass
)) {
856 if (IndicClassTable::isVattu(charClass
) && !hasNukta
&& supressVattu
) {
857 features
= tagArray4
;
860 supressVattu
= IndicClassTable::isVattu(charClass
) && !hasNukta
;
861 } else if (IndicClassTable::isVirama(charClass
) && chars
[i
+ 1] == C_SIGN_ZWNJ
)
863 features
= tagArray4
;
866 output
.writeChar(ch
, i
, features
);
869 le_int32 bcSpan
= baseConsonant
+ 1;
871 if (bcSpan
< markStart
&& classTable
->isNukta(chars
[bcSpan
])) {
875 if (baseConsonant
== lastConsonant
&& bcSpan
< markStart
&&
876 (classTable
->isVirama(chars
[bcSpan
]) || classTable
->isAlLakuna(chars
[bcSpan
]))) {
879 if (bcSpan
< markStart
&& chars
[bcSpan
] == C_SIGN_ZWNJ
) {
884 // note the base consonant for post-GSUB fixups
885 output
.noteBaseConsonant();
887 // write base consonant
888 for (i
= baseConsonant
; i
< bcSpan
; i
+= 1) {
889 output
.writeChar(chars
[i
], i
, tagArray4
);
892 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) != 0) {
893 output
.writeMbelow();
894 output
.writeSMbelow(); // FIXME: there are no SMs in these scripts...
895 output
.writeMabove();
899 // write below-base consonants
900 if (baseConsonant
!= lastConsonant
&& !seenPreBaseForm
) {
901 for (i
= bcSpan
+ 1; i
< postBase
; i
+= 1) {
902 output
.writeChar(chars
[i
], i
, tagArray1
);
905 if (postBase
> lastConsonant
) {
906 // write halant that was after base consonant
907 output
.writeChar(chars
[bcSpan
], bcSpan
, tagArray1
);
911 // write Mbelow, SMbelow, Mabove
912 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
913 output
.writeMbelow();
914 output
.writeSMbelow();
915 output
.writeMabove();
918 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) != 0) {
919 if (baseLimit
== prev
+ 2) {
920 output
.writeChar(chars
[prev
], prev
, tagArray0
);
921 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray0
);
924 output
.writeVMabove();
925 output
.writeSMabove(); // FIXME: there are no SM's in these scripts...
928 // write post-base consonants
929 // FIXME: does this put the right tags on post-base consonants?
930 if (baseConsonant
!= lastConsonant
&& !seenPreBaseForm
) {
931 if (postBase
<= lastConsonant
) {
932 for (i
= postBase
; i
<= lastConsonant
; i
+= 1) {
933 output
.writeChar(chars
[i
], i
, tagArray3
);
936 // write halant that was after base consonant
937 output
.writeChar(chars
[bcSpan
], bcSpan
, tagArray1
);
940 // write the training halant, if there is one
941 if (lastConsonant
< matra
&& classTable
->isVirama(chars
[matra
])) {
942 output
.writeChar(chars
[matra
], matra
, tagArray4
);
947 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
951 output
.writeLengthMark();
952 output
.writeAlLakuna();
955 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) == 0) {
956 if (baseLimit
== prev
+ 2) {
957 output
.writeChar(chars
[prev
], prev
, tagArray0
);
958 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray0
);
961 output
.writeVMabove();
962 output
.writeSMabove();
965 output
.writeVMpost();
977 *outMPreFixups
= mpreFixups
;
979 return output
.getOutputIndex();
982 void IndicReordering::adjustMPres(MPreFixups
*mpreFixups
, LEGlyphStorage
&glyphStorage
, LEErrorCode
& success
)
984 if (mpreFixups
!= NULL
) {
985 mpreFixups
->apply(glyphStorage
, success
);
991 void IndicReordering::applyPresentationForms(LEGlyphStorage
&glyphStorage
, le_int32 count
)
993 LEErrorCode success
= LE_NO_ERROR
;
995 // This sets us up for 2nd pass of glyph substitution as well as setting the feature masks for the
996 // GPOS table lookups
998 for ( le_int32 i
= 0 ; i
< count
; i
++ ) {
999 glyphStorage
.setAuxData(i
, ( presentationFormsMask
| positioningFormsMask
), success
);
1003 void IndicReordering::finalReordering(LEGlyphStorage
&glyphStorage
, le_int32 count
)
1005 LEErrorCode success
= LE_NO_ERROR
;
1007 // Reposition REPH as appropriate
1009 for ( le_int32 i
= 0 ; i
< count
; i
++ ) {
1011 le_int32 tmpAuxData
= glyphStorage
.getAuxData(i
,success
);
1012 LEGlyphID tmpGlyph
= glyphStorage
.getGlyphID(i
,success
);
1014 if ( ( tmpGlyph
!= NO_GLYPH
) && (tmpAuxData
& rephConsonantMask
) && !(tmpAuxData
& repositionedGlyphMask
)) {
1016 le_bool targetPositionFound
= false;
1017 le_int32 targetPosition
= i
+1;
1018 le_int32 baseConsonantData
;
1020 while (!targetPositionFound
) {
1021 tmpGlyph
= glyphStorage
.getGlyphID(targetPosition
,success
);
1022 tmpAuxData
= glyphStorage
.getAuxData(targetPosition
,success
);
1024 if ( tmpAuxData
& baseConsonantMask
) {
1025 baseConsonantData
= tmpAuxData
;
1026 targetPositionFound
= true;
1032 // Make sure we are not putting the reph into an empty hole
1034 le_bool targetPositionHasGlyph
= false;
1035 while (!targetPositionHasGlyph
) {
1036 tmpGlyph
= glyphStorage
.getGlyphID(targetPosition
,success
);
1037 if ( tmpGlyph
!= NO_GLYPH
) {
1038 targetPositionHasGlyph
= true;
1044 // Make sure that REPH is positioned after any above base or post base matras
1046 le_bool checkMatraDone
= false;
1047 le_int32 checkMatraPosition
= targetPosition
+1;
1048 while ( !checkMatraDone
) {
1049 tmpAuxData
= glyphStorage
.getAuxData(checkMatraPosition
,success
);
1050 if ( checkMatraPosition
>= count
|| ( (tmpAuxData
^ baseConsonantData
) & LE_GLYPH_GROUP_MASK
)) {
1051 checkMatraDone
= true;
1054 if ( (tmpAuxData
& matraMask
) &&
1055 (((tmpAuxData
& markPositionMask
) == aboveBasePosition
) ||
1056 ((tmpAuxData
& markPositionMask
) == postBasePosition
))) {
1057 targetPosition
= checkMatraPosition
;
1059 checkMatraPosition
++;
1062 glyphStorage
.moveGlyph(i
,targetPosition
,repositionedGlyphMask
);
1068 le_int32
IndicReordering::v2process(const LEUnicode
*chars
, le_int32 charCount
, le_int32 scriptCode
,
1069 LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
)
1071 const IndicClassTable
*classTable
= IndicClassTable::getScriptClassTable(scriptCode
);
1073 DynamicProperties dynProps
[INDIC_BLOCK_SIZE
];
1074 IndicReordering::getDynamicProperties(dynProps
,classTable
);
1076 IndicReorderingOutput
output(outChars
, glyphStorage
, NULL
);
1077 le_int32 i
, firstConsonant
, baseConsonant
, secondConsonant
, inv_count
= 0, beginSyllable
= 0;
1078 //le_bool lastInWord = FALSE;
1080 while (beginSyllable
< charCount
) {
1081 le_int32 nextSyllable
= findSyllable(classTable
, chars
, beginSyllable
, charCount
);
1085 // Find the First Consonant
1086 for ( firstConsonant
= beginSyllable
; firstConsonant
< nextSyllable
; firstConsonant
++ ) {
1087 if ( classTable
->isConsonant(chars
[firstConsonant
]) ) {
1092 // Find the base consonant
1094 baseConsonant
= nextSyllable
- 1;
1095 secondConsonant
= firstConsonant
;
1097 // TODO: Use Dynamic Properties for hasBelowBaseForm and hasPostBaseForm()
1099 while ( baseConsonant
> firstConsonant
) {
1100 if ( classTable
->isConsonant(chars
[baseConsonant
]) &&
1101 !classTable
->hasBelowBaseForm(chars
[baseConsonant
]) &&
1102 !classTable
->hasPostBaseForm(chars
[baseConsonant
]) ) {
1106 if ( classTable
->isConsonant(chars
[baseConsonant
]) ) {
1107 secondConsonant
= baseConsonant
;
1113 // If the syllable starts with Ra + Halant ( in a script that has Reph ) and has more than one
1114 // consonant, Ra is excluced from candidates for base consonants
1116 if ( classTable
->isReph(chars
[beginSyllable
]) &&
1117 beginSyllable
+1 < nextSyllable
&& classTable
->isVirama(chars
[beginSyllable
+1]) &&
1118 secondConsonant
!= firstConsonant
) {
1119 baseConsonant
= secondConsonant
;
1122 // Populate the output
1123 for ( i
= beginSyllable
; i
< nextSyllable
; i
++ ) {
1125 // Handle invalid combinartions
1127 if ( classTable
->isVirama(chars
[beginSyllable
]) ||
1128 classTable
->isMatra(chars
[beginSyllable
]) ||
1129 classTable
->isVowelModifier(chars
[beginSyllable
]) ||
1130 classTable
->isNukta(chars
[beginSyllable
]) ) {
1131 output
.writeChar(C_DOTTED_CIRCLE
,beginSyllable
,basicShapingFormsMask
);
1134 output
.writeChar(chars
[i
],i
, basicShapingFormsMask
);
1138 // Adjust features and set syllable structure bits
1140 for ( i
= beginSyllable
; i
< nextSyllable
; i
++ ) {
1142 FeatureMask outMask
= output
.getFeatures(i
+inv_count
);
1143 FeatureMask saveMask
= outMask
;
1145 // Since reph can only validly occur at the beginning of a syllable
1146 // We only apply it to the first 2 characters in the syllable, to keep it from
1147 // conflicting with other features ( i.e. rkrf )
1149 // TODO : Use the dynamic property for determining isREPH
1150 if ( i
== beginSyllable
&& i
< baseConsonant
&& classTable
->isReph(chars
[i
]) &&
1151 i
+1 < nextSyllable
&& classTable
->isVirama(chars
[i
+1])) {
1152 outMask
|= rphfFeatureMask
;
1153 outMask
|= rephConsonantMask
;
1154 output
.setFeatures(i
+1+inv_count
,outMask
);
1158 if ( i
== baseConsonant
) {
1159 outMask
|= baseConsonantMask
;
1162 if ( classTable
->isMatra(chars
[i
])) {
1163 outMask
|= matraMask
;
1164 if ( classTable
->hasAboveBaseForm(chars
[i
])) {
1165 outMask
|= aboveBasePosition
;
1166 } else if ( classTable
->hasBelowBaseForm(chars
[i
])) {
1167 outMask
|= belowBasePosition
;
1171 // Don't apply half form to virama that stands alone at the end of a syllable
1172 // to prevent half forms from forming when syllable ends with virama
1174 if ( classTable
->isVirama(chars
[i
]) && (i
+1 == nextSyllable
) ) {
1175 outMask
^= halfFeatureMask
;
1176 if ( classTable
->isConsonant(chars
[i
-1]) ) {
1177 FeatureMask tmp
= output
.getFeatures(i
-1+inv_count
);
1178 tmp
^= halfFeatureMask
;
1179 output
.setFeatures(i
-1+inv_count
,tmp
);
1183 if ( outMask
!= saveMask
) {
1184 output
.setFeatures(i
+inv_count
,outMask
);
1188 output
.decomposeReorderMatras(classTable
,beginSyllable
,nextSyllable
,inv_count
);
1190 beginSyllable
= nextSyllable
;
1194 return output
.getOutputIndex();
1198 void IndicReordering::getDynamicProperties( DynamicProperties
*, const IndicClassTable
*classTable
) {
1201 LEUnicode currentChar
;
1202 LEUnicode workChars
[2];
1203 LEGlyphStorage workGlyphs
;
1205 IndicReorderingOutput
workOutput(workChars
, workGlyphs
, NULL
);
1207 //le_int32 offset = 0;
1210 // TODO: Should this section of code have actually been doing something?
1211 // First find the relevant virama for the script we are dealing with
1213 for ( currentChar
= classTable
->firstChar
; currentChar
<= classTable
->lastChar
; currentChar
++ ) {
1214 if ( classTable
->isVirama(currentChar
)) {
1215 virama
= currentChar
;
1221 for ( currentChar
= classTable
->firstChar
; currentChar
<= classTable
->lastChar
; currentChar
++ ) {
1222 if ( classTable
->isConsonant(currentChar
)) {