3 * (C) Copyright IBM Corp. 1998-2013 - All Rights Reserved
8 #include "OpenTypeTables.h"
9 #include "OpenTypeUtilities.h"
10 #include "IndicReordering.h"
11 #include "LEGlyphStorage.h"
12 #include "MPreFixups.h"
16 #define loclFeatureTag LE_LOCL_FEATURE_TAG
17 #define initFeatureTag LE_INIT_FEATURE_TAG
18 #define nuktFeatureTag LE_NUKT_FEATURE_TAG
19 #define akhnFeatureTag LE_AKHN_FEATURE_TAG
20 #define rphfFeatureTag LE_RPHF_FEATURE_TAG
21 #define rkrfFeatureTag LE_RKRF_FEATURE_TAG
22 #define blwfFeatureTag LE_BLWF_FEATURE_TAG
23 #define halfFeatureTag LE_HALF_FEATURE_TAG
24 #define pstfFeatureTag LE_PSTF_FEATURE_TAG
25 #define vatuFeatureTag LE_VATU_FEATURE_TAG
26 #define presFeatureTag LE_PRES_FEATURE_TAG
27 #define blwsFeatureTag LE_BLWS_FEATURE_TAG
28 #define abvsFeatureTag LE_ABVS_FEATURE_TAG
29 #define pstsFeatureTag LE_PSTS_FEATURE_TAG
30 #define halnFeatureTag LE_HALN_FEATURE_TAG
31 #define cjctFeatureTag LE_CJCT_FEATURE_TAG
32 #define blwmFeatureTag LE_BLWM_FEATURE_TAG
33 #define abvmFeatureTag LE_ABVM_FEATURE_TAG
34 #define distFeatureTag LE_DIST_FEATURE_TAG
35 #define caltFeatureTag LE_CALT_FEATURE_TAG
36 #define kernFeatureTag LE_KERN_FEATURE_TAG
38 #define loclFeatureMask 0x80000000UL
39 #define rphfFeatureMask 0x40000000UL
40 #define blwfFeatureMask 0x20000000UL
41 #define halfFeatureMask 0x10000000UL
42 #define pstfFeatureMask 0x08000000UL
43 #define nuktFeatureMask 0x04000000UL
44 #define akhnFeatureMask 0x02000000UL
45 #define vatuFeatureMask 0x01000000UL
46 #define presFeatureMask 0x00800000UL
47 #define blwsFeatureMask 0x00400000UL
48 #define abvsFeatureMask 0x00200000UL
49 #define pstsFeatureMask 0x00100000UL
50 #define halnFeatureMask 0x00080000UL
51 #define blwmFeatureMask 0x00040000UL
52 #define abvmFeatureMask 0x00020000UL
53 #define distFeatureMask 0x00010000UL
54 #define initFeatureMask 0x00008000UL
55 #define cjctFeatureMask 0x00004000UL
56 #define rkrfFeatureMask 0x00002000UL
57 #define caltFeatureMask 0x00001000UL
58 #define kernFeatureMask 0x00000800UL
60 // Syllable structure bits
61 #define baseConsonantMask 0x00000400UL
62 #define consonantMask 0x00000200UL
63 #define halfConsonantMask 0x00000100UL
64 #define rephConsonantMask 0x00000080UL
65 #define matraMask 0x00000040UL
66 #define vowelModifierMask 0x00000020UL
67 #define markPositionMask 0x00000018UL
69 #define postBasePosition 0x00000000UL
70 #define preBasePosition 0x00000008UL
71 #define aboveBasePosition 0x00000010UL
72 #define belowBasePosition 0x00000018UL
74 #define repositionedGlyphMask 0x00000002UL
76 #define basicShapingFormsMask ( loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask )
77 #define positioningFormsMask ( kernFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask )
78 #define presentationFormsMask ( presFeatureMask | abvsFeatureMask | blwsFeatureMask | pstsFeatureMask | halnFeatureMask | caltFeatureMask )
81 #define C_MALAYALAM_VOWEL_SIGN_U 0x0D41
82 #define C_DOTTED_CIRCLE 0x25CC
83 #define NO_GLYPH 0xFFFF
85 // Some level of debate as to the proper value for MAX_CONSONANTS_PER_SYLLABLE. Ticket 5588 states that 4
86 // is the magic number according to ISCII, but 5 seems to be the more consistent with XP.
87 #define MAX_CONSONANTS_PER_SYLLABLE 5
89 #define INDIC_BLOCK_SIZE 0x7F
91 class IndicReorderingOutput
: public UMemory
{
93 le_int32 fSyllableCount
;
97 LEGlyphStorage
&fGlyphStorage
;
103 le_int32 fMbelowIndex
;
106 le_int32 fMaboveIndex
;
109 le_int32 fMpostIndex
;
111 LEUnicode fLengthMark
;
112 le_int32 fLengthMarkIndex
;
115 le_int32 fAlLakunaIndex
;
117 FeatureMask fMatraFeatures
;
119 le_int32 fMPreOutIndex
;
120 MPreFixups
*fMPreFixups
;
125 FeatureMask fVMFeatures
;
130 FeatureMask fSMFeatures
;
132 LEUnicode fPreBaseConsonant
;
133 LEUnicode fPreBaseVirama
;
135 FeatureMask fPBCFeatures
;
137 void saveMatra(LEUnicode matra
, le_int32 matraIndex
, IndicClassTable::CharClass matraClass
)
139 // FIXME: check if already set, or if not a matra...
140 if (IndicClassTable::isLengthMark(matraClass
)) {
142 fLengthMarkIndex
= matraIndex
;
143 } else if (IndicClassTable::isAlLakuna(matraClass
)) {
145 fAlLakunaIndex
= matraIndex
;
147 switch (matraClass
& CF_POS_MASK
) {
150 fMpreIndex
= matraIndex
;
155 fMbelowIndex
= matraIndex
;
160 fMaboveIndex
= matraIndex
;
165 fMpostIndex
= matraIndex
;
176 IndicReorderingOutput(LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
, MPreFixups
*mpreFixups
)
177 : fSyllableCount(0), fOutIndex(0), fOutChars(outChars
), fGlyphStorage(glyphStorage
),
178 fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0),
179 fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0),
180 fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups
),
181 fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0),
182 fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0),
183 fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0)
185 // nothing else to do...
188 ~IndicReorderingOutput()
190 // nothing to do here...
197 fMpre
= fMbelow
= fMabove
= fMpost
= fLengthMark
= fAlLakuna
= 0;
200 fVMabove
= fVMpost
= 0;
201 fSMabove
= fSMbelow
= 0;
203 fPreBaseConsonant
= fPreBaseVirama
= 0;
206 void writeChar(LEUnicode ch
, le_uint32 charIndex
, FeatureMask charFeatures
)
208 LEErrorCode success
= LE_NO_ERROR
;
210 fOutChars
[fOutIndex
] = ch
;
212 fGlyphStorage
.setCharIndex(fOutIndex
, charIndex
, success
);
213 fGlyphStorage
.setAuxData(fOutIndex
, charFeatures
| (fSyllableCount
& LE_GLYPH_GROUP_MASK
), success
);
218 void setFeatures ( le_uint32 charIndex
, FeatureMask charFeatures
)
220 LEErrorCode success
= LE_NO_ERROR
;
222 fGlyphStorage
.setAuxData( charIndex
, charFeatures
, success
);
226 FeatureMask
getFeatures ( le_uint32 charIndex
)
228 LEErrorCode success
= LE_NO_ERROR
;
229 return fGlyphStorage
.getAuxData(charIndex
,success
);
232 void decomposeReorderMatras ( const IndicClassTable
*classTable
, le_int32 beginSyllable
, le_int32 nextSyllable
, le_int32 inv_count
) {
234 LEErrorCode success
= LE_NO_ERROR
;
236 for ( i
= beginSyllable
; i
< nextSyllable
; i
++ ) {
237 if ( classTable
->isMatra(fOutChars
[i
+inv_count
])) {
238 IndicClassTable::CharClass matraClass
= classTable
->getCharClass(fOutChars
[i
+inv_count
]);
239 if ( classTable
->isSplitMatra(matraClass
)) {
240 le_int32 saveIndex
= fGlyphStorage
.getCharIndex(i
+inv_count
,success
);
241 le_uint32 saveAuxData
= fGlyphStorage
.getAuxData(i
+inv_count
,success
);
242 const SplitMatra
*splitMatra
= classTable
->getSplitMatra(matraClass
);
244 for (j
= 0 ; j
< SM_MAX_PIECES
&& *(splitMatra
)[j
] != 0 ; j
++) {
245 LEUnicode piece
= (*splitMatra
)[j
];
247 fOutChars
[i
+inv_count
] = piece
;
248 matraClass
= classTable
->getCharClass(piece
);
250 insertCharacter(piece
,i
+1+inv_count
,saveIndex
,saveAuxData
);
256 if ((matraClass
& CF_POS_MASK
) == CF_POS_BEFORE
) {
257 moveCharacter(i
+inv_count
,beginSyllable
+inv_count
);
263 void moveCharacter( le_int32 fromPosition
, le_int32 toPosition
) {
264 le_int32 i
,saveIndex
;
265 le_uint32 saveAuxData
;
266 LEUnicode saveChar
= fOutChars
[fromPosition
];
267 LEErrorCode success
= LE_NO_ERROR
;
268 LEErrorCode success2
= LE_NO_ERROR
;
269 saveIndex
= fGlyphStorage
.getCharIndex(fromPosition
,success
);
270 saveAuxData
= fGlyphStorage
.getAuxData(fromPosition
,success
);
272 if ( fromPosition
> toPosition
) {
273 for ( i
= fromPosition
; i
> toPosition
; i
-- ) {
274 fOutChars
[i
] = fOutChars
[i
-1];
275 fGlyphStorage
.setCharIndex(i
,fGlyphStorage
.getCharIndex(i
-1,success2
),success
);
276 fGlyphStorage
.setAuxData(i
,fGlyphStorage
.getAuxData(i
-1,success2
), success
);
280 for ( i
= fromPosition
; i
< toPosition
; i
++ ) {
281 fOutChars
[i
] = fOutChars
[i
+1];
282 fGlyphStorage
.setCharIndex(i
,fGlyphStorage
.getCharIndex(i
+1,success2
),success
);
283 fGlyphStorage
.setAuxData(i
,fGlyphStorage
.getAuxData(i
+1,success2
), success
);
287 fOutChars
[toPosition
] = saveChar
;
288 fGlyphStorage
.setCharIndex(toPosition
,saveIndex
,success
);
289 fGlyphStorage
.setAuxData(toPosition
,saveAuxData
,success
);
292 void insertCharacter( LEUnicode ch
, le_int32 toPosition
, le_int32 charIndex
, le_uint32 auxData
) {
293 LEErrorCode success
= LE_NO_ERROR
;
297 for ( i
= fOutIndex
; i
> toPosition
; i
--) {
298 fOutChars
[i
] = fOutChars
[i
-1];
299 fGlyphStorage
.setCharIndex(i
,fGlyphStorage
.getCharIndex(i
-1,success
),success
);
300 fGlyphStorage
.setAuxData(i
,fGlyphStorage
.getAuxData(i
-1,success
), success
);
303 fOutChars
[toPosition
] = ch
;
304 fGlyphStorage
.setCharIndex(toPosition
,charIndex
,success
);
305 fGlyphStorage
.setAuxData(toPosition
,auxData
,success
);
308 void removeCharacter( le_int32 fromPosition
) {
309 LEErrorCode success
= LE_NO_ERROR
;
313 for ( i
= fromPosition
; i
< fOutIndex
; i
--) {
314 fOutChars
[i
] = fOutChars
[i
+1];
315 fGlyphStorage
.setCharIndex(i
,fGlyphStorage
.getCharIndex(i
+1,success
),success
);
316 fGlyphStorage
.setAuxData(i
,fGlyphStorage
.getAuxData(i
+1,success
), success
);
320 le_bool
noteMatra(const IndicClassTable
*classTable
, LEUnicode matra
, le_uint32 matraIndex
, FeatureMask matraFeatures
, le_bool wordStart
)
322 IndicClassTable::CharClass matraClass
= classTable
->getCharClass(matra
);
324 fMatraFeatures
= matraFeatures
;
327 fMatraFeatures
|= initFeatureMask
;
330 if (IndicClassTable::isMatra(matraClass
)) {
331 if (IndicClassTable::isSplitMatra(matraClass
)) {
332 const SplitMatra
*splitMatra
= classTable
->getSplitMatra(matraClass
);
335 for (i
= 0; i
< SM_MAX_PIECES
&& (*splitMatra
)[i
] != 0; i
+= 1) {
336 LEUnicode piece
= (*splitMatra
)[i
];
337 IndicClassTable::CharClass pieceClass
= classTable
->getCharClass(piece
);
339 saveMatra(piece
, matraIndex
, pieceClass
);
342 saveMatra(matra
, matraIndex
, matraClass
);
351 void noteVowelModifier(const IndicClassTable
*classTable
, LEUnicode vowelModifier
, le_uint32 vowelModifierIndex
, FeatureMask vowelModifierFeatures
)
353 IndicClassTable::CharClass vmClass
= classTable
->getCharClass(vowelModifier
);
355 fVMIndex
= vowelModifierIndex
;
356 fVMFeatures
= vowelModifierFeatures
;
358 if (IndicClassTable::isVowelModifier(vmClass
)) {
359 switch (vmClass
& CF_POS_MASK
) {
361 fVMabove
= vowelModifier
;
365 fVMpost
= vowelModifier
;
369 // FIXME: this is an error...
375 void noteStressMark(const IndicClassTable
*classTable
, LEUnicode stressMark
, le_uint32 stressMarkIndex
, FeatureMask stressMarkFeatures
)
377 IndicClassTable::CharClass smClass
= classTable
->getCharClass(stressMark
);
379 fSMIndex
= stressMarkIndex
;
380 fSMFeatures
= stressMarkFeatures
;
382 if (IndicClassTable::isStressMark(smClass
)) {
383 switch (smClass
& CF_POS_MASK
) {
385 fSMabove
= stressMark
;
389 fSMbelow
= stressMark
;
393 // FIXME: this is an error...
399 void notePreBaseConsonant(le_uint32 index
,LEUnicode PBConsonant
, LEUnicode PBVirama
, FeatureMask features
)
402 fPreBaseConsonant
= PBConsonant
;
403 fPreBaseVirama
= PBVirama
;
404 fPBCFeatures
= features
;
407 void noteBaseConsonant()
409 if (fMPreFixups
!= NULL
&& fMPreOutIndex
>= 0) {
410 fMPreFixups
->add(fOutIndex
, fMPreOutIndex
);
414 // Handles Al-Lakuna in Sinhala split vowels.
417 if (fAlLakuna
!= 0) {
418 writeChar(fAlLakuna
, fAlLakunaIndex
, fMatraFeatures
);
425 fMPreOutIndex
= fOutIndex
;
426 writeChar(fMpre
, fMpreIndex
, fMatraFeatures
);
433 writeChar(fMbelow
, fMbelowIndex
, fMatraFeatures
);
440 writeChar(fMabove
, fMaboveIndex
, fMatraFeatures
);
447 writeChar(fMpost
, fMpostIndex
, fMatraFeatures
);
451 void writeLengthMark()
453 if (fLengthMark
!= 0) {
454 writeChar(fLengthMark
, fLengthMarkIndex
, fMatraFeatures
);
461 writeChar(fVMabove
, fVMIndex
, fVMFeatures
);
468 writeChar(fVMpost
, fVMIndex
, fVMFeatures
);
475 writeChar(fSMabove
, fSMIndex
, fSMFeatures
);
482 writeChar(fSMbelow
, fSMIndex
, fSMFeatures
);
486 void writePreBaseConsonant()
488 // The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam. However,
489 // it seems that almost none of the fonts for Malayalam are set up to handle this.
490 // So, we're going to force the issue here by using the rakar as defined with RA in most fonts.
492 if (fPreBaseConsonant
== 0x0d31) { // RRA
493 fPreBaseConsonant
= 0x0d30; // RA
496 if (fPreBaseConsonant
!= 0) {
497 writeChar(fPreBaseConsonant
, fPBCIndex
, fPBCFeatures
);
498 writeChar(fPreBaseVirama
,fPBCIndex
-1,fPBCFeatures
);
502 le_int32
getOutputIndex()
510 // TODO: Find better names for these!
511 #define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask)
512 #define tagArray3 (pstfFeatureMask | tagArray4)
513 #define tagArray2 (halfFeatureMask | tagArray3)
514 #define tagArray1 (blwfFeatureMask | tagArray2)
515 #define tagArray0 (rphfFeatureMask | tagArray1)
517 static const FeatureMap featureMap
[] = {
518 {loclFeatureTag
, loclFeatureMask
},
519 {initFeatureTag
, initFeatureMask
},
520 {nuktFeatureTag
, nuktFeatureMask
},
521 {akhnFeatureTag
, akhnFeatureMask
},
522 {rphfFeatureTag
, rphfFeatureMask
},
523 {blwfFeatureTag
, blwfFeatureMask
},
524 {halfFeatureTag
, halfFeatureMask
},
525 {pstfFeatureTag
, pstfFeatureMask
},
526 {vatuFeatureTag
, vatuFeatureMask
},
527 {presFeatureTag
, presFeatureMask
},
528 {blwsFeatureTag
, blwsFeatureMask
},
529 {abvsFeatureTag
, abvsFeatureMask
},
530 {pstsFeatureTag
, pstsFeatureMask
},
531 {halnFeatureTag
, halnFeatureMask
},
532 {blwmFeatureTag
, blwmFeatureMask
},
533 {abvmFeatureTag
, abvmFeatureMask
},
534 {distFeatureTag
, distFeatureMask
}
537 static const le_int32 featureCount
= LE_ARRAY_SIZE(featureMap
);
539 static const FeatureMap v2FeatureMap
[] = {
540 {loclFeatureTag
, loclFeatureMask
},
541 {nuktFeatureTag
, nuktFeatureMask
},
542 {akhnFeatureTag
, akhnFeatureMask
},
543 {rphfFeatureTag
, rphfFeatureMask
},
544 {rkrfFeatureTag
, rkrfFeatureMask
},
545 {blwfFeatureTag
, blwfFeatureMask
},
546 {halfFeatureTag
, halfFeatureMask
},
547 {vatuFeatureTag
, vatuFeatureMask
},
548 {cjctFeatureTag
, cjctFeatureMask
},
549 {presFeatureTag
, presFeatureMask
},
550 {abvsFeatureTag
, abvsFeatureMask
},
551 {blwsFeatureTag
, blwsFeatureMask
},
552 {pstsFeatureTag
, pstsFeatureMask
},
553 {halnFeatureTag
, halnFeatureMask
},
554 {caltFeatureTag
, caltFeatureMask
},
555 {kernFeatureTag
, kernFeatureMask
},
556 {distFeatureTag
, distFeatureMask
},
557 {abvmFeatureTag
, abvmFeatureMask
},
558 {blwmFeatureTag
, blwmFeatureMask
}
561 static const le_int32 v2FeatureMapCount
= LE_ARRAY_SIZE(v2FeatureMap
);
563 static const le_int8 stateTable
[][CC_COUNT
] =
565 // xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al
566 { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state
567 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
568 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta
569 {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant
570 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama
571 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
572 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
573 {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama
574 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama
575 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel
576 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel
577 {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv
578 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?)
579 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant
583 const FeatureMap
*IndicReordering::getFeatureMap(le_int32
&count
)
585 count
= featureCount
;
590 const FeatureMap
*IndicReordering::getv2FeatureMap(le_int32
&count
)
592 count
= v2FeatureMapCount
;
597 le_int32
IndicReordering::findSyllable(const IndicClassTable
*classTable
, const LEUnicode
*chars
, le_int32 prev
, le_int32 charCount
)
599 le_int32 cursor
= prev
;
601 le_int8 consonant_count
= 0;
603 while (cursor
< charCount
) {
604 IndicClassTable::CharClass charClass
= classTable
->getCharClass(chars
[cursor
]);
606 if ( IndicClassTable::isConsonant(charClass
) ) {
608 if ( consonant_count
> MAX_CONSONANTS_PER_SYLLABLE
) {
613 state
= stateTable
[state
][charClass
& CF_CLASS_MASK
];
625 le_int32
IndicReordering::reorder(const LEUnicode
*chars
, le_int32 charCount
, le_int32 scriptCode
,
626 LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
,
627 MPreFixups
**outMPreFixups
, LEErrorCode
& success
)
629 if (LE_FAILURE(success
)) {
633 MPreFixups
*mpreFixups
= NULL
;
634 const IndicClassTable
*classTable
= IndicClassTable::getScriptClassTable(scriptCode
);
636 if(classTable
==NULL
) {
637 success
= LE_MEMORY_ALLOCATION_ERROR
;
641 if (classTable
->scriptFlags
& SF_MPRE_FIXUP
) {
642 mpreFixups
= new MPreFixups(charCount
);
643 if (mpreFixups
== NULL
) {
644 success
= LE_MEMORY_ALLOCATION_ERROR
;
649 IndicReorderingOutput
output(outChars
, glyphStorage
, mpreFixups
);
650 le_int32 i
, prev
= 0;
651 le_bool lastInWord
= FALSE
;
653 while (prev
< charCount
) {
654 le_int32 syllable
= findSyllable(classTable
, chars
, prev
, charCount
);
655 le_int32 matra
, markStart
= syllable
;
659 if (classTable
->isStressMark(chars
[markStart
- 1])) {
661 output
.noteStressMark(classTable
, chars
[markStart
], markStart
, tagArray1
);
664 if (markStart
!= prev
&& classTable
->isVowelModifier(chars
[markStart
- 1])) {
666 output
.noteVowelModifier(classTable
, chars
[markStart
], markStart
, tagArray1
);
669 matra
= markStart
- 1;
671 while (output
.noteMatra(classTable
, chars
[matra
], matra
, tagArray1
, !lastInWord
) && matra
!= prev
) {
677 switch (classTable
->getCharClass(chars
[prev
]) & CF_CLASS_MASK
) {
682 case CC_INDEPENDENT_VOWEL
:
683 case CC_ZERO_WIDTH_MARK
:
684 for (i
= prev
; i
< syllable
; i
+= 1) {
685 output
.writeChar(chars
[i
], i
, tagArray1
);
692 output
.writeChar(C_DOTTED_CIRCLE
, prev
, tagArray1
);
693 output
.writeChar(chars
[prev
], prev
, tagArray1
);
697 // A lone virama is illegal unless it follows a
698 // MALAYALAM_VOWEL_SIGN_U. Such a usage is called
700 if (chars
[prev
- 1] != C_MALAYALAM_VOWEL_SIGN_U
) {
701 output
.writeChar(C_DOTTED_CIRCLE
, prev
, tagArray1
);
704 output
.writeChar(chars
[prev
], prev
, tagArray1
);
707 case CC_DEPENDENT_VOWEL
:
708 case CC_SPLIT_VOWEL_PIECE_1
:
709 case CC_SPLIT_VOWEL_PIECE_2
:
710 case CC_SPLIT_VOWEL_PIECE_3
:
711 case CC_VOWEL_MODIFIER
:
715 output
.writeChar(C_DOTTED_CIRCLE
, prev
, tagArray1
);
717 output
.writeMbelow();
718 output
.writeSMbelow();
719 output
.writeMabove();
721 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) != 0) {
725 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) != 0) {
726 output
.writeVMabove();
727 output
.writeSMabove(); // FIXME: there are no SM's in these scripts...
730 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
734 output
.writeLengthMark();
735 output
.writeAlLakuna();
737 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) == 0) {
738 output
.writeVMabove();
739 output
.writeSMabove();
742 output
.writeVMpost();
745 case CC_INDEPENDENT_VOWEL_2
:
746 case CC_INDEPENDENT_VOWEL_3
:
748 case CC_CONSONANT_WITH_NUKTA
:
750 le_uint32 length
= markStart
- prev
;
751 le_int32 lastConsonant
= markStart
- 1;
752 le_int32 baseLimit
= prev
;
754 // Check for REPH at front of syllable
755 if (length
> 2 && classTable
->isReph(chars
[prev
]) && classTable
->isVirama(chars
[prev
+ 1]) && chars
[prev
+ 2] != C_SIGN_ZWNJ
) {
758 // Check for eyelash RA, if the script supports it
759 if ((classTable
->scriptFlags
& SF_EYELASH_RA
) != 0 &&
760 chars
[baseLimit
] == C_SIGN_ZWJ
) {
769 while (lastConsonant
> baseLimit
&& !classTable
->isConsonant(chars
[lastConsonant
])) {
774 IndicClassTable::CharClass charClass
= CC_RESERVED
;
775 IndicClassTable::CharClass nextClass
= CC_RESERVED
;
776 le_int32 baseConsonant
= lastConsonant
;
777 le_int32 postBase
= lastConsonant
+ 1;
778 le_int32 postBaseLimit
= classTable
->scriptFlags
& SF_POST_BASE_LIMIT_MASK
;
779 le_bool seenVattu
= FALSE
;
780 le_bool seenBelowBaseForm
= FALSE
;
781 le_bool seenPreBaseForm
= FALSE
;
782 le_bool hasNukta
= FALSE
;
783 le_bool hasBelowBaseForm
= FALSE
;
784 le_bool hasPostBaseForm
= FALSE
;
785 le_bool hasPreBaseForm
= FALSE
;
787 if (postBase
< markStart
&& classTable
->isNukta(chars
[postBase
])) {
788 charClass
= CC_NUKTA
;
792 while (baseConsonant
> baseLimit
) {
793 nextClass
= charClass
;
794 hasNukta
= IndicClassTable::isNukta(nextClass
);
795 charClass
= classTable
->getCharClass(chars
[baseConsonant
]);
797 hasBelowBaseForm
= IndicClassTable::hasBelowBaseForm(charClass
) && !hasNukta
;
798 hasPostBaseForm
= IndicClassTable::hasPostBaseForm(charClass
) && !hasNukta
;
799 hasPreBaseForm
= IndicClassTable::hasPreBaseForm(charClass
) && !hasNukta
;
801 if (IndicClassTable::isConsonant(charClass
)) {
802 if (postBaseLimit
== 0 || seenVattu
||
803 (baseConsonant
> baseLimit
&& !classTable
->isVirama(chars
[baseConsonant
- 1])) ||
804 !(hasBelowBaseForm
|| hasPostBaseForm
|| hasPreBaseForm
)) {
808 // Note any pre-base consonants
809 if ( baseConsonant
== lastConsonant
&& lastConsonant
> 0 &&
810 hasPreBaseForm
&& classTable
->isVirama(chars
[baseConsonant
- 1])) {
811 output
.notePreBaseConsonant(lastConsonant
,chars
[lastConsonant
],chars
[lastConsonant
-1],tagArray2
);
812 seenPreBaseForm
= TRUE
;
815 // consonants with nuktas are never vattus
816 seenVattu
= IndicClassTable::isVattu(charClass
) && !hasNukta
;
818 // consonants with nuktas never have below- or post-base forms
819 if (hasPostBaseForm
) {
820 if (seenBelowBaseForm
) {
824 postBase
= baseConsonant
;
825 } else if (hasBelowBaseForm
) {
826 seenBelowBaseForm
= TRUE
;
839 // NOTE: baseLimit == prev + 3 iff eyelash RA present...
840 if (baseLimit
== prev
+ 3) {
841 output
.writeChar(chars
[prev
], prev
, tagArray2
);
842 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray2
);
843 output
.writeChar(chars
[prev
+ 2], prev
+ 2, tagArray2
);
846 // write any pre-base consonants
847 output
.writePreBaseConsonant();
849 le_bool supressVattu
= TRUE
;
851 for (i
= baseLimit
; i
< baseConsonant
; i
+= 1) {
852 LEUnicode ch
= chars
[i
];
853 // Don't put 'pstf' or 'blwf' on anything before the base consonant.
854 FeatureMask features
= tagArray1
& ~( pstfFeatureMask
| blwfFeatureMask
);
856 charClass
= classTable
->getCharClass(ch
);
857 nextClass
= classTable
->getCharClass(chars
[i
+ 1]);
858 hasNukta
= IndicClassTable::isNukta(nextClass
);
860 if (IndicClassTable::isConsonant(charClass
)) {
861 if (IndicClassTable::isVattu(charClass
) && !hasNukta
&& supressVattu
) {
862 features
= tagArray4
;
865 supressVattu
= IndicClassTable::isVattu(charClass
) && !hasNukta
;
866 } else if (IndicClassTable::isVirama(charClass
) && chars
[i
+ 1] == C_SIGN_ZWNJ
)
868 features
= tagArray4
;
871 output
.writeChar(ch
, i
, features
);
874 le_int32 bcSpan
= baseConsonant
+ 1;
876 if (bcSpan
< markStart
&& classTable
->isNukta(chars
[bcSpan
])) {
880 if (baseConsonant
== lastConsonant
&& bcSpan
< markStart
&&
881 (classTable
->isVirama(chars
[bcSpan
]) || classTable
->isAlLakuna(chars
[bcSpan
]))) {
884 if (bcSpan
< markStart
&& chars
[bcSpan
] == C_SIGN_ZWNJ
) {
889 // note the base consonant for post-GSUB fixups
890 output
.noteBaseConsonant();
892 // write base consonant
893 for (i
= baseConsonant
; i
< bcSpan
; i
+= 1) {
894 output
.writeChar(chars
[i
], i
, tagArray4
);
897 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) != 0) {
898 output
.writeMbelow();
899 output
.writeSMbelow(); // FIXME: there are no SMs in these scripts...
900 output
.writeMabove();
904 // write below-base consonants
905 if (baseConsonant
!= lastConsonant
&& !seenPreBaseForm
) {
906 for (i
= bcSpan
+ 1; i
< postBase
; i
+= 1) {
907 output
.writeChar(chars
[i
], i
, tagArray1
);
910 if (postBase
> lastConsonant
) {
911 // write halant that was after base consonant
912 output
.writeChar(chars
[bcSpan
], bcSpan
, tagArray1
);
916 // write Mbelow, SMbelow, Mabove
917 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
918 output
.writeMbelow();
919 output
.writeSMbelow();
920 output
.writeMabove();
923 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) != 0) {
924 if (baseLimit
== prev
+ 2) {
925 output
.writeChar(chars
[prev
], prev
, tagArray0
);
926 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray0
);
929 output
.writeVMabove();
930 output
.writeSMabove(); // FIXME: there are no SM's in these scripts...
933 // write post-base consonants
934 // FIXME: does this put the right tags on post-base consonants?
935 if (baseConsonant
!= lastConsonant
&& !seenPreBaseForm
) {
936 if (postBase
<= lastConsonant
) {
937 for (i
= postBase
; i
<= lastConsonant
; i
+= 1) {
938 output
.writeChar(chars
[i
], i
, tagArray3
);
941 // write halant that was after base consonant
942 output
.writeChar(chars
[bcSpan
], bcSpan
, tagArray1
);
945 // write the training halant, if there is one
946 if (lastConsonant
< matra
&& classTable
->isVirama(chars
[matra
])) {
947 output
.writeChar(chars
[matra
], matra
, tagArray4
);
952 if ((classTable
->scriptFlags
& SF_MATRAS_AFTER_BASE
) == 0) {
956 output
.writeLengthMark();
957 output
.writeAlLakuna();
960 if ((classTable
->scriptFlags
& SF_REPH_AFTER_BELOW
) == 0) {
961 if (baseLimit
== prev
+ 2) {
962 output
.writeChar(chars
[prev
], prev
, tagArray0
);
963 output
.writeChar(chars
[prev
+ 1], prev
+ 1, tagArray0
);
966 output
.writeVMabove();
967 output
.writeSMabove();
970 output
.writeVMpost();
982 *outMPreFixups
= mpreFixups
;
984 return output
.getOutputIndex();
987 void IndicReordering::adjustMPres(MPreFixups
*mpreFixups
, LEGlyphStorage
&glyphStorage
, LEErrorCode
& success
)
989 if (mpreFixups
!= NULL
) {
990 mpreFixups
->apply(glyphStorage
, success
);
996 void IndicReordering::applyPresentationForms(LEGlyphStorage
&glyphStorage
, le_int32 count
)
998 LEErrorCode success
= LE_NO_ERROR
;
1000 // This sets us up for 2nd pass of glyph substitution as well as setting the feature masks for the
1001 // GPOS table lookups
1003 for ( le_int32 i
= 0 ; i
< count
; i
++ ) {
1004 glyphStorage
.setAuxData(i
, ( presentationFormsMask
| positioningFormsMask
), success
);
1008 void IndicReordering::finalReordering(LEGlyphStorage
&glyphStorage
, le_int32 count
)
1010 LEErrorCode success
= LE_NO_ERROR
;
1012 // Reposition REPH as appropriate
1014 for ( le_int32 i
= 0 ; i
< count
; i
++ ) {
1016 le_int32 tmpAuxData
= glyphStorage
.getAuxData(i
,success
);
1017 LEGlyphID tmpGlyph
= glyphStorage
.getGlyphID(i
,success
);
1019 if ( ( tmpGlyph
!= NO_GLYPH
) && (tmpAuxData
& rephConsonantMask
) && !(tmpAuxData
& repositionedGlyphMask
)) {
1021 le_bool targetPositionFound
= false;
1022 le_int32 targetPosition
= i
+1;
1023 le_int32 baseConsonantData
;
1025 while (!targetPositionFound
) {
1026 tmpGlyph
= glyphStorage
.getGlyphID(targetPosition
,success
);
1027 tmpAuxData
= glyphStorage
.getAuxData(targetPosition
,success
);
1029 if ( tmpAuxData
& baseConsonantMask
) {
1030 baseConsonantData
= tmpAuxData
;
1031 targetPositionFound
= true;
1037 // Make sure we are not putting the reph into an empty hole
1039 le_bool targetPositionHasGlyph
= false;
1040 while (!targetPositionHasGlyph
) {
1041 tmpGlyph
= glyphStorage
.getGlyphID(targetPosition
,success
);
1042 if ( tmpGlyph
!= NO_GLYPH
) {
1043 targetPositionHasGlyph
= true;
1049 // Make sure that REPH is positioned after any above base or post base matras
1051 le_bool checkMatraDone
= false;
1052 le_int32 checkMatraPosition
= targetPosition
+1;
1053 while ( !checkMatraDone
) {
1054 tmpAuxData
= glyphStorage
.getAuxData(checkMatraPosition
,success
);
1055 if ( checkMatraPosition
>= count
|| ( (tmpAuxData
^ baseConsonantData
) & LE_GLYPH_GROUP_MASK
)) {
1056 checkMatraDone
= true;
1059 if ( (tmpAuxData
& matraMask
) &&
1060 (((tmpAuxData
& markPositionMask
) == aboveBasePosition
) ||
1061 ((tmpAuxData
& markPositionMask
) == postBasePosition
))) {
1062 targetPosition
= checkMatraPosition
;
1064 checkMatraPosition
++;
1067 glyphStorage
.moveGlyph(i
,targetPosition
,repositionedGlyphMask
);
1073 le_int32
IndicReordering::v2process(const LEUnicode
*chars
, le_int32 charCount
, le_int32 scriptCode
,
1074 LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
)
1076 const IndicClassTable
*classTable
= IndicClassTable::getScriptClassTable(scriptCode
);
1078 DynamicProperties dynProps
[INDIC_BLOCK_SIZE
];
1079 IndicReordering::getDynamicProperties(dynProps
,classTable
);
1081 IndicReorderingOutput
output(outChars
, glyphStorage
, NULL
);
1082 le_int32 i
, firstConsonant
, baseConsonant
, secondConsonant
, inv_count
= 0, beginSyllable
= 0;
1083 //le_bool lastInWord = FALSE;
1085 while (beginSyllable
< charCount
) {
1086 le_int32 nextSyllable
= findSyllable(classTable
, chars
, beginSyllable
, charCount
);
1090 // Find the First Consonant
1091 for ( firstConsonant
= beginSyllable
; firstConsonant
< nextSyllable
; firstConsonant
++ ) {
1092 if ( classTable
->isConsonant(chars
[firstConsonant
]) ) {
1097 // Find the base consonant
1099 baseConsonant
= nextSyllable
- 1;
1100 secondConsonant
= firstConsonant
;
1102 // TODO: Use Dynamic Properties for hasBelowBaseForm and hasPostBaseForm()
1104 while ( baseConsonant
> firstConsonant
) {
1105 if ( classTable
->isConsonant(chars
[baseConsonant
]) &&
1106 !classTable
->hasBelowBaseForm(chars
[baseConsonant
]) &&
1107 !classTable
->hasPostBaseForm(chars
[baseConsonant
]) ) {
1111 if ( classTable
->isConsonant(chars
[baseConsonant
]) ) {
1112 secondConsonant
= baseConsonant
;
1118 // If the syllable starts with Ra + Halant ( in a script that has Reph ) and has more than one
1119 // consonant, Ra is excluced from candidates for base consonants
1121 if ( classTable
->isReph(chars
[beginSyllable
]) &&
1122 beginSyllable
+1 < nextSyllable
&& classTable
->isVirama(chars
[beginSyllable
+1]) &&
1123 secondConsonant
!= firstConsonant
) {
1124 baseConsonant
= secondConsonant
;
1127 // Populate the output
1128 for ( i
= beginSyllable
; i
< nextSyllable
; i
++ ) {
1130 // Handle invalid combinartions
1132 if ( classTable
->isVirama(chars
[beginSyllable
]) ||
1133 classTable
->isMatra(chars
[beginSyllable
]) ||
1134 classTable
->isVowelModifier(chars
[beginSyllable
]) ||
1135 classTable
->isNukta(chars
[beginSyllable
]) ) {
1136 output
.writeChar(C_DOTTED_CIRCLE
,beginSyllable
,basicShapingFormsMask
);
1139 output
.writeChar(chars
[i
],i
, basicShapingFormsMask
);
1143 // Adjust features and set syllable structure bits
1145 for ( i
= beginSyllable
; i
< nextSyllable
; i
++ ) {
1147 FeatureMask outMask
= output
.getFeatures(i
+inv_count
);
1148 FeatureMask saveMask
= outMask
;
1150 // Since reph can only validly occur at the beginning of a syllable
1151 // We only apply it to the first 2 characters in the syllable, to keep it from
1152 // conflicting with other features ( i.e. rkrf )
1154 // TODO : Use the dynamic property for determining isREPH
1155 if ( i
== beginSyllable
&& i
< baseConsonant
&& classTable
->isReph(chars
[i
]) &&
1156 i
+1 < nextSyllable
&& classTable
->isVirama(chars
[i
+1])) {
1157 outMask
|= rphfFeatureMask
;
1158 outMask
|= rephConsonantMask
;
1159 output
.setFeatures(i
+1+inv_count
,outMask
);
1163 if ( i
== baseConsonant
) {
1164 outMask
|= baseConsonantMask
;
1167 if ( classTable
->isMatra(chars
[i
])) {
1168 outMask
|= matraMask
;
1169 if ( classTable
->hasAboveBaseForm(chars
[i
])) {
1170 outMask
|= aboveBasePosition
;
1171 } else if ( classTable
->hasBelowBaseForm(chars
[i
])) {
1172 outMask
|= belowBasePosition
;
1176 // Don't apply half form to virama that stands alone at the end of a syllable
1177 // to prevent half forms from forming when syllable ends with virama
1179 if ( classTable
->isVirama(chars
[i
]) && (i
+1 == nextSyllable
) ) {
1180 outMask
^= halfFeatureMask
;
1181 if ( classTable
->isConsonant(chars
[i
-1]) ) {
1182 FeatureMask tmp
= output
.getFeatures(i
-1+inv_count
);
1183 tmp
^= halfFeatureMask
;
1184 output
.setFeatures(i
-1+inv_count
,tmp
);
1188 if ( outMask
!= saveMask
) {
1189 output
.setFeatures(i
+inv_count
,outMask
);
1193 output
.decomposeReorderMatras(classTable
,beginSyllable
,nextSyllable
,inv_count
);
1195 beginSyllable
= nextSyllable
;
1199 return output
.getOutputIndex();
1203 void IndicReordering::getDynamicProperties( DynamicProperties
*, const IndicClassTable
*classTable
) {
1206 LEUnicode currentChar
;
1207 LEUnicode workChars
[2];
1208 LEGlyphStorage workGlyphs
;
1210 IndicReorderingOutput
workOutput(workChars
, workGlyphs
, NULL
);
1212 //le_int32 offset = 0;
1215 // TODO: Should this section of code have actually been doing something?
1216 // First find the relevant virama for the script we are dealing with
1218 for ( currentChar
= classTable
->firstChar
; currentChar
<= classTable
->lastChar
; currentChar
++ ) {
1219 if ( classTable
->isVirama(currentChar
)) {
1220 virama
= currentChar
;
1226 for ( currentChar
= classTable
->firstChar
; currentChar
<= classTable
->lastChar
; currentChar
++ ) {
1227 if ( classTable
->isConsonant(currentChar
)) {