3 * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved
8 #include "OpenTypeTables.h"
9 #include "OpenTypeUtilities.h"
10 #include "IndicReordering.h"
11 #include "LEGlyphStorage.h"
12 #include "MPreFixups.h"
16 class ReorderingOutput
: public UMemory
{
21 LEGlyphStorage
&fGlyphStorage
;
27 le_int32 fMbelowIndex
;
30 le_int32 fMaboveIndex
;
35 LEUnicode fLengthMark
;
36 le_int32 fLengthMarkIndex
;
38 const LETag
*fMatraTags
;
40 le_int32 fMPreOutIndex
;
41 MPreFixups
*fMPreFixups
;
53 void saveMatra(LEUnicode matra
, le_int32 matraIndex
, IndicClassTable::CharClass matraClass
)
55 // FIXME: check if already set, or if not a matra...
56 if (IndicClassTable::isLengthMark(matraClass
)) {
58 fLengthMarkIndex
= matraIndex
;
60 switch (matraClass
& IndicClassTable::CF_POS_MASK
) {
61 case IndicClassTable::CF_POS_BEFORE
:
63 fMpreIndex
= matraIndex
;
66 case IndicClassTable::CF_POS_BELOW
:
68 fMbelowIndex
= matraIndex
;
71 case IndicClassTable::CF_POS_ABOVE
:
73 fMaboveIndex
= matraIndex
;
76 case IndicClassTable::CF_POS_AFTER
:
78 fMpostIndex
= matraIndex
;
89 ReorderingOutput(LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
, MPreFixups
*mpreFixups
)
90 : fOutIndex(0), fOutChars(outChars
), fGlyphStorage(glyphStorage
),
91 fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0),
92 fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fMatraTags(NULL
),
93 fMPreOutIndex(-1), fMPreFixups(mpreFixups
),
94 fVMabove(0), fVMpost(0), fVMIndex(0), fVMTags(NULL
),
95 fSMabove(0), fSMbelow(0), fSMIndex(0), fSMTags(NULL
)
97 // nothing else to do...
102 // nothing to do here...
107 fMpre
= fMbelow
= fMabove
= fMpost
= fLengthMark
= 0;
110 fVMabove
= fVMpost
= 0;
111 fSMabove
= fSMbelow
= 0;
114 le_bool
noteMatra(const IndicClassTable
*classTable
, LEUnicode matra
, le_uint32 matraIndex
, const LETag
*matraTags
)
116 IndicClassTable::CharClass matraClass
= classTable
->getCharClass(matra
);
118 fMatraTags
= matraTags
;
120 if (IndicClassTable::isMatra(matraClass
)) {
121 if (IndicClassTable::isSplitMatra(matraClass
)) {
122 const SplitMatra
*splitMatra
= classTable
->getSplitMatra(matraClass
);
125 for (i
= 0; i
< 3 && (*splitMatra
)[i
] != 0; i
+= 1) {
126 LEUnicode piece
= (*splitMatra
)[i
];
127 IndicClassTable::CharClass pieceClass
= classTable
->getCharClass(piece
);
129 saveMatra(piece
, matraIndex
, pieceClass
);
132 saveMatra(matra
, matraIndex
, matraClass
);
141 void noteVowelModifier(const IndicClassTable
*classTable
, LEUnicode vowelModifier
, le_uint32 vowelModifierIndex
, const LETag
*vowelModifierTags
)
143 IndicClassTable::CharClass vmClass
= classTable
->getCharClass(vowelModifier
);
145 fVMIndex
= vowelModifierIndex
;
146 fVMTags
= vowelModifierTags
;
148 if (IndicClassTable::isVowelModifier(vmClass
)) {
149 switch (vmClass
& IndicClassTable::CF_POS_MASK
) {
150 case IndicClassTable::CF_POS_ABOVE
:
151 fVMabove
= vowelModifier
;
154 case IndicClassTable::CF_POS_AFTER
:
155 fVMpost
= vowelModifier
;
159 // FIXME: this is an error...
165 void noteStressMark(const IndicClassTable
*classTable
, LEUnicode stressMark
, le_uint32 stressMarkIndex
, const LETag
*stressMarkTags
)
167 IndicClassTable::CharClass smClass
= classTable
->getCharClass(stressMark
);
169 fSMIndex
= stressMarkIndex
;
170 fSMTags
= stressMarkTags
;
172 if (IndicClassTable::isStressMark(smClass
)) {
173 switch (smClass
& IndicClassTable::CF_POS_MASK
) {
174 case IndicClassTable::CF_POS_ABOVE
:
175 fSMabove
= stressMark
;
178 case IndicClassTable::CF_POS_BELOW
:
179 fSMbelow
= stressMark
;
183 // FIXME: this is an error...
189 void noteBaseConsonant()
191 if (fMPreFixups
!= NULL
&& fMPreOutIndex
>= 0) {
192 fMPreFixups
->add(fOutIndex
, fMPreOutIndex
);
199 fMPreOutIndex
= fOutIndex
;
200 writeChar(fMpre
, fMpreIndex
, fMatraTags
);
207 writeChar(fMbelow
, fMbelowIndex
, fMatraTags
);
214 writeChar(fMabove
, fMaboveIndex
, fMatraTags
);
221 writeChar(fMpost
, fMpostIndex
, fMatraTags
);
225 void writeLengthMark()
227 if (fLengthMark
!= 0) {
228 writeChar(fLengthMark
, fLengthMarkIndex
, fMatraTags
);
235 writeChar(fVMabove
, fVMIndex
, fVMTags
);
242 writeChar(fVMpost
, fVMIndex
, fVMTags
);
249 writeChar(fSMabove
, fSMIndex
, fSMTags
);
256 writeChar(fSMbelow
, fSMIndex
, fSMTags
);
260 void writeChar(LEUnicode ch
, le_uint32 charIndex
, const LETag
*charTags
)
262 LEErrorCode success
= LE_NO_ERROR
;
264 fOutChars
[fOutIndex
] = ch
;
266 fGlyphStorage
.setCharIndex(fOutIndex
, charIndex
, success
);
267 fGlyphStorage
.setAuxData(fOutIndex
, (void *) charTags
, success
);
272 le_int32
getOutputIndex()
280 C_DOTTED_CIRCLE
= 0x25CC
283 static const LETag emptyTag
= 0x00000000; // ''
285 static const LETag nuktFeatureTag
= LE_NUKT_FEATURE_TAG
;
286 static const LETag akhnFeatureTag
= LE_AKHN_FEATURE_TAG
;
287 static const LETag rphfFeatureTag
= LE_RPHF_FEATURE_TAG
;
288 static const LETag blwfFeatureTag
= LE_BLWF_FEATURE_TAG
;
289 static const LETag halfFeatureTag
= LE_HALF_FEATURE_TAG
;
290 static const LETag pstfFeatureTag
= LE_PSTF_FEATURE_TAG
;
291 static const LETag vatuFeatureTag
= LE_VATU_FEATURE_TAG
;
292 static const LETag presFeatureTag
= LE_PRES_FEATURE_TAG
;
293 static const LETag blwsFeatureTag
= LE_BLWS_FEATURE_TAG
;
294 static const LETag abvsFeatureTag
= LE_ABVS_FEATURE_TAG
;
295 static const LETag pstsFeatureTag
= LE_PSTS_FEATURE_TAG
;
296 static const LETag halnFeatureTag
= LE_HALN_FEATURE_TAG
;
298 static const LETag blwmFeatureTag
= LE_BLWM_FEATURE_TAG
;
299 static const LETag abvmFeatureTag
= LE_ABVM_FEATURE_TAG
;
300 static const LETag distFeatureTag
= LE_DIST_FEATURE_TAG
;
302 // These are in the order in which the features need to be applied
303 // for correct processing
304 static const LETag featureOrder
[] =
306 nuktFeatureTag
, akhnFeatureTag
, rphfFeatureTag
, blwfFeatureTag
, halfFeatureTag
, pstfFeatureTag
,
307 vatuFeatureTag
, presFeatureTag
, blwsFeatureTag
, abvsFeatureTag
, pstsFeatureTag
, halnFeatureTag
,
308 blwmFeatureTag
, abvmFeatureTag
, distFeatureTag
, emptyTag
311 // The order of these is determined so that the tag array of each glyph can start
312 // at an offset into this array
313 // FIXME: do we want a seperate tag array for each kind of character??
314 // FIXME: are there cases where this ordering causes glyphs to get tags
315 // that they shouldn't?
316 static const LETag tagArray
[] =
318 rphfFeatureTag
, blwfFeatureTag
, halfFeatureTag
, pstfFeatureTag
, nuktFeatureTag
, akhnFeatureTag
,
319 vatuFeatureTag
, presFeatureTag
, blwsFeatureTag
, abvsFeatureTag
, pstsFeatureTag
, halnFeatureTag
,
320 blwmFeatureTag
, abvmFeatureTag
, distFeatureTag
, emptyTag
323 static const le_int8 stateTable
[][IndicClassTable::CC_COUNT
] =
325 // xx vm sm iv i2 ct cn nu dv s1 s2 s3 vr zw
326 { 1, 1, 1, 5, 8, 3, 2, 1, 5, 9, 5, 1, 1, 1}, // 0 - ground state
327 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state
328 {-1, 6, 1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1}, // 2 - consonant
329 {-1, 6, 1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, -1}, // 3 - consonant with nukta
330 {-1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7}, // 4 - consonant virama
331 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels
332 {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark
333 {-1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1}, // 7 - ZWJ, ZWNJ
334 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1}, // 8 - independent vowels that can take a virama
335 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1}, // 9 - first part of split vowel
336 {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1} // 10 - second part of split vowel
340 const LETag
*IndicReordering::getFeatureOrder()
345 le_int32
IndicReordering::findSyllable(const IndicClassTable
*classTable
, const LEUnicode
*chars
, le_int32 prev
, le_int32 charCount
)
347 le_int32 cursor
= prev
;
350 while (cursor
< charCount
) {
351 IndicClassTable::CharClass charClass
= classTable
->getCharClass(chars
[cursor
]);
353 state
= stateTable
[state
][charClass
& IndicClassTable::CF_CLASS_MASK
];
365 le_int32
IndicReordering::reorder(const LEUnicode
*chars
, le_int32 charCount
, le_int32 scriptCode
,
366 LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
,
367 MPreFixups
**outMPreFixups
)
369 MPreFixups
*mpreFixups
= NULL
;
370 const IndicClassTable
*classTable
= IndicClassTable::getScriptClassTable(scriptCode
);
372 if (classTable
->scriptFlags
& IndicClassTable::SF_MPRE_FIXUP
) {
373 mpreFixups
= new MPreFixups(charCount
);
376 ReorderingOutput
output(outChars
, glyphStorage
, mpreFixups
);
377 le_int32 i
, prev
= 0;
379 while (prev
< charCount
) {
380 le_int32 syllable
= findSyllable(classTable
, chars
, prev
, charCount
);
381 le_int32 matra
, markStart
= syllable
;
385 if (classTable
->isStressMark(chars
[markStart
- 1])) {
387 output
.noteStressMark(classTable
, chars
[markStart
], markStart
, &tagArray
[1]);
390 if (classTable
->isVowelModifier(chars
[markStart
- 1])) {
392 output
.noteVowelModifier(classTable
, chars
[markStart
], markStart
, &tagArray
[1]);
395 matra
= markStart
- 1;
397 while (output
.noteMatra(classTable
, chars
[matra
], matra
, &tagArray
[1]) && matra
!= prev
) {
401 switch (classTable
->getCharClass(chars
[prev
]) & IndicClassTable::CF_CLASS_MASK
) {
402 case IndicClassTable::CC_RESERVED
:
403 case IndicClassTable::CC_INDEPENDENT_VOWEL
:
404 case IndicClassTable::CC_ZERO_WIDTH_MARK
:
405 for (i
= prev
; i
< syllable
; i
+= 1) {
406 output
.writeChar(chars
[i
], i
, &tagArray
[1]);
411 case IndicClassTable::CC_NUKTA
:
412 case IndicClassTable::CC_VIRAMA
:
413 output
.writeChar(C_DOTTED_CIRCLE
, prev
, &tagArray
[1]);
414 output
.writeChar(chars
[prev
], prev
, &tagArray
[1]);
417 case IndicClassTable::CC_DEPENDENT_VOWEL
:
418 case IndicClassTable::CC_SPLIT_VOWEL_PIECE_1
:
419 case IndicClassTable::CC_SPLIT_VOWEL_PIECE_2
:
420 case IndicClassTable::CC_SPLIT_VOWEL_PIECE_3
:
421 case IndicClassTable::CC_VOWEL_MODIFIER
:
422 case IndicClassTable::CC_STRESS_MARK
:
425 output
.writeChar(C_DOTTED_CIRCLE
, prev
, &tagArray
[1]);
427 output
.writeMbelow();
428 output
.writeSMbelow();
429 output
.writeMabove();
431 if ((classTable
->scriptFlags
& IndicClassTable::SF_MATRAS_AFTER_BASE
) != 0) {
435 if ((classTable
->scriptFlags
& IndicClassTable::SF_REPH_AFTER_BELOW
) != 0) {
436 output
.writeVMabove();
437 output
.writeSMabove(); // FIXME: there are no SM's in these scripts...
440 if ((classTable
->scriptFlags
& IndicClassTable::SF_MATRAS_AFTER_BASE
) == 0) {
444 output
.writeLengthMark();
446 if ((classTable
->scriptFlags
& IndicClassTable::SF_REPH_AFTER_BELOW
) == 0) {
447 output
.writeVMabove();
448 output
.writeSMabove();
451 output
.writeVMpost();
454 case IndicClassTable::CC_INDEPENDENT_VOWEL_2
:
455 case IndicClassTable::CC_CONSONANT
:
456 case IndicClassTable::CC_CONSONANT_WITH_NUKTA
:
458 le_uint32 length
= markStart
- prev
;
459 le_int32 lastConsonant
= markStart
- 1;
460 le_int32 baseLimit
= prev
;
462 // Check for REPH at front of syllable
463 if (length
> 2 && classTable
->isReph(chars
[prev
]) && classTable
->isVirama(chars
[prev
+ 1])) {
466 // Check for eyelash RA, if the script supports it
467 if ((classTable
->scriptFlags
& IndicClassTable::SF_EYELASH_RA
) != 0 &&
468 chars
[baseLimit
] == C_SIGN_ZWJ
) {
477 while (lastConsonant
> baseLimit
&& !classTable
->isConsonant(chars
[lastConsonant
])) {
481 le_int32 baseConsonant
= lastConsonant
;
482 le_int32 postBase
= lastConsonant
+ 1;
483 le_int32 postBaseLimit
= classTable
->scriptFlags
& IndicClassTable::SF_POST_BASE_LIMIT_MASK
;
484 le_bool seenVattu
= FALSE
;
485 le_bool seenBelowBaseForm
= FALSE
;
487 if (classTable
->isNukta(chars
[postBase
])) {
491 while (baseConsonant
> baseLimit
) {
492 IndicClassTable::CharClass charClass
= classTable
->getCharClass(chars
[baseConsonant
]);
494 if (IndicClassTable::isConsonant(charClass
)) {
495 if (postBaseLimit
== 0 || seenVattu
||
496 (baseConsonant
> baseLimit
&& !classTable
->isVirama(chars
[baseConsonant
- 1])) ||
497 !IndicClassTable::hasPostOrBelowBaseForm(charClass
)) {
501 seenVattu
= IndicClassTable::isVattu(charClass
);
503 if (IndicClassTable::hasPostBaseForm(charClass
)) {
504 if (seenBelowBaseForm
) {
508 postBase
= baseConsonant
;
509 } else if (IndicClassTable::hasBelowBaseForm(charClass
)) {
510 seenBelowBaseForm
= TRUE
;
523 // NOTE: baseLimit == prev + 3 iff eyelash RA present...
524 if (baseLimit
== prev
+ 3) {
525 output
.writeChar(chars
[prev
], prev
, &tagArray
[2]);
526 output
.writeChar(chars
[prev
+ 1], prev
+ 1, &tagArray
[2]);
527 output
.writeChar(chars
[prev
+ 2], prev
+ 2, &tagArray
[2]);
530 // write any pre-base consonants
531 le_bool supressVattu
= TRUE
;
533 for (i
= baseLimit
; i
< baseConsonant
; i
+= 1) {
534 LEUnicode ch
= chars
[i
];
535 // Don't put 'blwf' on first consonant.
536 const LETag
*tag
= (i
== baseLimit
? &tagArray
[2] : &tagArray
[1]);
537 IndicClassTable::CharClass charClass
= classTable
->getCharClass(ch
);
539 if (IndicClassTable::isConsonant(charClass
)) {
540 if (IndicClassTable::isVattu(charClass
) && supressVattu
) {
544 supressVattu
= IndicClassTable::isVattu(charClass
);
545 } else if (IndicClassTable::isVirama(charClass
) && chars
[i
+ 1] == C_SIGN_ZWNJ
)
550 output
.writeChar(ch
, i
, tag
);
553 le_int32 bcSpan
= baseConsonant
+ 1;
555 if (bcSpan
< markStart
&& classTable
->isNukta(chars
[bcSpan
])) {
559 if (baseConsonant
== lastConsonant
&& bcSpan
< markStart
&& classTable
->isVirama(chars
[bcSpan
])) {
562 if (bcSpan
< markStart
&& chars
[bcSpan
] == C_SIGN_ZWNJ
) {
567 // note the base consonant for post-GSUB fixups
568 output
.noteBaseConsonant();
570 // write base consonant
571 for (i
= baseConsonant
; i
< bcSpan
; i
+= 1) {
572 output
.writeChar(chars
[i
], i
, &tagArray
[4]);
575 if ((classTable
->scriptFlags
& IndicClassTable::SF_MATRAS_AFTER_BASE
) != 0) {
576 output
.writeMbelow();
577 output
.writeSMbelow(); // FIXME: there are no SMs in these scripts...
578 output
.writeMabove();
582 // write below-base consonants
583 if (baseConsonant
!= lastConsonant
) {
584 for (i
= bcSpan
+ 1; i
< postBase
; i
+= 1) {
585 output
.writeChar(chars
[i
], i
, &tagArray
[1]);
588 if (postBase
> lastConsonant
) {
589 // write halant that was after base consonant
590 output
.writeChar(chars
[bcSpan
], bcSpan
, &tagArray
[1]);
594 // write Mbelow, SMbelow, Mabove
595 if ((classTable
->scriptFlags
& IndicClassTable::SF_MATRAS_AFTER_BASE
) == 0) {
596 output
.writeMbelow();
597 output
.writeSMbelow();
598 output
.writeMabove();
601 if ((classTable
->scriptFlags
& IndicClassTable::SF_REPH_AFTER_BELOW
) != 0) {
602 if (baseLimit
== prev
+ 2) {
603 output
.writeChar(chars
[prev
], prev
, &tagArray
[0]);
604 output
.writeChar(chars
[prev
+ 1], prev
+ 1, &tagArray
[0]);
607 output
.writeVMabove();
608 output
.writeSMabove(); // FIXME: there are no SM's in these scripts...
611 // write post-base consonants
612 // FIXME: does this put the right tags on post-base consonants?
613 if (baseConsonant
!= lastConsonant
) {
614 if (postBase
<= lastConsonant
) {
615 for (i
= postBase
; i
<= lastConsonant
; i
+= 1) {
616 output
.writeChar(chars
[i
], i
, &tagArray
[3]);
619 // write halant that was after base consonant
620 output
.writeChar(chars
[bcSpan
], bcSpan
, &tagArray
[1]);
623 // write the training halant, if there is one
624 if (lastConsonant
< matra
&& classTable
->isVirama(chars
[matra
])) {
625 output
.writeChar(chars
[matra
], matra
, &tagArray
[4]);
630 if ((classTable
->scriptFlags
& IndicClassTable::SF_MATRAS_AFTER_BASE
) == 0) {
634 output
.writeLengthMark();
637 if ((classTable
->scriptFlags
& IndicClassTable::SF_REPH_AFTER_BELOW
) == 0) {
638 if (baseLimit
== prev
+ 2) {
639 output
.writeChar(chars
[prev
], prev
, &tagArray
[0]);
640 output
.writeChar(chars
[prev
+ 1], prev
+ 1, &tagArray
[0]);
643 output
.writeVMabove();
644 output
.writeSMabove();
647 output
.writeVMpost();
659 *outMPreFixups
= mpreFixups
;
661 return output
.getOutputIndex();
664 void IndicReordering::adjustMPres(MPreFixups
*mpreFixups
, LEGlyphStorage
&glyphStorage
)
666 if (mpreFixups
!= NULL
) {
667 mpreFixups
->apply(glyphStorage
);