]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /* |
729e4ab9 A |
2 | / * |
3 | * (C) Copyright IBM Corp. 1998-2009 - All Rights Reserved | |
b75a7d8f A |
4 | * |
5 | */ | |
6 | ||
7 | #include "LETypes.h" | |
8 | #include "OpenTypeTables.h" | |
9 | #include "OpenTypeUtilities.h" | |
10 | #include "IndicReordering.h" | |
374ca955 | 11 | #include "LEGlyphStorage.h" |
b75a7d8f A |
12 | #include "MPreFixups.h" |
13 | ||
14 | U_NAMESPACE_BEGIN | |
15 | ||
73c04bcf A |
16 | #define loclFeatureTag LE_LOCL_FEATURE_TAG |
17 | #define initFeatureTag LE_INIT_FEATURE_TAG | |
18 | #define nuktFeatureTag LE_NUKT_FEATURE_TAG | |
19 | #define akhnFeatureTag LE_AKHN_FEATURE_TAG | |
20 | #define rphfFeatureTag LE_RPHF_FEATURE_TAG | |
729e4ab9 | 21 | #define rkrfFeatureTag LE_RKRF_FEATURE_TAG |
73c04bcf A |
22 | #define blwfFeatureTag LE_BLWF_FEATURE_TAG |
23 | #define halfFeatureTag LE_HALF_FEATURE_TAG | |
24 | #define pstfFeatureTag LE_PSTF_FEATURE_TAG | |
25 | #define vatuFeatureTag LE_VATU_FEATURE_TAG | |
26 | #define presFeatureTag LE_PRES_FEATURE_TAG | |
27 | #define blwsFeatureTag LE_BLWS_FEATURE_TAG | |
28 | #define abvsFeatureTag LE_ABVS_FEATURE_TAG | |
29 | #define pstsFeatureTag LE_PSTS_FEATURE_TAG | |
30 | #define halnFeatureTag LE_HALN_FEATURE_TAG | |
729e4ab9 | 31 | #define cjctFeatureTag LE_CJCT_FEATURE_TAG |
73c04bcf A |
32 | #define blwmFeatureTag LE_BLWM_FEATURE_TAG |
33 | #define abvmFeatureTag LE_ABVM_FEATURE_TAG | |
34 | #define distFeatureTag LE_DIST_FEATURE_TAG | |
729e4ab9 A |
35 | #define caltFeatureTag LE_CALT_FEATURE_TAG |
36 | #define kernFeatureTag LE_KERN_FEATURE_TAG | |
73c04bcf A |
37 | |
38 | #define loclFeatureMask 0x80000000UL | |
39 | #define rphfFeatureMask 0x40000000UL | |
40 | #define blwfFeatureMask 0x20000000UL | |
41 | #define halfFeatureMask 0x10000000UL | |
42 | #define pstfFeatureMask 0x08000000UL | |
43 | #define nuktFeatureMask 0x04000000UL | |
44 | #define akhnFeatureMask 0x02000000UL | |
45 | #define vatuFeatureMask 0x01000000UL | |
46 | #define presFeatureMask 0x00800000UL | |
47 | #define blwsFeatureMask 0x00400000UL | |
48 | #define abvsFeatureMask 0x00200000UL | |
729e4ab9 | 49 | #define pstsFeatureMask 0x00100000UL |
73c04bcf A |
50 | #define halnFeatureMask 0x00080000UL |
51 | #define blwmFeatureMask 0x00040000UL | |
52 | #define abvmFeatureMask 0x00020000UL | |
53 | #define distFeatureMask 0x00010000UL | |
54 | #define initFeatureMask 0x00008000UL | |
729e4ab9 A |
55 | #define cjctFeatureMask 0x00004000UL |
56 | #define rkrfFeatureMask 0x00002000UL | |
57 | #define caltFeatureMask 0x00001000UL | |
58 | #define kernFeatureMask 0x00000800UL | |
59 | ||
60 | // Syllable structure bits | |
61 | #define baseConsonantMask 0x00000400UL | |
62 | #define consonantMask 0x00000200UL | |
63 | #define halfConsonantMask 0x00000100UL | |
64 | #define rephConsonantMask 0x00000080UL | |
65 | #define matraMask 0x00000040UL | |
66 | #define vowelModifierMask 0x00000020UL | |
67 | #define markPositionMask 0x00000018UL | |
68 | ||
69 | #define postBasePosition 0x00000000UL | |
70 | #define preBasePosition 0x00000008UL | |
71 | #define aboveBasePosition 0x00000010UL | |
72 | #define belowBasePosition 0x00000018UL | |
73 | ||
74 | #define repositionedGlyphMask 0x00000002UL | |
75 | ||
76 | #define basicShapingFormsMask ( loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask ) | |
77 | #define positioningFormsMask ( kernFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask ) | |
78 | #define presentationFormsMask ( presFeatureMask | abvsFeatureMask | blwsFeatureMask | pstsFeatureMask | halnFeatureMask | caltFeatureMask ) | |
79 | ||
80 | ||
81 | #define C_MALAYALAM_VOWEL_SIGN_U 0x0D41 | |
82 | #define C_DOTTED_CIRCLE 0x25CC | |
83 | #define NO_GLYPH 0xFFFF | |
84 | ||
85 | // Some level of debate as to the proper value for MAX_CONSONANTS_PER_SYLLABLE. Ticket 5588 states that 4 | |
86 | // is the magic number according to ISCII, but 5 seems to be the more consistent with XP. | |
87 | #define MAX_CONSONANTS_PER_SYLLABLE 5 | |
88 | ||
89 | #define INDIC_BLOCK_SIZE 0x7F | |
73c04bcf A |
90 | |
91 | class IndicReorderingOutput : public UMemory { | |
b75a7d8f | 92 | private: |
46f4442e | 93 | le_int32 fSyllableCount; |
73c04bcf | 94 | le_int32 fOutIndex; |
b75a7d8f | 95 | LEUnicode *fOutChars; |
374ca955 A |
96 | |
97 | LEGlyphStorage &fGlyphStorage; | |
b75a7d8f | 98 | |
73c04bcf A |
99 | LEUnicode fMpre; |
100 | le_int32 fMpreIndex; | |
374ca955 | 101 | |
73c04bcf A |
102 | LEUnicode fMbelow; |
103 | le_int32 fMbelowIndex; | |
374ca955 | 104 | |
73c04bcf A |
105 | LEUnicode fMabove; |
106 | le_int32 fMaboveIndex; | |
374ca955 | 107 | |
73c04bcf A |
108 | LEUnicode fMpost; |
109 | le_int32 fMpostIndex; | |
374ca955 | 110 | |
73c04bcf A |
111 | LEUnicode fLengthMark; |
112 | le_int32 fLengthMarkIndex; | |
374ca955 | 113 | |
46f4442e A |
114 | LEUnicode fAlLakuna; |
115 | le_int32 fAlLakunaIndex; | |
73c04bcf A |
116 | |
117 | FeatureMask fMatraFeatures; | |
374ca955 | 118 | |
73c04bcf | 119 | le_int32 fMPreOutIndex; |
b75a7d8f | 120 | MPreFixups *fMPreFixups; |
374ca955 | 121 | |
73c04bcf A |
122 | LEUnicode fVMabove; |
123 | LEUnicode fVMpost; | |
124 | le_int32 fVMIndex; | |
125 | FeatureMask fVMFeatures; | |
374ca955 | 126 | |
73c04bcf A |
127 | LEUnicode fSMabove; |
128 | LEUnicode fSMbelow; | |
129 | le_int32 fSMIndex; | |
130 | FeatureMask fSMFeatures; | |
374ca955 | 131 | |
729e4ab9 A |
132 | LEUnicode fPreBaseConsonant; |
133 | LEUnicode fPreBaseVirama; | |
134 | le_int32 fPBCIndex; | |
135 | FeatureMask fPBCFeatures; | |
136 | ||
374ca955 | 137 | void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass) |
b75a7d8f A |
138 | { |
139 | // FIXME: check if already set, or if not a matra... | |
374ca955 | 140 | if (IndicClassTable::isLengthMark(matraClass)) { |
b75a7d8f | 141 | fLengthMark = matra; |
374ca955 | 142 | fLengthMarkIndex = matraIndex; |
46f4442e A |
143 | } else if (IndicClassTable::isAlLakuna(matraClass)) { |
144 | fAlLakuna = matra; | |
145 | fAlLakunaIndex = matraIndex; | |
374ca955 | 146 | } else { |
73c04bcf A |
147 | switch (matraClass & CF_POS_MASK) { |
148 | case CF_POS_BEFORE: | |
374ca955 A |
149 | fMpre = matra; |
150 | fMpreIndex = matraIndex; | |
151 | break; | |
152 | ||
73c04bcf | 153 | case CF_POS_BELOW: |
374ca955 A |
154 | fMbelow = matra; |
155 | fMbelowIndex = matraIndex; | |
156 | break; | |
157 | ||
73c04bcf | 158 | case CF_POS_ABOVE: |
374ca955 A |
159 | fMabove = matra; |
160 | fMaboveIndex = matraIndex; | |
161 | break; | |
162 | ||
73c04bcf | 163 | case CF_POS_AFTER: |
374ca955 A |
164 | fMpost = matra; |
165 | fMpostIndex = matraIndex; | |
166 | break; | |
167 | ||
168 | default: | |
169 | // can't get here... | |
170 | break; | |
171 | } | |
b75a7d8f A |
172 | } |
173 | } | |
174 | ||
175 | public: | |
73c04bcf | 176 | IndicReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups *mpreFixups) |
46f4442e | 177 | : fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage), |
374ca955 | 178 | fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0), |
46f4442e | 179 | fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0), |
73c04bcf A |
180 | fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups), |
181 | fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0), | |
729e4ab9 A |
182 | fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0), |
183 | fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0) | |
b75a7d8f A |
184 | { |
185 | // nothing else to do... | |
186 | } | |
187 | ||
73c04bcf | 188 | ~IndicReorderingOutput() |
b75a7d8f A |
189 | { |
190 | // nothing to do here... | |
191 | } | |
192 | ||
374ca955 | 193 | void reset() |
b75a7d8f | 194 | { |
46f4442e A |
195 | fSyllableCount += 1; |
196 | ||
197 | fMpre = fMbelow = fMabove = fMpost = fLengthMark = fAlLakuna = 0; | |
b75a7d8f | 198 | fMPreOutIndex = -1; |
374ca955 A |
199 | |
200 | fVMabove = fVMpost = 0; | |
201 | fSMabove = fSMbelow = 0; | |
729e4ab9 A |
202 | |
203 | fPreBaseConsonant = fPreBaseVirama = 0; | |
374ca955 A |
204 | } |
205 | ||
73c04bcf A |
206 | void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures) |
207 | { | |
208 | LEErrorCode success = LE_NO_ERROR; | |
209 | ||
210 | fOutChars[fOutIndex] = ch; | |
211 | ||
212 | fGlyphStorage.setCharIndex(fOutIndex, charIndex, success); | |
46f4442e | 213 | fGlyphStorage.setAuxData(fOutIndex, charFeatures | (fSyllableCount & LE_GLYPH_GROUP_MASK), success); |
73c04bcf A |
214 | |
215 | fOutIndex += 1; | |
216 | } | |
217 | ||
729e4ab9 A |
218 | void setFeatures ( le_uint32 charIndex, FeatureMask charFeatures) |
219 | { | |
220 | LEErrorCode success = LE_NO_ERROR; | |
221 | ||
222 | fGlyphStorage.setAuxData( charIndex, charFeatures, success ); | |
223 | ||
224 | } | |
225 | ||
226 | FeatureMask getFeatures ( le_uint32 charIndex ) | |
227 | { | |
228 | LEErrorCode success = LE_NO_ERROR; | |
229 | return fGlyphStorage.getAuxData(charIndex,success); | |
230 | } | |
231 | ||
232 | void decomposeReorderMatras ( const IndicClassTable *classTable, le_int32 beginSyllable, le_int32 nextSyllable, le_int32 inv_count ) { | |
233 | le_int32 i; | |
234 | LEErrorCode success = LE_NO_ERROR; | |
235 | ||
236 | for ( i = beginSyllable ; i < nextSyllable ; i++ ) { | |
237 | if ( classTable->isMatra(fOutChars[i+inv_count])) { | |
238 | IndicClassTable::CharClass matraClass = classTable->getCharClass(fOutChars[i+inv_count]); | |
239 | if ( classTable->isSplitMatra(matraClass)) { | |
240 | le_int32 saveIndex = fGlyphStorage.getCharIndex(i+inv_count,success); | |
241 | le_uint32 saveAuxData = fGlyphStorage.getAuxData(i+inv_count,success); | |
242 | const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass); | |
243 | int j; | |
244 | for (j = 0 ; *(splitMatra)[j] != 0 ; j++) { | |
245 | LEUnicode piece = (*splitMatra)[j]; | |
246 | if ( j == 0 ) { | |
247 | fOutChars[i+inv_count] = piece; | |
248 | matraClass = classTable->getCharClass(piece); | |
249 | } else { | |
250 | insertCharacter(piece,i+1+inv_count,saveIndex,saveAuxData); | |
251 | nextSyllable++; | |
252 | } | |
253 | } | |
254 | } | |
255 | ||
256 | if ((matraClass & CF_POS_MASK) == CF_POS_BEFORE) { | |
257 | moveCharacter(i+inv_count,beginSyllable+inv_count); | |
258 | } | |
259 | } | |
260 | } | |
261 | } | |
262 | ||
263 | void moveCharacter( le_int32 fromPosition, le_int32 toPosition ) { | |
264 | le_int32 i,saveIndex; | |
265 | le_uint32 saveAuxData; | |
266 | LEUnicode saveChar = fOutChars[fromPosition]; | |
267 | LEErrorCode success = LE_NO_ERROR; | |
268 | LEErrorCode success2 = LE_NO_ERROR; | |
269 | saveIndex = fGlyphStorage.getCharIndex(fromPosition,success); | |
270 | saveAuxData = fGlyphStorage.getAuxData(fromPosition,success); | |
271 | ||
272 | if ( fromPosition > toPosition ) { | |
273 | for ( i = fromPosition ; i > toPosition ; i-- ) { | |
274 | fOutChars[i] = fOutChars[i-1]; | |
275 | fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success2),success); | |
276 | fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success2), success); | |
277 | ||
278 | } | |
279 | } else { | |
280 | for ( i = fromPosition ; i < toPosition ; i++ ) { | |
281 | fOutChars[i] = fOutChars[i+1]; | |
282 | fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success2),success); | |
283 | fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success2), success); | |
284 | } | |
285 | ||
286 | } | |
287 | fOutChars[toPosition] = saveChar; | |
288 | fGlyphStorage.setCharIndex(toPosition,saveIndex,success); | |
289 | fGlyphStorage.setAuxData(toPosition,saveAuxData,success); | |
290 | ||
291 | } | |
292 | void insertCharacter( LEUnicode ch, le_int32 toPosition, le_int32 charIndex, le_uint32 auxData ) { | |
293 | LEErrorCode success = LE_NO_ERROR; | |
294 | le_int32 i; | |
295 | fOutIndex += 1; | |
296 | ||
297 | for ( i = fOutIndex ; i > toPosition ; i--) { | |
298 | fOutChars[i] = fOutChars[i-1]; | |
299 | fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success),success); | |
300 | fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success), success); | |
301 | } | |
302 | ||
303 | fOutChars[toPosition] = ch; | |
304 | fGlyphStorage.setCharIndex(toPosition,charIndex,success); | |
305 | fGlyphStorage.setAuxData(toPosition,auxData,success); | |
306 | ||
307 | } | |
308 | void removeCharacter( le_int32 fromPosition ) { | |
309 | LEErrorCode success = LE_NO_ERROR; | |
310 | le_int32 i; | |
311 | fOutIndex -= 1; | |
312 | ||
313 | for ( i = fromPosition ; i < fOutIndex ; i--) { | |
314 | fOutChars[i] = fOutChars[i+1]; | |
315 | fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success),success); | |
316 | fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success), success); | |
317 | } | |
318 | } | |
319 | ||
73c04bcf | 320 | le_bool noteMatra(const IndicClassTable *classTable, LEUnicode matra, le_uint32 matraIndex, FeatureMask matraFeatures, le_bool wordStart) |
374ca955 A |
321 | { |
322 | IndicClassTable::CharClass matraClass = classTable->getCharClass(matra); | |
323 | ||
73c04bcf A |
324 | fMatraFeatures = matraFeatures; |
325 | ||
326 | if (wordStart) { | |
327 | fMatraFeatures |= initFeatureMask; | |
328 | } | |
b75a7d8f A |
329 | |
330 | if (IndicClassTable::isMatra(matraClass)) { | |
331 | if (IndicClassTable::isSplitMatra(matraClass)) { | |
332 | const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass); | |
333 | int i; | |
334 | ||
335 | for (i = 0; i < 3 && (*splitMatra)[i] != 0; i += 1) { | |
336 | LEUnicode piece = (*splitMatra)[i]; | |
337 | IndicClassTable::CharClass pieceClass = classTable->getCharClass(piece); | |
338 | ||
374ca955 | 339 | saveMatra(piece, matraIndex, pieceClass); |
b75a7d8f A |
340 | } |
341 | } else { | |
374ca955 | 342 | saveMatra(matra, matraIndex, matraClass); |
b75a7d8f | 343 | } |
374ca955 A |
344 | |
345 | return TRUE; | |
346 | } | |
347 | ||
348 | return FALSE; | |
349 | } | |
350 | ||
73c04bcf | 351 | void noteVowelModifier(const IndicClassTable *classTable, LEUnicode vowelModifier, le_uint32 vowelModifierIndex, FeatureMask vowelModifierFeatures) |
374ca955 A |
352 | { |
353 | IndicClassTable::CharClass vmClass = classTable->getCharClass(vowelModifier); | |
354 | ||
355 | fVMIndex = vowelModifierIndex; | |
73c04bcf | 356 | fVMFeatures = vowelModifierFeatures; |
374ca955 A |
357 | |
358 | if (IndicClassTable::isVowelModifier(vmClass)) { | |
73c04bcf A |
359 | switch (vmClass & CF_POS_MASK) { |
360 | case CF_POS_ABOVE: | |
374ca955 A |
361 | fVMabove = vowelModifier; |
362 | break; | |
363 | ||
73c04bcf | 364 | case CF_POS_AFTER: |
374ca955 A |
365 | fVMpost = vowelModifier; |
366 | break; | |
367 | ||
368 | default: | |
369 | // FIXME: this is an error... | |
370 | break; | |
371 | } | |
372 | } | |
373 | } | |
374 | ||
73c04bcf | 375 | void noteStressMark(const IndicClassTable *classTable, LEUnicode stressMark, le_uint32 stressMarkIndex, FeatureMask stressMarkFeatures) |
374ca955 A |
376 | { |
377 | IndicClassTable::CharClass smClass = classTable->getCharClass(stressMark); | |
378 | ||
379 | fSMIndex = stressMarkIndex; | |
73c04bcf | 380 | fSMFeatures = stressMarkFeatures; |
374ca955 A |
381 | |
382 | if (IndicClassTable::isStressMark(smClass)) { | |
73c04bcf A |
383 | switch (smClass & CF_POS_MASK) { |
384 | case CF_POS_ABOVE: | |
374ca955 A |
385 | fSMabove = stressMark; |
386 | break; | |
387 | ||
73c04bcf | 388 | case CF_POS_BELOW: |
374ca955 A |
389 | fSMbelow = stressMark; |
390 | break; | |
391 | ||
392 | default: | |
393 | // FIXME: this is an error... | |
394 | break; | |
395 | } | |
b75a7d8f A |
396 | } |
397 | } | |
398 | ||
729e4ab9 A |
399 | void notePreBaseConsonant(le_uint32 index,LEUnicode PBConsonant, LEUnicode PBVirama, FeatureMask features) |
400 | { | |
401 | fPBCIndex = index; | |
402 | fPreBaseConsonant = PBConsonant; | |
403 | fPreBaseVirama = PBVirama; | |
404 | fPBCFeatures = features; | |
405 | } | |
406 | ||
b75a7d8f A |
407 | void noteBaseConsonant() |
408 | { | |
409 | if (fMPreFixups != NULL && fMPreOutIndex >= 0) { | |
410 | fMPreFixups->add(fOutIndex, fMPreOutIndex); | |
411 | } | |
412 | } | |
413 | ||
46f4442e A |
414 | // Handles Al-Lakuna in Sinhala split vowels. |
415 | void writeAlLakuna() | |
73c04bcf | 416 | { |
46f4442e A |
417 | if (fAlLakuna != 0) { |
418 | writeChar(fAlLakuna, fAlLakunaIndex, fMatraFeatures); | |
73c04bcf A |
419 | } |
420 | } | |
421 | ||
b75a7d8f A |
422 | void writeMpre() |
423 | { | |
424 | if (fMpre != 0) { | |
425 | fMPreOutIndex = fOutIndex; | |
73c04bcf | 426 | writeChar(fMpre, fMpreIndex, fMatraFeatures); |
b75a7d8f A |
427 | } |
428 | } | |
429 | ||
430 | void writeMbelow() | |
431 | { | |
432 | if (fMbelow != 0) { | |
73c04bcf | 433 | writeChar(fMbelow, fMbelowIndex, fMatraFeatures); |
b75a7d8f A |
434 | } |
435 | } | |
436 | ||
437 | void writeMabove() | |
438 | { | |
439 | if (fMabove != 0) { | |
73c04bcf | 440 | writeChar(fMabove, fMaboveIndex, fMatraFeatures); |
b75a7d8f A |
441 | } |
442 | } | |
443 | ||
444 | void writeMpost() | |
445 | { | |
446 | if (fMpost != 0) { | |
73c04bcf | 447 | writeChar(fMpost, fMpostIndex, fMatraFeatures); |
b75a7d8f A |
448 | } |
449 | } | |
450 | ||
451 | void writeLengthMark() | |
452 | { | |
453 | if (fLengthMark != 0) { | |
73c04bcf | 454 | writeChar(fLengthMark, fLengthMarkIndex, fMatraFeatures); |
b75a7d8f A |
455 | } |
456 | } | |
374ca955 A |
457 | |
458 | void writeVMabove() | |
459 | { | |
460 | if (fVMabove != 0) { | |
73c04bcf | 461 | writeChar(fVMabove, fVMIndex, fVMFeatures); |
374ca955 A |
462 | } |
463 | } | |
464 | ||
465 | void writeVMpost() | |
466 | { | |
467 | if (fVMpost != 0) { | |
73c04bcf | 468 | writeChar(fVMpost, fVMIndex, fVMFeatures); |
374ca955 A |
469 | } |
470 | } | |
471 | ||
472 | void writeSMabove() | |
473 | { | |
474 | if (fSMabove != 0) { | |
73c04bcf | 475 | writeChar(fSMabove, fSMIndex, fSMFeatures); |
374ca955 A |
476 | } |
477 | } | |
478 | ||
479 | void writeSMbelow() | |
480 | { | |
481 | if (fSMbelow != 0) { | |
73c04bcf | 482 | writeChar(fSMbelow, fSMIndex, fSMFeatures); |
374ca955 A |
483 | } |
484 | } | |
485 | ||
729e4ab9 A |
486 | void writePreBaseConsonant() |
487 | { | |
488 | // The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam. However, | |
489 | // it seems that almost none of the fonts for Malayalam are set up to handle this. | |
490 | // So, we're going to force the issue here by using the rakar as defined with RA in most fonts. | |
491 | ||
492 | if (fPreBaseConsonant == 0x0d31) { // RRA | |
493 | fPreBaseConsonant = 0x0d30; // RA | |
494 | } | |
495 | ||
496 | if (fPreBaseConsonant != 0) { | |
497 | writeChar(fPreBaseConsonant, fPBCIndex, fPBCFeatures); | |
498 | writeChar(fPreBaseVirama,fPBCIndex-1,fPBCFeatures); | |
499 | } | |
500 | } | |
501 | ||
b75a7d8f A |
502 | le_int32 getOutputIndex() |
503 | { | |
504 | return fOutIndex; | |
505 | } | |
506 | }; | |
507 | ||
729e4ab9 | 508 | |
b75a7d8f | 509 | |
73c04bcf A |
510 | // TODO: Find better names for these! |
511 | #define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask) | |
512 | #define tagArray3 (pstfFeatureMask | tagArray4) | |
513 | #define tagArray2 (halfFeatureMask | tagArray3) | |
514 | #define tagArray1 (blwfFeatureMask | tagArray2) | |
515 | #define tagArray0 (rphfFeatureMask | tagArray1) | |
b75a7d8f | 516 | |
729e4ab9 | 517 | static const FeatureMap featureMap[] = { |
73c04bcf A |
518 | {loclFeatureTag, loclFeatureMask}, |
519 | {initFeatureTag, initFeatureMask}, | |
520 | {nuktFeatureTag, nuktFeatureMask}, | |
521 | {akhnFeatureTag, akhnFeatureMask}, | |
522 | {rphfFeatureTag, rphfFeatureMask}, | |
523 | {blwfFeatureTag, blwfFeatureMask}, | |
524 | {halfFeatureTag, halfFeatureMask}, | |
525 | {pstfFeatureTag, pstfFeatureMask}, | |
526 | {vatuFeatureTag, vatuFeatureMask}, | |
527 | {presFeatureTag, presFeatureMask}, | |
528 | {blwsFeatureTag, blwsFeatureMask}, | |
529 | {abvsFeatureTag, abvsFeatureMask}, | |
530 | {pstsFeatureTag, pstsFeatureMask}, | |
531 | {halnFeatureTag, halnFeatureMask}, | |
532 | {blwmFeatureTag, blwmFeatureMask}, | |
533 | {abvmFeatureTag, abvmFeatureMask}, | |
534 | {distFeatureTag, distFeatureMask} | |
b75a7d8f A |
535 | }; |
536 | ||
73c04bcf | 537 | static const le_int32 featureCount = LE_ARRAY_SIZE(featureMap); |
b75a7d8f | 538 | |
729e4ab9 A |
539 | static const FeatureMap v2FeatureMap[] = { |
540 | {loclFeatureTag, loclFeatureMask}, | |
541 | {nuktFeatureTag, nuktFeatureMask}, | |
542 | {akhnFeatureTag, akhnFeatureMask}, | |
543 | {rphfFeatureTag, rphfFeatureMask}, | |
544 | {rkrfFeatureTag, rkrfFeatureMask}, | |
545 | {blwfFeatureTag, blwfFeatureMask}, | |
546 | {halfFeatureTag, halfFeatureMask}, | |
547 | {vatuFeatureTag, vatuFeatureMask}, | |
548 | {cjctFeatureTag, cjctFeatureMask}, | |
549 | {presFeatureTag, presFeatureMask}, | |
550 | {abvsFeatureTag, abvsFeatureMask}, | |
551 | {blwsFeatureTag, blwsFeatureMask}, | |
552 | {pstsFeatureTag, pstsFeatureMask}, | |
553 | {halnFeatureTag, halnFeatureMask}, | |
554 | {caltFeatureTag, caltFeatureMask}, | |
555 | {kernFeatureTag, kernFeatureMask}, | |
556 | {distFeatureTag, distFeatureMask}, | |
557 | {abvmFeatureTag, abvmFeatureMask}, | |
558 | {blwmFeatureTag, blwmFeatureMask} | |
559 | }; | |
560 | ||
561 | static const le_int32 v2FeatureMapCount = LE_ARRAY_SIZE(v2FeatureMap); | |
562 | ||
73c04bcf A |
563 | static const le_int8 stateTable[][CC_COUNT] = |
564 | { | |
46f4442e A |
565 | // xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al |
566 | { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state | |
567 | {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state | |
568 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta | |
569 | {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant | |
570 | {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama | |
571 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels | |
572 | {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark | |
573 | {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama | |
574 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama | |
575 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel | |
576 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel | |
577 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv | |
578 | {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?) | |
579 | {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant | |
b75a7d8f A |
580 | }; |
581 | ||
73c04bcf A |
582 | |
583 | const FeatureMap *IndicReordering::getFeatureMap(le_int32 &count) | |
b75a7d8f | 584 | { |
73c04bcf A |
585 | count = featureCount; |
586 | ||
587 | return featureMap; | |
b75a7d8f A |
588 | } |
589 | ||
729e4ab9 A |
590 | const FeatureMap *IndicReordering::getv2FeatureMap(le_int32 &count) |
591 | { | |
592 | count = v2FeatureMapCount; | |
593 | ||
594 | return v2FeatureMap; | |
595 | } | |
596 | ||
b75a7d8f A |
597 | le_int32 IndicReordering::findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount) |
598 | { | |
599 | le_int32 cursor = prev; | |
600 | le_int8 state = 0; | |
729e4ab9 | 601 | le_int8 consonant_count = 0; |
b75a7d8f A |
602 | |
603 | while (cursor < charCount) { | |
604 | IndicClassTable::CharClass charClass = classTable->getCharClass(chars[cursor]); | |
729e4ab9 A |
605 | |
606 | if ( IndicClassTable::isConsonant(charClass) ) { | |
607 | consonant_count++; | |
608 | if ( consonant_count > MAX_CONSONANTS_PER_SYLLABLE ) { | |
609 | break; | |
610 | } | |
611 | } | |
b75a7d8f | 612 | |
73c04bcf | 613 | state = stateTable[state][charClass & CF_CLASS_MASK]; |
b75a7d8f A |
614 | |
615 | if (state < 0) { | |
616 | break; | |
617 | } | |
618 | ||
619 | cursor += 1; | |
620 | } | |
621 | ||
622 | return cursor; | |
623 | } | |
624 | ||
625 | le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, | |
374ca955 | 626 | LEUnicode *outChars, LEGlyphStorage &glyphStorage, |
729e4ab9 | 627 | MPreFixups **outMPreFixups, LEErrorCode& success) |
b75a7d8f | 628 | { |
729e4ab9 A |
629 | if (LE_FAILURE(success)) { |
630 | return 0; | |
631 | } | |
632 | ||
b75a7d8f A |
633 | MPreFixups *mpreFixups = NULL; |
634 | const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); | |
635 | ||
73c04bcf | 636 | if (classTable->scriptFlags & SF_MPRE_FIXUP) { |
b75a7d8f | 637 | mpreFixups = new MPreFixups(charCount); |
729e4ab9 A |
638 | if (mpreFixups == NULL) { |
639 | success = LE_MEMORY_ALLOCATION_ERROR; | |
640 | return 0; | |
641 | } | |
b75a7d8f A |
642 | } |
643 | ||
73c04bcf | 644 | IndicReorderingOutput output(outChars, glyphStorage, mpreFixups); |
b75a7d8f | 645 | le_int32 i, prev = 0; |
73c04bcf | 646 | le_bool lastInWord = FALSE; |
b75a7d8f A |
647 | |
648 | while (prev < charCount) { | |
649 | le_int32 syllable = findSyllable(classTable, chars, prev, charCount); | |
374ca955 | 650 | le_int32 matra, markStart = syllable; |
b75a7d8f | 651 | |
374ca955 A |
652 | output.reset(); |
653 | ||
654 | if (classTable->isStressMark(chars[markStart - 1])) { | |
655 | markStart -= 1; | |
73c04bcf | 656 | output.noteStressMark(classTable, chars[markStart], markStart, tagArray1); |
b75a7d8f | 657 | } |
374ca955 | 658 | |
73c04bcf | 659 | if (markStart != prev && classTable->isVowelModifier(chars[markStart - 1])) { |
374ca955 | 660 | markStart -= 1; |
73c04bcf | 661 | output.noteVowelModifier(classTable, chars[markStart], markStart, tagArray1); |
b75a7d8f A |
662 | } |
663 | ||
374ca955 A |
664 | matra = markStart - 1; |
665 | ||
73c04bcf | 666 | while (output.noteMatra(classTable, chars[matra], matra, tagArray1, !lastInWord) && matra != prev) { |
374ca955 A |
667 | matra -= 1; |
668 | } | |
b75a7d8f | 669 | |
73c04bcf A |
670 | lastInWord = TRUE; |
671 | ||
672 | switch (classTable->getCharClass(chars[prev]) & CF_CLASS_MASK) { | |
673 | case CC_RESERVED: | |
674 | lastInWord = FALSE; | |
675 | /* fall through */ | |
676 | ||
677 | case CC_INDEPENDENT_VOWEL: | |
678 | case CC_ZERO_WIDTH_MARK: | |
b75a7d8f | 679 | for (i = prev; i < syllable; i += 1) { |
73c04bcf | 680 | output.writeChar(chars[i], i, tagArray1); |
b75a7d8f A |
681 | } |
682 | ||
683 | break; | |
684 | ||
46f4442e | 685 | case CC_AL_LAKUNA: |
73c04bcf | 686 | case CC_NUKTA: |
73c04bcf A |
687 | output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); |
688 | output.writeChar(chars[prev], prev, tagArray1); | |
b75a7d8f A |
689 | break; |
690 | ||
46f4442e A |
691 | case CC_VIRAMA: |
692 | // A lone virama is illegal unless it follows a | |
693 | // MALAYALAM_VOWEL_SIGN_U. Such a usage is called | |
694 | // "samvruthokaram". | |
695 | if (chars[prev - 1] != C_MALAYALAM_VOWEL_SIGN_U) { | |
696 | output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); | |
697 | } | |
698 | ||
699 | output.writeChar(chars[prev], prev, tagArray1); | |
700 | break; | |
701 | ||
73c04bcf A |
702 | case CC_DEPENDENT_VOWEL: |
703 | case CC_SPLIT_VOWEL_PIECE_1: | |
704 | case CC_SPLIT_VOWEL_PIECE_2: | |
705 | case CC_SPLIT_VOWEL_PIECE_3: | |
706 | case CC_VOWEL_MODIFIER: | |
707 | case CC_STRESS_MARK: | |
b75a7d8f | 708 | output.writeMpre(); |
374ca955 | 709 | |
73c04bcf | 710 | output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); |
374ca955 | 711 | |
b75a7d8f | 712 | output.writeMbelow(); |
374ca955 | 713 | output.writeSMbelow(); |
b75a7d8f | 714 | output.writeMabove(); |
374ca955 | 715 | |
73c04bcf | 716 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { |
374ca955 A |
717 | output.writeMpost(); |
718 | } | |
719 | ||
73c04bcf | 720 | if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { |
374ca955 A |
721 | output.writeVMabove(); |
722 | output.writeSMabove(); // FIXME: there are no SM's in these scripts... | |
723 | } | |
724 | ||
73c04bcf | 725 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { |
374ca955 A |
726 | output.writeMpost(); |
727 | } | |
728 | ||
b75a7d8f | 729 | output.writeLengthMark(); |
46f4442e | 730 | output.writeAlLakuna(); |
374ca955 | 731 | |
73c04bcf | 732 | if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { |
374ca955 A |
733 | output.writeVMabove(); |
734 | output.writeSMabove(); | |
735 | } | |
736 | ||
737 | output.writeVMpost(); | |
b75a7d8f A |
738 | break; |
739 | ||
73c04bcf A |
740 | case CC_INDEPENDENT_VOWEL_2: |
741 | case CC_INDEPENDENT_VOWEL_3: | |
742 | case CC_CONSONANT: | |
743 | case CC_CONSONANT_WITH_NUKTA: | |
b75a7d8f | 744 | { |
374ca955 A |
745 | le_uint32 length = markStart - prev; |
746 | le_int32 lastConsonant = markStart - 1; | |
b75a7d8f A |
747 | le_int32 baseLimit = prev; |
748 | ||
749 | // Check for REPH at front of syllable | |
729e4ab9 | 750 | if (length > 2 && classTable->isReph(chars[prev]) && classTable->isVirama(chars[prev + 1]) && chars[prev + 2] != C_SIGN_ZWNJ) { |
b75a7d8f A |
751 | baseLimit += 2; |
752 | ||
753 | // Check for eyelash RA, if the script supports it | |
73c04bcf | 754 | if ((classTable->scriptFlags & SF_EYELASH_RA) != 0 && |
b75a7d8f A |
755 | chars[baseLimit] == C_SIGN_ZWJ) { |
756 | if (length > 3) { | |
757 | baseLimit += 1; | |
758 | } else { | |
759 | baseLimit -= 2; | |
760 | } | |
761 | } | |
762 | } | |
763 | ||
764 | while (lastConsonant > baseLimit && !classTable->isConsonant(chars[lastConsonant])) { | |
765 | lastConsonant -= 1; | |
766 | } | |
767 | ||
729e4ab9 | 768 | |
46f4442e A |
769 | IndicClassTable::CharClass charClass = CC_RESERVED; |
770 | IndicClassTable::CharClass nextClass = CC_RESERVED; | |
b75a7d8f A |
771 | le_int32 baseConsonant = lastConsonant; |
772 | le_int32 postBase = lastConsonant + 1; | |
73c04bcf | 773 | le_int32 postBaseLimit = classTable->scriptFlags & SF_POST_BASE_LIMIT_MASK; |
374ca955 A |
774 | le_bool seenVattu = FALSE; |
775 | le_bool seenBelowBaseForm = FALSE; | |
729e4ab9 | 776 | le_bool seenPreBaseForm = FALSE; |
46f4442e A |
777 | le_bool hasNukta = FALSE; |
778 | le_bool hasBelowBaseForm = FALSE; | |
779 | le_bool hasPostBaseForm = FALSE; | |
729e4ab9 | 780 | le_bool hasPreBaseForm = FALSE; |
374ca955 | 781 | |
73c04bcf | 782 | if (postBase < markStart && classTable->isNukta(chars[postBase])) { |
46f4442e | 783 | charClass = CC_NUKTA; |
374ca955 A |
784 | postBase += 1; |
785 | } | |
b75a7d8f A |
786 | |
787 | while (baseConsonant > baseLimit) { | |
46f4442e A |
788 | nextClass = charClass; |
789 | hasNukta = IndicClassTable::isNukta(nextClass); | |
790 | charClass = classTable->getCharClass(chars[baseConsonant]); | |
791 | ||
792 | hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta; | |
793 | hasPostBaseForm = IndicClassTable::hasPostBaseForm(charClass) && !hasNukta; | |
729e4ab9 | 794 | hasPreBaseForm = IndicClassTable::hasPreBaseForm(charClass) && !hasNukta; |
b75a7d8f A |
795 | |
796 | if (IndicClassTable::isConsonant(charClass)) { | |
797 | if (postBaseLimit == 0 || seenVattu || | |
798 | (baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) || | |
729e4ab9 | 799 | !(hasBelowBaseForm || hasPostBaseForm || hasPreBaseForm)) { |
b75a7d8f A |
800 | break; |
801 | } | |
802 | ||
729e4ab9 A |
803 | // Note any pre-base consonants |
804 | if ( baseConsonant == lastConsonant && lastConsonant > 0 && | |
805 | hasPreBaseForm && classTable->isVirama(chars[baseConsonant - 1])) { | |
806 | output.notePreBaseConsonant(lastConsonant,chars[lastConsonant],chars[lastConsonant-1],tagArray2); | |
807 | seenPreBaseForm = TRUE; | |
808 | ||
809 | } | |
46f4442e A |
810 | // consonants with nuktas are never vattus |
811 | seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta; | |
b75a7d8f | 812 | |
46f4442e A |
813 | // consonants with nuktas never have below- or post-base forms |
814 | if (hasPostBaseForm) { | |
b75a7d8f A |
815 | if (seenBelowBaseForm) { |
816 | break; | |
817 | } | |
818 | ||
819 | postBase = baseConsonant; | |
46f4442e | 820 | } else if (hasBelowBaseForm) { |
374ca955 | 821 | seenBelowBaseForm = TRUE; |
b75a7d8f A |
822 | } |
823 | ||
824 | postBaseLimit -= 1; | |
825 | } | |
826 | ||
827 | baseConsonant -= 1; | |
828 | } | |
829 | ||
830 | // Write Mpre | |
831 | output.writeMpre(); | |
832 | ||
833 | // Write eyelash RA | |
834 | // NOTE: baseLimit == prev + 3 iff eyelash RA present... | |
835 | if (baseLimit == prev + 3) { | |
73c04bcf A |
836 | output.writeChar(chars[prev], prev, tagArray2); |
837 | output.writeChar(chars[prev + 1], prev + 1, tagArray2); | |
838 | output.writeChar(chars[prev + 2], prev + 2, tagArray2); | |
b75a7d8f A |
839 | } |
840 | ||
841 | // write any pre-base consonants | |
729e4ab9 A |
842 | output.writePreBaseConsonant(); |
843 | ||
374ca955 | 844 | le_bool supressVattu = TRUE; |
b75a7d8f A |
845 | |
846 | for (i = baseLimit; i < baseConsonant; i += 1) { | |
847 | LEUnicode ch = chars[i]; | |
729e4ab9 A |
848 | // Don't put 'pstf' or 'blwf' on anything before the base consonant. |
849 | FeatureMask features = tagArray1 & ~( pstfFeatureMask | blwfFeatureMask ); | |
46f4442e A |
850 | |
851 | charClass = classTable->getCharClass(ch); | |
852 | nextClass = classTable->getCharClass(chars[i + 1]); | |
853 | hasNukta = IndicClassTable::isNukta(nextClass); | |
b75a7d8f A |
854 | |
855 | if (IndicClassTable::isConsonant(charClass)) { | |
46f4442e | 856 | if (IndicClassTable::isVattu(charClass) && !hasNukta && supressVattu) { |
73c04bcf | 857 | features = tagArray4; |
b75a7d8f A |
858 | } |
859 | ||
46f4442e | 860 | supressVattu = IndicClassTable::isVattu(charClass) && !hasNukta; |
b75a7d8f A |
861 | } else if (IndicClassTable::isVirama(charClass) && chars[i + 1] == C_SIGN_ZWNJ) |
862 | { | |
73c04bcf | 863 | features = tagArray4; |
b75a7d8f A |
864 | } |
865 | ||
73c04bcf | 866 | output.writeChar(ch, i, features); |
b75a7d8f A |
867 | } |
868 | ||
869 | le_int32 bcSpan = baseConsonant + 1; | |
870 | ||
374ca955 | 871 | if (bcSpan < markStart && classTable->isNukta(chars[bcSpan])) { |
b75a7d8f A |
872 | bcSpan += 1; |
873 | } | |
874 | ||
46f4442e A |
875 | if (baseConsonant == lastConsonant && bcSpan < markStart && |
876 | (classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) { | |
b75a7d8f A |
877 | bcSpan += 1; |
878 | ||
374ca955 | 879 | if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) { |
b75a7d8f A |
880 | bcSpan += 1; |
881 | } | |
882 | } | |
883 | ||
884 | // note the base consonant for post-GSUB fixups | |
885 | output.noteBaseConsonant(); | |
886 | ||
887 | // write base consonant | |
888 | for (i = baseConsonant; i < bcSpan; i += 1) { | |
73c04bcf | 889 | output.writeChar(chars[i], i, tagArray4); |
b75a7d8f A |
890 | } |
891 | ||
73c04bcf | 892 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { |
b75a7d8f | 893 | output.writeMbelow(); |
374ca955 | 894 | output.writeSMbelow(); // FIXME: there are no SMs in these scripts... |
b75a7d8f A |
895 | output.writeMabove(); |
896 | output.writeMpost(); | |
897 | } | |
898 | ||
899 | // write below-base consonants | |
729e4ab9 | 900 | if (baseConsonant != lastConsonant && !seenPreBaseForm) { |
b75a7d8f | 901 | for (i = bcSpan + 1; i < postBase; i += 1) { |
73c04bcf | 902 | output.writeChar(chars[i], i, tagArray1); |
b75a7d8f A |
903 | } |
904 | ||
905 | if (postBase > lastConsonant) { | |
906 | // write halant that was after base consonant | |
73c04bcf | 907 | output.writeChar(chars[bcSpan], bcSpan, tagArray1); |
b75a7d8f A |
908 | } |
909 | } | |
910 | ||
374ca955 | 911 | // write Mbelow, SMbelow, Mabove |
73c04bcf | 912 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { |
b75a7d8f | 913 | output.writeMbelow(); |
374ca955 | 914 | output.writeSMbelow(); |
b75a7d8f A |
915 | output.writeMabove(); |
916 | } | |
917 | ||
73c04bcf | 918 | if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { |
b75a7d8f | 919 | if (baseLimit == prev + 2) { |
73c04bcf A |
920 | output.writeChar(chars[prev], prev, tagArray0); |
921 | output.writeChar(chars[prev + 1], prev + 1, tagArray0); | |
b75a7d8f A |
922 | } |
923 | ||
374ca955 A |
924 | output.writeVMabove(); |
925 | output.writeSMabove(); // FIXME: there are no SM's in these scripts... | |
b75a7d8f A |
926 | } |
927 | ||
928 | // write post-base consonants | |
929 | // FIXME: does this put the right tags on post-base consonants? | |
729e4ab9 | 930 | if (baseConsonant != lastConsonant && !seenPreBaseForm) { |
b75a7d8f A |
931 | if (postBase <= lastConsonant) { |
932 | for (i = postBase; i <= lastConsonant; i += 1) { | |
73c04bcf | 933 | output.writeChar(chars[i], i, tagArray3); |
b75a7d8f A |
934 | } |
935 | ||
936 | // write halant that was after base consonant | |
73c04bcf | 937 | output.writeChar(chars[bcSpan], bcSpan, tagArray1); |
b75a7d8f A |
938 | } |
939 | ||
940 | // write the training halant, if there is one | |
941 | if (lastConsonant < matra && classTable->isVirama(chars[matra])) { | |
73c04bcf | 942 | output.writeChar(chars[matra], matra, tagArray4); |
b75a7d8f A |
943 | } |
944 | } | |
945 | ||
946 | // write Mpost | |
73c04bcf | 947 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { |
b75a7d8f A |
948 | output.writeMpost(); |
949 | } | |
950 | ||
951 | output.writeLengthMark(); | |
46f4442e | 952 | output.writeAlLakuna(); |
b75a7d8f A |
953 | |
954 | // write reph | |
73c04bcf | 955 | if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { |
b75a7d8f | 956 | if (baseLimit == prev + 2) { |
73c04bcf A |
957 | output.writeChar(chars[prev], prev, tagArray0); |
958 | output.writeChar(chars[prev + 1], prev + 1, tagArray0); | |
b75a7d8f A |
959 | } |
960 | ||
374ca955 A |
961 | output.writeVMabove(); |
962 | output.writeSMabove(); | |
b75a7d8f A |
963 | } |
964 | ||
374ca955 | 965 | output.writeVMpost(); |
b75a7d8f A |
966 | |
967 | break; | |
968 | } | |
969 | ||
970 | default: | |
971 | break; | |
972 | } | |
973 | ||
974 | prev = syllable; | |
975 | } | |
976 | ||
977 | *outMPreFixups = mpreFixups; | |
978 | ||
979 | return output.getOutputIndex(); | |
980 | } | |
981 | ||
729e4ab9 | 982 | void IndicReordering::adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage, LEErrorCode& success) |
b75a7d8f A |
983 | { |
984 | if (mpreFixups != NULL) { | |
729e4ab9 | 985 | mpreFixups->apply(glyphStorage, success); |
b75a7d8f A |
986 | |
987 | delete mpreFixups; | |
988 | } | |
989 | } | |
990 | ||
729e4ab9 A |
991 | void IndicReordering::applyPresentationForms(LEGlyphStorage &glyphStorage, le_int32 count) |
992 | { | |
993 | LEErrorCode success = LE_NO_ERROR; | |
994 | ||
995 | // This sets us up for 2nd pass of glyph substitution as well as setting the feature masks for the | |
996 | // GPOS table lookups | |
997 | ||
998 | for ( le_int32 i = 0 ; i < count ; i++ ) { | |
999 | glyphStorage.setAuxData(i, ( presentationFormsMask | positioningFormsMask ), success); | |
1000 | } | |
1001 | ||
1002 | } | |
1003 | void IndicReordering::finalReordering(LEGlyphStorage &glyphStorage, le_int32 count) | |
1004 | { | |
1005 | LEErrorCode success = LE_NO_ERROR; | |
1006 | ||
1007 | // Reposition REPH as appropriate | |
1008 | ||
1009 | for ( le_int32 i = 0 ; i < count ; i++ ) { | |
1010 | ||
1011 | le_int32 tmpAuxData = glyphStorage.getAuxData(i,success); | |
1012 | LEGlyphID tmpGlyph = glyphStorage.getGlyphID(i,success); | |
1013 | ||
1014 | if ( ( tmpGlyph != NO_GLYPH ) && (tmpAuxData & rephConsonantMask) && !(tmpAuxData & repositionedGlyphMask)) { | |
1015 | ||
1016 | le_bool targetPositionFound = false; | |
1017 | le_int32 targetPosition = i+1; | |
1018 | le_int32 baseConsonantData; | |
1019 | ||
1020 | while (!targetPositionFound) { | |
1021 | tmpGlyph = glyphStorage.getGlyphID(targetPosition,success); | |
1022 | tmpAuxData = glyphStorage.getAuxData(targetPosition,success); | |
1023 | ||
1024 | if ( tmpAuxData & baseConsonantMask ) { | |
1025 | baseConsonantData = tmpAuxData; | |
1026 | targetPositionFound = true; | |
1027 | } else { | |
1028 | targetPosition++; | |
1029 | } | |
1030 | } | |
1031 | ||
1032 | // Make sure we are not putting the reph into an empty hole | |
1033 | ||
1034 | le_bool targetPositionHasGlyph = false; | |
1035 | while (!targetPositionHasGlyph) { | |
1036 | tmpGlyph = glyphStorage.getGlyphID(targetPosition,success); | |
1037 | if ( tmpGlyph != NO_GLYPH ) { | |
1038 | targetPositionHasGlyph = true; | |
1039 | } else { | |
1040 | targetPosition--; | |
1041 | } | |
1042 | } | |
1043 | ||
1044 | // Make sure that REPH is positioned after any above base or post base matras | |
1045 | // | |
1046 | le_bool checkMatraDone = false; | |
1047 | le_int32 checkMatraPosition = targetPosition+1; | |
1048 | while ( !checkMatraDone ) { | |
1049 | tmpAuxData = glyphStorage.getAuxData(checkMatraPosition,success); | |
1050 | if ( checkMatraPosition >= count || ( (tmpAuxData ^ baseConsonantData) & LE_GLYPH_GROUP_MASK)) { | |
1051 | checkMatraDone = true; | |
1052 | continue; | |
1053 | } | |
1054 | if ( (tmpAuxData & matraMask) && | |
1055 | (((tmpAuxData & markPositionMask) == aboveBasePosition) || | |
1056 | ((tmpAuxData & markPositionMask) == postBasePosition))) { | |
1057 | targetPosition = checkMatraPosition; | |
1058 | } | |
1059 | checkMatraPosition++; | |
1060 | } | |
1061 | ||
1062 | glyphStorage.moveGlyph(i,targetPosition,repositionedGlyphMask); | |
1063 | } | |
1064 | } | |
1065 | } | |
1066 | ||
1067 | ||
1068 | le_int32 IndicReordering::v2process(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, | |
1069 | LEUnicode *outChars, LEGlyphStorage &glyphStorage) | |
1070 | { | |
1071 | const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); | |
1072 | ||
1073 | DynamicProperties dynProps[INDIC_BLOCK_SIZE]; | |
1074 | IndicReordering::getDynamicProperties(dynProps,classTable); | |
1075 | ||
1076 | IndicReorderingOutput output(outChars, glyphStorage, NULL); | |
1077 | le_int32 i, firstConsonant, baseConsonant, secondConsonant, inv_count = 0, beginSyllable = 0; | |
1078 | //le_bool lastInWord = FALSE; | |
1079 | ||
1080 | while (beginSyllable < charCount) { | |
1081 | le_int32 nextSyllable = findSyllable(classTable, chars, beginSyllable, charCount); | |
1082 | ||
1083 | output.reset(); | |
1084 | ||
1085 | // Find the First Consonant | |
1086 | for ( firstConsonant = beginSyllable ; firstConsonant < nextSyllable ; firstConsonant++ ) { | |
1087 | if ( classTable->isConsonant(chars[firstConsonant]) ) { | |
1088 | break; | |
1089 | } | |
1090 | } | |
1091 | ||
1092 | // Find the base consonant | |
1093 | ||
1094 | baseConsonant = nextSyllable - 1; | |
1095 | secondConsonant = firstConsonant; | |
1096 | ||
1097 | // TODO: Use Dynamic Properties for hasBelowBaseForm and hasPostBaseForm() | |
1098 | ||
1099 | while ( baseConsonant > firstConsonant ) { | |
1100 | if ( classTable->isConsonant(chars[baseConsonant]) && | |
1101 | !classTable->hasBelowBaseForm(chars[baseConsonant]) && | |
1102 | !classTable->hasPostBaseForm(chars[baseConsonant]) ) { | |
1103 | break; | |
1104 | } | |
1105 | else { | |
1106 | if ( classTable->isConsonant(chars[baseConsonant]) ) { | |
1107 | secondConsonant = baseConsonant; | |
1108 | } | |
1109 | baseConsonant--; | |
1110 | } | |
1111 | } | |
1112 | ||
1113 | // If the syllable starts with Ra + Halant ( in a script that has Reph ) and has more than one | |
1114 | // consonant, Ra is excluced from candidates for base consonants | |
1115 | ||
1116 | if ( classTable->isReph(chars[beginSyllable]) && | |
1117 | beginSyllable+1 < nextSyllable && classTable->isVirama(chars[beginSyllable+1]) && | |
1118 | secondConsonant != firstConsonant) { | |
1119 | baseConsonant = secondConsonant; | |
1120 | } | |
1121 | ||
1122 | // Populate the output | |
1123 | for ( i = beginSyllable ; i < nextSyllable ; i++ ) { | |
1124 | ||
1125 | // Handle invalid combinartions | |
1126 | ||
1127 | if ( classTable->isVirama(chars[beginSyllable]) || | |
1128 | classTable->isMatra(chars[beginSyllable]) || | |
1129 | classTable->isVowelModifier(chars[beginSyllable]) || | |
1130 | classTable->isNukta(chars[beginSyllable]) ) { | |
1131 | output.writeChar(C_DOTTED_CIRCLE,beginSyllable,basicShapingFormsMask); | |
1132 | inv_count++; | |
1133 | } | |
1134 | output.writeChar(chars[i],i, basicShapingFormsMask); | |
1135 | ||
1136 | } | |
1137 | ||
1138 | // Adjust features and set syllable structure bits | |
1139 | ||
1140 | for ( i = beginSyllable ; i < nextSyllable ; i++ ) { | |
1141 | ||
1142 | FeatureMask outMask = output.getFeatures(i+inv_count); | |
1143 | FeatureMask saveMask = outMask; | |
1144 | ||
1145 | // Since reph can only validly occur at the beginning of a syllable | |
1146 | // We only apply it to the first 2 characters in the syllable, to keep it from | |
1147 | // conflicting with other features ( i.e. rkrf ) | |
1148 | ||
1149 | // TODO : Use the dynamic property for determining isREPH | |
1150 | if ( i == beginSyllable && i < baseConsonant && classTable->isReph(chars[i]) && | |
1151 | i+1 < nextSyllable && classTable->isVirama(chars[i+1])) { | |
1152 | outMask |= rphfFeatureMask; | |
1153 | outMask |= rephConsonantMask; | |
1154 | output.setFeatures(i+1+inv_count,outMask); | |
1155 | ||
1156 | } | |
1157 | ||
1158 | if ( i == baseConsonant ) { | |
1159 | outMask |= baseConsonantMask; | |
1160 | } | |
1161 | ||
1162 | if ( classTable->isMatra(chars[i])) { | |
1163 | outMask |= matraMask; | |
1164 | if ( classTable->hasAboveBaseForm(chars[i])) { | |
1165 | outMask |= aboveBasePosition; | |
1166 | } else if ( classTable->hasBelowBaseForm(chars[i])) { | |
1167 | outMask |= belowBasePosition; | |
1168 | } | |
1169 | } | |
1170 | ||
1171 | // Don't apply half form to virama that stands alone at the end of a syllable | |
1172 | // to prevent half forms from forming when syllable ends with virama | |
1173 | ||
1174 | if ( classTable->isVirama(chars[i]) && (i+1 == nextSyllable) ) { | |
1175 | outMask ^= halfFeatureMask; | |
1176 | if ( classTable->isConsonant(chars[i-1]) ) { | |
1177 | FeatureMask tmp = output.getFeatures(i-1+inv_count); | |
1178 | tmp ^= halfFeatureMask; | |
1179 | output.setFeatures(i-1+inv_count,tmp); | |
1180 | } | |
1181 | } | |
1182 | ||
1183 | if ( outMask != saveMask ) { | |
1184 | output.setFeatures(i+inv_count,outMask); | |
1185 | } | |
1186 | } | |
1187 | ||
1188 | output.decomposeReorderMatras(classTable,beginSyllable,nextSyllable,inv_count); | |
1189 | ||
1190 | beginSyllable = nextSyllable; | |
1191 | } | |
1192 | ||
1193 | ||
1194 | return output.getOutputIndex(); | |
1195 | } | |
1196 | ||
1197 | ||
1198 | void IndicReordering::getDynamicProperties( DynamicProperties *, const IndicClassTable *classTable ) { | |
1199 | ||
1200 | ||
1201 | LEUnicode currentChar; | |
1202 | LEUnicode virama; | |
1203 | LEUnicode workChars[2]; | |
1204 | LEGlyphStorage workGlyphs; | |
1205 | ||
1206 | IndicReorderingOutput workOutput(workChars, workGlyphs, NULL); | |
1207 | ||
1208 | //le_int32 offset = 0; | |
1209 | ||
1210 | // First find the relevant virama for the script we are dealing with | |
1211 | ||
1212 | for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) { | |
1213 | if ( classTable->isVirama(currentChar)) { | |
1214 | virama = currentChar; | |
1215 | break; | |
1216 | } | |
1217 | } | |
1218 | ||
1219 | for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) { | |
1220 | if ( classTable->isConsonant(currentChar)) { | |
1221 | workOutput.reset(); | |
1222 | } | |
1223 | } | |
1224 | ||
1225 | ||
1226 | } | |
1227 | ||
b75a7d8f | 1228 | U_NAMESPACE_END |