]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /* |
729e4ab9 | 2 | / * |
57a6839d | 3 | * (C) Copyright IBM Corp. 1998-2013 - All Rights Reserved |
b75a7d8f A |
4 | * |
5 | */ | |
6 | ||
7 | #include "LETypes.h" | |
8 | #include "OpenTypeTables.h" | |
9 | #include "OpenTypeUtilities.h" | |
10 | #include "IndicReordering.h" | |
374ca955 | 11 | #include "LEGlyphStorage.h" |
b75a7d8f A |
12 | #include "MPreFixups.h" |
13 | ||
14 | U_NAMESPACE_BEGIN | |
15 | ||
73c04bcf A |
16 | #define loclFeatureTag LE_LOCL_FEATURE_TAG |
17 | #define initFeatureTag LE_INIT_FEATURE_TAG | |
18 | #define nuktFeatureTag LE_NUKT_FEATURE_TAG | |
19 | #define akhnFeatureTag LE_AKHN_FEATURE_TAG | |
20 | #define rphfFeatureTag LE_RPHF_FEATURE_TAG | |
729e4ab9 | 21 | #define rkrfFeatureTag LE_RKRF_FEATURE_TAG |
73c04bcf A |
22 | #define blwfFeatureTag LE_BLWF_FEATURE_TAG |
23 | #define halfFeatureTag LE_HALF_FEATURE_TAG | |
24 | #define pstfFeatureTag LE_PSTF_FEATURE_TAG | |
25 | #define vatuFeatureTag LE_VATU_FEATURE_TAG | |
26 | #define presFeatureTag LE_PRES_FEATURE_TAG | |
27 | #define blwsFeatureTag LE_BLWS_FEATURE_TAG | |
28 | #define abvsFeatureTag LE_ABVS_FEATURE_TAG | |
29 | #define pstsFeatureTag LE_PSTS_FEATURE_TAG | |
30 | #define halnFeatureTag LE_HALN_FEATURE_TAG | |
729e4ab9 | 31 | #define cjctFeatureTag LE_CJCT_FEATURE_TAG |
73c04bcf A |
32 | #define blwmFeatureTag LE_BLWM_FEATURE_TAG |
33 | #define abvmFeatureTag LE_ABVM_FEATURE_TAG | |
34 | #define distFeatureTag LE_DIST_FEATURE_TAG | |
729e4ab9 A |
35 | #define caltFeatureTag LE_CALT_FEATURE_TAG |
36 | #define kernFeatureTag LE_KERN_FEATURE_TAG | |
73c04bcf A |
37 | |
38 | #define loclFeatureMask 0x80000000UL | |
39 | #define rphfFeatureMask 0x40000000UL | |
40 | #define blwfFeatureMask 0x20000000UL | |
41 | #define halfFeatureMask 0x10000000UL | |
42 | #define pstfFeatureMask 0x08000000UL | |
43 | #define nuktFeatureMask 0x04000000UL | |
44 | #define akhnFeatureMask 0x02000000UL | |
45 | #define vatuFeatureMask 0x01000000UL | |
46 | #define presFeatureMask 0x00800000UL | |
47 | #define blwsFeatureMask 0x00400000UL | |
48 | #define abvsFeatureMask 0x00200000UL | |
729e4ab9 | 49 | #define pstsFeatureMask 0x00100000UL |
73c04bcf A |
50 | #define halnFeatureMask 0x00080000UL |
51 | #define blwmFeatureMask 0x00040000UL | |
52 | #define abvmFeatureMask 0x00020000UL | |
53 | #define distFeatureMask 0x00010000UL | |
54 | #define initFeatureMask 0x00008000UL | |
729e4ab9 A |
55 | #define cjctFeatureMask 0x00004000UL |
56 | #define rkrfFeatureMask 0x00002000UL | |
57 | #define caltFeatureMask 0x00001000UL | |
58 | #define kernFeatureMask 0x00000800UL | |
59 | ||
60 | // Syllable structure bits | |
61 | #define baseConsonantMask 0x00000400UL | |
62 | #define consonantMask 0x00000200UL | |
63 | #define halfConsonantMask 0x00000100UL | |
64 | #define rephConsonantMask 0x00000080UL | |
65 | #define matraMask 0x00000040UL | |
66 | #define vowelModifierMask 0x00000020UL | |
67 | #define markPositionMask 0x00000018UL | |
68 | ||
69 | #define postBasePosition 0x00000000UL | |
70 | #define preBasePosition 0x00000008UL | |
71 | #define aboveBasePosition 0x00000010UL | |
72 | #define belowBasePosition 0x00000018UL | |
73 | ||
74 | #define repositionedGlyphMask 0x00000002UL | |
75 | ||
76 | #define basicShapingFormsMask ( loclFeatureMask | nuktFeatureMask | akhnFeatureMask | rkrfFeatureMask | blwfFeatureMask | halfFeatureMask | vatuFeatureMask | cjctFeatureMask ) | |
77 | #define positioningFormsMask ( kernFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask ) | |
78 | #define presentationFormsMask ( presFeatureMask | abvsFeatureMask | blwsFeatureMask | pstsFeatureMask | halnFeatureMask | caltFeatureMask ) | |
79 | ||
80 | ||
81 | #define C_MALAYALAM_VOWEL_SIGN_U 0x0D41 | |
82 | #define C_DOTTED_CIRCLE 0x25CC | |
83 | #define NO_GLYPH 0xFFFF | |
84 | ||
85 | // Some level of debate as to the proper value for MAX_CONSONANTS_PER_SYLLABLE. Ticket 5588 states that 4 | |
86 | // is the magic number according to ISCII, but 5 seems to be the more consistent with XP. | |
87 | #define MAX_CONSONANTS_PER_SYLLABLE 5 | |
88 | ||
89 | #define INDIC_BLOCK_SIZE 0x7F | |
73c04bcf A |
90 | |
91 | class IndicReorderingOutput : public UMemory { | |
b75a7d8f | 92 | private: |
46f4442e | 93 | le_int32 fSyllableCount; |
73c04bcf | 94 | le_int32 fOutIndex; |
b75a7d8f | 95 | LEUnicode *fOutChars; |
374ca955 A |
96 | |
97 | LEGlyphStorage &fGlyphStorage; | |
b75a7d8f | 98 | |
73c04bcf A |
99 | LEUnicode fMpre; |
100 | le_int32 fMpreIndex; | |
374ca955 | 101 | |
73c04bcf A |
102 | LEUnicode fMbelow; |
103 | le_int32 fMbelowIndex; | |
374ca955 | 104 | |
73c04bcf A |
105 | LEUnicode fMabove; |
106 | le_int32 fMaboveIndex; | |
374ca955 | 107 | |
73c04bcf A |
108 | LEUnicode fMpost; |
109 | le_int32 fMpostIndex; | |
374ca955 | 110 | |
73c04bcf A |
111 | LEUnicode fLengthMark; |
112 | le_int32 fLengthMarkIndex; | |
374ca955 | 113 | |
46f4442e A |
114 | LEUnicode fAlLakuna; |
115 | le_int32 fAlLakunaIndex; | |
73c04bcf A |
116 | |
117 | FeatureMask fMatraFeatures; | |
374ca955 | 118 | |
73c04bcf | 119 | le_int32 fMPreOutIndex; |
b75a7d8f | 120 | MPreFixups *fMPreFixups; |
374ca955 | 121 | |
73c04bcf A |
122 | LEUnicode fVMabove; |
123 | LEUnicode fVMpost; | |
124 | le_int32 fVMIndex; | |
125 | FeatureMask fVMFeatures; | |
374ca955 | 126 | |
73c04bcf A |
127 | LEUnicode fSMabove; |
128 | LEUnicode fSMbelow; | |
129 | le_int32 fSMIndex; | |
130 | FeatureMask fSMFeatures; | |
374ca955 | 131 | |
729e4ab9 A |
132 | LEUnicode fPreBaseConsonant; |
133 | LEUnicode fPreBaseVirama; | |
134 | le_int32 fPBCIndex; | |
135 | FeatureMask fPBCFeatures; | |
136 | ||
374ca955 | 137 | void saveMatra(LEUnicode matra, le_int32 matraIndex, IndicClassTable::CharClass matraClass) |
b75a7d8f A |
138 | { |
139 | // FIXME: check if already set, or if not a matra... | |
374ca955 | 140 | if (IndicClassTable::isLengthMark(matraClass)) { |
b75a7d8f | 141 | fLengthMark = matra; |
374ca955 | 142 | fLengthMarkIndex = matraIndex; |
46f4442e A |
143 | } else if (IndicClassTable::isAlLakuna(matraClass)) { |
144 | fAlLakuna = matra; | |
145 | fAlLakunaIndex = matraIndex; | |
374ca955 | 146 | } else { |
73c04bcf A |
147 | switch (matraClass & CF_POS_MASK) { |
148 | case CF_POS_BEFORE: | |
374ca955 A |
149 | fMpre = matra; |
150 | fMpreIndex = matraIndex; | |
151 | break; | |
152 | ||
73c04bcf | 153 | case CF_POS_BELOW: |
374ca955 A |
154 | fMbelow = matra; |
155 | fMbelowIndex = matraIndex; | |
156 | break; | |
157 | ||
73c04bcf | 158 | case CF_POS_ABOVE: |
374ca955 A |
159 | fMabove = matra; |
160 | fMaboveIndex = matraIndex; | |
161 | break; | |
162 | ||
73c04bcf | 163 | case CF_POS_AFTER: |
374ca955 A |
164 | fMpost = matra; |
165 | fMpostIndex = matraIndex; | |
166 | break; | |
167 | ||
168 | default: | |
169 | // can't get here... | |
170 | break; | |
171 | } | |
b75a7d8f A |
172 | } |
173 | } | |
174 | ||
175 | public: | |
73c04bcf | 176 | IndicReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage, MPreFixups *mpreFixups) |
46f4442e | 177 | : fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage), |
374ca955 | 178 | fMpre(0), fMpreIndex(0), fMbelow(0), fMbelowIndex(0), fMabove(0), fMaboveIndex(0), |
46f4442e | 179 | fMpost(0), fMpostIndex(0), fLengthMark(0), fLengthMarkIndex(0), fAlLakuna(0), fAlLakunaIndex(0), |
73c04bcf A |
180 | fMatraFeatures(0), fMPreOutIndex(-1), fMPreFixups(mpreFixups), |
181 | fVMabove(0), fVMpost(0), fVMIndex(0), fVMFeatures(0), | |
729e4ab9 A |
182 | fSMabove(0), fSMbelow(0), fSMIndex(0), fSMFeatures(0), |
183 | fPreBaseConsonant(0), fPreBaseVirama(0), fPBCIndex(0), fPBCFeatures(0) | |
b75a7d8f A |
184 | { |
185 | // nothing else to do... | |
186 | } | |
187 | ||
73c04bcf | 188 | ~IndicReorderingOutput() |
b75a7d8f A |
189 | { |
190 | // nothing to do here... | |
191 | } | |
192 | ||
374ca955 | 193 | void reset() |
b75a7d8f | 194 | { |
46f4442e A |
195 | fSyllableCount += 1; |
196 | ||
197 | fMpre = fMbelow = fMabove = fMpost = fLengthMark = fAlLakuna = 0; | |
b75a7d8f | 198 | fMPreOutIndex = -1; |
374ca955 A |
199 | |
200 | fVMabove = fVMpost = 0; | |
201 | fSMabove = fSMbelow = 0; | |
729e4ab9 A |
202 | |
203 | fPreBaseConsonant = fPreBaseVirama = 0; | |
374ca955 A |
204 | } |
205 | ||
73c04bcf A |
206 | void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures) |
207 | { | |
208 | LEErrorCode success = LE_NO_ERROR; | |
209 | ||
210 | fOutChars[fOutIndex] = ch; | |
211 | ||
212 | fGlyphStorage.setCharIndex(fOutIndex, charIndex, success); | |
46f4442e | 213 | fGlyphStorage.setAuxData(fOutIndex, charFeatures | (fSyllableCount & LE_GLYPH_GROUP_MASK), success); |
73c04bcf A |
214 | |
215 | fOutIndex += 1; | |
216 | } | |
217 | ||
729e4ab9 A |
218 | void setFeatures ( le_uint32 charIndex, FeatureMask charFeatures) |
219 | { | |
220 | LEErrorCode success = LE_NO_ERROR; | |
221 | ||
222 | fGlyphStorage.setAuxData( charIndex, charFeatures, success ); | |
223 | ||
224 | } | |
225 | ||
226 | FeatureMask getFeatures ( le_uint32 charIndex ) | |
227 | { | |
228 | LEErrorCode success = LE_NO_ERROR; | |
229 | return fGlyphStorage.getAuxData(charIndex,success); | |
230 | } | |
231 | ||
232 | void decomposeReorderMatras ( const IndicClassTable *classTable, le_int32 beginSyllable, le_int32 nextSyllable, le_int32 inv_count ) { | |
233 | le_int32 i; | |
234 | LEErrorCode success = LE_NO_ERROR; | |
235 | ||
236 | for ( i = beginSyllable ; i < nextSyllable ; i++ ) { | |
237 | if ( classTable->isMatra(fOutChars[i+inv_count])) { | |
238 | IndicClassTable::CharClass matraClass = classTable->getCharClass(fOutChars[i+inv_count]); | |
239 | if ( classTable->isSplitMatra(matraClass)) { | |
240 | le_int32 saveIndex = fGlyphStorage.getCharIndex(i+inv_count,success); | |
241 | le_uint32 saveAuxData = fGlyphStorage.getAuxData(i+inv_count,success); | |
242 | const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass); | |
243 | int j; | |
4388f060 | 244 | for (j = 0 ; j < SM_MAX_PIECES && *(splitMatra)[j] != 0 ; j++) { |
729e4ab9 A |
245 | LEUnicode piece = (*splitMatra)[j]; |
246 | if ( j == 0 ) { | |
247 | fOutChars[i+inv_count] = piece; | |
248 | matraClass = classTable->getCharClass(piece); | |
249 | } else { | |
250 | insertCharacter(piece,i+1+inv_count,saveIndex,saveAuxData); | |
251 | nextSyllable++; | |
252 | } | |
253 | } | |
254 | } | |
255 | ||
256 | if ((matraClass & CF_POS_MASK) == CF_POS_BEFORE) { | |
257 | moveCharacter(i+inv_count,beginSyllable+inv_count); | |
258 | } | |
259 | } | |
260 | } | |
261 | } | |
262 | ||
263 | void moveCharacter( le_int32 fromPosition, le_int32 toPosition ) { | |
264 | le_int32 i,saveIndex; | |
265 | le_uint32 saveAuxData; | |
266 | LEUnicode saveChar = fOutChars[fromPosition]; | |
267 | LEErrorCode success = LE_NO_ERROR; | |
268 | LEErrorCode success2 = LE_NO_ERROR; | |
269 | saveIndex = fGlyphStorage.getCharIndex(fromPosition,success); | |
270 | saveAuxData = fGlyphStorage.getAuxData(fromPosition,success); | |
271 | ||
272 | if ( fromPosition > toPosition ) { | |
273 | for ( i = fromPosition ; i > toPosition ; i-- ) { | |
274 | fOutChars[i] = fOutChars[i-1]; | |
275 | fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success2),success); | |
276 | fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success2), success); | |
277 | ||
278 | } | |
279 | } else { | |
280 | for ( i = fromPosition ; i < toPosition ; i++ ) { | |
281 | fOutChars[i] = fOutChars[i+1]; | |
282 | fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success2),success); | |
283 | fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success2), success); | |
284 | } | |
285 | ||
286 | } | |
287 | fOutChars[toPosition] = saveChar; | |
288 | fGlyphStorage.setCharIndex(toPosition,saveIndex,success); | |
289 | fGlyphStorage.setAuxData(toPosition,saveAuxData,success); | |
290 | ||
291 | } | |
292 | void insertCharacter( LEUnicode ch, le_int32 toPosition, le_int32 charIndex, le_uint32 auxData ) { | |
293 | LEErrorCode success = LE_NO_ERROR; | |
294 | le_int32 i; | |
295 | fOutIndex += 1; | |
296 | ||
297 | for ( i = fOutIndex ; i > toPosition ; i--) { | |
298 | fOutChars[i] = fOutChars[i-1]; | |
299 | fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i-1,success),success); | |
300 | fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i-1,success), success); | |
301 | } | |
302 | ||
303 | fOutChars[toPosition] = ch; | |
304 | fGlyphStorage.setCharIndex(toPosition,charIndex,success); | |
305 | fGlyphStorage.setAuxData(toPosition,auxData,success); | |
306 | ||
307 | } | |
308 | void removeCharacter( le_int32 fromPosition ) { | |
309 | LEErrorCode success = LE_NO_ERROR; | |
310 | le_int32 i; | |
311 | fOutIndex -= 1; | |
312 | ||
313 | for ( i = fromPosition ; i < fOutIndex ; i--) { | |
314 | fOutChars[i] = fOutChars[i+1]; | |
315 | fGlyphStorage.setCharIndex(i,fGlyphStorage.getCharIndex(i+1,success),success); | |
316 | fGlyphStorage.setAuxData(i,fGlyphStorage.getAuxData(i+1,success), success); | |
317 | } | |
318 | } | |
319 | ||
73c04bcf | 320 | le_bool noteMatra(const IndicClassTable *classTable, LEUnicode matra, le_uint32 matraIndex, FeatureMask matraFeatures, le_bool wordStart) |
374ca955 A |
321 | { |
322 | IndicClassTable::CharClass matraClass = classTable->getCharClass(matra); | |
323 | ||
73c04bcf A |
324 | fMatraFeatures = matraFeatures; |
325 | ||
326 | if (wordStart) { | |
327 | fMatraFeatures |= initFeatureMask; | |
328 | } | |
b75a7d8f A |
329 | |
330 | if (IndicClassTable::isMatra(matraClass)) { | |
331 | if (IndicClassTable::isSplitMatra(matraClass)) { | |
332 | const SplitMatra *splitMatra = classTable->getSplitMatra(matraClass); | |
333 | int i; | |
334 | ||
4388f060 | 335 | for (i = 0; i < SM_MAX_PIECES && (*splitMatra)[i] != 0; i += 1) { |
b75a7d8f A |
336 | LEUnicode piece = (*splitMatra)[i]; |
337 | IndicClassTable::CharClass pieceClass = classTable->getCharClass(piece); | |
338 | ||
374ca955 | 339 | saveMatra(piece, matraIndex, pieceClass); |
b75a7d8f A |
340 | } |
341 | } else { | |
374ca955 | 342 | saveMatra(matra, matraIndex, matraClass); |
b75a7d8f | 343 | } |
374ca955 A |
344 | |
345 | return TRUE; | |
346 | } | |
347 | ||
348 | return FALSE; | |
349 | } | |
350 | ||
73c04bcf | 351 | void noteVowelModifier(const IndicClassTable *classTable, LEUnicode vowelModifier, le_uint32 vowelModifierIndex, FeatureMask vowelModifierFeatures) |
374ca955 A |
352 | { |
353 | IndicClassTable::CharClass vmClass = classTable->getCharClass(vowelModifier); | |
354 | ||
355 | fVMIndex = vowelModifierIndex; | |
73c04bcf | 356 | fVMFeatures = vowelModifierFeatures; |
374ca955 A |
357 | |
358 | if (IndicClassTable::isVowelModifier(vmClass)) { | |
73c04bcf A |
359 | switch (vmClass & CF_POS_MASK) { |
360 | case CF_POS_ABOVE: | |
374ca955 A |
361 | fVMabove = vowelModifier; |
362 | break; | |
363 | ||
73c04bcf | 364 | case CF_POS_AFTER: |
374ca955 A |
365 | fVMpost = vowelModifier; |
366 | break; | |
367 | ||
368 | default: | |
369 | // FIXME: this is an error... | |
370 | break; | |
371 | } | |
372 | } | |
373 | } | |
374 | ||
73c04bcf | 375 | void noteStressMark(const IndicClassTable *classTable, LEUnicode stressMark, le_uint32 stressMarkIndex, FeatureMask stressMarkFeatures) |
374ca955 A |
376 | { |
377 | IndicClassTable::CharClass smClass = classTable->getCharClass(stressMark); | |
378 | ||
379 | fSMIndex = stressMarkIndex; | |
73c04bcf | 380 | fSMFeatures = stressMarkFeatures; |
374ca955 A |
381 | |
382 | if (IndicClassTable::isStressMark(smClass)) { | |
73c04bcf A |
383 | switch (smClass & CF_POS_MASK) { |
384 | case CF_POS_ABOVE: | |
374ca955 A |
385 | fSMabove = stressMark; |
386 | break; | |
387 | ||
73c04bcf | 388 | case CF_POS_BELOW: |
374ca955 A |
389 | fSMbelow = stressMark; |
390 | break; | |
391 | ||
392 | default: | |
393 | // FIXME: this is an error... | |
394 | break; | |
395 | } | |
b75a7d8f A |
396 | } |
397 | } | |
398 | ||
729e4ab9 A |
399 | void notePreBaseConsonant(le_uint32 index,LEUnicode PBConsonant, LEUnicode PBVirama, FeatureMask features) |
400 | { | |
401 | fPBCIndex = index; | |
402 | fPreBaseConsonant = PBConsonant; | |
403 | fPreBaseVirama = PBVirama; | |
404 | fPBCFeatures = features; | |
405 | } | |
406 | ||
b75a7d8f A |
407 | void noteBaseConsonant() |
408 | { | |
409 | if (fMPreFixups != NULL && fMPreOutIndex >= 0) { | |
410 | fMPreFixups->add(fOutIndex, fMPreOutIndex); | |
411 | } | |
412 | } | |
413 | ||
46f4442e A |
414 | // Handles Al-Lakuna in Sinhala split vowels. |
415 | void writeAlLakuna() | |
73c04bcf | 416 | { |
46f4442e A |
417 | if (fAlLakuna != 0) { |
418 | writeChar(fAlLakuna, fAlLakunaIndex, fMatraFeatures); | |
73c04bcf A |
419 | } |
420 | } | |
421 | ||
b75a7d8f A |
422 | void writeMpre() |
423 | { | |
424 | if (fMpre != 0) { | |
425 | fMPreOutIndex = fOutIndex; | |
73c04bcf | 426 | writeChar(fMpre, fMpreIndex, fMatraFeatures); |
b75a7d8f A |
427 | } |
428 | } | |
429 | ||
430 | void writeMbelow() | |
431 | { | |
432 | if (fMbelow != 0) { | |
73c04bcf | 433 | writeChar(fMbelow, fMbelowIndex, fMatraFeatures); |
b75a7d8f A |
434 | } |
435 | } | |
436 | ||
437 | void writeMabove() | |
438 | { | |
439 | if (fMabove != 0) { | |
73c04bcf | 440 | writeChar(fMabove, fMaboveIndex, fMatraFeatures); |
b75a7d8f A |
441 | } |
442 | } | |
443 | ||
444 | void writeMpost() | |
445 | { | |
446 | if (fMpost != 0) { | |
73c04bcf | 447 | writeChar(fMpost, fMpostIndex, fMatraFeatures); |
b75a7d8f A |
448 | } |
449 | } | |
450 | ||
451 | void writeLengthMark() | |
452 | { | |
453 | if (fLengthMark != 0) { | |
73c04bcf | 454 | writeChar(fLengthMark, fLengthMarkIndex, fMatraFeatures); |
b75a7d8f A |
455 | } |
456 | } | |
374ca955 A |
457 | |
458 | void writeVMabove() | |
459 | { | |
460 | if (fVMabove != 0) { | |
73c04bcf | 461 | writeChar(fVMabove, fVMIndex, fVMFeatures); |
374ca955 A |
462 | } |
463 | } | |
464 | ||
465 | void writeVMpost() | |
466 | { | |
467 | if (fVMpost != 0) { | |
73c04bcf | 468 | writeChar(fVMpost, fVMIndex, fVMFeatures); |
374ca955 A |
469 | } |
470 | } | |
471 | ||
472 | void writeSMabove() | |
473 | { | |
474 | if (fSMabove != 0) { | |
73c04bcf | 475 | writeChar(fSMabove, fSMIndex, fSMFeatures); |
374ca955 A |
476 | } |
477 | } | |
478 | ||
479 | void writeSMbelow() | |
480 | { | |
481 | if (fSMbelow != 0) { | |
73c04bcf | 482 | writeChar(fSMbelow, fSMIndex, fSMFeatures); |
374ca955 A |
483 | } |
484 | } | |
485 | ||
729e4ab9 A |
486 | void writePreBaseConsonant() |
487 | { | |
488 | // The TDIL spec says that consonant + virama + RRA should produce a rakar in Malayalam. However, | |
489 | // it seems that almost none of the fonts for Malayalam are set up to handle this. | |
490 | // So, we're going to force the issue here by using the rakar as defined with RA in most fonts. | |
491 | ||
492 | if (fPreBaseConsonant == 0x0d31) { // RRA | |
493 | fPreBaseConsonant = 0x0d30; // RA | |
494 | } | |
495 | ||
496 | if (fPreBaseConsonant != 0) { | |
497 | writeChar(fPreBaseConsonant, fPBCIndex, fPBCFeatures); | |
498 | writeChar(fPreBaseVirama,fPBCIndex-1,fPBCFeatures); | |
499 | } | |
500 | } | |
501 | ||
b75a7d8f A |
502 | le_int32 getOutputIndex() |
503 | { | |
504 | return fOutIndex; | |
505 | } | |
506 | }; | |
507 | ||
729e4ab9 | 508 | |
b75a7d8f | 509 | |
73c04bcf A |
510 | // TODO: Find better names for these! |
511 | #define tagArray4 (loclFeatureMask | nuktFeatureMask | akhnFeatureMask | vatuFeatureMask | presFeatureMask | blwsFeatureMask | abvsFeatureMask | pstsFeatureMask | halnFeatureMask | blwmFeatureMask | abvmFeatureMask | distFeatureMask) | |
512 | #define tagArray3 (pstfFeatureMask | tagArray4) | |
513 | #define tagArray2 (halfFeatureMask | tagArray3) | |
514 | #define tagArray1 (blwfFeatureMask | tagArray2) | |
515 | #define tagArray0 (rphfFeatureMask | tagArray1) | |
b75a7d8f | 516 | |
729e4ab9 | 517 | static const FeatureMap featureMap[] = { |
73c04bcf A |
518 | {loclFeatureTag, loclFeatureMask}, |
519 | {initFeatureTag, initFeatureMask}, | |
520 | {nuktFeatureTag, nuktFeatureMask}, | |
521 | {akhnFeatureTag, akhnFeatureMask}, | |
522 | {rphfFeatureTag, rphfFeatureMask}, | |
523 | {blwfFeatureTag, blwfFeatureMask}, | |
524 | {halfFeatureTag, halfFeatureMask}, | |
525 | {pstfFeatureTag, pstfFeatureMask}, | |
526 | {vatuFeatureTag, vatuFeatureMask}, | |
527 | {presFeatureTag, presFeatureMask}, | |
528 | {blwsFeatureTag, blwsFeatureMask}, | |
529 | {abvsFeatureTag, abvsFeatureMask}, | |
530 | {pstsFeatureTag, pstsFeatureMask}, | |
531 | {halnFeatureTag, halnFeatureMask}, | |
532 | {blwmFeatureTag, blwmFeatureMask}, | |
533 | {abvmFeatureTag, abvmFeatureMask}, | |
534 | {distFeatureTag, distFeatureMask} | |
b75a7d8f A |
535 | }; |
536 | ||
73c04bcf | 537 | static const le_int32 featureCount = LE_ARRAY_SIZE(featureMap); |
b75a7d8f | 538 | |
729e4ab9 A |
539 | static const FeatureMap v2FeatureMap[] = { |
540 | {loclFeatureTag, loclFeatureMask}, | |
541 | {nuktFeatureTag, nuktFeatureMask}, | |
542 | {akhnFeatureTag, akhnFeatureMask}, | |
543 | {rphfFeatureTag, rphfFeatureMask}, | |
544 | {rkrfFeatureTag, rkrfFeatureMask}, | |
545 | {blwfFeatureTag, blwfFeatureMask}, | |
546 | {halfFeatureTag, halfFeatureMask}, | |
547 | {vatuFeatureTag, vatuFeatureMask}, | |
548 | {cjctFeatureTag, cjctFeatureMask}, | |
549 | {presFeatureTag, presFeatureMask}, | |
550 | {abvsFeatureTag, abvsFeatureMask}, | |
551 | {blwsFeatureTag, blwsFeatureMask}, | |
552 | {pstsFeatureTag, pstsFeatureMask}, | |
553 | {halnFeatureTag, halnFeatureMask}, | |
554 | {caltFeatureTag, caltFeatureMask}, | |
555 | {kernFeatureTag, kernFeatureMask}, | |
556 | {distFeatureTag, distFeatureMask}, | |
557 | {abvmFeatureTag, abvmFeatureMask}, | |
558 | {blwmFeatureTag, blwmFeatureMask} | |
559 | }; | |
560 | ||
561 | static const le_int32 v2FeatureMapCount = LE_ARRAY_SIZE(v2FeatureMap); | |
562 | ||
73c04bcf A |
563 | static const le_int8 stateTable[][CC_COUNT] = |
564 | { | |
46f4442e A |
565 | // xx vm sm iv i2 i3 ct cn nu dv s1 s2 s3 vr zw al |
566 | { 1, 6, 1, 5, 8, 11, 3, 2, 1, 5, 9, 5, 5, 1, 1, 1}, // 0 - ground state | |
567 | {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state | |
568 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, 12, -1}, // 2 - consonant with nukta | |
569 | {-1, 6, 1, -1, -1, -1, -1, -1, 2, 5, 9, 5, 5, 4, 12, 13}, // 3 - consonant | |
570 | {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, 7, -1}, // 4 - consonant virama | |
571 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 5 - dependent vowels | |
572 | {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - vowel mark | |
573 | {-1, -1, -1, -1, -1, -1, 3, 2, -1, -1, -1, -1, -1, -1, -1, -1}, // 7 - consonant virama ZWJ, consonant ZWJ virama | |
574 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1}, // 8 - independent vowels that can take a virama | |
575 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 5, -1, -1, -1}, // 9 - first part of split vowel | |
576 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 5, -1, -1, -1}, // 10 - second part of split vowel | |
577 | {-1, 6, 1, -1, -1, -1, -1, -1, -1, 5, 9, 5, 5, 4, -1, -1}, // 11 - independent vowels that can take an iv | |
578 | {-1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, 7}, // 12 - consonant ZWJ (TODO: Take everything else that can be after a consonant?) | |
579 | {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1} // 13 - consonant al-lakuna ZWJ consonant | |
b75a7d8f A |
580 | }; |
581 | ||
73c04bcf A |
582 | |
583 | const FeatureMap *IndicReordering::getFeatureMap(le_int32 &count) | |
b75a7d8f | 584 | { |
73c04bcf A |
585 | count = featureCount; |
586 | ||
587 | return featureMap; | |
b75a7d8f A |
588 | } |
589 | ||
729e4ab9 A |
590 | const FeatureMap *IndicReordering::getv2FeatureMap(le_int32 &count) |
591 | { | |
592 | count = v2FeatureMapCount; | |
593 | ||
594 | return v2FeatureMap; | |
595 | } | |
596 | ||
b75a7d8f A |
597 | le_int32 IndicReordering::findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount) |
598 | { | |
599 | le_int32 cursor = prev; | |
600 | le_int8 state = 0; | |
729e4ab9 | 601 | le_int8 consonant_count = 0; |
b75a7d8f A |
602 | |
603 | while (cursor < charCount) { | |
604 | IndicClassTable::CharClass charClass = classTable->getCharClass(chars[cursor]); | |
729e4ab9 A |
605 | |
606 | if ( IndicClassTable::isConsonant(charClass) ) { | |
607 | consonant_count++; | |
608 | if ( consonant_count > MAX_CONSONANTS_PER_SYLLABLE ) { | |
609 | break; | |
610 | } | |
611 | } | |
b75a7d8f | 612 | |
73c04bcf | 613 | state = stateTable[state][charClass & CF_CLASS_MASK]; |
b75a7d8f A |
614 | |
615 | if (state < 0) { | |
616 | break; | |
617 | } | |
618 | ||
619 | cursor += 1; | |
620 | } | |
621 | ||
622 | return cursor; | |
623 | } | |
624 | ||
625 | le_int32 IndicReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, | |
374ca955 | 626 | LEUnicode *outChars, LEGlyphStorage &glyphStorage, |
729e4ab9 | 627 | MPreFixups **outMPreFixups, LEErrorCode& success) |
b75a7d8f | 628 | { |
729e4ab9 A |
629 | if (LE_FAILURE(success)) { |
630 | return 0; | |
631 | } | |
632 | ||
b75a7d8f A |
633 | MPreFixups *mpreFixups = NULL; |
634 | const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); | |
635 | ||
57a6839d A |
636 | if(classTable==NULL) { |
637 | success = LE_MEMORY_ALLOCATION_ERROR; | |
638 | return 0; | |
639 | } | |
640 | ||
73c04bcf | 641 | if (classTable->scriptFlags & SF_MPRE_FIXUP) { |
b75a7d8f | 642 | mpreFixups = new MPreFixups(charCount); |
729e4ab9 A |
643 | if (mpreFixups == NULL) { |
644 | success = LE_MEMORY_ALLOCATION_ERROR; | |
645 | return 0; | |
646 | } | |
b75a7d8f A |
647 | } |
648 | ||
73c04bcf | 649 | IndicReorderingOutput output(outChars, glyphStorage, mpreFixups); |
b75a7d8f | 650 | le_int32 i, prev = 0; |
73c04bcf | 651 | le_bool lastInWord = FALSE; |
b75a7d8f A |
652 | |
653 | while (prev < charCount) { | |
654 | le_int32 syllable = findSyllable(classTable, chars, prev, charCount); | |
374ca955 | 655 | le_int32 matra, markStart = syllable; |
b75a7d8f | 656 | |
374ca955 A |
657 | output.reset(); |
658 | ||
659 | if (classTable->isStressMark(chars[markStart - 1])) { | |
660 | markStart -= 1; | |
73c04bcf | 661 | output.noteStressMark(classTable, chars[markStart], markStart, tagArray1); |
b75a7d8f | 662 | } |
374ca955 | 663 | |
73c04bcf | 664 | if (markStart != prev && classTable->isVowelModifier(chars[markStart - 1])) { |
374ca955 | 665 | markStart -= 1; |
73c04bcf | 666 | output.noteVowelModifier(classTable, chars[markStart], markStart, tagArray1); |
b75a7d8f A |
667 | } |
668 | ||
374ca955 A |
669 | matra = markStart - 1; |
670 | ||
73c04bcf | 671 | while (output.noteMatra(classTable, chars[matra], matra, tagArray1, !lastInWord) && matra != prev) { |
374ca955 A |
672 | matra -= 1; |
673 | } | |
b75a7d8f | 674 | |
73c04bcf A |
675 | lastInWord = TRUE; |
676 | ||
677 | switch (classTable->getCharClass(chars[prev]) & CF_CLASS_MASK) { | |
678 | case CC_RESERVED: | |
679 | lastInWord = FALSE; | |
680 | /* fall through */ | |
681 | ||
682 | case CC_INDEPENDENT_VOWEL: | |
683 | case CC_ZERO_WIDTH_MARK: | |
b75a7d8f | 684 | for (i = prev; i < syllable; i += 1) { |
73c04bcf | 685 | output.writeChar(chars[i], i, tagArray1); |
b75a7d8f A |
686 | } |
687 | ||
688 | break; | |
689 | ||
46f4442e | 690 | case CC_AL_LAKUNA: |
73c04bcf | 691 | case CC_NUKTA: |
73c04bcf A |
692 | output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); |
693 | output.writeChar(chars[prev], prev, tagArray1); | |
b75a7d8f A |
694 | break; |
695 | ||
46f4442e A |
696 | case CC_VIRAMA: |
697 | // A lone virama is illegal unless it follows a | |
698 | // MALAYALAM_VOWEL_SIGN_U. Such a usage is called | |
699 | // "samvruthokaram". | |
700 | if (chars[prev - 1] != C_MALAYALAM_VOWEL_SIGN_U) { | |
701 | output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); | |
702 | } | |
703 | ||
704 | output.writeChar(chars[prev], prev, tagArray1); | |
705 | break; | |
706 | ||
73c04bcf A |
707 | case CC_DEPENDENT_VOWEL: |
708 | case CC_SPLIT_VOWEL_PIECE_1: | |
709 | case CC_SPLIT_VOWEL_PIECE_2: | |
710 | case CC_SPLIT_VOWEL_PIECE_3: | |
711 | case CC_VOWEL_MODIFIER: | |
712 | case CC_STRESS_MARK: | |
b75a7d8f | 713 | output.writeMpre(); |
374ca955 | 714 | |
73c04bcf | 715 | output.writeChar(C_DOTTED_CIRCLE, prev, tagArray1); |
374ca955 | 716 | |
b75a7d8f | 717 | output.writeMbelow(); |
374ca955 | 718 | output.writeSMbelow(); |
b75a7d8f | 719 | output.writeMabove(); |
374ca955 | 720 | |
73c04bcf | 721 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { |
374ca955 A |
722 | output.writeMpost(); |
723 | } | |
724 | ||
73c04bcf | 725 | if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { |
374ca955 A |
726 | output.writeVMabove(); |
727 | output.writeSMabove(); // FIXME: there are no SM's in these scripts... | |
728 | } | |
729 | ||
73c04bcf | 730 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { |
374ca955 A |
731 | output.writeMpost(); |
732 | } | |
733 | ||
b75a7d8f | 734 | output.writeLengthMark(); |
46f4442e | 735 | output.writeAlLakuna(); |
374ca955 | 736 | |
73c04bcf | 737 | if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { |
374ca955 A |
738 | output.writeVMabove(); |
739 | output.writeSMabove(); | |
740 | } | |
741 | ||
742 | output.writeVMpost(); | |
b75a7d8f A |
743 | break; |
744 | ||
73c04bcf A |
745 | case CC_INDEPENDENT_VOWEL_2: |
746 | case CC_INDEPENDENT_VOWEL_3: | |
747 | case CC_CONSONANT: | |
748 | case CC_CONSONANT_WITH_NUKTA: | |
b75a7d8f | 749 | { |
374ca955 A |
750 | le_uint32 length = markStart - prev; |
751 | le_int32 lastConsonant = markStart - 1; | |
b75a7d8f A |
752 | le_int32 baseLimit = prev; |
753 | ||
754 | // Check for REPH at front of syllable | |
729e4ab9 | 755 | if (length > 2 && classTable->isReph(chars[prev]) && classTable->isVirama(chars[prev + 1]) && chars[prev + 2] != C_SIGN_ZWNJ) { |
b75a7d8f A |
756 | baseLimit += 2; |
757 | ||
758 | // Check for eyelash RA, if the script supports it | |
73c04bcf | 759 | if ((classTable->scriptFlags & SF_EYELASH_RA) != 0 && |
b75a7d8f A |
760 | chars[baseLimit] == C_SIGN_ZWJ) { |
761 | if (length > 3) { | |
762 | baseLimit += 1; | |
763 | } else { | |
764 | baseLimit -= 2; | |
765 | } | |
766 | } | |
767 | } | |
768 | ||
769 | while (lastConsonant > baseLimit && !classTable->isConsonant(chars[lastConsonant])) { | |
770 | lastConsonant -= 1; | |
771 | } | |
772 | ||
729e4ab9 | 773 | |
46f4442e A |
774 | IndicClassTable::CharClass charClass = CC_RESERVED; |
775 | IndicClassTable::CharClass nextClass = CC_RESERVED; | |
b75a7d8f A |
776 | le_int32 baseConsonant = lastConsonant; |
777 | le_int32 postBase = lastConsonant + 1; | |
73c04bcf | 778 | le_int32 postBaseLimit = classTable->scriptFlags & SF_POST_BASE_LIMIT_MASK; |
374ca955 A |
779 | le_bool seenVattu = FALSE; |
780 | le_bool seenBelowBaseForm = FALSE; | |
729e4ab9 | 781 | le_bool seenPreBaseForm = FALSE; |
46f4442e A |
782 | le_bool hasNukta = FALSE; |
783 | le_bool hasBelowBaseForm = FALSE; | |
784 | le_bool hasPostBaseForm = FALSE; | |
729e4ab9 | 785 | le_bool hasPreBaseForm = FALSE; |
374ca955 | 786 | |
73c04bcf | 787 | if (postBase < markStart && classTable->isNukta(chars[postBase])) { |
46f4442e | 788 | charClass = CC_NUKTA; |
374ca955 A |
789 | postBase += 1; |
790 | } | |
b75a7d8f A |
791 | |
792 | while (baseConsonant > baseLimit) { | |
46f4442e A |
793 | nextClass = charClass; |
794 | hasNukta = IndicClassTable::isNukta(nextClass); | |
795 | charClass = classTable->getCharClass(chars[baseConsonant]); | |
796 | ||
797 | hasBelowBaseForm = IndicClassTable::hasBelowBaseForm(charClass) && !hasNukta; | |
798 | hasPostBaseForm = IndicClassTable::hasPostBaseForm(charClass) && !hasNukta; | |
729e4ab9 | 799 | hasPreBaseForm = IndicClassTable::hasPreBaseForm(charClass) && !hasNukta; |
b75a7d8f A |
800 | |
801 | if (IndicClassTable::isConsonant(charClass)) { | |
802 | if (postBaseLimit == 0 || seenVattu || | |
803 | (baseConsonant > baseLimit && !classTable->isVirama(chars[baseConsonant - 1])) || | |
729e4ab9 | 804 | !(hasBelowBaseForm || hasPostBaseForm || hasPreBaseForm)) { |
b75a7d8f A |
805 | break; |
806 | } | |
807 | ||
729e4ab9 A |
808 | // Note any pre-base consonants |
809 | if ( baseConsonant == lastConsonant && lastConsonant > 0 && | |
810 | hasPreBaseForm && classTable->isVirama(chars[baseConsonant - 1])) { | |
811 | output.notePreBaseConsonant(lastConsonant,chars[lastConsonant],chars[lastConsonant-1],tagArray2); | |
812 | seenPreBaseForm = TRUE; | |
813 | ||
814 | } | |
46f4442e A |
815 | // consonants with nuktas are never vattus |
816 | seenVattu = IndicClassTable::isVattu(charClass) && !hasNukta; | |
b75a7d8f | 817 | |
46f4442e A |
818 | // consonants with nuktas never have below- or post-base forms |
819 | if (hasPostBaseForm) { | |
b75a7d8f A |
820 | if (seenBelowBaseForm) { |
821 | break; | |
822 | } | |
823 | ||
824 | postBase = baseConsonant; | |
46f4442e | 825 | } else if (hasBelowBaseForm) { |
374ca955 | 826 | seenBelowBaseForm = TRUE; |
b75a7d8f A |
827 | } |
828 | ||
829 | postBaseLimit -= 1; | |
830 | } | |
831 | ||
832 | baseConsonant -= 1; | |
833 | } | |
834 | ||
835 | // Write Mpre | |
836 | output.writeMpre(); | |
837 | ||
838 | // Write eyelash RA | |
839 | // NOTE: baseLimit == prev + 3 iff eyelash RA present... | |
840 | if (baseLimit == prev + 3) { | |
73c04bcf A |
841 | output.writeChar(chars[prev], prev, tagArray2); |
842 | output.writeChar(chars[prev + 1], prev + 1, tagArray2); | |
843 | output.writeChar(chars[prev + 2], prev + 2, tagArray2); | |
b75a7d8f A |
844 | } |
845 | ||
846 | // write any pre-base consonants | |
729e4ab9 A |
847 | output.writePreBaseConsonant(); |
848 | ||
374ca955 | 849 | le_bool supressVattu = TRUE; |
b75a7d8f A |
850 | |
851 | for (i = baseLimit; i < baseConsonant; i += 1) { | |
852 | LEUnicode ch = chars[i]; | |
729e4ab9 A |
853 | // Don't put 'pstf' or 'blwf' on anything before the base consonant. |
854 | FeatureMask features = tagArray1 & ~( pstfFeatureMask | blwfFeatureMask ); | |
46f4442e A |
855 | |
856 | charClass = classTable->getCharClass(ch); | |
857 | nextClass = classTable->getCharClass(chars[i + 1]); | |
858 | hasNukta = IndicClassTable::isNukta(nextClass); | |
b75a7d8f A |
859 | |
860 | if (IndicClassTable::isConsonant(charClass)) { | |
46f4442e | 861 | if (IndicClassTable::isVattu(charClass) && !hasNukta && supressVattu) { |
73c04bcf | 862 | features = tagArray4; |
b75a7d8f A |
863 | } |
864 | ||
46f4442e | 865 | supressVattu = IndicClassTable::isVattu(charClass) && !hasNukta; |
b75a7d8f A |
866 | } else if (IndicClassTable::isVirama(charClass) && chars[i + 1] == C_SIGN_ZWNJ) |
867 | { | |
73c04bcf | 868 | features = tagArray4; |
b75a7d8f A |
869 | } |
870 | ||
73c04bcf | 871 | output.writeChar(ch, i, features); |
b75a7d8f A |
872 | } |
873 | ||
874 | le_int32 bcSpan = baseConsonant + 1; | |
875 | ||
374ca955 | 876 | if (bcSpan < markStart && classTable->isNukta(chars[bcSpan])) { |
b75a7d8f A |
877 | bcSpan += 1; |
878 | } | |
879 | ||
46f4442e A |
880 | if (baseConsonant == lastConsonant && bcSpan < markStart && |
881 | (classTable->isVirama(chars[bcSpan]) || classTable->isAlLakuna(chars[bcSpan]))) { | |
b75a7d8f A |
882 | bcSpan += 1; |
883 | ||
374ca955 | 884 | if (bcSpan < markStart && chars[bcSpan] == C_SIGN_ZWNJ) { |
b75a7d8f A |
885 | bcSpan += 1; |
886 | } | |
887 | } | |
888 | ||
889 | // note the base consonant for post-GSUB fixups | |
890 | output.noteBaseConsonant(); | |
891 | ||
892 | // write base consonant | |
893 | for (i = baseConsonant; i < bcSpan; i += 1) { | |
73c04bcf | 894 | output.writeChar(chars[i], i, tagArray4); |
b75a7d8f A |
895 | } |
896 | ||
73c04bcf | 897 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) != 0) { |
b75a7d8f | 898 | output.writeMbelow(); |
374ca955 | 899 | output.writeSMbelow(); // FIXME: there are no SMs in these scripts... |
b75a7d8f A |
900 | output.writeMabove(); |
901 | output.writeMpost(); | |
902 | } | |
903 | ||
904 | // write below-base consonants | |
729e4ab9 | 905 | if (baseConsonant != lastConsonant && !seenPreBaseForm) { |
b75a7d8f | 906 | for (i = bcSpan + 1; i < postBase; i += 1) { |
73c04bcf | 907 | output.writeChar(chars[i], i, tagArray1); |
b75a7d8f A |
908 | } |
909 | ||
910 | if (postBase > lastConsonant) { | |
911 | // write halant that was after base consonant | |
73c04bcf | 912 | output.writeChar(chars[bcSpan], bcSpan, tagArray1); |
b75a7d8f A |
913 | } |
914 | } | |
915 | ||
374ca955 | 916 | // write Mbelow, SMbelow, Mabove |
73c04bcf | 917 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { |
b75a7d8f | 918 | output.writeMbelow(); |
374ca955 | 919 | output.writeSMbelow(); |
b75a7d8f A |
920 | output.writeMabove(); |
921 | } | |
922 | ||
73c04bcf | 923 | if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) != 0) { |
b75a7d8f | 924 | if (baseLimit == prev + 2) { |
73c04bcf A |
925 | output.writeChar(chars[prev], prev, tagArray0); |
926 | output.writeChar(chars[prev + 1], prev + 1, tagArray0); | |
b75a7d8f A |
927 | } |
928 | ||
374ca955 A |
929 | output.writeVMabove(); |
930 | output.writeSMabove(); // FIXME: there are no SM's in these scripts... | |
b75a7d8f A |
931 | } |
932 | ||
933 | // write post-base consonants | |
934 | // FIXME: does this put the right tags on post-base consonants? | |
729e4ab9 | 935 | if (baseConsonant != lastConsonant && !seenPreBaseForm) { |
b75a7d8f A |
936 | if (postBase <= lastConsonant) { |
937 | for (i = postBase; i <= lastConsonant; i += 1) { | |
73c04bcf | 938 | output.writeChar(chars[i], i, tagArray3); |
b75a7d8f A |
939 | } |
940 | ||
941 | // write halant that was after base consonant | |
73c04bcf | 942 | output.writeChar(chars[bcSpan], bcSpan, tagArray1); |
b75a7d8f A |
943 | } |
944 | ||
945 | // write the training halant, if there is one | |
946 | if (lastConsonant < matra && classTable->isVirama(chars[matra])) { | |
73c04bcf | 947 | output.writeChar(chars[matra], matra, tagArray4); |
b75a7d8f A |
948 | } |
949 | } | |
950 | ||
951 | // write Mpost | |
73c04bcf | 952 | if ((classTable->scriptFlags & SF_MATRAS_AFTER_BASE) == 0) { |
b75a7d8f A |
953 | output.writeMpost(); |
954 | } | |
955 | ||
956 | output.writeLengthMark(); | |
46f4442e | 957 | output.writeAlLakuna(); |
b75a7d8f A |
958 | |
959 | // write reph | |
73c04bcf | 960 | if ((classTable->scriptFlags & SF_REPH_AFTER_BELOW) == 0) { |
b75a7d8f | 961 | if (baseLimit == prev + 2) { |
73c04bcf A |
962 | output.writeChar(chars[prev], prev, tagArray0); |
963 | output.writeChar(chars[prev + 1], prev + 1, tagArray0); | |
b75a7d8f A |
964 | } |
965 | ||
374ca955 A |
966 | output.writeVMabove(); |
967 | output.writeSMabove(); | |
b75a7d8f A |
968 | } |
969 | ||
374ca955 | 970 | output.writeVMpost(); |
b75a7d8f A |
971 | |
972 | break; | |
973 | } | |
974 | ||
975 | default: | |
976 | break; | |
977 | } | |
978 | ||
979 | prev = syllable; | |
980 | } | |
981 | ||
982 | *outMPreFixups = mpreFixups; | |
983 | ||
984 | return output.getOutputIndex(); | |
985 | } | |
986 | ||
729e4ab9 | 987 | void IndicReordering::adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage, LEErrorCode& success) |
b75a7d8f A |
988 | { |
989 | if (mpreFixups != NULL) { | |
729e4ab9 | 990 | mpreFixups->apply(glyphStorage, success); |
b75a7d8f A |
991 | |
992 | delete mpreFixups; | |
993 | } | |
994 | } | |
995 | ||
729e4ab9 A |
996 | void IndicReordering::applyPresentationForms(LEGlyphStorage &glyphStorage, le_int32 count) |
997 | { | |
998 | LEErrorCode success = LE_NO_ERROR; | |
999 | ||
1000 | // This sets us up for 2nd pass of glyph substitution as well as setting the feature masks for the | |
1001 | // GPOS table lookups | |
1002 | ||
1003 | for ( le_int32 i = 0 ; i < count ; i++ ) { | |
1004 | glyphStorage.setAuxData(i, ( presentationFormsMask | positioningFormsMask ), success); | |
1005 | } | |
1006 | ||
1007 | } | |
1008 | void IndicReordering::finalReordering(LEGlyphStorage &glyphStorage, le_int32 count) | |
1009 | { | |
1010 | LEErrorCode success = LE_NO_ERROR; | |
1011 | ||
1012 | // Reposition REPH as appropriate | |
1013 | ||
1014 | for ( le_int32 i = 0 ; i < count ; i++ ) { | |
1015 | ||
1016 | le_int32 tmpAuxData = glyphStorage.getAuxData(i,success); | |
1017 | LEGlyphID tmpGlyph = glyphStorage.getGlyphID(i,success); | |
1018 | ||
1019 | if ( ( tmpGlyph != NO_GLYPH ) && (tmpAuxData & rephConsonantMask) && !(tmpAuxData & repositionedGlyphMask)) { | |
1020 | ||
1021 | le_bool targetPositionFound = false; | |
1022 | le_int32 targetPosition = i+1; | |
1023 | le_int32 baseConsonantData; | |
1024 | ||
1025 | while (!targetPositionFound) { | |
1026 | tmpGlyph = glyphStorage.getGlyphID(targetPosition,success); | |
1027 | tmpAuxData = glyphStorage.getAuxData(targetPosition,success); | |
1028 | ||
1029 | if ( tmpAuxData & baseConsonantMask ) { | |
1030 | baseConsonantData = tmpAuxData; | |
1031 | targetPositionFound = true; | |
1032 | } else { | |
1033 | targetPosition++; | |
1034 | } | |
1035 | } | |
1036 | ||
1037 | // Make sure we are not putting the reph into an empty hole | |
1038 | ||
1039 | le_bool targetPositionHasGlyph = false; | |
1040 | while (!targetPositionHasGlyph) { | |
1041 | tmpGlyph = glyphStorage.getGlyphID(targetPosition,success); | |
1042 | if ( tmpGlyph != NO_GLYPH ) { | |
1043 | targetPositionHasGlyph = true; | |
1044 | } else { | |
1045 | targetPosition--; | |
1046 | } | |
1047 | } | |
1048 | ||
1049 | // Make sure that REPH is positioned after any above base or post base matras | |
1050 | // | |
1051 | le_bool checkMatraDone = false; | |
1052 | le_int32 checkMatraPosition = targetPosition+1; | |
1053 | while ( !checkMatraDone ) { | |
1054 | tmpAuxData = glyphStorage.getAuxData(checkMatraPosition,success); | |
1055 | if ( checkMatraPosition >= count || ( (tmpAuxData ^ baseConsonantData) & LE_GLYPH_GROUP_MASK)) { | |
1056 | checkMatraDone = true; | |
1057 | continue; | |
1058 | } | |
1059 | if ( (tmpAuxData & matraMask) && | |
1060 | (((tmpAuxData & markPositionMask) == aboveBasePosition) || | |
1061 | ((tmpAuxData & markPositionMask) == postBasePosition))) { | |
1062 | targetPosition = checkMatraPosition; | |
1063 | } | |
1064 | checkMatraPosition++; | |
1065 | } | |
1066 | ||
1067 | glyphStorage.moveGlyph(i,targetPosition,repositionedGlyphMask); | |
1068 | } | |
1069 | } | |
1070 | } | |
1071 | ||
1072 | ||
1073 | le_int32 IndicReordering::v2process(const LEUnicode *chars, le_int32 charCount, le_int32 scriptCode, | |
1074 | LEUnicode *outChars, LEGlyphStorage &glyphStorage) | |
1075 | { | |
1076 | const IndicClassTable *classTable = IndicClassTable::getScriptClassTable(scriptCode); | |
1077 | ||
1078 | DynamicProperties dynProps[INDIC_BLOCK_SIZE]; | |
1079 | IndicReordering::getDynamicProperties(dynProps,classTable); | |
1080 | ||
1081 | IndicReorderingOutput output(outChars, glyphStorage, NULL); | |
1082 | le_int32 i, firstConsonant, baseConsonant, secondConsonant, inv_count = 0, beginSyllable = 0; | |
1083 | //le_bool lastInWord = FALSE; | |
1084 | ||
1085 | while (beginSyllable < charCount) { | |
1086 | le_int32 nextSyllable = findSyllable(classTable, chars, beginSyllable, charCount); | |
1087 | ||
1088 | output.reset(); | |
1089 | ||
1090 | // Find the First Consonant | |
1091 | for ( firstConsonant = beginSyllable ; firstConsonant < nextSyllable ; firstConsonant++ ) { | |
1092 | if ( classTable->isConsonant(chars[firstConsonant]) ) { | |
1093 | break; | |
1094 | } | |
1095 | } | |
1096 | ||
1097 | // Find the base consonant | |
1098 | ||
1099 | baseConsonant = nextSyllable - 1; | |
1100 | secondConsonant = firstConsonant; | |
1101 | ||
1102 | // TODO: Use Dynamic Properties for hasBelowBaseForm and hasPostBaseForm() | |
1103 | ||
1104 | while ( baseConsonant > firstConsonant ) { | |
1105 | if ( classTable->isConsonant(chars[baseConsonant]) && | |
1106 | !classTable->hasBelowBaseForm(chars[baseConsonant]) && | |
1107 | !classTable->hasPostBaseForm(chars[baseConsonant]) ) { | |
1108 | break; | |
1109 | } | |
1110 | else { | |
1111 | if ( classTable->isConsonant(chars[baseConsonant]) ) { | |
1112 | secondConsonant = baseConsonant; | |
1113 | } | |
1114 | baseConsonant--; | |
1115 | } | |
1116 | } | |
1117 | ||
1118 | // If the syllable starts with Ra + Halant ( in a script that has Reph ) and has more than one | |
1119 | // consonant, Ra is excluced from candidates for base consonants | |
1120 | ||
1121 | if ( classTable->isReph(chars[beginSyllable]) && | |
1122 | beginSyllable+1 < nextSyllable && classTable->isVirama(chars[beginSyllable+1]) && | |
1123 | secondConsonant != firstConsonant) { | |
1124 | baseConsonant = secondConsonant; | |
1125 | } | |
1126 | ||
1127 | // Populate the output | |
1128 | for ( i = beginSyllable ; i < nextSyllable ; i++ ) { | |
1129 | ||
1130 | // Handle invalid combinartions | |
1131 | ||
1132 | if ( classTable->isVirama(chars[beginSyllable]) || | |
1133 | classTable->isMatra(chars[beginSyllable]) || | |
1134 | classTable->isVowelModifier(chars[beginSyllable]) || | |
1135 | classTable->isNukta(chars[beginSyllable]) ) { | |
1136 | output.writeChar(C_DOTTED_CIRCLE,beginSyllable,basicShapingFormsMask); | |
1137 | inv_count++; | |
1138 | } | |
1139 | output.writeChar(chars[i],i, basicShapingFormsMask); | |
1140 | ||
1141 | } | |
1142 | ||
1143 | // Adjust features and set syllable structure bits | |
1144 | ||
1145 | for ( i = beginSyllable ; i < nextSyllable ; i++ ) { | |
1146 | ||
1147 | FeatureMask outMask = output.getFeatures(i+inv_count); | |
1148 | FeatureMask saveMask = outMask; | |
1149 | ||
1150 | // Since reph can only validly occur at the beginning of a syllable | |
1151 | // We only apply it to the first 2 characters in the syllable, to keep it from | |
1152 | // conflicting with other features ( i.e. rkrf ) | |
1153 | ||
1154 | // TODO : Use the dynamic property for determining isREPH | |
1155 | if ( i == beginSyllable && i < baseConsonant && classTable->isReph(chars[i]) && | |
1156 | i+1 < nextSyllable && classTable->isVirama(chars[i+1])) { | |
1157 | outMask |= rphfFeatureMask; | |
1158 | outMask |= rephConsonantMask; | |
1159 | output.setFeatures(i+1+inv_count,outMask); | |
1160 | ||
1161 | } | |
1162 | ||
1163 | if ( i == baseConsonant ) { | |
1164 | outMask |= baseConsonantMask; | |
1165 | } | |
1166 | ||
1167 | if ( classTable->isMatra(chars[i])) { | |
1168 | outMask |= matraMask; | |
1169 | if ( classTable->hasAboveBaseForm(chars[i])) { | |
1170 | outMask |= aboveBasePosition; | |
1171 | } else if ( classTable->hasBelowBaseForm(chars[i])) { | |
1172 | outMask |= belowBasePosition; | |
1173 | } | |
1174 | } | |
1175 | ||
1176 | // Don't apply half form to virama that stands alone at the end of a syllable | |
1177 | // to prevent half forms from forming when syllable ends with virama | |
1178 | ||
1179 | if ( classTable->isVirama(chars[i]) && (i+1 == nextSyllable) ) { | |
1180 | outMask ^= halfFeatureMask; | |
1181 | if ( classTable->isConsonant(chars[i-1]) ) { | |
1182 | FeatureMask tmp = output.getFeatures(i-1+inv_count); | |
1183 | tmp ^= halfFeatureMask; | |
1184 | output.setFeatures(i-1+inv_count,tmp); | |
1185 | } | |
1186 | } | |
1187 | ||
1188 | if ( outMask != saveMask ) { | |
1189 | output.setFeatures(i+inv_count,outMask); | |
1190 | } | |
1191 | } | |
1192 | ||
1193 | output.decomposeReorderMatras(classTable,beginSyllable,nextSyllable,inv_count); | |
1194 | ||
1195 | beginSyllable = nextSyllable; | |
1196 | } | |
1197 | ||
1198 | ||
1199 | return output.getOutputIndex(); | |
1200 | } | |
1201 | ||
1202 | ||
1203 | void IndicReordering::getDynamicProperties( DynamicProperties *, const IndicClassTable *classTable ) { | |
1204 | ||
1205 | ||
1206 | LEUnicode currentChar; | |
729e4ab9 A |
1207 | LEUnicode workChars[2]; |
1208 | LEGlyphStorage workGlyphs; | |
1209 | ||
1210 | IndicReorderingOutput workOutput(workChars, workGlyphs, NULL); | |
1211 | ||
1212 | //le_int32 offset = 0; | |
1213 | ||
4388f060 A |
1214 | #if 0 |
1215 | // TODO: Should this section of code have actually been doing something? | |
729e4ab9 | 1216 | // First find the relevant virama for the script we are dealing with |
4388f060 | 1217 | LEUnicode virama; |
729e4ab9 A |
1218 | for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) { |
1219 | if ( classTable->isVirama(currentChar)) { | |
1220 | virama = currentChar; | |
1221 | break; | |
1222 | } | |
1223 | } | |
4388f060 | 1224 | #endif |
729e4ab9 A |
1225 | |
1226 | for ( currentChar = classTable->firstChar ; currentChar <= classTable->lastChar ; currentChar++ ) { | |
1227 | if ( classTable->isConsonant(currentChar)) { | |
1228 | workOutput.reset(); | |
1229 | } | |
1230 | } | |
1231 | ||
1232 | ||
1233 | } | |
1234 | ||
b75a7d8f | 1235 | U_NAMESPACE_END |