]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * | |
3 | * (C) Copyright IBM Corp. 1998-2011 - All Rights Reserved | |
4 | * | |
5 | */ | |
6 | ||
7 | #ifndef __INDICREORDERING_H | |
8 | #define __INDICREORDERING_H | |
9 | ||
10 | /** | |
11 | * \file | |
12 | * \internal | |
13 | */ | |
14 | ||
15 | #include "LETypes.h" | |
16 | #include "OpenTypeTables.h" | |
17 | ||
18 | U_NAMESPACE_BEGIN | |
19 | ||
20 | // Characters that get refered to by name... | |
21 | #define C_SIGN_ZWNJ 0x200C | |
22 | #define C_SIGN_ZWJ 0x200D | |
23 | ||
24 | // Character class values | |
25 | #define CC_RESERVED 0U | |
26 | #define CC_VOWEL_MODIFIER 1U | |
27 | #define CC_STRESS_MARK 2U | |
28 | #define CC_INDEPENDENT_VOWEL 3U | |
29 | #define CC_INDEPENDENT_VOWEL_2 4U | |
30 | #define CC_INDEPENDENT_VOWEL_3 5U | |
31 | #define CC_CONSONANT 6U | |
32 | #define CC_CONSONANT_WITH_NUKTA 7U | |
33 | #define CC_NUKTA 8U | |
34 | #define CC_DEPENDENT_VOWEL 9U | |
35 | #define CC_SPLIT_VOWEL_PIECE_1 10U | |
36 | #define CC_SPLIT_VOWEL_PIECE_2 11U | |
37 | #define CC_SPLIT_VOWEL_PIECE_3 12U | |
38 | #define CC_VIRAMA 13U | |
39 | #define CC_ZERO_WIDTH_MARK 14U | |
40 | #define CC_AL_LAKUNA 15U | |
41 | #define CC_COUNT 16U | |
42 | ||
43 | // Character class flags | |
44 | #define CF_CLASS_MASK 0x0000FFFFU | |
45 | ||
46 | #define CF_CONSONANT 0x80000000U | |
47 | ||
48 | #define CF_REPH 0x40000000U | |
49 | #define CF_VATTU 0x20000000U | |
50 | #define CF_BELOW_BASE 0x10000000U | |
51 | #define CF_POST_BASE 0x08000000U | |
52 | #define CF_LENGTH_MARK 0x04000000U | |
53 | #define CF_PRE_BASE 0x02000000U | |
54 | ||
55 | #define CF_POS_BEFORE 0x00300000U | |
56 | #define CF_POS_BELOW 0x00200000U | |
57 | #define CF_POS_ABOVE 0x00100000U | |
58 | #define CF_POS_AFTER 0x00000000U | |
59 | #define CF_POS_MASK 0x00300000U | |
60 | ||
61 | #define CF_INDEX_MASK 0x000F0000U | |
62 | #define CF_INDEX_SHIFT 16 | |
63 | ||
64 | // Script flag bits | |
65 | #define SF_MATRAS_AFTER_BASE 0x80000000U | |
66 | #define SF_REPH_AFTER_BELOW 0x40000000U | |
67 | #define SF_EYELASH_RA 0x20000000U | |
68 | #define SF_MPRE_FIXUP 0x10000000U | |
69 | #define SF_FILTER_ZERO_WIDTH 0x08000000U | |
70 | ||
71 | #define SF_POST_BASE_LIMIT_MASK 0x0000FFFFU | |
72 | #define SF_NO_POST_BASE_LIMIT 0x00007FFFU | |
73 | ||
74 | #define SM_MAX_PIECES 3 | |
75 | ||
76 | typedef LEUnicode SplitMatra[SM_MAX_PIECES]; | |
77 | ||
78 | class MPreFixups; | |
79 | class LEGlyphStorage; | |
80 | ||
81 | // Dynamic Properties ( v2 fonts only ) | |
82 | typedef le_uint32 DynamicProperties; | |
83 | ||
84 | #define DP_REPH 0x80000000U | |
85 | #define DP_HALF 0x40000000U | |
86 | #define DP_PREF 0x20000000U | |
87 | #define DP_BLWF 0x10000000U | |
88 | #define DP_PSTF 0x08000000U | |
89 | ||
90 | struct IndicClassTable | |
91 | { | |
92 | typedef le_uint32 CharClass; | |
93 | typedef le_uint32 ScriptFlags; | |
94 | ||
95 | LEUnicode firstChar; | |
96 | LEUnicode lastChar; | |
97 | le_int32 worstCaseExpansion; | |
98 | ScriptFlags scriptFlags; | |
99 | const CharClass *classTable; | |
100 | const SplitMatra *splitMatraTable; | |
101 | ||
102 | inline le_int32 getWorstCaseExpansion() const; | |
103 | inline le_bool getFilterZeroWidth() const; | |
104 | ||
105 | CharClass getCharClass(LEUnicode ch) const; | |
106 | ||
107 | inline const SplitMatra *getSplitMatra(CharClass charClass) const; | |
108 | ||
109 | inline le_bool isVowelModifier(LEUnicode ch) const; | |
110 | inline le_bool isStressMark(LEUnicode ch) const; | |
111 | inline le_bool isConsonant(LEUnicode ch) const; | |
112 | inline le_bool isReph(LEUnicode ch) const; | |
113 | inline le_bool isVirama(LEUnicode ch) const; | |
114 | inline le_bool isAlLakuna(LEUnicode ch) const; | |
115 | inline le_bool isNukta(LEUnicode ch) const; | |
116 | inline le_bool isVattu(LEUnicode ch) const; | |
117 | inline le_bool isMatra(LEUnicode ch) const; | |
118 | inline le_bool isSplitMatra(LEUnicode ch) const; | |
119 | inline le_bool isLengthMark(LEUnicode ch) const; | |
120 | inline le_bool hasPostOrBelowBaseForm(LEUnicode ch) const; | |
121 | inline le_bool hasPostBaseForm(LEUnicode ch) const; | |
122 | inline le_bool hasBelowBaseForm(LEUnicode ch) const; | |
123 | inline le_bool hasAboveBaseForm(LEUnicode ch) const; | |
124 | inline le_bool hasPreBaseForm(LEUnicode ch) const; | |
125 | ||
126 | inline static le_bool isVowelModifier(CharClass charClass); | |
127 | inline static le_bool isStressMark(CharClass charClass); | |
128 | inline static le_bool isConsonant(CharClass charClass); | |
129 | inline static le_bool isReph(CharClass charClass); | |
130 | inline static le_bool isVirama(CharClass charClass); | |
131 | inline static le_bool isAlLakuna(CharClass charClass); | |
132 | inline static le_bool isNukta(CharClass charClass); | |
133 | inline static le_bool isVattu(CharClass charClass); | |
134 | inline static le_bool isMatra(CharClass charClass); | |
135 | inline static le_bool isSplitMatra(CharClass charClass); | |
136 | inline static le_bool isLengthMark(CharClass charClass); | |
137 | inline static le_bool hasPostOrBelowBaseForm(CharClass charClass); | |
138 | inline static le_bool hasPostBaseForm(CharClass charClass); | |
139 | inline static le_bool hasBelowBaseForm(CharClass charClass); | |
140 | inline static le_bool hasAboveBaseForm(CharClass charClass); | |
141 | inline static le_bool hasPreBaseForm(CharClass charClass); | |
142 | ||
143 | static const IndicClassTable *getScriptClassTable(le_int32 scriptCode); | |
144 | }; | |
145 | ||
146 | class IndicReordering /* not : public UObject because all methods are static */ { | |
147 | public: | |
148 | static le_int32 getWorstCaseExpansion(le_int32 scriptCode); | |
149 | ||
150 | static le_bool getFilterZeroWidth(le_int32 scriptCode); | |
151 | ||
152 | static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, | |
153 | LEUnicode *outChars, LEGlyphStorage &glyphStorage, | |
154 | MPreFixups **outMPreFixups, LEErrorCode& success); | |
155 | ||
156 | static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage, LEErrorCode& success); | |
157 | ||
158 | static le_int32 v2process(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, | |
159 | LEUnicode *outChars, LEGlyphStorage &glyphStorage); | |
160 | ||
161 | static const FeatureMap *getFeatureMap(le_int32 &count); | |
162 | ||
163 | static const FeatureMap *getv2FeatureMap(le_int32 &count); | |
164 | ||
165 | static void applyPresentationForms(LEGlyphStorage &glyphStorage, le_int32 count); | |
166 | ||
167 | static void finalReordering(LEGlyphStorage &glyphStorage, le_int32 count); | |
168 | ||
169 | static void getDynamicProperties(DynamicProperties *dProps, const IndicClassTable *classTable); | |
170 | ||
171 | private: | |
172 | // do not instantiate | |
173 | IndicReordering(); | |
174 | ||
175 | static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount); | |
176 | ||
177 | }; | |
178 | ||
179 | inline le_int32 IndicClassTable::getWorstCaseExpansion() const | |
180 | { | |
181 | return worstCaseExpansion; | |
182 | } | |
183 | ||
184 | inline le_bool IndicClassTable::getFilterZeroWidth() const | |
185 | { | |
186 | return (scriptFlags & SF_FILTER_ZERO_WIDTH) != 0; | |
187 | } | |
188 | ||
189 | inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const | |
190 | { | |
191 | le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT; | |
192 | ||
193 | return &splitMatraTable[index - 1]; | |
194 | } | |
195 | ||
196 | inline le_bool IndicClassTable::isVowelModifier(CharClass charClass) | |
197 | { | |
198 | return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER; | |
199 | } | |
200 | ||
201 | inline le_bool IndicClassTable::isStressMark(CharClass charClass) | |
202 | { | |
203 | return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK; | |
204 | } | |
205 | ||
206 | inline le_bool IndicClassTable::isConsonant(CharClass charClass) | |
207 | { | |
208 | return (charClass & CF_CONSONANT) != 0; | |
209 | } | |
210 | ||
211 | inline le_bool IndicClassTable::isReph(CharClass charClass) | |
212 | { | |
213 | return (charClass & CF_REPH) != 0; | |
214 | } | |
215 | ||
216 | inline le_bool IndicClassTable::isNukta(CharClass charClass) | |
217 | { | |
218 | return (charClass & CF_CLASS_MASK) == CC_NUKTA; | |
219 | } | |
220 | ||
221 | inline le_bool IndicClassTable::isVirama(CharClass charClass) | |
222 | { | |
223 | return (charClass & CF_CLASS_MASK) == CC_VIRAMA; | |
224 | } | |
225 | ||
226 | inline le_bool IndicClassTable::isAlLakuna(CharClass charClass) | |
227 | { | |
228 | return (charClass & CF_CLASS_MASK) == CC_AL_LAKUNA; | |
229 | } | |
230 | ||
231 | inline le_bool IndicClassTable::isVattu(CharClass charClass) | |
232 | { | |
233 | return (charClass & CF_VATTU) != 0; | |
234 | } | |
235 | ||
236 | inline le_bool IndicClassTable::isMatra(CharClass charClass) | |
237 | { | |
238 | charClass &= CF_CLASS_MASK; | |
239 | ||
240 | return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3; | |
241 | } | |
242 | ||
243 | inline le_bool IndicClassTable::isSplitMatra(CharClass charClass) | |
244 | { | |
245 | return (charClass & CF_INDEX_MASK) != 0; | |
246 | } | |
247 | ||
248 | inline le_bool IndicClassTable::isLengthMark(CharClass charClass) | |
249 | { | |
250 | return (charClass & CF_LENGTH_MARK) != 0; | |
251 | } | |
252 | ||
253 | inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass) | |
254 | { | |
255 | return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0; | |
256 | } | |
257 | ||
258 | inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass) | |
259 | { | |
260 | return (charClass & CF_POST_BASE) != 0; | |
261 | } | |
262 | ||
263 | inline le_bool IndicClassTable::hasPreBaseForm(CharClass charClass) | |
264 | { | |
265 | return (charClass & CF_PRE_BASE) != 0; | |
266 | } | |
267 | ||
268 | inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass) | |
269 | { | |
270 | return (charClass & CF_BELOW_BASE) != 0; | |
271 | } | |
272 | ||
273 | inline le_bool IndicClassTable::hasAboveBaseForm(CharClass charClass) | |
274 | { | |
275 | return ((charClass & CF_POS_MASK) == CF_POS_ABOVE); | |
276 | } | |
277 | ||
278 | inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const | |
279 | { | |
280 | return isVowelModifier(getCharClass(ch)); | |
281 | } | |
282 | ||
283 | inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const | |
284 | { | |
285 | return isStressMark(getCharClass(ch)); | |
286 | } | |
287 | ||
288 | inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const | |
289 | { | |
290 | return isConsonant(getCharClass(ch)); | |
291 | } | |
292 | ||
293 | inline le_bool IndicClassTable::isReph(LEUnicode ch) const | |
294 | { | |
295 | return isReph(getCharClass(ch)); | |
296 | } | |
297 | ||
298 | inline le_bool IndicClassTable::isVirama(LEUnicode ch) const | |
299 | { | |
300 | return isVirama(getCharClass(ch)); | |
301 | } | |
302 | ||
303 | inline le_bool IndicClassTable::isAlLakuna(LEUnicode ch) const | |
304 | { | |
305 | return isAlLakuna(getCharClass(ch)); | |
306 | } | |
307 | ||
308 | inline le_bool IndicClassTable::isNukta(LEUnicode ch) const | |
309 | { | |
310 | return isNukta(getCharClass(ch)); | |
311 | } | |
312 | ||
313 | inline le_bool IndicClassTable::isVattu(LEUnicode ch) const | |
314 | { | |
315 | return isVattu(getCharClass(ch)); | |
316 | } | |
317 | ||
318 | inline le_bool IndicClassTable::isMatra(LEUnicode ch) const | |
319 | { | |
320 | return isMatra(getCharClass(ch)); | |
321 | } | |
322 | ||
323 | inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const | |
324 | { | |
325 | return isSplitMatra(getCharClass(ch)); | |
326 | } | |
327 | ||
328 | inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const | |
329 | { | |
330 | return isLengthMark(getCharClass(ch)); | |
331 | } | |
332 | ||
333 | inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const | |
334 | { | |
335 | return hasPostOrBelowBaseForm(getCharClass(ch)); | |
336 | } | |
337 | ||
338 | inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const | |
339 | { | |
340 | return hasPostBaseForm(getCharClass(ch)); | |
341 | } | |
342 | ||
343 | inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const | |
344 | { | |
345 | return hasBelowBaseForm(getCharClass(ch)); | |
346 | } | |
347 | ||
348 | inline le_bool IndicClassTable::hasPreBaseForm(LEUnicode ch) const | |
349 | { | |
350 | return hasPreBaseForm(getCharClass(ch)); | |
351 | } | |
352 | ||
353 | inline le_bool IndicClassTable::hasAboveBaseForm(LEUnicode ch) const | |
354 | { | |
355 | return hasAboveBaseForm(getCharClass(ch)); | |
356 | } | |
357 | U_NAMESPACE_END | |
358 | #endif |