]> git.saurik.com Git - apple/icu.git/blob - icuSources/layout/IndicReordering.h
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / layout / IndicReordering.h
1 /*
2 *
3 * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved
4 *
5 */
6
7 #ifndef __INDICREORDERING_H
8 #define __INDICREORDERING_H
9
10 /**
11 * \file
12 * \internal
13 */
14
15 #include "LETypes.h"
16 #include "OpenTypeTables.h"
17
18 U_NAMESPACE_BEGIN
19
20 // Characters that get refered to by name...
21 enum
22 {
23 C_SIGN_ZWNJ = 0x200C,
24 C_SIGN_ZWJ = 0x200D
25 };
26
27 typedef LEUnicode SplitMatra[3];
28
29 class MPreFixups;
30 class LEGlyphStorage;
31
32 struct IndicClassTable
33 {
34 enum CharClassValues
35 {
36 CC_RESERVED = 0,
37 CC_VOWEL_MODIFIER = 1,
38 CC_STRESS_MARK = 2,
39 CC_INDEPENDENT_VOWEL = 3,
40 CC_INDEPENDENT_VOWEL_2 = 4,
41 CC_CONSONANT = 5,
42 CC_CONSONANT_WITH_NUKTA = 6,
43 CC_NUKTA = 7,
44 CC_DEPENDENT_VOWEL = 8,
45 CC_SPLIT_VOWEL_PIECE_1 = 9,
46 CC_SPLIT_VOWEL_PIECE_2 = 10,
47 CC_SPLIT_VOWEL_PIECE_3 = 11,
48 CC_VIRAMA = 12,
49 CC_ZERO_WIDTH_MARK = 13,
50 CC_COUNT = 14
51 };
52
53 enum CharClassFlags
54 {
55 CF_CLASS_MASK = 0x0000FFFF,
56
57 CF_CONSONANT = 0x80000000,
58
59 CF_REPH = 0x40000000,
60 CF_VATTU = 0x20000000,
61 CF_BELOW_BASE = 0x10000000,
62 CF_POST_BASE = 0x08000000,
63 CF_LENGTH_MARK = 0x04000000,
64
65 CF_POS_BEFORE = 0x00300000,
66 CF_POS_BELOW = 0x00200000,
67 CF_POS_ABOVE = 0x00100000,
68 CF_POS_AFTER = 0x00000000,
69 CF_POS_MASK = 0x00300000,
70
71 CF_INDEX_MASK = 0x000F0000,
72 CF_INDEX_SHIFT = 16
73 };
74
75 typedef le_uint32 CharClass;
76
77 enum ScriptFlagBits
78 {
79 SF_MATRAS_AFTER_BASE = 0x80000000,
80 SF_REPH_AFTER_BELOW = 0x40000000,
81 SF_EYELASH_RA = 0x20000000,
82 SF_MPRE_FIXUP = 0x10000000,
83
84 SF_POST_BASE_LIMIT_MASK = 0x0000FFFF,
85 SF_NO_POST_BASE_LIMIT = 0x00007FFF
86 };
87
88 typedef le_int32 ScriptFlags;
89
90 LEUnicode firstChar;
91 LEUnicode lastChar;
92 le_int32 worstCaseExpansion;
93 ScriptFlags scriptFlags;
94 const CharClass *classTable;
95 const SplitMatra *splitMatraTable;
96
97 le_int32 getWorstCaseExpansion() const;
98
99 CharClass getCharClass(LEUnicode ch) const;
100 const SplitMatra *getSplitMatra(CharClass charClass) const;
101
102 le_bool isVowelModifier(LEUnicode ch) const;
103 le_bool isStressMark(LEUnicode ch) const;
104 le_bool isConsonant(LEUnicode ch) const;
105 le_bool isReph(LEUnicode ch) const;
106 le_bool isVirama(LEUnicode ch) const;
107 le_bool isNukta(LEUnicode ch) const;
108 le_bool isVattu(LEUnicode ch) const;
109 le_bool isMatra(LEUnicode ch) const;
110 le_bool isSplitMatra(LEUnicode ch) const;
111 le_bool isLengthMark(LEUnicode ch) const;
112 le_bool hasPostOrBelowBaseForm(LEUnicode ch) const;
113 le_bool hasPostBaseForm(LEUnicode ch) const;
114 le_bool hasBelowBaseForm(LEUnicode ch) const;
115
116 static le_bool isVowelModifier(CharClass charClass);
117 static le_bool isStressMark(CharClass charClass);
118 static le_bool isConsonant(CharClass charClass);
119 static le_bool isReph(CharClass charClass);
120 static le_bool isVirama(CharClass charClass);
121 static le_bool isNukta(CharClass charClass);
122 static le_bool isVattu(CharClass charClass);
123 static le_bool isMatra(CharClass charClass);
124 static le_bool isSplitMatra(CharClass charClass);
125 static le_bool isLengthMark(CharClass charClass);
126 static le_bool hasPostOrBelowBaseForm(CharClass charClass);
127 static le_bool hasPostBaseForm(CharClass charClass);
128 static le_bool hasBelowBaseForm(CharClass charClass);
129
130 static const IndicClassTable *getScriptClassTable(le_int32 scriptCode);
131 };
132
133 class IndicReordering /* not : public UObject because all methods are static */ {
134 public:
135 static le_int32 getWorstCaseExpansion(le_int32 scriptCode);
136
137 static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
138 LEUnicode *outChars, LEGlyphStorage &glyphStorage,
139 MPreFixups **outMPreFixups);
140
141 static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage);
142
143 static const LETag *getFeatureOrder();
144
145 private:
146 // do not instantiate
147 IndicReordering();
148
149 static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
150
151 };
152
153 inline le_int32 IndicClassTable::getWorstCaseExpansion() const
154 {
155 return worstCaseExpansion;
156 }
157
158 inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const
159 {
160 le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT;
161
162 return &splitMatraTable[index - 1];
163 }
164
165 inline le_bool IndicClassTable::isVowelModifier(CharClass charClass)
166 {
167 return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER;
168 }
169
170 inline le_bool IndicClassTable::isStressMark(CharClass charClass)
171 {
172 return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK;
173 }
174
175 inline le_bool IndicClassTable::isConsonant(CharClass charClass)
176 {
177 return (charClass & CF_CONSONANT) != 0;
178 }
179
180 inline le_bool IndicClassTable::isReph(CharClass charClass)
181 {
182 return (charClass & CF_REPH) != 0;
183 }
184
185 inline le_bool IndicClassTable::isNukta(CharClass charClass)
186 {
187 return (charClass & CF_CLASS_MASK) == CC_NUKTA;
188 }
189
190 inline le_bool IndicClassTable::isVirama(CharClass charClass)
191 {
192 return (charClass & CF_CLASS_MASK) == CC_VIRAMA;
193 }
194
195 inline le_bool IndicClassTable::isVattu(CharClass charClass)
196 {
197 return (charClass & CF_VATTU) != 0;
198 }
199
200 inline le_bool IndicClassTable::isMatra(CharClass charClass)
201 {
202 charClass &= CF_CLASS_MASK;
203
204 return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3;
205 }
206
207 inline le_bool IndicClassTable::isSplitMatra(CharClass charClass)
208 {
209 return (charClass & CF_INDEX_MASK) != 0;
210 }
211
212 inline le_bool IndicClassTable::isLengthMark(CharClass charClass)
213 {
214 return (charClass & CF_LENGTH_MARK) != 0;
215 }
216
217 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass)
218 {
219 return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0;
220 }
221
222 inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass)
223 {
224 return (charClass & CF_POST_BASE) != 0;
225 }
226
227 inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass)
228 {
229 return (charClass & CF_BELOW_BASE) != 0;
230 }
231
232 inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const
233 {
234 return isVowelModifier(getCharClass(ch));
235 }
236
237 inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const
238 {
239 return isStressMark(getCharClass(ch));
240 }
241
242 inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const
243 {
244 return isConsonant(getCharClass(ch));
245 }
246
247 inline le_bool IndicClassTable::isReph(LEUnicode ch) const
248 {
249 return isReph(getCharClass(ch));
250 }
251
252 inline le_bool IndicClassTable::isVirama(LEUnicode ch) const
253 {
254 return isVirama(getCharClass(ch));
255 }
256
257 inline le_bool IndicClassTable::isNukta(LEUnicode ch) const
258 {
259 return isNukta(getCharClass(ch));
260 }
261
262 inline le_bool IndicClassTable::isVattu(LEUnicode ch) const
263 {
264 return isVattu(getCharClass(ch));
265 }
266
267 inline le_bool IndicClassTable::isMatra(LEUnicode ch) const
268 {
269 return isMatra(getCharClass(ch));
270 }
271
272 inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const
273 {
274 return isSplitMatra(getCharClass(ch));
275 }
276
277 inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const
278 {
279 return isLengthMark(getCharClass(ch));
280 }
281
282 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const
283 {
284 return hasPostOrBelowBaseForm(getCharClass(ch));
285 }
286
287 inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const
288 {
289 return hasPostBaseForm(getCharClass(ch));
290 }
291
292 inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const
293 {
294 return hasBelowBaseForm(getCharClass(ch));
295 }
296
297 U_NAMESPACE_END
298 #endif