3 * (C) Copyright IBM Corp. 1998-2014 - All Rights Reserved
9 #include "OpenTypeTables.h"
10 #include "OpenTypeUtilities.h"
11 #include "IndicReordering.h"
15 // Split matra table indices
16 #define _x1 (1 << CF_INDEX_SHIFT)
17 #define _x2 (2 << CF_INDEX_SHIFT)
18 #define _x3 (3 << CF_INDEX_SHIFT)
19 #define _x4 (4 << CF_INDEX_SHIFT)
20 #define _x5 (5 << CF_INDEX_SHIFT)
21 #define _x6 (6 << CF_INDEX_SHIFT)
22 #define _x7 (7 << CF_INDEX_SHIFT)
23 #define _x8 (8 << CF_INDEX_SHIFT)
24 #define _x9 (9 << CF_INDEX_SHIFT)
27 #define _xx (CC_RESERVED)
28 #define _ma (CC_VOWEL_MODIFIER | CF_POS_ABOVE)
29 #define _mp (CC_VOWEL_MODIFIER | CF_POS_AFTER)
30 #define _sa (CC_STRESS_MARK | CF_POS_ABOVE)
31 #define _sb (CC_STRESS_MARK | CF_POS_BELOW)
32 #define _iv (CC_INDEPENDENT_VOWEL)
33 #define _i2 (CC_INDEPENDENT_VOWEL_2)
34 #define _i3 (CC_INDEPENDENT_VOWEL_3)
35 #define _ct (CC_CONSONANT | CF_CONSONANT)
36 #define _cn (CC_CONSONANT_WITH_NUKTA | CF_CONSONANT)
37 #define _nu (CC_NUKTA)
38 #define _dv (CC_DEPENDENT_VOWEL)
39 #define _dl (_dv | CF_POS_BEFORE)
40 #define _db (_dv | CF_POS_BELOW)
41 #define _da (_dv | CF_POS_ABOVE)
42 #define _dr (_dv | CF_POS_AFTER)
43 #define _lm (_dv | CF_LENGTH_MARK)
44 #define _l1 (CC_SPLIT_VOWEL_PIECE_1 | CF_POS_BEFORE)
45 #define _a1 (CC_SPLIT_VOWEL_PIECE_1 | CF_POS_ABOVE)
46 #define _b2 (CC_SPLIT_VOWEL_PIECE_2 | CF_POS_BELOW)
47 #define _r2 (CC_SPLIT_VOWEL_PIECE_2 | CF_POS_AFTER)
48 #define _m2 (CC_SPLIT_VOWEL_PIECE_2 | CF_LENGTH_MARK)
49 #define _m3 (CC_SPLIT_VOWEL_PIECE_3 | CF_LENGTH_MARK)
50 #define _vr (CC_VIRAMA)
51 #define _al (CC_AL_LAKUNA)
54 #define _s1 (_dv | _x1)
55 #define _s2 (_dv | _x2)
56 #define _s3 (_dv | _x3)
57 #define _s4 (_dv | _x4)
58 #define _s5 (_dv | _x5)
59 #define _s6 (_dv | _x6)
60 #define _s7 (_dv | _x7)
61 #define _s8 (_dv | _x8)
62 #define _s9 (_dv | _x9)
64 // consonants with special forms
65 // NOTE: this assumes that no consonants with nukta have
66 // special forms... (Bengali RA?)
67 #define _bb (_ct | CF_BELOW_BASE)
68 #define _pb (_ct | CF_POST_BASE)
69 #define _fb (_ct | CF_PRE_BASE)
70 #define _vt (_bb | CF_VATTU)
71 #define _rv (_vt | CF_REPH)
72 #define _rp (_pb | CF_REPH)
73 #define _rb (_bb | CF_REPH)
76 // Character class tables
78 static const IndicClassTable::CharClass devaCharClasses
[] =
80 _xx
, _ma
, _ma
, _mp
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, // 0900 - 090F
81 _iv
, _iv
, _iv
, _iv
, _iv
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0910 - 091F
82 _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _cn
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0920 - 092F
83 _rv
, _cn
, _ct
, _ct
, _cn
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _xx
, _nu
, _xx
, _dr
, _dl
, // 0930 - 093F
84 _dr
, _db
, _db
, _db
, _db
, _da
, _da
, _da
, _da
, _dr
, _dr
, _dr
, _dr
, _vr
, _xx
, _xx
, // 0940 - 094F
85 _xx
, _sa
, _sb
, _sa
, _sa
, _xx
, _xx
, _xx
, _cn
, _cn
, _cn
, _cn
, _cn
, _cn
, _cn
, _cn
, // 0950 - 095F
86 _iv
, _iv
, _db
, _db
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0960 - 096F
90 static const IndicClassTable::CharClass bengCharClasses
[] =
92 _xx
, _ma
, _mp
, _mp
, _xx
, _i2
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _xx
, _i2
, // 0980 - 098F
93 _iv
, _xx
, _xx
, _iv
, _iv
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0990 - 099F
94 _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _ct
, _ct
, _bb
, _ct
, _ct
, _pb
, // 09A0 - 09AF
95 _rv
, _xx
, _ct
, _xx
, _xx
, _xx
, _ct
, _ct
, _ct
, _ct
, _xx
, _xx
, _nu
, _xx
, _r2
, _dl
, // 09B0 - 09BF
96 _dr
, _db
, _db
, _db
, _db
, _xx
, _xx
, _l1
, _dl
, _xx
, _xx
, _s1
, _s2
, _vr
, _xx
, _xx
, // 09C0 - 09CF
97 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _m2
, _xx
, _xx
, _xx
, _xx
, _cn
, _cn
, _xx
, _cn
, // 09D0 - 09DF
98 _iv
, _iv
, _dv
, _dv
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 09E0 - 09EF
99 _rv
, _ct
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
// 09F0 - 09FA
102 static const IndicClassTable::CharClass punjCharClasses
[] =
104 _xx
, _ma
, _ma
, _mp
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _xx
, _xx
, _xx
, _iv
, // 0A00 - 0A0F
105 _iv
, _xx
, _xx
, _i3
, _iv
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0A10 - 0A1F
106 _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _ct
, _ct
, _ct
, _ct
, _ct
, _bb
, // 0A20 - 0A2F
107 _vt
, _xx
, _ct
, _cn
, _xx
, _bb
, _cn
, _xx
, _ct
, _bb
, _xx
, _xx
, _nu
, _xx
, _dr
, _dl
, // 0A30 - 0A3F
108 _dr
, _b2
, _db
, _xx
, _xx
, _xx
, _xx
, _da
, _da
, _xx
, _xx
, _a1
, _da
, _vr
, _xx
, _xx
, // 0A40 - 0A4F
109 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _cn
, _cn
, _cn
, _ct
, _xx
, _cn
, _xx
, // 0A50 - 0A5F
110 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0A60 - 0A6F
111 _ma
, _ma
, _xx
, _xx
, _xx
// 0A70 - 0A74
114 static const IndicClassTable::CharClass gujrCharClasses
[] =
116 _xx
, _ma
, _ma
, _mp
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _iv
, _xx
, _iv
, // 0A80 - 0A8F
117 _iv
, _iv
, _xx
, _iv
, _iv
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0A90 - 0A9F
118 _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0AA0 - 0AAF
119 _rv
, _xx
, _ct
, _ct
, _xx
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _xx
, _nu
, _xx
, _dr
, _dl
, // 0AB0 - 0ABF
120 _dr
, _db
, _db
, _db
, _db
, _da
, _xx
, _da
, _da
, _dr
, _xx
, _dr
, _dr
, _vr
, _xx
, _xx
, // 0AC0 - 0ACF
121 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0AD0 - 0ADF
122 _iv
, _iv
, _db
, _db
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
// 0AE0 - 0AEF
126 static const IndicClassTable::CharClass oryaCharClasses
[] =
128 _xx
, _ma
, _mp
, _mp
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _xx
, _iv
, /* 0B00 - 0B0F */
129 _iv
, _xx
, _xx
, _iv
, _iv
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _ct
, _bb
, /* 0B10 - 0B1F */
130 _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _xx
, _bb
, _bb
, _bb
, _bb
, _bb
, _pb
, /* 0B20 - 0B2F */
131 _rb
, _xx
, _bb
, _bb
, _xx
, _bb
, _bb
, _bb
, _bb
, _bb
, _xx
, _xx
, _nu
, _xx
, _dr
, _da
, /* 0B30 - 0B3F */
132 _dr
, _db
, _db
, _db
, _xx
, _xx
, _xx
, _dl
, _s1
, _xx
, _xx
, _s2
, _s3
, _vr
, _xx
, _xx
, /* 0B40 - 0B4F */
133 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _da
, _dr
, _xx
, _xx
, _xx
, _xx
, _cn
, _cn
, _xx
, _pb
, /* 0B50 - 0B5F */
134 _iv
, _iv
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, /* 0B60 - 0B6F */
135 _xx
, _bb
/* 0B70 - 0B71 */
138 static const IndicClassTable::CharClass oryaCharClasses
[] =
140 _xx
, _ma
, _mp
, _mp
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _xx
, _iv
, // 0B00 - 0B0F
141 _iv
, _xx
, _xx
, _iv
, _iv
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0B10 - 0B1F
142 _ct
, _ct
, _ct
, _ct
, _bb
, _ct
, _ct
, _ct
, _bb
, _xx
, _ct
, _ct
, _bb
, _bb
, _bb
, _pb
, // 0B20 - 0B2F
143 _rb
, _xx
, _bb
, _bb
, _xx
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _xx
, _nu
, _xx
, _r2
, _da
, // 0B30 - 0B3F
144 _dr
, _db
, _db
, _db
, _xx
, _xx
, _xx
, _l1
, _s1
, _xx
, _xx
, _s2
, _s3
, _vr
, _xx
, _xx
, // 0B40 - 0B4F
145 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _m2
, _m2
, _xx
, _xx
, _xx
, _xx
, _cn
, _cn
, _xx
, _cn
, // 0B50 - 0B5F
146 _iv
, _iv
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0B60 - 0B6F
147 _xx
, _ct
// 0B70 - 0B71
151 static const IndicClassTable::CharClass tamlCharClasses
[] =
153 _xx
, _xx
, _ma
, _xx
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _xx
, _xx
, _iv
, _iv
, // 0B80 - 0B8F
154 _iv
, _xx
, _iv
, _iv
, _iv
, _ct
, _xx
, _xx
, _xx
, _ct
, _ct
, _xx
, _ct
, _xx
, _ct
, _ct
, // 0B90 - 0B9F
155 _xx
, _xx
, _xx
, _ct
, _ct
, _xx
, _xx
, _xx
, _ct
, _ct
, _ct
, _xx
, _xx
, _xx
, _ct
, _ct
, // 0BA0 - 0BAF
156 _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _xx
, _xx
, _xx
, _r2
, _dr
, // 0BB0 - 0BBF
157 _da
, _dr
, _dr
, _xx
, _xx
, _xx
, _l1
, _l1
, _dl
, _xx
, _s1
, _s2
, _s3
, _vr
, _xx
, _xx
, // 0BC0 - 0BCF
158 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _m2
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0BD0 - 0BDF
159 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0BE0 - 0BEF
160 _xx
, _xx
, _xx
// 0BF0 - 0BF2
163 // FIXME: Should some of the bb's be pb's? (KA, NA, MA, YA, VA, etc. (approx 13))
164 // U+C43 and U+C44 are _lm here not _dr. Similar to the situation with U+CC3 and
165 // U+CC4 in Kannada below.
166 static const IndicClassTable::CharClass teluCharClasses
[] =
168 _xx
, _mp
, _mp
, _mp
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _iv
, _iv
, // 0C00 - 0C0F
169 _iv
, _xx
, _iv
, _iv
, _iv
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, // 0C10 - 0C1F
170 _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _xx
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, // 0C20 - 0C2F
171 _bb
, _bb
, _bb
, _bb
, _xx
, _bb
, _bb
, _bb
, _bb
, _bb
, _xx
, _xx
, _xx
, _xx
, _da
, _da
, // 0C30 - 0C3F
172 _da
, _dr
, _dr
, _lm
, _lm
, _xx
, _a1
, _da
, _s1
, _xx
, _da
, _da
, _da
, _vr
, _xx
, _xx
, // 0C40 - 0C4F
173 _xx
, _xx
, _xx
, _xx
, _xx
, _da
, _m2
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0C50 - 0C5F
174 _iv
, _iv
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
// 0C60 - 0C6F
177 // U+CC3 and U+CC4 are _lm here not _dr since the Kannada rendering
178 // rules want them below and to the right of the entire cluster
180 // There's some information about this in:
182 // http://brahmi.sourceforge.net/docs/KannadaComputing.html
183 static const IndicClassTable::CharClass kndaCharClasses
[] =
185 _xx
, _xx
, _mp
, _mp
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _iv
, _iv
, // 0C80 - 0C8F
186 _iv
, _xx
, _iv
, _iv
, _iv
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, // 0C90 - 0C9F
187 _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, _xx
, _bb
, _bb
, _bb
, _bb
, _bb
, _bb
, // 0CA0 - 0CAF
188 _rb
, _ct
, _bb
, _bb
, _xx
, _bb
, _bb
, _bb
, _bb
, _bb
, _xx
, _xx
, _xx
, _xx
, _dr
, _da
, // 0CB0 - 0CBF
189 _s1
, _dr
, _r2
, _lm
, _lm
, _xx
, _a1
, _s2
, _s3
, _xx
, _s4
, _s5
, _da
, _vr
, _xx
, _xx
, // 0CC0 - 0CCF
190 _xx
, _xx
, _xx
, _xx
, _xx
, _m3
, _m2
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _ct
, _xx
, // 0CD0 - 0CDF
191 _iv
, _iv
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
// 0CE0 - 0CEF
194 // FIXME: this is correct for old-style Malayalam (MAL) but not for reformed Malayalam (MLR)
195 // FIXME: should there be a REPH for old-style Malayalam?
196 static const IndicClassTable::CharClass mlymCharClasses
[] =
198 _xx
, _xx
, _mp
, _mp
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _iv
, _iv
, // 0D00 - 0D0F
199 _iv
, _xx
, _iv
, _iv
, _iv
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0D10 - 0D1F
200 _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _ct
, _ct
, _ct
, _ct
, _ct
, _pb
, // 0D20 - 0D2F
201 _fb
, _fb
, _bb
, _ct
, _ct
, _pb
, _ct
, _ct
, _ct
, _ct
, _xx
, _xx
, _xx
, _xx
, _r2
, _dr
, // 0D30 - 0D3F
202 _dr
, _dr
, _dr
, _dr
, _xx
, _xx
, _l1
, _l1
, _dl
, _xx
, _s1
, _s2
, _s3
, _vr
, _xx
, _xx
, // 0D40 - 0D4F
203 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _m2
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0D50 - 0D5F
204 _iv
, _iv
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
// 0D60 - 0D6F
207 static const IndicClassTable::CharClass sinhCharClasses
[] =
209 _xx
, _xx
, _mp
, _mp
, _xx
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, // 0D80 - 0D8F
210 _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _iv
, _xx
, _xx
, _xx
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0D90 - 0D9F
211 _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, // 0DA0 - 0DAF
212 _ct
, _ct
, _xx
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _ct
, _xx
, _xx
, // 0DB0 - 0DBF
213 _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _ct
, _xx
, _xx
, _xx
, _al
, _xx
, _xx
, _xx
, _xx
, _dr
, // 0DC0 - 0DCF
214 _dr
, _dr
, _da
, _da
, _db
, _xx
, _db
, _xx
, _dr
, _dl
, _s1
, _dl
, _s2
, _s3
, _s4
, _dr
, // 0DD0 - 0DDF
215 _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, _xx
, // 0DE0 - 0DEF
216 _xx
, _xx
, _dr
, _dr
, _xx
// 0DF0 - 0DF4
220 // Split matra tables
222 static const SplitMatra bengSplitTable
[] = {{0x09C7, 0x09BE}, {0x09C7, 0x09D7}};
224 static const SplitMatra oryaSplitTable
[] = {{0x0B47, 0x0B56}, {0x0B47, 0x0B3E}, {0x0B47, 0x0B57}};
226 static const SplitMatra tamlSplitTable
[] = {{0x0BC6, 0x0BBE}, {0x0BC7, 0x0BBE}, {0x0BC6, 0x0BD7}};
228 static const SplitMatra teluSplitTable
[] = {{0x0C46, 0x0C56}};
230 static const SplitMatra kndaSplitTable
[] = {{0x0CBF, 0x0CD5}, {0x0CC6, 0x0CD5}, {0x0CC6, 0x0CD6}, {0x0CC6, 0x0CC2},
231 {0x0CC6, 0x0CC2, 0x0CD5}};
233 static const SplitMatra mlymSplitTable
[] = {{0x0D46, 0x0D3E}, {0x0D47, 0x0D3E}, {0x0D46, 0x0D57}};
236 static const SplitMatra sinhSplitTable
[] = {{0x0DD9, 0x0DCA}, {0x0DD9, 0x0DCF}, {0x0DD9, 0x0DCF, 0x0DCA},
242 // FIXME: post 'GSUB' reordering of MATRA_PRE's for Malayalam and Tamil
243 // FIXME: reformed Malayalam needs to reorder VATTU to before base glyph...
244 // FIXME: not sure passing ZWJ/ZWNJ is best way to render Malayalam Cillu...
245 // FIXME: eyelash RA only for Devanagari??
246 #define DEVA_SCRIPT_FLAGS (SF_EYELASH_RA | SF_NO_POST_BASE_LIMIT | SF_FILTER_ZERO_WIDTH)
247 #define BENG_SCRIPT_FLAGS (SF_REPH_AFTER_BELOW | SF_NO_POST_BASE_LIMIT | SF_FILTER_ZERO_WIDTH)
248 #define PUNJ_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT | SF_FILTER_ZERO_WIDTH)
249 #define GUJR_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT | SF_FILTER_ZERO_WIDTH)
250 #define ORYA_SCRIPT_FLAGS (SF_REPH_AFTER_BELOW | SF_NO_POST_BASE_LIMIT | SF_FILTER_ZERO_WIDTH)
251 #define TAML_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT | SF_FILTER_ZERO_WIDTH)
252 #define TELU_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
253 #define KNDA_SCRIPT_FLAGS (SF_MATRAS_AFTER_BASE | SF_FILTER_ZERO_WIDTH | 3)
254 #define MLYM_SCRIPT_FLAGS (SF_MPRE_FIXUP | SF_NO_POST_BASE_LIMIT /*| SF_FILTER_ZERO_WIDTH*/)
255 #define SINH_SCRIPT_FLAGS (SF_NO_POST_BASE_LIMIT)
258 // Indic Class Tables
260 static const IndicClassTable devaClassTable
= {0x0900, 0x0970, 2, DEVA_SCRIPT_FLAGS
, devaCharClasses
, NULL
};
262 static const IndicClassTable bengClassTable
= {0x0980, 0x09FA, 3, BENG_SCRIPT_FLAGS
, bengCharClasses
, bengSplitTable
};
264 static const IndicClassTable punjClassTable
= {0x0A00, 0x0A74, 2, PUNJ_SCRIPT_FLAGS
, punjCharClasses
, NULL
};
266 static const IndicClassTable gujrClassTable
= {0x0A80, 0x0AEF, 2, GUJR_SCRIPT_FLAGS
, gujrCharClasses
, NULL
};
268 static const IndicClassTable oryaClassTable
= {0x0B00, 0x0B71, 3, ORYA_SCRIPT_FLAGS
, oryaCharClasses
, oryaSplitTable
};
270 static const IndicClassTable tamlClassTable
= {0x0B80, 0x0BF2, 3, TAML_SCRIPT_FLAGS
, tamlCharClasses
, tamlSplitTable
};
272 static const IndicClassTable teluClassTable
= {0x0C00, 0x0C6F, 3, TELU_SCRIPT_FLAGS
, teluCharClasses
, teluSplitTable
};
274 static const IndicClassTable kndaClassTable
= {0x0C80, 0x0CEF, 4, KNDA_SCRIPT_FLAGS
, kndaCharClasses
, kndaSplitTable
};
276 static const IndicClassTable mlymClassTable
= {0x0D00, 0x0D6F, 4, MLYM_SCRIPT_FLAGS
, mlymCharClasses
, mlymSplitTable
};
278 static const IndicClassTable sinhClassTable
= {0x0D80, 0x0DF4, 4, SINH_SCRIPT_FLAGS
, sinhCharClasses
, sinhSplitTable
};
281 // IndicClassTable addresses
283 static const IndicClassTable
* const indicClassTables
[scriptCodeCount
] = {
284 NULL
, /* 'zyyy' (COMMON) */
285 NULL
, /* 'qaai' (INHERITED) */
286 NULL
, /* 'arab' (ARABIC) */
287 NULL
, /* 'armn' (ARMENIAN) */
288 &bengClassTable
, /* 'beng' (BENGALI) */
289 NULL
, /* 'bopo' (BOPOMOFO) */
290 NULL
, /* 'cher' (CHEROKEE) */
291 NULL
, /* 'copt' (COPTIC) */
292 NULL
, /* 'cyrl' (CYRILLIC) */
293 NULL
, /* 'dsrt' (DESERET) */
294 &devaClassTable
, /* 'deva' (DEVANAGARI) */
295 NULL
, /* 'ethi' (ETHIOPIC) */
296 NULL
, /* 'geor' (GEORGIAN) */
297 NULL
, /* 'goth' (GOTHIC) */
298 NULL
, /* 'grek' (GREEK) */
299 &gujrClassTable
, /* 'gujr' (GUJARATI) */
300 &punjClassTable
, /* 'guru' (GURMUKHI) */
301 NULL
, /* 'hani' (HAN) */
302 NULL
, /* 'hang' (HANGUL) */
303 NULL
, /* 'hebr' (HEBREW) */
304 NULL
, /* 'hira' (HIRAGANA) */
305 &kndaClassTable
, /* 'knda' (KANNADA) */
306 NULL
, /* 'kata' (KATAKANA) */
307 NULL
, /* 'khmr' (KHMER) */
308 NULL
, /* 'laoo' (LAO) */
309 NULL
, /* 'latn' (LATIN) */
310 &mlymClassTable
, /* 'mlym' (MALAYALAM) */
311 NULL
, /* 'mong' (MONGOLIAN) */
312 NULL
, /* 'mymr' (MYANMAR) */
313 NULL
, /* 'ogam' (OGHAM) */
314 NULL
, /* 'ital' (OLD-ITALIC) */
315 &oryaClassTable
, /* 'orya' (ORIYA) */
316 NULL
, /* 'runr' (RUNIC) */
317 &sinhClassTable
, /* 'sinh' (SINHALA) */
318 NULL
, /* 'syrc' (SYRIAC) */
319 &tamlClassTable
, /* 'taml' (TAMIL) */
320 &teluClassTable
, /* 'telu' (TELUGU) */
321 NULL
, /* 'thaa' (THAANA) */
322 NULL
, /* 'thai' (THAI) */
323 NULL
, /* 'tibt' (TIBETAN) */
324 NULL
, /* 'cans' (CANADIAN-ABORIGINAL) */
325 NULL
, /* 'yiii' (YI) */
326 NULL
, /* 'tglg' (TAGALOG) */
327 NULL
, /* 'hano' (HANUNOO) */
328 NULL
, /* 'buhd' (BUHID) */
329 NULL
, /* 'tagb' (TAGBANWA) */
330 NULL
, /* 'brai' (BRAILLE) */
331 NULL
, /* 'cprt' (CYPRIOT) */
332 NULL
, /* 'limb' (LIMBU) */
333 NULL
, /* 'linb' (LINEAR_B) */
334 NULL
, /* 'osma' (OSMANYA) */
335 NULL
, /* 'shaw' (SHAVIAN) */
336 NULL
, /* 'tale' (TAI_LE) */
337 NULL
, /* 'ugar' (UGARITIC) */
338 NULL
, /* 'hrkt' (KATAKANA_OR_HIRAGANA) */
339 NULL
, /* 'bugi' (BUGINESE) */
340 NULL
, /* 'glag' (GLAGOLITIC) */
341 NULL
, /* 'khar' (KHAROSHTHI) */
342 NULL
, /* 'sylo' (SYLOTI_NAGRI) */
343 NULL
, /* 'talu' (NEW_TAI_LUE) */
344 NULL
, /* 'tfng' (TIFINAGH) */
345 NULL
, /* 'xpeo' (OLD_PERSIAN) */
346 NULL
, /* 'bali' (BALINESE) */
347 NULL
, /* 'batk' (BATK) */
348 NULL
, /* 'blis' (BLIS) */
349 NULL
, /* 'brah' (BRAH) */
350 NULL
, /* 'cham' (CHAM) */
351 NULL
, /* 'cirt' (CIRT) */
352 NULL
, /* 'cyrs' (CYRS) */
353 NULL
, /* 'egyd' (EGYD) */
354 NULL
, /* 'egyh' (EGYH) */
355 NULL
, /* 'egyp' (EGYP) */
356 NULL
, /* 'geok' (GEOK) */
357 NULL
, /* 'hans' (HANS) */
358 NULL
, /* 'hant' (HANT) */
359 NULL
, /* 'hmng' (HMNG) */
360 NULL
, /* 'hung' (HUNG) */
361 NULL
, /* 'inds' (INDS) */
362 NULL
, /* 'java' (JAVA) */
363 NULL
, /* 'kali' (KALI) */
364 NULL
, /* 'latf' (LATF) */
365 NULL
, /* 'latg' (LATG) */
366 NULL
, /* 'lepc' (LEPC) */
367 NULL
, /* 'lina' (LINA) */
368 NULL
, /* 'mand' (MAND) */
369 NULL
, /* 'maya' (MAYA) */
370 NULL
, /* 'mero' (MERO) */
371 NULL
, /* 'nko ' (NKO) */
372 NULL
, /* 'orkh' (ORKH) */
373 NULL
, /* 'perm' (PERM) */
374 NULL
, /* 'phag' (PHAGS_PA) */
375 NULL
, /* 'phnx' (PHOENICIAN) */
376 NULL
, /* 'plrd' (PLRD) */
377 NULL
, /* 'roro' (RORO) */
378 NULL
, /* 'sara' (SARA) */
379 NULL
, /* 'syre' (SYRE) */
380 NULL
, /* 'syrj' (SYRJ) */
381 NULL
, /* 'syrn' (SYRN) */
382 NULL
, /* 'teng' (TENG) */
383 NULL
, /* 'vai ' (VAII) */
384 NULL
, /* 'visp' (VISP) */
385 NULL
, /* 'xsux' (CUNEIFORM) */
386 NULL
, /* 'zxxx' (ZXXX) */
387 NULL
, /* 'zzzz' (UNKNOWN) */
388 NULL
, /* 'cari' (CARI) */
389 NULL
, /* 'jpan' (JPAN) */
390 NULL
, /* 'lana' (LANA) */
391 NULL
, /* 'lyci' (LYCI) */
392 NULL
, /* 'lydi' (LYDI) */
393 NULL
, /* 'olck' (OLCK) */
394 NULL
, /* 'rjng' (RJNG) */
395 NULL
, /* 'saur' (SAUR) */
396 NULL
, /* 'sgnw' (SGNW) */
397 NULL
, /* 'sund' (SUND) */
398 NULL
, /* 'moon' (MOON) */
399 NULL
, /* 'mtei' (MTEI) */
400 NULL
, /* 'armi' (ARMI) */
401 NULL
, /* 'avst' (AVST) */
402 NULL
, /* 'cakm' (CAKM) */
403 NULL
, /* 'kore' (KORE) */
404 NULL
, /* 'kthi' (KTHI) */
405 NULL
, /* 'mani' (MANI) */
406 NULL
, /* 'phli' (PHLI) */
407 NULL
, /* 'phlp' (PHLP) */
408 NULL
, /* 'phlv' (PHLV) */
409 NULL
, /* 'prti' (PRTI) */
410 NULL
, /* 'samr' (SAMR) */
411 NULL
, /* 'tavt' (TAVT) */
412 NULL
, /* 'zmth' (ZMTH) */
413 NULL
, /* 'zsym' (ZSYM) */
414 NULL
, /* 'bamu' (BAMUM) */
415 NULL
, /* 'lisu' (LISU) */
416 NULL
, /* 'nkgb' (NKGB) */
417 NULL
/* 'sarb' (OLD_SOUTH_ARABIAN) */
420 IndicClassTable::CharClass
IndicClassTable::getCharClass(LEUnicode ch
) const
422 if (ch
== C_SIGN_ZWJ
) {
423 return CF_CONSONANT
| CC_ZERO_WIDTH_MARK
;
426 if (ch
== C_SIGN_ZWNJ
) {
427 return CC_ZERO_WIDTH_MARK
;
430 if (ch
< firstChar
|| ch
> lastChar
) {
434 return classTable
[ch
- firstChar
];
437 const IndicClassTable
*IndicClassTable::getScriptClassTable(le_int32 scriptCode
)
439 if (scriptCode
< 0 || scriptCode
>= scriptCodeCount
) {
443 return indicClassTables
[scriptCode
];
446 le_int32
IndicReordering::getWorstCaseExpansion(le_int32 scriptCode
)
448 const IndicClassTable
*classTable
= IndicClassTable::getScriptClassTable(scriptCode
);
450 if (classTable
== NULL
) {
454 return classTable
->getWorstCaseExpansion();
457 le_bool
IndicReordering::getFilterZeroWidth(le_int32 scriptCode
)
459 const IndicClassTable
*classTable
= IndicClassTable::getScriptClassTable(scriptCode
);
461 if (classTable
== NULL
) {
465 return classTable
->getFilterZeroWidth();