]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved | |
3 | * | |
4 | * This file is a modification of the ICU file IndicReordering.h | |
5 | * by Jens Herden and Javier Sola for Khmer language | |
6 | * | |
7 | */ | |
8 | ||
9 | #ifndef __KHMERREORDERING_H | |
10 | #define __KHMERREORDERING_H | |
11 | ||
12 | /** | |
13 | * \file | |
14 | * \internal | |
15 | */ | |
16 | ||
17 | #include "LETypes.h" | |
18 | #include "OpenTypeTables.h" | |
19 | ||
20 | U_NAMESPACE_BEGIN | |
21 | ||
22 | class LEGlyphStorage; | |
23 | ||
24 | // Vocabulary | |
25 | // Base -> A consonant or an independent vowel in its full (not subscript) form. It is the | |
26 | // center of the syllable, it can be souranded by coeng (subscript) consonants, vowels, | |
27 | // split vowels, signs... but there is only one base in a syllable, it has to be coded as | |
28 | // the first character of the syllable. | |
29 | // split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant). | |
30 | // Khmer language has five of them. Khmer split vowels either have one part before the | |
31 | // base and one after the base or they have a part before the base and a part above the base. | |
32 | // The first part of all Khmer split vowels is the same character, identical to | |
33 | // the glyph of Khmer dependent vowel SRA EI | |
34 | // coeng --> modifier used in Khmer to construct coeng (subscript) consonants | |
35 | // Differently than indian languages, the coeng modifies the consonant that follows it, | |
36 | // not the one preceding it Each consonant has two forms, the base form and the subscript form | |
37 | // the base form is the normal one (using the consonants code-point), the subscript form is | |
38 | // displayed when the combination coeng + consonant is encountered. | |
39 | // Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant | |
40 | // Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO) | |
41 | // Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA) | |
42 | // Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds | |
43 | // if it is attached to a consonant of the first series or a consonant of the second series | |
44 | // Most consonants have an equivalent in the other series, but some of theme exist only in | |
45 | // one series (for example SA). If we want to use the consonant SA with a vowel sound that | |
46 | // can only be done with a vowel sound that corresponds to a vowel accompanying a consonant | |
47 | // of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN | |
48 | // x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and | |
49 | // MUSIKATOAN a second series consonant to have a first series vowel sound. | |
50 | // Consonant shifter are both normally supercript marks, but, when they are followed by a | |
51 | // superscript, they change shape and take the form of subscript dependent vowel SRA U. | |
52 | // If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they | |
53 | // should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should | |
54 | // be placed after the coeng consonant. | |
55 | // Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base | |
56 | // Each vowel has its own position. Only one vowel per syllable is allowed. | |
57 | // Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are | |
58 | // Allowed in a syllable. | |
59 | // | |
60 | // | |
61 | ||
62 | struct KhmerClassTable // This list must include all types of components that can be used inside a syllable | |
63 | { | |
64 | enum CharClassValues // order is important here! This order must be the same that is found in each horizontal | |
65 | // line in the statetable for Khmer (file KhmerReordering.cpp). | |
66 | { | |
67 | CC_RESERVED = 0, | |
68 | CC_CONSONANT = 1, // consonant of type 1 or independent vowel | |
69 | CC_CONSONANT2 = 2, // Consonant of type 2 | |
70 | CC_CONSONANT3 = 3, // Consonant of type 3 | |
71 | CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C) | |
72 | CC_CONSONANT_SHIFTER = 5, | |
73 | CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table | |
74 | CC_COENG = 7, // Subscript consonant combining character | |
75 | CC_DEPENDENT_VOWEL = 8, | |
76 | CC_SIGN_ABOVE = 9, | |
77 | CC_SIGN_AFTER = 10, | |
78 | CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character | |
79 | CC_COUNT = 12 // This is the number of character classes | |
80 | }; | |
81 | ||
82 | enum CharClassFlags | |
83 | { | |
84 | CF_CLASS_MASK = 0x0000FFFF, | |
85 | ||
86 | CF_CONSONANT = 0x01000000, // flag to speed up comparing | |
87 | CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable | |
88 | CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable | |
89 | CF_COENG = 0x08000000, // flag to speed up comparing | |
90 | CF_SHIFTER = 0x10000000, // flag to speed up comparing | |
91 | CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing | |
92 | ||
93 | // position flags | |
94 | CF_POS_BEFORE = 0x00080000, | |
95 | CF_POS_BELOW = 0x00040000, | |
96 | CF_POS_ABOVE = 0x00020000, | |
97 | CF_POS_AFTER = 0x00010000, | |
98 | CF_POS_MASK = 0x000f0000 | |
99 | }; | |
100 | ||
101 | typedef le_uint32 CharClass; | |
102 | ||
103 | typedef le_int32 ScriptFlags; | |
104 | ||
105 | LEUnicode firstChar; // for Khmer this will become x1780 | |
106 | LEUnicode lastChar; // and this x17DF | |
107 | const CharClass *classTable; | |
108 | ||
109 | CharClass getCharClass(LEUnicode ch) const; | |
110 | ||
111 | static const KhmerClassTable *getKhmerClassTable(); | |
112 | }; | |
113 | ||
114 | ||
115 | class KhmerReordering /* not : public UObject because all methods are static */ { | |
116 | public: | |
117 | static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, | |
118 | LEUnicode *outChars, LEGlyphStorage &glyphStorage); | |
119 | ||
120 | static const FeatureMap *getFeatureMap(le_int32 &count); | |
121 | ||
122 | private: | |
123 | // do not instantiate | |
124 | KhmerReordering(); | |
125 | ||
126 | static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount); | |
127 | ||
128 | }; | |
129 | ||
130 | ||
131 | U_NAMESPACE_END | |
132 | #endif |