]>
git.saurik.com Git - apple/icu.git/blob - icuSources/layout/KhmerReordering.h
2 * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
4 * This file is a modification of the ICU file IndicReordering.h
5 * by Jens Herden and Javier Sola for Khmer language
9 #ifndef __KHMERREORDERING_H
10 #define __KHMERREORDERING_H
18 #include "OpenTypeTables.h"
25 // Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
26 // center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
27 // split vowels, signs... but there is only one base in a syllable, it has to be coded as
28 // the first character of the syllable.
29 // split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
30 // Khmer language has five of them. Khmer split vowels either have one part before the
31 // base and one after the base or they have a part before the base and a part above the base.
32 // The first part of all Khmer split vowels is the same character, identical to
33 // the glyph of Khmer dependent vowel SRA EI
34 // coeng --> modifier used in Khmer to construct coeng (subscript) consonants
35 // Differently than indian languages, the coeng modifies the consonant that follows it,
36 // not the one preceding it Each consonant has two forms, the base form and the subscript form
37 // the base form is the normal one (using the consonants code-point), the subscript form is
38 // displayed when the combination coeng + consonant is encountered.
39 // Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
40 // Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
41 // Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
42 // Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
43 // if it is attached to a consonant of the first series or a consonant of the second series
44 // Most consonants have an equivalent in the other series, but some of theme exist only in
45 // one series (for example SA). If we want to use the consonant SA with a vowel sound that
46 // can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
47 // of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
48 // x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
49 // MUSIKATOAN a second series consonant to have a first series vowel sound.
50 // Consonant shifter are both normally supercript marks, but, when they are followed by a
51 // superscript, they change shape and take the form of subscript dependent vowel SRA U.
52 // If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
53 // should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
54 // be placed after the coeng consonant.
55 // Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
56 // Each vowel has its own position. Only one vowel per syllable is allowed.
57 // Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
58 // Allowed in a syllable.
62 struct KhmerClassTable
// This list must include all types of components that can be used inside a syllable
64 enum CharClassValues
// order is important here! This order must be the same that is found in each horizontal
65 // line in the statetable for Khmer (file KhmerReordering.cpp).
68 CC_CONSONANT
= 1, // consonant of type 1 or independent vowel
69 CC_CONSONANT2
= 2, // Consonant of type 2
70 CC_CONSONANT3
= 3, // Consonant of type 3
71 CC_ZERO_WIDTH_NJ_MARK
= 4, // Zero Width non joiner character (0x200C)
72 CC_CONSONANT_SHIFTER
= 5,
73 CC_ROBAT
= 6, // Khmer special diacritic accent -treated differently in state table
74 CC_COENG
= 7, // Subscript consonant combining character
75 CC_DEPENDENT_VOWEL
= 8,
78 CC_ZERO_WIDTH_J_MARK
= 11, // Zero width joiner character
79 CC_COUNT
= 12 // This is the number of character classes
84 CF_CLASS_MASK
= 0x0000FFFF,
86 CF_CONSONANT
= 0x01000000, // flag to speed up comparing
87 CF_SPLIT_VOWEL
= 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable
88 CF_DOTTED_CIRCLE
= 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable
89 CF_COENG
= 0x08000000, // flag to speed up comparing
90 CF_SHIFTER
= 0x10000000, // flag to speed up comparing
91 CF_ABOVE_VOWEL
= 0x20000000, // flag to speed up comparing
94 CF_POS_BEFORE
= 0x00080000,
95 CF_POS_BELOW
= 0x00040000,
96 CF_POS_ABOVE
= 0x00020000,
97 CF_POS_AFTER
= 0x00010000,
98 CF_POS_MASK
= 0x000f0000
101 typedef le_uint32 CharClass
;
103 typedef le_int32 ScriptFlags
;
105 LEUnicode firstChar
; // for Khmer this will become x1780
106 LEUnicode lastChar
; // and this x17DF
107 const CharClass
*classTable
;
109 CharClass
getCharClass(LEUnicode ch
) const;
111 static const KhmerClassTable
*getKhmerClassTable();
115 class KhmerReordering
/* not : public UObject because all methods are static */ {
117 static le_int32
reorder(const LEUnicode
*theChars
, le_int32 charCount
, le_int32 scriptCode
,
118 LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
);
120 static const FeatureMap
*getFeatureMap(le_int32
&count
);
123 // do not instantiate
126 static le_int32
findSyllable(const KhmerClassTable
*classTable
, const LEUnicode
*chars
, le_int32 prev
, le_int32 charCount
);