]>
git.saurik.com Git - apple/icu.git/blob - icuSources/layout/TibetanReordering.h
3 * (C) Copyright IBM Corp. 1998-2013 - All Rights Reserved
5 * Developed at DIT - Government of Bhutan
7 * Contact person: Pema Geyleg - <pema_geyleg@druknet.bt>
9 * This file is a modification of the ICU file KhmerReordering.h
10 * by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan
11 * A first module for Dzongkha was developed by Karunakar under Panlocalisation funding.
12 * Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola
16 #ifndef __TIBETANREORDERING_H
17 #define __TIBETANREORDERING_H
24 // #include "LETypes.h"
25 // #include "OpenTypeTables.h"
32 // Base -> A consonant in its full (not subscript) form. It is the
33 // center of the syllable, it can be souranded by subjoined consonants, vowels,
34 // signs... but there is only one base in a stack, it has to be coded as
35 // the first character of the syllable.Included here are also groups of base + subjoined
36 // which are represented by one single code point in unicode (e.g. 0F43) Also other characters that might take
37 // subjoined consonants or other combining characters.
38 // Subjoined -> Subjoined consonants and groups of subjoined consonants which have a single code-point
39 // to repersent the group (even if each subjoined consonant is represented independently
40 // by anothe code-point
41 // Tsa Phru --> Tsa Phru character, Bhutanese people will always place it right after the base, but sometimes, due to
43 // is placed after all the subjoined consonants, and it is also permitted there.
44 // A Chung Vowel lengthening mark --> . 0F71 It is placed after the base and any subjoined consonants but before any vowels
45 // Precomposed Sanskrit vowels --> The are combinations of subjoined consonants + vowels that have been assigned
46 // a given code-point (in spite of each single part of them having also a code-point
47 // They are avoided, and users are encouraged to use the combination of code-points that
48 // represents the same sound instead of using this combined characters. This is included here
49 // for compatibility with possible texts that use them (they are not in the Dzongkha keyboard).
50 // Halanta -> The Halanta or Virama character 0F84 indicates that a consonant should not use its inheernt vowel,
51 // in spite of not having other vowels present. It is usually placed immediatly after a base consonant,
52 // but in some special cases it can also be placed after a subjoined consonant, so this is also
53 // permitted in this algorithm. (Halanta is always displayed in Tibetan not used as a connecting char)
55 // Subjoined vowels -> Dependent vowels (matras) placed below the base and below all subjoined consonants. There
56 // might be as much as three subjoined vowels in a given stack (only one in general text, but up
57 // to three for abreviations, they have to be permitted).
58 // Superscript vowels -> There are three superscript vowels, and they can be repeated or combined (up to three
59 // times. They can combine with subjoined vowels, and are always coded after these.
60 // Anusvara --> Nasalisation sign. Traditioinally placed in absence of vowels, but also after vowels. In some
61 // special cases it can be placed before a vowel, so this is also permitted
62 // Candrabindu -> Forms of the Anusvara with different glyphs (and different in identity) which can be placed
63 // without vowel or after the vowel, but never before. Cannot combine with Anusvara.
64 // Stress marks -> Marks placed above or below a syllable, affecting the whole syllable. They are combining
65 // marks, so they have to be attached to a specific stack. The are using to emphasise a syllable.
67 // Digits -> Digits are not considered as non-combining characters because there are a few characters which
68 // combine with them, so they have to be considered independently.
69 // Digit combining marks -> dependent marks that combine with digits.
72 // There are a number of characters in the CJK block that are used in Tibetan script, two of these are symbols
73 // are used as bases for combining glyphs, and have not been encoded in Tibetan. As these characters are outside
74 // of the tibetan block, they have not been treated in this program.
77 struct TibetanClassTable
// This list must include all types of components that can be used inside a syllable
79 enum CharClassValues
// order is important here! This order must be the same that is found in each horizontal
80 // line in the statetable for Tibetan (file TibetanReordering.cpp). It assigns one number
81 // to each type of character that has to be considered when analysing the order in which
82 // characters can be placed
84 CC_RESERVED
= 0, //Non Combining Characters
85 CC_BASE
= 1, // Base Consonants, Base Consonants with Subjoined attached in code point, Sanskrit base marks
86 CC_SUBJOINED
= 2, // Subjoined Consonats, combination of more than Subjoined Consonants in the code point
87 CC_TSA_PHRU
= 3, // Tsa-Phru character 0F39
88 CC_A_CHUNG
= 4, // Vowel Lenthening a-chung mark 0F71
89 CC_COMP_SANSKRIT
= 5, // Precomposed Sanskrit vowels including Subjoined characters and vowels
90 CC_HALANTA
= 6, // Halanta Character 0F84
91 CC_BELOW_VOWEL
= 7, // Subjoined vowels
92 CC_ABOVE_VOWEL
= 8, // Superscript vowels
93 CC_ANUSVARA
= 9, // Tibetan sign Rjes Su Nga Ro 0F7E
94 CC_CANDRABINDU
= 10, // Tibetan sign Sna Ldan and Nyi Zla Naa Da 0F82, 0F83
95 CC_VISARGA
= 11, // Tibetan sign Rnam Bcad (0F7F)
96 CC_ABOVE_S_MARK
= 12, // Stress Marks placed above the text
97 CC_BELOW_S_MARK
= 13, // Stress Marks placed below the text
98 CC_DIGIT
= 14, // Dzongkha Digits
99 CC_PRE_DIGIT_MARK
= 15, // Mark placed before the digit
100 CC_POST_BELOW_DIGIT_M
= 16, // Mark placed below or after the digit
101 CC_COUNT
= 17 // This is the number of character classes
106 CF_CLASS_MASK
= 0x0000FFFF,
108 CF_DOTTED_CIRCLE
= 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable
109 CF_DIGIT
= 0x01000000, // flag to speed up comparaisson
110 CF_PREDIGIT
= 0x02000000, // flag to detect pre-digit marks for reordering
113 CF_POS_BEFORE
= 0x00080000,
114 CF_POS_BELOW
= 0x00040000,
115 CF_POS_ABOVE
= 0x00020000,
116 CF_POS_AFTER
= 0x00010000,
117 CF_POS_MASK
= 0x000f0000
120 typedef le_uint32 CharClass
;
122 typedef le_int32 ScriptFlags
;
124 LEUnicode firstChar
; // for Tibetan this will become xOF00
125 LEUnicode lastChar
; // and this x0FFF
126 const CharClass
*classTable
;
128 CharClass
getCharClass(LEUnicode ch
) const;
130 static const TibetanClassTable
*getTibetanClassTable();
134 class TibetanReordering
/* not : public UObject because all methods are static */ {
136 static le_int32
reorder(const LEUnicode
*theChars
, le_int32 charCount
, le_int32 scriptCode
,
137 LEUnicode
*outChars
, LEGlyphStorage
&glyphStorage
);
139 static const FeatureMap
*getFeatureMap(le_int32
&count
);
142 // do not instantiate
145 static le_int32
findSyllable(const TibetanClassTable
*classTable
, const LEUnicode
*chars
, le_int32 prev
, le_int32 charCount
);