]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csrsbcs.h
ICU-511.27.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.h
1 /*
2 **********************************************************************
3 * Copyright (C) 2005-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #ifndef __CSRSBCS_H
9 #define __CSRSBCS_H
10
11 #include "unicode/uobject.h"
12
13 #if !UCONFIG_NO_CONVERSION
14
15 #include "csrecog.h"
16
17 U_NAMESPACE_BEGIN
18
19 class NGramParser : public UMemory
20 {
21 private:
22 int32_t byteIndex;
23 int32_t ngram;
24
25 const int32_t *ngramList;
26 const uint8_t *charMap;
27
28 int32_t ngramCount;
29 int32_t hitCount;
30
31 public:
32 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
33
34 private:
35 /*
36 * Binary search for value in table, which must have exactly 64 entries.
37 */
38 int32_t search(const int32_t *table, int32_t value);
39
40 void lookup(int32_t thisNgram);
41 void addByte(int32_t b);
42 int32_t nextByte(InputText *det);
43
44 public:
45 int32_t parse(InputText *det);
46
47 };
48
49
50 class CharsetRecog_sbcs : public CharsetRecognizer
51 {
52 public:
53 CharsetRecog_sbcs();
54 virtual ~CharsetRecog_sbcs();
55 virtual const char *getName() const = 0;
56 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
57 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
58 };
59
60 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
61 {
62 public:
63 virtual ~CharsetRecog_8859_1();
64 const char *getName() const;
65 virtual UBool match(InputText *det, CharsetMatch *results) const;
66 };
67
68 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
69 {
70 public:
71 virtual ~CharsetRecog_8859_2();
72 const char *getName() const;
73 virtual UBool match(InputText *det, CharsetMatch *results) const;
74 };
75
76 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
77 {
78 public:
79 virtual ~CharsetRecog_8859_5();
80 const char *getName() const;
81 };
82
83 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
84 {
85 public:
86 virtual ~CharsetRecog_8859_6();
87
88 const char *getName() const;
89 };
90
91 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
92 {
93 public:
94 virtual ~CharsetRecog_8859_7();
95
96 const char *getName() const;
97 };
98
99 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
100 {
101 public:
102 virtual ~CharsetRecog_8859_8();
103
104 virtual const char *getName() const;
105 };
106
107 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
108 {
109 public:
110 virtual ~CharsetRecog_8859_9();
111
112 const char *getName() const;
113 };
114
115
116
117 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
118 {
119 public:
120 virtual ~CharsetRecog_8859_5_ru();
121
122 const char *getLanguage() const;
123
124 virtual UBool match(InputText *det, CharsetMatch *results) const;
125 };
126
127 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
128 {
129 public:
130 virtual ~CharsetRecog_8859_6_ar();
131
132 const char *getLanguage() const;
133
134 virtual UBool match(InputText *det, CharsetMatch *results) const;
135 };
136
137 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
138 {
139 public:
140 virtual ~CharsetRecog_8859_7_el();
141
142 const char *getLanguage() const;
143
144 virtual UBool match(InputText *det, CharsetMatch *results) const;
145 };
146
147 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
148 {
149 public:
150 virtual ~CharsetRecog_8859_8_I_he();
151
152 const char *getName() const;
153
154 const char *getLanguage() const;
155
156 virtual UBool match(InputText *det, CharsetMatch *results) const;
157 };
158
159 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
160 {
161 public:
162 virtual ~CharsetRecog_8859_8_he ();
163
164 const char *getLanguage() const;
165
166 virtual UBool match(InputText *det, CharsetMatch *results) const;
167 };
168
169 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
170 {
171 public:
172 virtual ~CharsetRecog_8859_9_tr ();
173
174 const char *getLanguage() const;
175
176 virtual UBool match(InputText *det, CharsetMatch *results) const;
177 };
178
179 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
180 {
181 public:
182 virtual ~CharsetRecog_windows_1256();
183
184 const char *getName() const;
185
186 const char *getLanguage() const;
187
188 virtual UBool match(InputText *det, CharsetMatch *results) const;
189 };
190
191 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
192 {
193 public:
194 virtual ~CharsetRecog_windows_1251();
195
196 const char *getName() const;
197
198 const char *getLanguage() const;
199
200 virtual UBool match(InputText *det, CharsetMatch *results) const;
201 };
202
203
204 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
205 {
206 public:
207 virtual ~CharsetRecog_KOI8_R();
208
209 const char *getName() const;
210
211 const char *getLanguage() const;
212
213 virtual UBool match(InputText *det, CharsetMatch *results) const;
214 };
215
216 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
217 {
218 public:
219 virtual ~CharsetRecog_IBM424_he();
220
221 const char *getLanguage() const;
222 };
223
224 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
225 public:
226 virtual ~CharsetRecog_IBM424_he_rtl();
227
228 const char *getName() const;
229
230 virtual UBool match(InputText *det, CharsetMatch *results) const;
231 };
232
233 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
234 virtual ~CharsetRecog_IBM424_he_ltr();
235
236 const char *getName() const;
237
238 virtual UBool match(InputText *det, CharsetMatch *results) const;
239 };
240
241 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
242 {
243 public:
244 virtual ~CharsetRecog_IBM420_ar();
245
246 const char *getLanguage() const;
247
248 protected:
249 void matchInit(InputText *textIn);
250 void matchFinish(InputText *textIn);
251
252 private:
253 uint8_t *prev_fInputBytes;
254 int32_t prev_fInputBytesLength;
255 UBool deleteBuffer;
256
257 UBool isLamAlef(uint8_t b);
258 uint8_t *unshapeLamAlef(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length);
259 uint8_t *unshape(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length);
260 };
261
262 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
263 public:
264 virtual ~CharsetRecog_IBM420_ar_rtl();
265
266 const char *getName() const;
267
268 virtual UBool match(InputText *det, CharsetMatch *results) const;
269 };
270
271 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
272 virtual ~CharsetRecog_IBM420_ar_ltr();
273
274 const char *getName() const;
275
276 virtual UBool match(InputText *det, CharsetMatch *results) const;
277 };
278
279 U_NAMESPACE_END
280
281 #endif /* !UCONFIG_NO_CONVERSION */
282 #endif /* __CSRSBCS_H */