]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csrsbcs.h
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.h
1 /*
2 **********************************************************************
3 * Copyright (C) 2005-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #ifndef __CSRSBCS_H
9 #define __CSRSBCS_H
10
11 #include "unicode/uobject.h"
12
13 #if !UCONFIG_NO_CONVERSION
14
15 #include "csrecog.h"
16
17 U_NAMESPACE_BEGIN
18
19 class NGramParser : public UMemory
20 {
21 private:
22 int32_t ngram;
23 const int32_t *ngramList;
24
25 int32_t ngramCount;
26 int32_t hitCount;
27
28 protected:
29 int32_t byteIndex;
30 const uint8_t *charMap;
31
32 void addByte(int32_t b);
33
34 public:
35 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
36 virtual ~NGramParser();
37
38 private:
39 /*
40 * Binary search for value in table, which must have exactly 64 entries.
41 */
42 int32_t search(const int32_t *table, int32_t value);
43
44 void lookup(int32_t thisNgram);
45
46 virtual int32_t nextByte(InputText *det);
47 virtual void parseCharacters(InputText *det);
48
49 public:
50 int32_t parse(InputText *det);
51
52 };
53
54 #if !UCONFIG_ONLY_HTML_CONVERSION
55 class NGramParser_IBM420 : public NGramParser
56 {
57 public:
58 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
59 ~NGramParser_IBM420();
60
61 private:
62 int32_t alef;
63 int32_t isLamAlef(int32_t b);
64 int32_t nextByte(InputText *det);
65 void parseCharacters(InputText *det);
66 };
67 #endif
68
69
70 class CharsetRecog_sbcs : public CharsetRecognizer
71 {
72 public:
73 CharsetRecog_sbcs();
74 virtual ~CharsetRecog_sbcs();
75 virtual const char *getName() const = 0;
76 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
77 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
78 };
79
80 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
81 {
82 public:
83 virtual ~CharsetRecog_8859_1();
84 const char *getName() const;
85 virtual UBool match(InputText *det, CharsetMatch *results) const;
86 };
87
88 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
89 {
90 public:
91 virtual ~CharsetRecog_8859_2();
92 const char *getName() const;
93 virtual UBool match(InputText *det, CharsetMatch *results) const;
94 };
95
96 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
97 {
98 public:
99 virtual ~CharsetRecog_8859_5();
100 const char *getName() const;
101 };
102
103 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
104 {
105 public:
106 virtual ~CharsetRecog_8859_6();
107
108 const char *getName() const;
109 };
110
111 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
112 {
113 public:
114 virtual ~CharsetRecog_8859_7();
115
116 const char *getName() const;
117 };
118
119 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
120 {
121 public:
122 virtual ~CharsetRecog_8859_8();
123
124 virtual const char *getName() const;
125 };
126
127 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
128 {
129 public:
130 virtual ~CharsetRecog_8859_9();
131
132 const char *getName() const;
133 };
134
135
136
137 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
138 {
139 public:
140 virtual ~CharsetRecog_8859_5_ru();
141
142 const char *getLanguage() const;
143
144 virtual UBool match(InputText *det, CharsetMatch *results) const;
145 };
146
147 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
148 {
149 public:
150 virtual ~CharsetRecog_8859_6_ar();
151
152 const char *getLanguage() const;
153
154 virtual UBool match(InputText *det, CharsetMatch *results) const;
155 };
156
157 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
158 {
159 public:
160 virtual ~CharsetRecog_8859_7_el();
161
162 const char *getLanguage() const;
163
164 virtual UBool match(InputText *det, CharsetMatch *results) const;
165 };
166
167 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
168 {
169 public:
170 virtual ~CharsetRecog_8859_8_I_he();
171
172 const char *getName() const;
173
174 const char *getLanguage() const;
175
176 virtual UBool match(InputText *det, CharsetMatch *results) const;
177 };
178
179 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
180 {
181 public:
182 virtual ~CharsetRecog_8859_8_he ();
183
184 const char *getLanguage() const;
185
186 virtual UBool match(InputText *det, CharsetMatch *results) const;
187 };
188
189 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
190 {
191 public:
192 virtual ~CharsetRecog_8859_9_tr ();
193
194 const char *getLanguage() const;
195
196 virtual UBool match(InputText *det, CharsetMatch *results) const;
197 };
198
199 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
200 {
201 public:
202 virtual ~CharsetRecog_windows_1256();
203
204 const char *getName() const;
205
206 const char *getLanguage() const;
207
208 virtual UBool match(InputText *det, CharsetMatch *results) const;
209 };
210
211 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
212 {
213 public:
214 virtual ~CharsetRecog_windows_1251();
215
216 const char *getName() const;
217
218 const char *getLanguage() const;
219
220 virtual UBool match(InputText *det, CharsetMatch *results) const;
221 };
222
223
224 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
225 {
226 public:
227 virtual ~CharsetRecog_KOI8_R();
228
229 const char *getName() const;
230
231 const char *getLanguage() const;
232
233 virtual UBool match(InputText *det, CharsetMatch *results) const;
234 };
235
236 #if !UCONFIG_ONLY_HTML_CONVERSION
237 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
238 {
239 public:
240 virtual ~CharsetRecog_IBM424_he();
241
242 const char *getLanguage() const;
243 };
244
245 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
246 public:
247 virtual ~CharsetRecog_IBM424_he_rtl();
248
249 const char *getName() const;
250
251 virtual UBool match(InputText *det, CharsetMatch *results) const;
252 };
253
254 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
255 virtual ~CharsetRecog_IBM424_he_ltr();
256
257 const char *getName() const;
258
259 virtual UBool match(InputText *det, CharsetMatch *results) const;
260 };
261
262 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
263 {
264 public:
265 virtual ~CharsetRecog_IBM420_ar();
266
267 const char *getLanguage() const;
268 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
269
270 };
271
272 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
273 public:
274 virtual ~CharsetRecog_IBM420_ar_rtl();
275
276 const char *getName() const;
277
278 virtual UBool match(InputText *det, CharsetMatch *results) const;
279 };
280
281 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
282 virtual ~CharsetRecog_IBM420_ar_ltr();
283
284 const char *getName() const;
285
286 virtual UBool match(InputText *det, CharsetMatch *results) const;
287 };
288 #endif
289
290 U_NAMESPACE_END
291
292 #endif /* !UCONFIG_NO_CONVERSION */
293 #endif /* __CSRSBCS_H */