]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csrsbcs.h
ICU-551.51.3.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.h
1 /*
2 **********************************************************************
3 * Copyright (C) 2005-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #ifndef __CSRSBCS_H
9 #define __CSRSBCS_H
10
11 #include "unicode/uobject.h"
12
13 #if !UCONFIG_NO_CONVERSION
14
15 #include "csrecog.h"
16
17 U_NAMESPACE_BEGIN
18
19 class NGramParser : public UMemory
20 {
21 private:
22 int32_t ngram;
23 const int32_t *ngramList;
24
25 int32_t ngramCount;
26 int32_t hitCount;
27
28 protected:
29 int32_t byteIndex;
30 const uint8_t *charMap;
31
32 void addByte(int32_t b);
33
34 public:
35 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
36 virtual ~NGramParser();
37
38 private:
39 /*
40 * Binary search for value in table, which must have exactly 64 entries.
41 */
42 int32_t search(const int32_t *table, int32_t value);
43
44 void lookup(int32_t thisNgram);
45
46 virtual int32_t nextByte(InputText *det);
47 virtual void parseCharacters(InputText *det);
48
49 public:
50 int32_t parse(InputText *det);
51
52 };
53
54 #if !UCONFIG_ONLY_HTML_CONVERSION
55 class NGramParser_IBM420 : public NGramParser
56 {
57 private:
58 int32_t alef;
59 int32_t isLamAlef(int32_t b);
60 int32_t nextByte(InputText *det);
61 void parseCharacters(InputText *det);
62
63 public:
64 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
65 };
66 #endif
67
68
69 class CharsetRecog_sbcs : public CharsetRecognizer
70 {
71 public:
72 CharsetRecog_sbcs();
73 virtual ~CharsetRecog_sbcs();
74 virtual const char *getName() const = 0;
75 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
76 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
77 };
78
79 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
80 {
81 public:
82 virtual ~CharsetRecog_8859_1();
83 const char *getName() const;
84 virtual UBool match(InputText *det, CharsetMatch *results) const;
85 };
86
87 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
88 {
89 public:
90 virtual ~CharsetRecog_8859_2();
91 const char *getName() const;
92 virtual UBool match(InputText *det, CharsetMatch *results) const;
93 };
94
95 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
96 {
97 public:
98 virtual ~CharsetRecog_8859_5();
99 const char *getName() const;
100 };
101
102 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
103 {
104 public:
105 virtual ~CharsetRecog_8859_6();
106
107 const char *getName() const;
108 };
109
110 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
111 {
112 public:
113 virtual ~CharsetRecog_8859_7();
114
115 const char *getName() const;
116 };
117
118 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
119 {
120 public:
121 virtual ~CharsetRecog_8859_8();
122
123 virtual const char *getName() const;
124 };
125
126 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
127 {
128 public:
129 virtual ~CharsetRecog_8859_9();
130
131 const char *getName() const;
132 };
133
134
135
136 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
137 {
138 public:
139 virtual ~CharsetRecog_8859_5_ru();
140
141 const char *getLanguage() const;
142
143 virtual UBool match(InputText *det, CharsetMatch *results) const;
144 };
145
146 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
147 {
148 public:
149 virtual ~CharsetRecog_8859_6_ar();
150
151 const char *getLanguage() const;
152
153 virtual UBool match(InputText *det, CharsetMatch *results) const;
154 };
155
156 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
157 {
158 public:
159 virtual ~CharsetRecog_8859_7_el();
160
161 const char *getLanguage() const;
162
163 virtual UBool match(InputText *det, CharsetMatch *results) const;
164 };
165
166 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
167 {
168 public:
169 virtual ~CharsetRecog_8859_8_I_he();
170
171 const char *getName() const;
172
173 const char *getLanguage() const;
174
175 virtual UBool match(InputText *det, CharsetMatch *results) const;
176 };
177
178 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
179 {
180 public:
181 virtual ~CharsetRecog_8859_8_he ();
182
183 const char *getLanguage() const;
184
185 virtual UBool match(InputText *det, CharsetMatch *results) const;
186 };
187
188 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
189 {
190 public:
191 virtual ~CharsetRecog_8859_9_tr ();
192
193 const char *getLanguage() const;
194
195 virtual UBool match(InputText *det, CharsetMatch *results) const;
196 };
197
198 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
199 {
200 public:
201 virtual ~CharsetRecog_windows_1256();
202
203 const char *getName() const;
204
205 const char *getLanguage() const;
206
207 virtual UBool match(InputText *det, CharsetMatch *results) const;
208 };
209
210 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
211 {
212 public:
213 virtual ~CharsetRecog_windows_1251();
214
215 const char *getName() const;
216
217 const char *getLanguage() const;
218
219 virtual UBool match(InputText *det, CharsetMatch *results) const;
220 };
221
222
223 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
224 {
225 public:
226 virtual ~CharsetRecog_KOI8_R();
227
228 const char *getName() const;
229
230 const char *getLanguage() const;
231
232 virtual UBool match(InputText *det, CharsetMatch *results) const;
233 };
234
235 #if !UCONFIG_ONLY_HTML_CONVERSION
236 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
237 {
238 public:
239 virtual ~CharsetRecog_IBM424_he();
240
241 const char *getLanguage() const;
242 };
243
244 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
245 public:
246 virtual ~CharsetRecog_IBM424_he_rtl();
247
248 const char *getName() const;
249
250 virtual UBool match(InputText *det, CharsetMatch *results) const;
251 };
252
253 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
254 virtual ~CharsetRecog_IBM424_he_ltr();
255
256 const char *getName() const;
257
258 virtual UBool match(InputText *det, CharsetMatch *results) const;
259 };
260
261 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
262 {
263 public:
264 virtual ~CharsetRecog_IBM420_ar();
265
266 const char *getLanguage() const;
267 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
268
269 };
270
271 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
272 public:
273 virtual ~CharsetRecog_IBM420_ar_rtl();
274
275 const char *getName() const;
276
277 virtual UBool match(InputText *det, CharsetMatch *results) const;
278 };
279
280 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
281 virtual ~CharsetRecog_IBM420_ar_ltr();
282
283 const char *getName() const;
284
285 virtual UBool match(InputText *det, CharsetMatch *results) const;
286 };
287 #endif
288
289 U_NAMESPACE_END
290
291 #endif /* !UCONFIG_NO_CONVERSION */
292 #endif /* __CSRSBCS_H */