]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csrsbcs.h
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2005-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9
10 #ifndef __CSRSBCS_H
11 #define __CSRSBCS_H
12
13 #include "unicode/uobject.h"
14
15 #if !UCONFIG_NO_CONVERSION
16
17 #include "csrecog.h"
18
19 U_NAMESPACE_BEGIN
20
21 class NGramParser : public UMemory
22 {
23 private:
24 int32_t ngram;
25 const int32_t *ngramList;
26
27 int32_t ngramCount;
28 int32_t hitCount;
29
30 protected:
31 int32_t byteIndex;
32 const uint8_t *charMap;
33
34 void addByte(int32_t b);
35
36 public:
37 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
38 virtual ~NGramParser();
39
40 private:
41 /*
42 * Binary search for value in table, which must have exactly 64 entries.
43 */
44 int32_t search(const int32_t *table, int32_t value);
45
46 void lookup(int32_t thisNgram);
47
48 virtual int32_t nextByte(InputText *det);
49 virtual void parseCharacters(InputText *det);
50
51 public:
52 int32_t parse(InputText *det);
53
54 };
55
56 #if !UCONFIG_ONLY_HTML_CONVERSION
57 class NGramParser_IBM420 : public NGramParser
58 {
59 public:
60 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
61 ~NGramParser_IBM420();
62
63 private:
64 int32_t alef;
65 int32_t isLamAlef(int32_t b);
66 int32_t nextByte(InputText *det);
67 void parseCharacters(InputText *det);
68 };
69 #endif
70
71
72 class CharsetRecog_sbcs : public CharsetRecognizer
73 {
74 public:
75 CharsetRecog_sbcs();
76 virtual ~CharsetRecog_sbcs();
77 virtual const char *getName() const = 0;
78 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
79 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
80 };
81
82 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
83 {
84 public:
85 virtual ~CharsetRecog_8859_1();
86 const char *getName() const;
87 virtual UBool match(InputText *det, CharsetMatch *results) const;
88 };
89
90 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
91 {
92 public:
93 virtual ~CharsetRecog_8859_2();
94 const char *getName() const;
95 virtual UBool match(InputText *det, CharsetMatch *results) const;
96 };
97
98 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
99 {
100 public:
101 virtual ~CharsetRecog_8859_5();
102 const char *getName() const;
103 };
104
105 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
106 {
107 public:
108 virtual ~CharsetRecog_8859_6();
109
110 const char *getName() const;
111 };
112
113 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
114 {
115 public:
116 virtual ~CharsetRecog_8859_7();
117
118 const char *getName() const;
119 };
120
121 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
122 {
123 public:
124 virtual ~CharsetRecog_8859_8();
125
126 virtual const char *getName() const;
127 };
128
129 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
130 {
131 public:
132 virtual ~CharsetRecog_8859_9();
133
134 const char *getName() const;
135 };
136
137
138
139 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
140 {
141 public:
142 virtual ~CharsetRecog_8859_5_ru();
143
144 const char *getLanguage() const;
145
146 virtual UBool match(InputText *det, CharsetMatch *results) const;
147 };
148
149 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
150 {
151 public:
152 virtual ~CharsetRecog_8859_6_ar();
153
154 const char *getLanguage() const;
155
156 virtual UBool match(InputText *det, CharsetMatch *results) const;
157 };
158
159 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
160 {
161 public:
162 virtual ~CharsetRecog_8859_7_el();
163
164 const char *getLanguage() const;
165
166 virtual UBool match(InputText *det, CharsetMatch *results) const;
167 };
168
169 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
170 {
171 public:
172 virtual ~CharsetRecog_8859_8_I_he();
173
174 const char *getName() const;
175
176 const char *getLanguage() const;
177
178 virtual UBool match(InputText *det, CharsetMatch *results) const;
179 };
180
181 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
182 {
183 public:
184 virtual ~CharsetRecog_8859_8_he ();
185
186 const char *getLanguage() const;
187
188 virtual UBool match(InputText *det, CharsetMatch *results) const;
189 };
190
191 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
192 {
193 public:
194 virtual ~CharsetRecog_8859_9_tr ();
195
196 const char *getLanguage() const;
197
198 virtual UBool match(InputText *det, CharsetMatch *results) const;
199 };
200
201 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
202 {
203 public:
204 virtual ~CharsetRecog_windows_1256();
205
206 const char *getName() const;
207
208 const char *getLanguage() const;
209
210 virtual UBool match(InputText *det, CharsetMatch *results) const;
211 };
212
213 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
214 {
215 public:
216 virtual ~CharsetRecog_windows_1251();
217
218 const char *getName() const;
219
220 const char *getLanguage() const;
221
222 virtual UBool match(InputText *det, CharsetMatch *results) const;
223 };
224
225
226 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
227 {
228 public:
229 virtual ~CharsetRecog_KOI8_R();
230
231 const char *getName() const;
232
233 const char *getLanguage() const;
234
235 virtual UBool match(InputText *det, CharsetMatch *results) const;
236 };
237
238 #if !UCONFIG_ONLY_HTML_CONVERSION
239 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
240 {
241 public:
242 virtual ~CharsetRecog_IBM424_he();
243
244 const char *getLanguage() const;
245 };
246
247 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
248 public:
249 virtual ~CharsetRecog_IBM424_he_rtl();
250
251 const char *getName() const;
252
253 virtual UBool match(InputText *det, CharsetMatch *results) const;
254 };
255
256 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
257 virtual ~CharsetRecog_IBM424_he_ltr();
258
259 const char *getName() const;
260
261 virtual UBool match(InputText *det, CharsetMatch *results) const;
262 };
263
264 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
265 {
266 public:
267 virtual ~CharsetRecog_IBM420_ar();
268
269 const char *getLanguage() const;
270 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
271
272 };
273
274 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
275 public:
276 virtual ~CharsetRecog_IBM420_ar_rtl();
277
278 const char *getName() const;
279
280 virtual UBool match(InputText *det, CharsetMatch *results) const;
281 };
282
283 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
284 virtual ~CharsetRecog_IBM420_ar_ltr();
285
286 const char *getName() const;
287
288 virtual UBool match(InputText *det, CharsetMatch *results) const;
289 };
290 #endif
291
292 U_NAMESPACE_END
293
294 #endif /* !UCONFIG_NO_CONVERSION */
295 #endif /* __CSRSBCS_H */