]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csrsbcs.h
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.h
1 /*
2 **********************************************************************
3 * Copyright (C) 2005-2009, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #ifndef __CSRSBCS_H
9 #define __CSRSBCS_H
10
11 #include "unicode/uobject.h"
12
13 #if !UCONFIG_NO_CONVERSION
14
15 #include "csrecog.h"
16
17 U_NAMESPACE_BEGIN
18
19 class NGramParser : public UMemory
20 {
21 private:
22 int32_t byteIndex;
23 int32_t ngram;
24
25 const int32_t *ngramList;
26 const uint8_t *charMap;
27
28 int32_t ngramCount;
29 int32_t hitCount;
30
31 public:
32 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
33
34 private:
35 /*
36 * Binary search for value in table, which must have exactly 64 entries.
37 */
38 int32_t search(const int32_t *table, int32_t value);
39
40 void lookup(int32_t thisNgram);
41 void addByte(int32_t b);
42 int32_t nextByte(InputText *det);
43
44 public:
45 int32_t parse(InputText *det);
46
47 };
48
49 class CharsetRecog_sbcs : public CharsetRecognizer
50 {
51 protected:
52 UBool haveC1Bytes;
53
54 public:
55 CharsetRecog_sbcs();
56
57 virtual ~CharsetRecog_sbcs();
58
59 virtual const char *getName() const = 0;
60
61 virtual int32_t match(InputText *det) = 0;
62
63 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]);
64 };
65
66 class CharsetRecog_8859_1 : public CharsetRecog_sbcs
67 {
68 public:
69 virtual ~CharsetRecog_8859_1();
70
71 const char *getName() const;
72 };
73
74 class CharsetRecog_8859_2 : public CharsetRecog_sbcs
75 {
76 public:
77 virtual ~CharsetRecog_8859_2();
78
79 const char *getName() const;
80 };
81
82 class CharsetRecog_8859_5 : public CharsetRecog_sbcs
83 {
84 public:
85 virtual ~CharsetRecog_8859_5();
86
87 const char *getName() const;
88 };
89
90 class CharsetRecog_8859_6 : public CharsetRecog_sbcs
91 {
92 public:
93 virtual ~CharsetRecog_8859_6();
94
95 const char *getName() const;
96 };
97
98 class CharsetRecog_8859_7 : public CharsetRecog_sbcs
99 {
100 public:
101 virtual ~CharsetRecog_8859_7();
102
103 const char *getName() const;
104 };
105
106 class CharsetRecog_8859_8 : public CharsetRecog_sbcs
107 {
108 public:
109 virtual ~CharsetRecog_8859_8();
110
111 virtual const char *getName() const;
112 };
113
114 class CharsetRecog_8859_9 : public CharsetRecog_sbcs
115 {
116 public:
117 virtual ~CharsetRecog_8859_9();
118
119 const char *getName() const;
120 };
121
122 class CharsetRecog_8859_1_en : public CharsetRecog_8859_1
123 {
124 public:
125 virtual ~CharsetRecog_8859_1_en();
126
127 const char *getLanguage() const;
128
129 int32_t match(InputText *textIn);
130 };
131
132 class CharsetRecog_8859_1_da : public CharsetRecog_8859_1
133 {
134 public:
135 virtual ~CharsetRecog_8859_1_da();
136
137 const char *getLanguage() const;
138
139 int32_t match(InputText *textIn);
140 };
141
142 class CharsetRecog_8859_1_de : public CharsetRecog_8859_1
143 {
144 public:
145 virtual ~CharsetRecog_8859_1_de();
146
147 const char *getLanguage() const;
148
149 int32_t match(InputText *textIn);
150 };
151
152 class CharsetRecog_8859_1_es : public CharsetRecog_8859_1
153 {
154 public:
155 virtual ~CharsetRecog_8859_1_es();
156
157 const char *getLanguage() const;
158
159 int32_t match(InputText *textIn);
160 };
161
162 class CharsetRecog_8859_1_fr : public CharsetRecog_8859_1
163 {
164 public:
165 virtual ~CharsetRecog_8859_1_fr();
166
167 const char *getLanguage() const;
168
169 int32_t match(InputText *textIn);
170 };
171
172 class CharsetRecog_8859_1_it : public CharsetRecog_8859_1
173 {
174 public:
175 virtual ~CharsetRecog_8859_1_it();
176
177 const char *getLanguage() const;
178
179 int32_t match(InputText *textIn);
180 };
181
182 class CharsetRecog_8859_1_nl : public CharsetRecog_8859_1
183 {
184 public:
185 virtual ~CharsetRecog_8859_1_nl();
186
187 const char *getLanguage() const;
188
189 int32_t match(InputText *textIn);
190 };
191
192 class CharsetRecog_8859_1_no : public CharsetRecog_8859_1
193 {
194 public:
195 virtual ~CharsetRecog_8859_1_no();
196
197 const char *getLanguage() const;
198
199 int32_t match(InputText *textIn);
200 };
201
202 class CharsetRecog_8859_1_pt : public CharsetRecog_8859_1
203 {
204 public:
205 virtual ~CharsetRecog_8859_1_pt();
206
207 const char *getLanguage() const;
208
209 int32_t match(InputText *textIn);
210 };
211
212 class CharsetRecog_8859_1_sv : public CharsetRecog_8859_1
213 {
214 public:
215 virtual ~CharsetRecog_8859_1_sv();
216
217 const char *getLanguage() const;
218
219 int32_t match(InputText *textIn);
220 };
221
222 class CharsetRecog_8859_2_cs : public CharsetRecog_8859_2
223 {
224 public:
225 virtual ~CharsetRecog_8859_2_cs();
226
227 const char *getLanguage() const;
228
229 int32_t match(InputText *textIn);
230 };
231
232 class CharsetRecog_8859_2_hu : public CharsetRecog_8859_2
233 {
234 public:
235 virtual ~CharsetRecog_8859_2_hu();
236
237 const char *getLanguage() const;
238
239 int32_t match(InputText *textIn);
240 };
241
242 class CharsetRecog_8859_2_pl : public CharsetRecog_8859_2
243 {
244 public:
245 virtual ~CharsetRecog_8859_2_pl();
246
247 const char *getLanguage() const;
248
249 int32_t match(InputText *textIn);
250 };
251
252 class CharsetRecog_8859_2_ro : public CharsetRecog_8859_2
253 {
254 public:
255 virtual ~CharsetRecog_8859_2_ro();
256
257 const char *getLanguage() const;
258
259 int32_t match(InputText *textIn);
260 };
261
262 class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
263 {
264 public:
265 virtual ~CharsetRecog_8859_5_ru();
266
267 const char *getLanguage() const;
268
269 int32_t match(InputText *textIn);
270 };
271
272 class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
273 {
274 public:
275 virtual ~CharsetRecog_8859_6_ar();
276
277 const char *getLanguage() const;
278
279 int32_t match(InputText *textIn);
280 };
281
282 class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
283 {
284 public:
285 virtual ~CharsetRecog_8859_7_el();
286
287 const char *getLanguage() const;
288
289 int32_t match(InputText *textIn);
290 };
291
292 class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
293 {
294 public:
295 virtual ~CharsetRecog_8859_8_I_he();
296
297 const char *getName() const;
298
299 const char *getLanguage() const;
300
301 int32_t match(InputText *textIn);
302 };
303
304 class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
305 {
306 public:
307 virtual ~CharsetRecog_8859_8_he ();
308
309 const char *getLanguage() const;
310
311 int32_t match(InputText *textIn);
312 };
313
314 class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
315 {
316 public:
317 virtual ~CharsetRecog_8859_9_tr ();
318
319 const char *getLanguage() const;
320
321 int32_t match(InputText *textIn);
322 };
323
324 class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
325 {
326 public:
327 virtual ~CharsetRecog_windows_1256();
328
329 const char *getName() const;
330
331 const char *getLanguage() const;
332
333 int32_t match(InputText *textIn);
334 };
335
336 class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
337 {
338 public:
339 virtual ~CharsetRecog_windows_1251();
340
341 const char *getName() const;
342
343 const char *getLanguage() const;
344
345 int32_t match(InputText *textIn);
346 };
347
348
349 class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
350 {
351 public:
352 virtual ~CharsetRecog_KOI8_R();
353
354 const char *getName() const;
355
356 const char *getLanguage() const;
357
358 int32_t match(InputText *textIn);
359 };
360
361 class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
362 {
363 public:
364 virtual ~CharsetRecog_IBM424_he();
365
366 const char *getLanguage() const;
367 };
368
369 class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
370 public:
371 virtual ~CharsetRecog_IBM424_he_rtl();
372
373 const char *getName() const;
374
375 int32_t match(InputText *textIn);
376 };
377
378 class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
379 virtual ~CharsetRecog_IBM424_he_ltr();
380
381 const char *getName() const;
382
383 int32_t match(InputText *textIn);
384 };
385
386 class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
387 {
388 public:
389 virtual ~CharsetRecog_IBM420_ar();
390
391 const char *getLanguage() const;
392
393 protected:
394 void matchInit(InputText *textIn);
395 void matchFinish(InputText *textIn);
396
397 private:
398 uint8_t *prev_fInputBytes;
399 int32_t prev_fInputBytesLength;
400 UBool deleteBuffer;
401
402 UBool isLamAlef(uint8_t b);
403 uint8_t *unshapeLamAlef(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length);
404 uint8_t *unshape(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length);
405 };
406
407 class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
408 public:
409 virtual ~CharsetRecog_IBM420_ar_rtl();
410
411 const char *getName() const;
412
413 int32_t match(InputText *textIn);
414 };
415
416 class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
417 virtual ~CharsetRecog_IBM420_ar_ltr();
418
419 const char *getName() const;
420
421 int32_t match(InputText *textIn);
422 };
423
424 U_NAMESPACE_END
425
426 #endif
427 #endif /* __CSRSBCS_H */