]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/csrsbcs.h
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.h
CommitLineData
73c04bcf
A
1/*
2 **********************************************************************
729e4ab9 3 * Copyright (C) 2005-2009, International Business Machines
73c04bcf
A
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8#ifndef __CSRSBCS_H
9#define __CSRSBCS_H
10
11#include "unicode/uobject.h"
12
13#if !UCONFIG_NO_CONVERSION
14
15#include "csrecog.h"
16
17U_NAMESPACE_BEGIN
18
19class NGramParser : public UMemory
20{
21private:
22 int32_t byteIndex;
23 int32_t ngram;
24
25 const int32_t *ngramList;
26 const uint8_t *charMap;
27
28 int32_t ngramCount;
29 int32_t hitCount;
30
31public:
32 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
33
34private:
35 /*
36 * Binary search for value in table, which must have exactly 64 entries.
37 */
38 int32_t search(const int32_t *table, int32_t value);
39
40 void lookup(int32_t thisNgram);
41 void addByte(int32_t b);
42 int32_t nextByte(InputText *det);
43
44public:
45 int32_t parse(InputText *det);
46
47};
48
49class CharsetRecog_sbcs : public CharsetRecognizer
50{
51protected:
52 UBool haveC1Bytes;
53
54public:
55 CharsetRecog_sbcs();
56
57 virtual ~CharsetRecog_sbcs();
58
59 virtual const char *getName() const = 0;
60
61 virtual int32_t match(InputText *det) = 0;
62
63 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]);
64};
65
66class CharsetRecog_8859_1 : public CharsetRecog_sbcs
67{
68public:
69 virtual ~CharsetRecog_8859_1();
70
71 const char *getName() const;
72};
73
74class CharsetRecog_8859_2 : public CharsetRecog_sbcs
75{
76public:
77 virtual ~CharsetRecog_8859_2();
78
79 const char *getName() const;
80};
81
82class CharsetRecog_8859_5 : public CharsetRecog_sbcs
83{
84public:
85 virtual ~CharsetRecog_8859_5();
86
87 const char *getName() const;
88};
89
90class CharsetRecog_8859_6 : public CharsetRecog_sbcs
91{
92public:
93 virtual ~CharsetRecog_8859_6();
94
95 const char *getName() const;
96};
97
98class CharsetRecog_8859_7 : public CharsetRecog_sbcs
99{
100public:
101 virtual ~CharsetRecog_8859_7();
102
103 const char *getName() const;
104};
105
106class CharsetRecog_8859_8 : public CharsetRecog_sbcs
107{
108public:
109 virtual ~CharsetRecog_8859_8();
110
111 virtual const char *getName() const;
112};
113
114class CharsetRecog_8859_9 : public CharsetRecog_sbcs
115{
116public:
117 virtual ~CharsetRecog_8859_9();
118
119 const char *getName() const;
120};
121
122class CharsetRecog_8859_1_en : public CharsetRecog_8859_1
123{
124public:
125 virtual ~CharsetRecog_8859_1_en();
126
127 const char *getLanguage() const;
128
129 int32_t match(InputText *textIn);
130};
131
132class CharsetRecog_8859_1_da : public CharsetRecog_8859_1
133{
134public:
135 virtual ~CharsetRecog_8859_1_da();
136
137 const char *getLanguage() const;
138
139 int32_t match(InputText *textIn);
140};
141
142class CharsetRecog_8859_1_de : public CharsetRecog_8859_1
143{
144public:
145 virtual ~CharsetRecog_8859_1_de();
146
147 const char *getLanguage() const;
148
149 int32_t match(InputText *textIn);
150};
151
152class CharsetRecog_8859_1_es : public CharsetRecog_8859_1
153{
154public:
155 virtual ~CharsetRecog_8859_1_es();
156
157 const char *getLanguage() const;
158
159 int32_t match(InputText *textIn);
160};
161
162class CharsetRecog_8859_1_fr : public CharsetRecog_8859_1
163{
164public:
165 virtual ~CharsetRecog_8859_1_fr();
166
167 const char *getLanguage() const;
168
169 int32_t match(InputText *textIn);
170};
171
172class CharsetRecog_8859_1_it : public CharsetRecog_8859_1
173{
174public:
175 virtual ~CharsetRecog_8859_1_it();
176
177 const char *getLanguage() const;
178
179 int32_t match(InputText *textIn);
180};
181
182class CharsetRecog_8859_1_nl : public CharsetRecog_8859_1
183{
184public:
185 virtual ~CharsetRecog_8859_1_nl();
186
187 const char *getLanguage() const;
188
189 int32_t match(InputText *textIn);
190};
191
192class CharsetRecog_8859_1_no : public CharsetRecog_8859_1
193{
194public:
195 virtual ~CharsetRecog_8859_1_no();
196
197 const char *getLanguage() const;
198
199 int32_t match(InputText *textIn);
200};
201
202class CharsetRecog_8859_1_pt : public CharsetRecog_8859_1
203{
204public:
205 virtual ~CharsetRecog_8859_1_pt();
206
207 const char *getLanguage() const;
208
209 int32_t match(InputText *textIn);
210};
211
212class CharsetRecog_8859_1_sv : public CharsetRecog_8859_1
213{
214public:
215 virtual ~CharsetRecog_8859_1_sv();
216
217 const char *getLanguage() const;
218
219 int32_t match(InputText *textIn);
220};
221
222class CharsetRecog_8859_2_cs : public CharsetRecog_8859_2
223{
224public:
225 virtual ~CharsetRecog_8859_2_cs();
226
227 const char *getLanguage() const;
228
229 int32_t match(InputText *textIn);
230};
231
232class CharsetRecog_8859_2_hu : public CharsetRecog_8859_2
233{
234public:
235 virtual ~CharsetRecog_8859_2_hu();
236
237 const char *getLanguage() const;
238
239 int32_t match(InputText *textIn);
240};
241
242class CharsetRecog_8859_2_pl : public CharsetRecog_8859_2
243{
244public:
245 virtual ~CharsetRecog_8859_2_pl();
246
247 const char *getLanguage() const;
248
249 int32_t match(InputText *textIn);
250};
251
252class CharsetRecog_8859_2_ro : public CharsetRecog_8859_2
253{
254public:
255 virtual ~CharsetRecog_8859_2_ro();
256
257 const char *getLanguage() const;
258
259 int32_t match(InputText *textIn);
260};
261
262class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
263{
264public:
265 virtual ~CharsetRecog_8859_5_ru();
266
267 const char *getLanguage() const;
268
269 int32_t match(InputText *textIn);
270};
271
272class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
273{
274public:
275 virtual ~CharsetRecog_8859_6_ar();
276
277 const char *getLanguage() const;
278
279 int32_t match(InputText *textIn);
280};
281
282class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
283{
284public:
285 virtual ~CharsetRecog_8859_7_el();
286
287 const char *getLanguage() const;
288
289 int32_t match(InputText *textIn);
290};
291
292class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
293{
294public:
295 virtual ~CharsetRecog_8859_8_I_he();
296
297 const char *getName() const;
298
299 const char *getLanguage() const;
300
301 int32_t match(InputText *textIn);
302};
303
304class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
305{
306public:
307 virtual ~CharsetRecog_8859_8_he ();
308
309 const char *getLanguage() const;
310
311 int32_t match(InputText *textIn);
312};
313
314class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
315{
316public:
317 virtual ~CharsetRecog_8859_9_tr ();
318
319 const char *getLanguage() const;
320
321 int32_t match(InputText *textIn);
322};
323
324class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
325{
326public:
327 virtual ~CharsetRecog_windows_1256();
328
329 const char *getName() const;
330
331 const char *getLanguage() const;
332
333 int32_t match(InputText *textIn);
334};
335
336class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
337{
338public:
339 virtual ~CharsetRecog_windows_1251();
340
341 const char *getName() const;
342
343 const char *getLanguage() const;
344
345 int32_t match(InputText *textIn);
346};
347
348
349class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
350{
351public:
352 virtual ~CharsetRecog_KOI8_R();
353
354 const char *getName() const;
355
356 const char *getLanguage() const;
357
358 int32_t match(InputText *textIn);
359};
360
729e4ab9
A
361class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
362{
363public:
364 virtual ~CharsetRecog_IBM424_he();
365
366 const char *getLanguage() const;
367};
368
369class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
370public:
371 virtual ~CharsetRecog_IBM424_he_rtl();
372
373 const char *getName() const;
374
375 int32_t match(InputText *textIn);
376};
377
378class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
379 virtual ~CharsetRecog_IBM424_he_ltr();
380
381 const char *getName() const;
382
383 int32_t match(InputText *textIn);
384};
385
386class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
387{
388public:
389 virtual ~CharsetRecog_IBM420_ar();
390
391 const char *getLanguage() const;
392
393protected:
394 void matchInit(InputText *textIn);
395 void matchFinish(InputText *textIn);
396
397private:
398 uint8_t *prev_fInputBytes;
399 int32_t prev_fInputBytesLength;
400 UBool deleteBuffer;
401
402 UBool isLamAlef(uint8_t b);
403 uint8_t *unshapeLamAlef(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length);
404 uint8_t *unshape(const uint8_t *inputBytes, int32_t inputBytesLength, int32_t &length);
405};
406
407class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
408public:
409 virtual ~CharsetRecog_IBM420_ar_rtl();
410
411 const char *getName() const;
412
413 int32_t match(InputText *textIn);
414};
415
416class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
417 virtual ~CharsetRecog_IBM420_ar_ltr();
418
419 const char *getName() const;
420
421 int32_t match(InputText *textIn);
422};
423
73c04bcf
A
424U_NAMESPACE_END
425
426#endif
427#endif /* __CSRSBCS_H */