]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/csrsbcs.h
ICU-551.24.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.h
CommitLineData
73c04bcf
A
1/*
2 **********************************************************************
b331163b 3 * Copyright (C) 2005-2015, International Business Machines
73c04bcf
A
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8#ifndef __CSRSBCS_H
9#define __CSRSBCS_H
10
11#include "unicode/uobject.h"
12
13#if !UCONFIG_NO_CONVERSION
14
15#include "csrecog.h"
16
17U_NAMESPACE_BEGIN
18
19class NGramParser : public UMemory
20{
21private:
73c04bcf 22 int32_t ngram;
57a6839d 23 const int32_t *ngramList;
73c04bcf
A
24
25 int32_t ngramCount;
26 int32_t hitCount;
27
57a6839d
A
28protected:
29 int32_t byteIndex;
30 const uint8_t *charMap;
31
32 void addByte(int32_t b);
33
73c04bcf
A
34public:
35 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
b331163b 36 virtual ~NGramParser();
73c04bcf
A
37
38private:
39 /*
40 * Binary search for value in table, which must have exactly 64 entries.
41 */
42 int32_t search(const int32_t *table, int32_t value);
43
44 void lookup(int32_t thisNgram);
57a6839d
A
45
46 virtual int32_t nextByte(InputText *det);
47 virtual void parseCharacters(InputText *det);
73c04bcf
A
48
49public:
50 int32_t parse(InputText *det);
51
52};
53
b331163b 54#if !UCONFIG_ONLY_HTML_CONVERSION
57a6839d
A
55class NGramParser_IBM420 : public NGramParser
56{
57private:
58 int32_t alef;
59 int32_t isLamAlef(int32_t b);
60 int32_t nextByte(InputText *det);
61 void parseCharacters(InputText *det);
62
63public:
64 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
65};
b331163b 66#endif
57a6839d 67
51004dcb 68
73c04bcf
A
69class CharsetRecog_sbcs : public CharsetRecognizer
70{
73c04bcf
A
71public:
72 CharsetRecog_sbcs();
73c04bcf 73 virtual ~CharsetRecog_sbcs();
73c04bcf 74 virtual const char *getName() const = 0;
51004dcb
A
75 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
76 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
73c04bcf
A
77};
78
79class CharsetRecog_8859_1 : public CharsetRecog_sbcs
80{
81public:
82 virtual ~CharsetRecog_8859_1();
73c04bcf 83 const char *getName() const;
51004dcb 84 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
85};
86
87class CharsetRecog_8859_2 : public CharsetRecog_sbcs
88{
89public:
90 virtual ~CharsetRecog_8859_2();
73c04bcf 91 const char *getName() const;
51004dcb 92 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
93};
94
95class CharsetRecog_8859_5 : public CharsetRecog_sbcs
96{
97public:
98 virtual ~CharsetRecog_8859_5();
73c04bcf
A
99 const char *getName() const;
100};
101
102class CharsetRecog_8859_6 : public CharsetRecog_sbcs
103{
104public:
105 virtual ~CharsetRecog_8859_6();
106
107 const char *getName() const;
108};
109
110class CharsetRecog_8859_7 : public CharsetRecog_sbcs
111{
112public:
113 virtual ~CharsetRecog_8859_7();
114
115 const char *getName() const;
116};
117
118class CharsetRecog_8859_8 : public CharsetRecog_sbcs
119{
120public:
121 virtual ~CharsetRecog_8859_8();
122
123 virtual const char *getName() const;
124};
125
126class CharsetRecog_8859_9 : public CharsetRecog_sbcs
127{
128public:
129 virtual ~CharsetRecog_8859_9();
130
131 const char *getName() const;
132};
133
73c04bcf 134
73c04bcf
A
135
136class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
137{
138public:
139 virtual ~CharsetRecog_8859_5_ru();
140
141 const char *getLanguage() const;
142
51004dcb 143 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
144};
145
146class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
147{
148public:
149 virtual ~CharsetRecog_8859_6_ar();
150
151 const char *getLanguage() const;
152
51004dcb 153 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
154};
155
156class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
157{
158public:
159 virtual ~CharsetRecog_8859_7_el();
160
161 const char *getLanguage() const;
162
51004dcb 163 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
164};
165
166class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
167{
168public:
169 virtual ~CharsetRecog_8859_8_I_he();
170
171 const char *getName() const;
172
173 const char *getLanguage() const;
174
51004dcb 175 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
176};
177
178class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
179{
180public:
181 virtual ~CharsetRecog_8859_8_he ();
182
183 const char *getLanguage() const;
184
51004dcb 185 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
186};
187
188class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
189{
190public:
191 virtual ~CharsetRecog_8859_9_tr ();
192
193 const char *getLanguage() const;
194
51004dcb 195 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
196};
197
198class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
199{
200public:
201 virtual ~CharsetRecog_windows_1256();
202
203 const char *getName() const;
204
205 const char *getLanguage() const;
206
51004dcb 207 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
208};
209
210class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
211{
212public:
213 virtual ~CharsetRecog_windows_1251();
214
215 const char *getName() const;
216
217 const char *getLanguage() const;
218
51004dcb 219 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
220};
221
222
223class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
224{
225public:
226 virtual ~CharsetRecog_KOI8_R();
227
228 const char *getName() const;
229
230 const char *getLanguage() const;
231
51004dcb 232 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
233};
234
b331163b 235#if !UCONFIG_ONLY_HTML_CONVERSION
729e4ab9
A
236class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
237{
238public:
239 virtual ~CharsetRecog_IBM424_he();
240
241 const char *getLanguage() const;
242};
243
244class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
245public:
246 virtual ~CharsetRecog_IBM424_he_rtl();
247
248 const char *getName() const;
249
51004dcb 250 virtual UBool match(InputText *det, CharsetMatch *results) const;
729e4ab9
A
251};
252
253class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
254 virtual ~CharsetRecog_IBM424_he_ltr();
255
256 const char *getName() const;
257
51004dcb 258 virtual UBool match(InputText *det, CharsetMatch *results) const;
729e4ab9
A
259};
260
261class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
262{
263public:
264 virtual ~CharsetRecog_IBM420_ar();
265
266 const char *getLanguage() const;
57a6839d 267 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
729e4ab9 268
729e4ab9
A
269};
270
271class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
272public:
273 virtual ~CharsetRecog_IBM420_ar_rtl();
274
275 const char *getName() const;
276
51004dcb 277 virtual UBool match(InputText *det, CharsetMatch *results) const;
729e4ab9
A
278};
279
280class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
281 virtual ~CharsetRecog_IBM420_ar_ltr();
282
283 const char *getName() const;
284
51004dcb 285 virtual UBool match(InputText *det, CharsetMatch *results) const;
729e4ab9 286};
b331163b 287#endif
729e4ab9 288
73c04bcf
A
289U_NAMESPACE_END
290
51004dcb 291#endif /* !UCONFIG_NO_CONVERSION */
73c04bcf 292#endif /* __CSRSBCS_H */