]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/csrsbcs.h
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
73c04bcf
A
3/*
4 **********************************************************************
b331163b 5 * Copyright (C) 2005-2015, International Business Machines
73c04bcf
A
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9
10#ifndef __CSRSBCS_H
11#define __CSRSBCS_H
12
13#include "unicode/uobject.h"
14
15#if !UCONFIG_NO_CONVERSION
16
17#include "csrecog.h"
18
19U_NAMESPACE_BEGIN
20
21class NGramParser : public UMemory
22{
23private:
73c04bcf 24 int32_t ngram;
57a6839d 25 const int32_t *ngramList;
73c04bcf
A
26
27 int32_t ngramCount;
28 int32_t hitCount;
29
57a6839d
A
30protected:
31 int32_t byteIndex;
32 const uint8_t *charMap;
33
34 void addByte(int32_t b);
35
73c04bcf
A
36public:
37 NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap);
b331163b 38 virtual ~NGramParser();
73c04bcf
A
39
40private:
41 /*
42 * Binary search for value in table, which must have exactly 64 entries.
43 */
44 int32_t search(const int32_t *table, int32_t value);
45
46 void lookup(int32_t thisNgram);
57a6839d
A
47
48 virtual int32_t nextByte(InputText *det);
49 virtual void parseCharacters(InputText *det);
73c04bcf
A
50
51public:
52 int32_t parse(InputText *det);
53
54};
55
b331163b 56#if !UCONFIG_ONLY_HTML_CONVERSION
57a6839d
A
57class NGramParser_IBM420 : public NGramParser
58{
57a6839d
A
59public:
60 NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap);
2ca993e8
A
61 ~NGramParser_IBM420();
62
63private:
64 int32_t alef;
65 int32_t isLamAlef(int32_t b);
66 int32_t nextByte(InputText *det);
67 void parseCharacters(InputText *det);
57a6839d 68};
b331163b 69#endif
57a6839d 70
51004dcb 71
73c04bcf
A
72class CharsetRecog_sbcs : public CharsetRecognizer
73{
73c04bcf
A
74public:
75 CharsetRecog_sbcs();
73c04bcf 76 virtual ~CharsetRecog_sbcs();
73c04bcf 77 virtual const char *getName() const = 0;
51004dcb
A
78 virtual UBool match(InputText *det, CharsetMatch *results) const = 0;
79 virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
73c04bcf
A
80};
81
82class CharsetRecog_8859_1 : public CharsetRecog_sbcs
83{
84public:
85 virtual ~CharsetRecog_8859_1();
73c04bcf 86 const char *getName() const;
51004dcb 87 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
88};
89
90class CharsetRecog_8859_2 : public CharsetRecog_sbcs
91{
92public:
93 virtual ~CharsetRecog_8859_2();
73c04bcf 94 const char *getName() const;
51004dcb 95 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
96};
97
98class CharsetRecog_8859_5 : public CharsetRecog_sbcs
99{
100public:
101 virtual ~CharsetRecog_8859_5();
73c04bcf
A
102 const char *getName() const;
103};
104
105class CharsetRecog_8859_6 : public CharsetRecog_sbcs
106{
107public:
108 virtual ~CharsetRecog_8859_6();
109
110 const char *getName() const;
111};
112
113class CharsetRecog_8859_7 : public CharsetRecog_sbcs
114{
115public:
116 virtual ~CharsetRecog_8859_7();
117
118 const char *getName() const;
119};
120
121class CharsetRecog_8859_8 : public CharsetRecog_sbcs
122{
123public:
124 virtual ~CharsetRecog_8859_8();
125
126 virtual const char *getName() const;
127};
128
129class CharsetRecog_8859_9 : public CharsetRecog_sbcs
130{
131public:
132 virtual ~CharsetRecog_8859_9();
133
134 const char *getName() const;
135};
136
73c04bcf 137
73c04bcf
A
138
139class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5
140{
141public:
142 virtual ~CharsetRecog_8859_5_ru();
143
144 const char *getLanguage() const;
145
51004dcb 146 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
147};
148
149class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6
150{
151public:
152 virtual ~CharsetRecog_8859_6_ar();
153
154 const char *getLanguage() const;
155
51004dcb 156 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
157};
158
159class CharsetRecog_8859_7_el : public CharsetRecog_8859_7
160{
161public:
162 virtual ~CharsetRecog_8859_7_el();
163
164 const char *getLanguage() const;
165
51004dcb 166 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
167};
168
169class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8
170{
171public:
172 virtual ~CharsetRecog_8859_8_I_he();
173
174 const char *getName() const;
175
176 const char *getLanguage() const;
177
51004dcb 178 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
179};
180
181class CharsetRecog_8859_8_he : public CharsetRecog_8859_8
182{
183public:
184 virtual ~CharsetRecog_8859_8_he ();
185
186 const char *getLanguage() const;
187
51004dcb 188 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
189};
190
191class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9
192{
193public:
194 virtual ~CharsetRecog_8859_9_tr ();
195
196 const char *getLanguage() const;
197
51004dcb 198 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
199};
200
201class CharsetRecog_windows_1256 : public CharsetRecog_sbcs
202{
203public:
204 virtual ~CharsetRecog_windows_1256();
205
206 const char *getName() const;
207
208 const char *getLanguage() const;
209
51004dcb 210 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
211};
212
213class CharsetRecog_windows_1251 : public CharsetRecog_sbcs
214{
215public:
216 virtual ~CharsetRecog_windows_1251();
217
218 const char *getName() const;
219
220 const char *getLanguage() const;
221
51004dcb 222 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
223};
224
225
226class CharsetRecog_KOI8_R : public CharsetRecog_sbcs
227{
228public:
229 virtual ~CharsetRecog_KOI8_R();
230
231 const char *getName() const;
232
233 const char *getLanguage() const;
234
51004dcb 235 virtual UBool match(InputText *det, CharsetMatch *results) const;
73c04bcf
A
236};
237
b331163b 238#if !UCONFIG_ONLY_HTML_CONVERSION
729e4ab9
A
239class CharsetRecog_IBM424_he : public CharsetRecog_sbcs
240{
241public:
242 virtual ~CharsetRecog_IBM424_he();
243
244 const char *getLanguage() const;
245};
246
247class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he {
248public:
249 virtual ~CharsetRecog_IBM424_he_rtl();
250
251 const char *getName() const;
252
51004dcb 253 virtual UBool match(InputText *det, CharsetMatch *results) const;
729e4ab9
A
254};
255
256class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he {
257 virtual ~CharsetRecog_IBM424_he_ltr();
258
259 const char *getName() const;
260
51004dcb 261 virtual UBool match(InputText *det, CharsetMatch *results) const;
729e4ab9
A
262};
263
264class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs
265{
266public:
267 virtual ~CharsetRecog_IBM420_ar();
268
269 const char *getLanguage() const;
57a6839d 270 int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const;
729e4ab9 271
729e4ab9
A
272};
273
274class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar {
275public:
276 virtual ~CharsetRecog_IBM420_ar_rtl();
277
278 const char *getName() const;
279
51004dcb 280 virtual UBool match(InputText *det, CharsetMatch *results) const;
729e4ab9
A
281};
282
283class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar {
284 virtual ~CharsetRecog_IBM420_ar_ltr();
285
286 const char *getName() const;
287
51004dcb 288 virtual UBool match(InputText *det, CharsetMatch *results) const;
729e4ab9 289};
b331163b 290#endif
729e4ab9 291
73c04bcf
A
292U_NAMESPACE_END
293
51004dcb 294#endif /* !UCONFIG_NO_CONVERSION */
73c04bcf 295#endif /* __CSRSBCS_H */