2 **********************************************************************
3 * Copyright (C) 2005-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
11 #include "unicode/uobject.h"
13 #if !UCONFIG_NO_CONVERSION
19 class NGramParser
: public UMemory
23 const int32_t *ngramList
;
30 const uint8_t *charMap
;
32 void addByte(int32_t b
);
35 NGramParser(const int32_t *theNgramList
, const uint8_t *theCharMap
);
36 virtual ~NGramParser();
40 * Binary search for value in table, which must have exactly 64 entries.
42 int32_t search(const int32_t *table
, int32_t value
);
44 void lookup(int32_t thisNgram
);
46 virtual int32_t nextByte(InputText
*det
);
47 virtual void parseCharacters(InputText
*det
);
50 int32_t parse(InputText
*det
);
54 #if !UCONFIG_ONLY_HTML_CONVERSION
55 class NGramParser_IBM420
: public NGramParser
58 NGramParser_IBM420(const int32_t *theNgramList
, const uint8_t *theCharMap
);
59 ~NGramParser_IBM420();
63 int32_t isLamAlef(int32_t b
);
64 int32_t nextByte(InputText
*det
);
65 void parseCharacters(InputText
*det
);
70 class CharsetRecog_sbcs
: public CharsetRecognizer
74 virtual ~CharsetRecog_sbcs();
75 virtual const char *getName() const = 0;
76 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const = 0;
77 virtual int32_t match_sbcs(InputText
*det
, const int32_t ngrams
[], const uint8_t charMap
[]) const;
80 class CharsetRecog_8859_1
: public CharsetRecog_sbcs
83 virtual ~CharsetRecog_8859_1();
84 const char *getName() const;
85 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
88 class CharsetRecog_8859_2
: public CharsetRecog_sbcs
91 virtual ~CharsetRecog_8859_2();
92 const char *getName() const;
93 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
96 class CharsetRecog_8859_5
: public CharsetRecog_sbcs
99 virtual ~CharsetRecog_8859_5();
100 const char *getName() const;
103 class CharsetRecog_8859_6
: public CharsetRecog_sbcs
106 virtual ~CharsetRecog_8859_6();
108 const char *getName() const;
111 class CharsetRecog_8859_7
: public CharsetRecog_sbcs
114 virtual ~CharsetRecog_8859_7();
116 const char *getName() const;
119 class CharsetRecog_8859_8
: public CharsetRecog_sbcs
122 virtual ~CharsetRecog_8859_8();
124 virtual const char *getName() const;
127 class CharsetRecog_8859_9
: public CharsetRecog_sbcs
130 virtual ~CharsetRecog_8859_9();
132 const char *getName() const;
137 class CharsetRecog_8859_5_ru
: public CharsetRecog_8859_5
140 virtual ~CharsetRecog_8859_5_ru();
142 const char *getLanguage() const;
144 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
147 class CharsetRecog_8859_6_ar
: public CharsetRecog_8859_6
150 virtual ~CharsetRecog_8859_6_ar();
152 const char *getLanguage() const;
154 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
157 class CharsetRecog_8859_7_el
: public CharsetRecog_8859_7
160 virtual ~CharsetRecog_8859_7_el();
162 const char *getLanguage() const;
164 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
167 class CharsetRecog_8859_8_I_he
: public CharsetRecog_8859_8
170 virtual ~CharsetRecog_8859_8_I_he();
172 const char *getName() const;
174 const char *getLanguage() const;
176 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
179 class CharsetRecog_8859_8_he
: public CharsetRecog_8859_8
182 virtual ~CharsetRecog_8859_8_he ();
184 const char *getLanguage() const;
186 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
189 class CharsetRecog_8859_9_tr
: public CharsetRecog_8859_9
192 virtual ~CharsetRecog_8859_9_tr ();
194 const char *getLanguage() const;
196 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
199 class CharsetRecog_windows_1256
: public CharsetRecog_sbcs
202 virtual ~CharsetRecog_windows_1256();
204 const char *getName() const;
206 const char *getLanguage() const;
208 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
211 class CharsetRecog_windows_1251
: public CharsetRecog_sbcs
214 virtual ~CharsetRecog_windows_1251();
216 const char *getName() const;
218 const char *getLanguage() const;
220 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
224 class CharsetRecog_KOI8_R
: public CharsetRecog_sbcs
227 virtual ~CharsetRecog_KOI8_R();
229 const char *getName() const;
231 const char *getLanguage() const;
233 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
236 #if !UCONFIG_ONLY_HTML_CONVERSION
237 class CharsetRecog_IBM424_he
: public CharsetRecog_sbcs
240 virtual ~CharsetRecog_IBM424_he();
242 const char *getLanguage() const;
245 class CharsetRecog_IBM424_he_rtl
: public CharsetRecog_IBM424_he
{
247 virtual ~CharsetRecog_IBM424_he_rtl();
249 const char *getName() const;
251 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
254 class CharsetRecog_IBM424_he_ltr
: public CharsetRecog_IBM424_he
{
255 virtual ~CharsetRecog_IBM424_he_ltr();
257 const char *getName() const;
259 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
262 class CharsetRecog_IBM420_ar
: public CharsetRecog_sbcs
265 virtual ~CharsetRecog_IBM420_ar();
267 const char *getLanguage() const;
268 int32_t match_sbcs(InputText
*det
, const int32_t ngrams
[], const uint8_t charMap
[]) const;
272 class CharsetRecog_IBM420_ar_rtl
: public CharsetRecog_IBM420_ar
{
274 virtual ~CharsetRecog_IBM420_ar_rtl();
276 const char *getName() const;
278 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
281 class CharsetRecog_IBM420_ar_ltr
: public CharsetRecog_IBM420_ar
{
282 virtual ~CharsetRecog_IBM420_ar_ltr();
284 const char *getName() const;
286 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
292 #endif /* !UCONFIG_NO_CONVERSION */
293 #endif /* __CSRSBCS_H */