2 **********************************************************************
3 * Copyright (C) 2005-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
11 #include "unicode/uobject.h"
13 #if !UCONFIG_NO_CONVERSION
19 class NGramParser
: public UMemory
23 const int32_t *ngramList
;
30 const uint8_t *charMap
;
32 void addByte(int32_t b
);
35 NGramParser(const int32_t *theNgramList
, const uint8_t *theCharMap
);
36 virtual ~NGramParser();
40 * Binary search for value in table, which must have exactly 64 entries.
42 int32_t search(const int32_t *table
, int32_t value
);
44 void lookup(int32_t thisNgram
);
46 virtual int32_t nextByte(InputText
*det
);
47 virtual void parseCharacters(InputText
*det
);
50 int32_t parse(InputText
*det
);
54 #if !UCONFIG_ONLY_HTML_CONVERSION
55 class NGramParser_IBM420
: public NGramParser
59 int32_t isLamAlef(int32_t b
);
60 int32_t nextByte(InputText
*det
);
61 void parseCharacters(InputText
*det
);
64 NGramParser_IBM420(const int32_t *theNgramList
, const uint8_t *theCharMap
);
69 class CharsetRecog_sbcs
: public CharsetRecognizer
73 virtual ~CharsetRecog_sbcs();
74 virtual const char *getName() const = 0;
75 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const = 0;
76 virtual int32_t match_sbcs(InputText
*det
, const int32_t ngrams
[], const uint8_t charMap
[]) const;
79 class CharsetRecog_8859_1
: public CharsetRecog_sbcs
82 virtual ~CharsetRecog_8859_1();
83 const char *getName() const;
84 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
87 class CharsetRecog_8859_2
: public CharsetRecog_sbcs
90 virtual ~CharsetRecog_8859_2();
91 const char *getName() const;
92 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
95 class CharsetRecog_8859_5
: public CharsetRecog_sbcs
98 virtual ~CharsetRecog_8859_5();
99 const char *getName() const;
102 class CharsetRecog_8859_6
: public CharsetRecog_sbcs
105 virtual ~CharsetRecog_8859_6();
107 const char *getName() const;
110 class CharsetRecog_8859_7
: public CharsetRecog_sbcs
113 virtual ~CharsetRecog_8859_7();
115 const char *getName() const;
118 class CharsetRecog_8859_8
: public CharsetRecog_sbcs
121 virtual ~CharsetRecog_8859_8();
123 virtual const char *getName() const;
126 class CharsetRecog_8859_9
: public CharsetRecog_sbcs
129 virtual ~CharsetRecog_8859_9();
131 const char *getName() const;
136 class CharsetRecog_8859_5_ru
: public CharsetRecog_8859_5
139 virtual ~CharsetRecog_8859_5_ru();
141 const char *getLanguage() const;
143 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
146 class CharsetRecog_8859_6_ar
: public CharsetRecog_8859_6
149 virtual ~CharsetRecog_8859_6_ar();
151 const char *getLanguage() const;
153 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
156 class CharsetRecog_8859_7_el
: public CharsetRecog_8859_7
159 virtual ~CharsetRecog_8859_7_el();
161 const char *getLanguage() const;
163 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
166 class CharsetRecog_8859_8_I_he
: public CharsetRecog_8859_8
169 virtual ~CharsetRecog_8859_8_I_he();
171 const char *getName() const;
173 const char *getLanguage() const;
175 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
178 class CharsetRecog_8859_8_he
: public CharsetRecog_8859_8
181 virtual ~CharsetRecog_8859_8_he ();
183 const char *getLanguage() const;
185 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
188 class CharsetRecog_8859_9_tr
: public CharsetRecog_8859_9
191 virtual ~CharsetRecog_8859_9_tr ();
193 const char *getLanguage() const;
195 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
198 class CharsetRecog_windows_1256
: public CharsetRecog_sbcs
201 virtual ~CharsetRecog_windows_1256();
203 const char *getName() const;
205 const char *getLanguage() const;
207 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
210 class CharsetRecog_windows_1251
: public CharsetRecog_sbcs
213 virtual ~CharsetRecog_windows_1251();
215 const char *getName() const;
217 const char *getLanguage() const;
219 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
223 class CharsetRecog_KOI8_R
: public CharsetRecog_sbcs
226 virtual ~CharsetRecog_KOI8_R();
228 const char *getName() const;
230 const char *getLanguage() const;
232 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
235 #if !UCONFIG_ONLY_HTML_CONVERSION
236 class CharsetRecog_IBM424_he
: public CharsetRecog_sbcs
239 virtual ~CharsetRecog_IBM424_he();
241 const char *getLanguage() const;
244 class CharsetRecog_IBM424_he_rtl
: public CharsetRecog_IBM424_he
{
246 virtual ~CharsetRecog_IBM424_he_rtl();
248 const char *getName() const;
250 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
253 class CharsetRecog_IBM424_he_ltr
: public CharsetRecog_IBM424_he
{
254 virtual ~CharsetRecog_IBM424_he_ltr();
256 const char *getName() const;
258 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
261 class CharsetRecog_IBM420_ar
: public CharsetRecog_sbcs
264 virtual ~CharsetRecog_IBM420_ar();
266 const char *getLanguage() const;
267 int32_t match_sbcs(InputText
*det
, const int32_t ngrams
[], const uint8_t charMap
[]) const;
271 class CharsetRecog_IBM420_ar_rtl
: public CharsetRecog_IBM420_ar
{
273 virtual ~CharsetRecog_IBM420_ar_rtl();
275 const char *getName() const;
277 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
280 class CharsetRecog_IBM420_ar_ltr
: public CharsetRecog_IBM420_ar
{
281 virtual ~CharsetRecog_IBM420_ar_ltr();
283 const char *getName() const;
285 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
291 #endif /* !UCONFIG_NO_CONVERSION */
292 #endif /* __CSRSBCS_H */