2 **********************************************************************
3 * Copyright (C) 2005-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
11 #include "unicode/uobject.h"
13 #if !UCONFIG_NO_CONVERSION
19 class NGramParser
: public UMemory
25 const int32_t *ngramList
;
26 const uint8_t *charMap
;
32 NGramParser(const int32_t *theNgramList
, const uint8_t *theCharMap
);
36 * Binary search for value in table, which must have exactly 64 entries.
38 int32_t search(const int32_t *table
, int32_t value
);
40 void lookup(int32_t thisNgram
);
41 void addByte(int32_t b
);
42 int32_t nextByte(InputText
*det
);
45 int32_t parse(InputText
*det
);
50 class CharsetRecog_sbcs
: public CharsetRecognizer
54 virtual ~CharsetRecog_sbcs();
55 virtual const char *getName() const = 0;
56 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const = 0;
57 virtual int32_t match_sbcs(InputText
*det
, const int32_t ngrams
[], const uint8_t charMap
[]) const;
60 class CharsetRecog_8859_1
: public CharsetRecog_sbcs
63 virtual ~CharsetRecog_8859_1();
64 const char *getName() const;
65 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
68 class CharsetRecog_8859_2
: public CharsetRecog_sbcs
71 virtual ~CharsetRecog_8859_2();
72 const char *getName() const;
73 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
76 class CharsetRecog_8859_5
: public CharsetRecog_sbcs
79 virtual ~CharsetRecog_8859_5();
80 const char *getName() const;
83 class CharsetRecog_8859_6
: public CharsetRecog_sbcs
86 virtual ~CharsetRecog_8859_6();
88 const char *getName() const;
91 class CharsetRecog_8859_7
: public CharsetRecog_sbcs
94 virtual ~CharsetRecog_8859_7();
96 const char *getName() const;
99 class CharsetRecog_8859_8
: public CharsetRecog_sbcs
102 virtual ~CharsetRecog_8859_8();
104 virtual const char *getName() const;
107 class CharsetRecog_8859_9
: public CharsetRecog_sbcs
110 virtual ~CharsetRecog_8859_9();
112 const char *getName() const;
117 class CharsetRecog_8859_5_ru
: public CharsetRecog_8859_5
120 virtual ~CharsetRecog_8859_5_ru();
122 const char *getLanguage() const;
124 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
127 class CharsetRecog_8859_6_ar
: public CharsetRecog_8859_6
130 virtual ~CharsetRecog_8859_6_ar();
132 const char *getLanguage() const;
134 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
137 class CharsetRecog_8859_7_el
: public CharsetRecog_8859_7
140 virtual ~CharsetRecog_8859_7_el();
142 const char *getLanguage() const;
144 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
147 class CharsetRecog_8859_8_I_he
: public CharsetRecog_8859_8
150 virtual ~CharsetRecog_8859_8_I_he();
152 const char *getName() const;
154 const char *getLanguage() const;
156 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
159 class CharsetRecog_8859_8_he
: public CharsetRecog_8859_8
162 virtual ~CharsetRecog_8859_8_he ();
164 const char *getLanguage() const;
166 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
169 class CharsetRecog_8859_9_tr
: public CharsetRecog_8859_9
172 virtual ~CharsetRecog_8859_9_tr ();
174 const char *getLanguage() const;
176 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
179 class CharsetRecog_windows_1256
: public CharsetRecog_sbcs
182 virtual ~CharsetRecog_windows_1256();
184 const char *getName() const;
186 const char *getLanguage() const;
188 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
191 class CharsetRecog_windows_1251
: public CharsetRecog_sbcs
194 virtual ~CharsetRecog_windows_1251();
196 const char *getName() const;
198 const char *getLanguage() const;
200 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
204 class CharsetRecog_KOI8_R
: public CharsetRecog_sbcs
207 virtual ~CharsetRecog_KOI8_R();
209 const char *getName() const;
211 const char *getLanguage() const;
213 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
216 class CharsetRecog_IBM424_he
: public CharsetRecog_sbcs
219 virtual ~CharsetRecog_IBM424_he();
221 const char *getLanguage() const;
224 class CharsetRecog_IBM424_he_rtl
: public CharsetRecog_IBM424_he
{
226 virtual ~CharsetRecog_IBM424_he_rtl();
228 const char *getName() const;
230 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
233 class CharsetRecog_IBM424_he_ltr
: public CharsetRecog_IBM424_he
{
234 virtual ~CharsetRecog_IBM424_he_ltr();
236 const char *getName() const;
238 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
241 class CharsetRecog_IBM420_ar
: public CharsetRecog_sbcs
244 virtual ~CharsetRecog_IBM420_ar();
246 const char *getLanguage() const;
249 void matchInit(InputText
*textIn
);
250 void matchFinish(InputText
*textIn
);
253 uint8_t *prev_fInputBytes
;
254 int32_t prev_fInputBytesLength
;
257 UBool
isLamAlef(uint8_t b
);
258 uint8_t *unshapeLamAlef(const uint8_t *inputBytes
, int32_t inputBytesLength
, int32_t &length
);
259 uint8_t *unshape(const uint8_t *inputBytes
, int32_t inputBytesLength
, int32_t &length
);
262 class CharsetRecog_IBM420_ar_rtl
: public CharsetRecog_IBM420_ar
{
264 virtual ~CharsetRecog_IBM420_ar_rtl();
266 const char *getName() const;
268 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
271 class CharsetRecog_IBM420_ar_ltr
: public CharsetRecog_IBM420_ar
{
272 virtual ~CharsetRecog_IBM420_ar_ltr();
274 const char *getName() const;
276 virtual UBool
match(InputText
*det
, CharsetMatch
*results
) const;
281 #endif /* !UCONFIG_NO_CONVERSION */
282 #endif /* __CSRSBCS_H */