]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csrsbcs.cpp
2 **********************************************************************
3 * Copyright (C) 2005-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
10 #if !UCONFIG_NO_CONVERSION
16 #define N_GRAM_MASK 0xFFFFFF
20 NGramParser::NGramParser(const int32_t *theNgramList
, const uint8_t *theCharMap
)
21 :byteIndex(0), ngram(0)
23 ngramList
= theNgramList
;
26 ngramCount
= hitCount
= 0;
30 * Binary search for value in table, which must have exactly 64 entries.
33 int32_t NGramParser::search(const int32_t *table
, int32_t value
)
37 if (table
[index
+ 32] <= value
) {
41 if (table
[index
+ 16] <= value
) {
45 if (table
[index
+ 8] <= value
) {
49 if (table
[index
+ 4] <= value
) {
53 if (table
[index
+ 2] <= value
) {
57 if (table
[index
+ 1] <= value
) {
61 if (table
[index
] > value
) {
65 if (index
< 0 || table
[index
] != value
) {
72 void NGramParser::lookup(int32_t thisNgram
)
76 if (search(ngramList
, thisNgram
) >= 0) {
82 void NGramParser::addByte(int32_t b
)
84 ngram
= ((ngram
<< 8) + b
) & N_GRAM_MASK
;
88 int32_t NGramParser::nextByte(InputText
*det
)
90 if (byteIndex
>= det
->fInputLen
) {
94 return det
->fInputBytes
[byteIndex
++];
97 int32_t NGramParser::parse(InputText
*det
)
100 bool ignoreSpace
= FALSE
;
102 while ((b
= nextByte(det
)) >= 0) {
103 uint8_t mb
= charMap
[b
];
105 // TODO: 0x20 might not be a space in all character sets...
107 if (!(mb
== 0x20 && ignoreSpace
)) {
111 ignoreSpace
= (mb
== 0x20);
115 // TODO: Is this OK? The buffer could have ended in the middle of a word...
118 double rawPercent
= (double) hitCount
/ (double) ngramCount
;
120 // if (rawPercent <= 2.0) {
124 // TODO - This is a bit of a hack to take care of a case
125 // were we were getting a confidence of 135...
126 if (rawPercent
> 0.33) {
130 return (int32_t) (rawPercent
* 300.0);
133 CharsetRecog_sbcs::CharsetRecog_sbcs()
136 // nothing else to do
139 CharsetRecog_sbcs::~CharsetRecog_sbcs()
144 int32_t CharsetRecog_sbcs::match_sbcs(InputText
*det
, const int32_t ngrams
[], const uint8_t byteMap
[])
146 NGramParser
*parser
= new NGramParser(ngrams
, byteMap
);
149 haveC1Bytes
= det
->fC1Bytes
;
150 result
= parser
->parse(det
);
156 static const uint8_t charMap_8859_1
[] = {
157 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
158 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
159 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
160 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
161 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
162 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
163 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
164 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
165 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
166 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
167 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
168 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
169 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
170 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
171 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
172 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
173 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
174 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
175 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
176 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
177 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
178 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
179 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
180 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
181 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
182 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
183 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
184 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
185 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
186 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
187 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
188 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
191 static const uint8_t charMap_8859_2
[] = {
192 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
193 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
194 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
195 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
196 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
197 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
198 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
199 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
200 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
201 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
202 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
203 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
204 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
205 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
206 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
207 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
208 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
209 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
210 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
211 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
212 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
213 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
214 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
215 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
216 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
217 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
218 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
219 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
220 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
221 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
222 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
223 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
226 static const uint8_t charMap_8859_5
[] = {
227 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
228 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
229 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
230 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
231 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
232 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
233 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
234 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
235 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
236 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
237 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
238 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
239 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
240 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
241 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
242 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
243 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
244 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
245 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
246 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
247 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
248 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
249 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
250 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
251 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
252 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
253 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
254 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
255 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
256 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
257 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
258 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
261 static const uint8_t charMap_8859_6
[] = {
262 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
263 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
264 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
265 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
266 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
267 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
268 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
269 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
270 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
271 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
272 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
274 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
275 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
276 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
277 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
278 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
279 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
280 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
281 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
282 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
283 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
284 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
285 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
286 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
287 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
288 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
289 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
290 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
291 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
292 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
293 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
296 static const uint8_t charMap_8859_7
[] = {
297 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
298 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
299 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
301 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
302 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
303 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
304 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
305 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
306 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
307 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
308 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
309 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
310 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
311 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
312 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
313 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
314 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
315 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
316 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
317 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
318 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
319 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
320 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
321 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
322 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
323 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
324 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
325 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
326 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
327 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
328 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
331 static const uint8_t charMap_8859_8
[] = {
332 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
333 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
334 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
335 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
336 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
337 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
338 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
339 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
340 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
341 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
342 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
343 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
344 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
345 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
346 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
347 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
348 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
349 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
350 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
351 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
352 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
353 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
354 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
355 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
356 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
357 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
358 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
359 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
360 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
361 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
362 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
363 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
366 static const uint8_t charMap_8859_9
[] = {
367 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
368 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
369 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
370 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
371 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
372 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
373 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
374 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
375 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
378 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
379 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
380 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
381 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
382 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
383 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
384 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
385 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
386 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
387 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
388 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
389 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
390 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
391 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
392 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
393 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
394 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
395 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
396 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
397 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
398 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
401 static const int32_t ngrams_windows_1251
[] = {
402 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
403 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
404 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
405 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
408 static const uint8_t charMap_windows_1251
[] = {
409 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
410 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
411 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
412 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
413 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
414 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
415 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
416 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
417 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
418 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
419 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
420 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
421 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
422 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
423 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
424 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
425 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
426 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
427 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
428 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
429 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
430 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
431 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
432 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
433 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
434 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
435 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
436 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
437 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
438 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
439 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
440 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
443 static const int32_t ngrams_windows_1256
[] = {
444 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
445 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
446 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
447 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
450 static const uint8_t charMap_windows_1256
[] = {
451 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
452 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
453 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
454 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
455 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
456 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
457 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
458 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
459 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
460 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
461 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
462 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
463 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
464 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
465 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
466 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
467 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
468 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
469 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
470 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
471 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
472 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
473 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
474 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
475 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
476 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
477 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
478 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
479 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
480 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
481 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
482 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
485 static const int32_t ngrams_KOI8_R
[] = {
486 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
487 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
488 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
489 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
492 static const uint8_t charMap_KOI8_R
[] = {
493 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
494 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
495 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
496 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
497 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
498 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
499 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
500 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
501 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
502 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
503 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
504 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
505 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
506 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
507 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
508 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
509 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
510 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
511 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
512 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
513 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
514 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
515 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
516 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
517 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
518 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
519 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
520 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
521 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
522 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
523 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
524 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
527 //ISO-8859-1,2,5,6,7,8,9 Ngrams
528 static const int32_t ngrams_8859_1_en
[] = {
529 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
530 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
531 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
532 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
535 static const int32_t ngrams_8859_1_da
[] = {
536 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
537 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
538 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
539 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
542 static const int32_t ngrams_8859_1_de
[] = {
543 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
544 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
545 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
546 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
549 static const int32_t ngrams_8859_1_es
[] = {
550 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
551 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
552 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
553 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
556 static const int32_t ngrams_8859_1_fr
[] = {
557 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
558 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
559 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
560 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
563 static const int32_t ngrams_8859_1_it
[] = {
564 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
565 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
566 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
567 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
570 static const int32_t ngrams_8859_1_nl
[] = {
571 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
572 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
573 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
574 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
577 static const int32_t ngrams_8859_1_no
[] = {
578 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
579 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
580 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
581 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
584 static const int32_t ngrams_8859_1_pt
[] = {
585 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
586 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
587 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
588 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
591 static const int32_t ngrams_8859_1_sv
[] = {
592 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
593 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
594 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
595 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
598 static const int32_t ngrams_8859_2_cs
[] = {
599 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
600 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
601 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
602 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
605 static const int32_t ngrams_8859_2_hu
[] = {
606 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
607 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
608 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
609 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
612 static const int32_t ngrams_8859_2_pl
[] = {
613 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
614 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
615 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
616 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
619 static const int32_t ngrams_8859_2_ro
[] = {
620 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
621 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
622 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
623 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
626 static const int32_t ngrams_8859_5_ru
[] = {
627 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
628 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
629 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
630 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
633 static const int32_t ngrams_8859_6_ar
[] = {
634 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
635 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
636 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
637 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
640 static const int32_t ngrams_8859_7_el
[] = {
641 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
642 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
643 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
644 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
647 static const int32_t ngrams_8859_8_I_he
[] = {
648 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
649 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
650 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
651 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
654 static const int32_t ngrams_8859_8_he
[] = {
655 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
656 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
657 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
658 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
661 static const int32_t ngrams_8859_9_tr
[] = {
662 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
663 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
664 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
665 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
668 CharsetRecog_8859_1::~CharsetRecog_8859_1()
673 const char *CharsetRecog_8859_1::getName() const
675 return haveC1Bytes
? "windows-1252" : "ISO-8859-1";
678 const char *CharsetRecog_8859_1_en::getLanguage() const
683 CharsetRecog_8859_1_en::~CharsetRecog_8859_1_en()
688 int32_t CharsetRecog_8859_1_en::match(InputText
*textIn
)
690 int32_t result
= match_sbcs(textIn
, ngrams_8859_1_en
, charMap_8859_1
);
692 // printf("8859_1_en: result = %d\n", result);
693 return result
; //match_sbcs(textIn, ngrams, charMap);
696 CharsetRecog_8859_1_da::~CharsetRecog_8859_1_da()
701 const char *CharsetRecog_8859_1_da::getLanguage() const
706 int32_t CharsetRecog_8859_1_da::match(InputText
*textIn
)
708 return match_sbcs(textIn
, ngrams_8859_1_da
, charMap_8859_1
);
711 CharsetRecog_8859_1_de::~CharsetRecog_8859_1_de() {}
713 const char *CharsetRecog_8859_1_de::getLanguage() const
718 int32_t CharsetRecog_8859_1_de::match(InputText
*textIn
)
720 return match_sbcs(textIn
, ngrams_8859_1_de
, charMap_8859_1
);
723 CharsetRecog_8859_1_es::~CharsetRecog_8859_1_es()
728 const char *CharsetRecog_8859_1_es::getLanguage() const
733 int32_t CharsetRecog_8859_1_es::match(InputText
*textIn
)
735 return match_sbcs(textIn
, ngrams_8859_1_es
, charMap_8859_1
);
738 CharsetRecog_8859_1_fr::~CharsetRecog_8859_1_fr()
743 const char *CharsetRecog_8859_1_fr::getLanguage() const
748 int32_t CharsetRecog_8859_1_fr::match(InputText
*textIn
)
750 return match_sbcs(textIn
, ngrams_8859_1_fr
, charMap_8859_1
);
753 CharsetRecog_8859_1_it::~CharsetRecog_8859_1_it()
758 const char *CharsetRecog_8859_1_it::getLanguage() const
763 int32_t CharsetRecog_8859_1_it::match(InputText
*textIn
)
765 return match_sbcs(textIn
, ngrams_8859_1_it
, charMap_8859_1
);
768 CharsetRecog_8859_1_nl::~CharsetRecog_8859_1_nl()
773 const char *CharsetRecog_8859_1_nl::getLanguage() const
778 int32_t CharsetRecog_8859_1_nl::match(InputText
*textIn
)
780 return match_sbcs(textIn
, ngrams_8859_1_nl
, charMap_8859_1
);
783 CharsetRecog_8859_1_no::~CharsetRecog_8859_1_no() {}
785 const char *CharsetRecog_8859_1_no::getLanguage() const
790 int32_t CharsetRecog_8859_1_no::match(InputText
*textIn
)
792 return match_sbcs(textIn
, ngrams_8859_1_no
, charMap_8859_1
);
795 CharsetRecog_8859_1_pt::~CharsetRecog_8859_1_pt()
800 const char *CharsetRecog_8859_1_pt::getLanguage() const
805 int32_t CharsetRecog_8859_1_pt::match(InputText
*textIn
)
807 return match_sbcs(textIn
, ngrams_8859_1_pt
, charMap_8859_1
);
810 CharsetRecog_8859_1_sv::~CharsetRecog_8859_1_sv() {}
812 const char *CharsetRecog_8859_1_sv::getLanguage() const
817 int32_t CharsetRecog_8859_1_sv::match(InputText
*textIn
)
819 return match_sbcs(textIn
, ngrams_8859_1_sv
, charMap_8859_1
);
822 CharsetRecog_8859_2::~CharsetRecog_8859_2()
827 const char *CharsetRecog_8859_2::getName() const
829 return haveC1Bytes
? "windows-1250" : "ISO-8859-2";
832 CharsetRecog_8859_2_cs::~CharsetRecog_8859_2_cs()
837 const char *CharsetRecog_8859_2_cs::getLanguage() const
842 int32_t CharsetRecog_8859_2_cs::match(InputText
*textIn
)
844 return match_sbcs(textIn
, ngrams_8859_2_cs
, charMap_8859_2
);
847 CharsetRecog_8859_2_hu::~CharsetRecog_8859_2_hu()
852 const char *CharsetRecog_8859_2_hu::getLanguage() const
857 int32_t CharsetRecog_8859_2_hu::match(InputText
*textIn
)
859 return match_sbcs(textIn
, ngrams_8859_2_hu
, charMap_8859_2
);
862 CharsetRecog_8859_2_pl::~CharsetRecog_8859_2_pl()
867 const char *CharsetRecog_8859_2_pl::getLanguage() const
872 int32_t CharsetRecog_8859_2_pl::match(InputText
*textIn
)
874 return match_sbcs(textIn
, ngrams_8859_2_pl
, charMap_8859_2
);
877 CharsetRecog_8859_2_ro::~CharsetRecog_8859_2_ro()
882 const char *CharsetRecog_8859_2_ro::getLanguage() const
887 int32_t CharsetRecog_8859_2_ro::match(InputText
*textIn
)
889 return match_sbcs(textIn
, ngrams_8859_2_ro
, charMap_8859_2
);
892 CharsetRecog_8859_5::~CharsetRecog_8859_5()
897 const char *CharsetRecog_8859_5::getName() const
902 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
907 const char *CharsetRecog_8859_5_ru::getLanguage() const
912 int32_t CharsetRecog_8859_5_ru::match(InputText
*textIn
)
914 return match_sbcs(textIn
, ngrams_8859_5_ru
, charMap_8859_5
);
917 CharsetRecog_8859_6::~CharsetRecog_8859_6()
922 const char *CharsetRecog_8859_6::getName() const
927 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
932 const char *CharsetRecog_8859_6_ar::getLanguage() const
937 int32_t CharsetRecog_8859_6_ar::match(InputText
*textIn
)
939 return match_sbcs(textIn
, ngrams_8859_6_ar
, charMap_8859_6
);
942 CharsetRecog_8859_7::~CharsetRecog_8859_7()
947 const char *CharsetRecog_8859_7::getName() const
949 return haveC1Bytes
? "windows-1253" : "ISO-8859-7";
952 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
957 const char *CharsetRecog_8859_7_el::getLanguage() const
962 int32_t CharsetRecog_8859_7_el::match(InputText
*textIn
)
964 return match_sbcs(textIn
, ngrams_8859_7_el
, charMap_8859_7
);
967 CharsetRecog_8859_8::~CharsetRecog_8859_8()
972 const char *CharsetRecog_8859_8::getName() const
974 return haveC1Bytes
? "windows-1255" : "ISO-8859-8";
977 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
982 const char *CharsetRecog_8859_8_I_he::getName() const
984 return haveC1Bytes
? "windows-1255" : "ISO-8859-8-I";
987 const char *CharsetRecog_8859_8_I_he::getLanguage() const
992 int32_t CharsetRecog_8859_8_I_he::match(InputText
*textIn
)
994 return match_sbcs(textIn
, ngrams_8859_8_I_he
, charMap_8859_8
);
997 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
1002 const char *CharsetRecog_8859_8_he::getLanguage() const
1007 int32_t CharsetRecog_8859_8_he::match(InputText
*textIn
)
1009 return match_sbcs(textIn
, ngrams_8859_8_he
, charMap_8859_8
);
1012 CharsetRecog_8859_9::~CharsetRecog_8859_9()
1017 const char *CharsetRecog_8859_9::getName() const
1019 return haveC1Bytes
? "windows-1254" : "ISO-8859-9";
1022 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
1027 const char *CharsetRecog_8859_9_tr::getLanguage() const
1032 int32_t CharsetRecog_8859_9_tr::match(InputText
*textIn
)
1034 return match_sbcs(textIn
, ngrams_8859_9_tr
, charMap_8859_9
);
1037 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
1042 const char *CharsetRecog_windows_1256::getName() const
1044 return "windows-1256";
1047 const char *CharsetRecog_windows_1256::getLanguage() const
1052 int32_t CharsetRecog_windows_1256::match(InputText
*textIn
)
1054 return match_sbcs(textIn
, ngrams_windows_1256
, charMap_windows_1256
);
1057 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
1062 const char *CharsetRecog_windows_1251::getName() const
1064 return "windows-1251";
1067 const char *CharsetRecog_windows_1251::getLanguage() const
1072 int32_t CharsetRecog_windows_1251::match(InputText
*textIn
)
1074 return match_sbcs(textIn
, ngrams_windows_1251
, charMap_windows_1251
);
1077 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
1082 const char *CharsetRecog_KOI8_R::getName() const
1087 const char *CharsetRecog_KOI8_R::getLanguage() const
1092 int32_t CharsetRecog_KOI8_R::match(InputText
*textIn
)
1094 return match_sbcs(textIn
, ngrams_KOI8_R
, charMap_KOI8_R
);