]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/csrsbcs.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / i18n / csrsbcs.cpp
CommitLineData
73c04bcf
A
1/*
2 **********************************************************************
3 * Copyright (C) 2005-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8#include "unicode/utypes.h"
9
10#if !UCONFIG_NO_CONVERSION
11#include "csrsbcs.h"
12
13#include <stdio.h>
14
15#define N_GRAM_SIZE 3
16#define N_GRAM_MASK 0xFFFFFF
17
18U_NAMESPACE_BEGIN
19
20NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
21 :byteIndex(0), ngram(0)
22{
23 ngramList = theNgramList;
24 charMap = theCharMap;
25
26 ngramCount = hitCount = 0;
27}
28
29/*
30 * Binary search for value in table, which must have exactly 64 entries.
31 */
32
33int32_t NGramParser::search(const int32_t *table, int32_t value)
34{
35 int32_t index = 0;
36
37 if (table[index + 32] <= value) {
38 index += 32;
39 }
40
41 if (table[index + 16] <= value) {
42 index += 16;
43 }
44
45 if (table[index + 8] <= value) {
46 index += 8;
47 }
48
49 if (table[index + 4] <= value) {
50 index += 4;
51 }
52
53 if (table[index + 2] <= value) {
54 index += 2;
55 }
56
57 if (table[index + 1] <= value) {
58 index += 1;
59 }
60
61 if (table[index] > value) {
62 index -= 1;
63 }
64
65 if (index < 0 || table[index] != value) {
66 return -1;
67 }
68
69 return index;
70}
71
72void NGramParser::lookup(int32_t thisNgram)
73{
74 ngramCount += 1;
75
76 if (search(ngramList, thisNgram) >= 0) {
77 hitCount += 1;
78 }
79
80}
81
82void NGramParser::addByte(int32_t b)
83{
84 ngram = ((ngram << 8) + b) & N_GRAM_MASK;
85 lookup(ngram);
86}
87
88int32_t NGramParser::nextByte(InputText *det)
89{
90 if (byteIndex >= det->fInputLen) {
91 return -1;
92 }
93
94 return det->fInputBytes[byteIndex++];
95}
96
97int32_t NGramParser::parse(InputText *det)
98{
99 int32_t b;
100 bool ignoreSpace = FALSE;
101
102 while ((b = nextByte(det)) >= 0) {
103 uint8_t mb = charMap[b];
104
105 // TODO: 0x20 might not be a space in all character sets...
106 if (mb != 0) {
107 if (!(mb == 0x20 && ignoreSpace)) {
108 addByte(mb);
109 }
110
111 ignoreSpace = (mb == 0x20);
112 }
113 }
114
115 // TODO: Is this OK? The buffer could have ended in the middle of a word...
116 addByte(0x20);
117
118 double rawPercent = (double) hitCount / (double) ngramCount;
119
120 // if (rawPercent <= 2.0) {
121 // return 0;
122 // }
123
124 // TODO - This is a bit of a hack to take care of a case
125 // were we were getting a confidence of 135...
126 if (rawPercent > 0.33) {
127 return 98;
128 }
129
130 return (int32_t) (rawPercent * 300.0);
131}
132
133CharsetRecog_sbcs::CharsetRecog_sbcs()
134: haveC1Bytes(FALSE)
135{
136 // nothing else to do
137}
138
139CharsetRecog_sbcs::~CharsetRecog_sbcs()
140{
141 // nothing to do
142}
143
144int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t byteMap[])
145{
146 NGramParser *parser = new NGramParser(ngrams, byteMap);
147 int32_t result;
148
149 haveC1Bytes = det->fC1Bytes;
150 result = parser->parse(det);
151 delete parser;
152
153 return result;
154}
155
156static const uint8_t charMap_8859_1[] = {
157 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
158 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
159 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
160 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
161 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
162 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
163 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
164 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
165 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
166 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
167 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
168 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
169 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
170 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
171 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
172 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
173 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
174 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
175 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
176 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
177 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
178 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
179 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
180 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
181 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
182 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
183 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
184 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
185 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
186 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
187 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
188 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
189};
190
191static const uint8_t charMap_8859_2[] = {
192 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
193 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
194 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
195 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
196 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
197 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
198 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
199 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
200 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
201 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
202 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
203 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
204 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
205 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
206 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
207 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
208 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
209 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
210 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
211 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
212 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
213 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
214 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
215 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
216 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
217 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
218 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
219 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
220 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
221 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
222 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
223 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
224};
225
226static const uint8_t charMap_8859_5[] = {
227 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
228 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
229 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
230 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
231 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
232 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
233 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
234 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
235 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
236 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
237 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
238 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
239 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
240 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
241 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
242 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
243 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
244 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
245 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
246 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
247 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
248 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
249 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
250 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
251 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
252 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
253 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
254 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
255 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
256 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
257 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
258 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
259};
260
261static const uint8_t charMap_8859_6[] = {
262 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
263 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
264 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
265 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
266 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
267 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
268 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
269 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
270 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
271 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
272 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
274 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
275 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
276 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
277 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
278 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
279 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
280 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
281 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
282 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
283 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
284 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
285 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
286 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
287 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
288 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
289 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
290 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
291 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
292 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
293 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
294};
295
296static const uint8_t charMap_8859_7[] = {
297 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
298 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
299 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
301 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
302 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
303 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
304 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
305 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
306 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
307 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
308 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
309 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
310 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
311 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
312 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
313 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
314 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
315 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
316 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
317 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
318 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
319 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
320 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
321 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
322 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
323 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
324 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
325 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
326 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
327 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
328 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
329};
330
331static const uint8_t charMap_8859_8[] = {
332 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
333 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
334 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
335 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
336 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
337 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
338 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
339 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
340 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
341 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
342 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
343 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
344 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
345 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
346 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
347 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
348 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
349 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
350 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
351 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
352 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
353 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
354 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
355 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
356 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
357 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
358 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
359 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
360 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
361 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
362 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
363 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
364};
365
366static const uint8_t charMap_8859_9[] = {
367 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
368 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
369 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
370 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
371 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
372 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
373 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
374 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
375 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
378 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
379 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
380 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
381 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
382 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
383 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
384 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
385 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
386 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
387 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
388 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
389 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
390 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
391 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
392 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
393 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
394 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
395 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
396 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
397 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
398 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
399};
400
401static const int32_t ngrams_windows_1251[] = {
402 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
403 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
404 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
405 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
406};
407
408static const uint8_t charMap_windows_1251[] = {
409 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
410 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
411 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
412 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
413 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
414 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
415 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
416 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
417 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
418 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
419 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
420 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
421 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
422 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
423 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
424 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
425 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
426 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
427 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
428 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
429 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
430 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
431 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
432 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
433 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
434 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
435 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
436 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
437 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
438 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
439 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
440 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
441};
442
443static const int32_t ngrams_windows_1256[] = {
444 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
445 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
446 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
447 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
448};
449
450static const uint8_t charMap_windows_1256[] = {
451 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
452 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
453 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
454 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
455 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
456 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
457 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
458 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
459 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
460 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
461 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
462 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
463 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
464 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
465 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
466 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
467 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
468 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
469 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
470 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
471 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
472 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
473 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
474 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
475 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
476 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
477 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
478 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
479 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
480 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
481 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
482 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
483};
484
485static const int32_t ngrams_KOI8_R[] = {
486 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
487 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
488 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
489 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
490};
491
492static const uint8_t charMap_KOI8_R[] = {
493 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
494 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
495 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
496 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
497 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
498 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
499 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
500 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
501 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
502 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
503 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
504 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
505 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
506 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
507 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
508 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
509 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
510 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
511 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
512 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
513 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
514 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
515 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
516 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
517 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
518 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
519 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
520 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
521 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
522 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
523 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
524 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
525};
526
527//ISO-8859-1,2,5,6,7,8,9 Ngrams
528static const int32_t ngrams_8859_1_en[] = {
529 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
530 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
531 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
532 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
533};
534
535static const int32_t ngrams_8859_1_da[] = {
536 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
537 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
538 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
539 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
540};
541
542static const int32_t ngrams_8859_1_de[] = {
543 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
544 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
545 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
546 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
547};
548
549static const int32_t ngrams_8859_1_es[] = {
550 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
551 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
552 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
553 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
554};
555
556static const int32_t ngrams_8859_1_fr[] = {
557 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
558 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
559 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
560 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
561};
562
563static const int32_t ngrams_8859_1_it[] = {
564 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
565 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
566 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
567 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
568};
569
570static const int32_t ngrams_8859_1_nl[] = {
571 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
572 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
573 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
574 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
575};
576
577static const int32_t ngrams_8859_1_no[] = {
578 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
579 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
580 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
581 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
582};
583
584static const int32_t ngrams_8859_1_pt[] = {
585 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
586 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
587 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
588 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
589};
590
591static const int32_t ngrams_8859_1_sv[] = {
592 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
593 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
594 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
595 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
596};
597
598static const int32_t ngrams_8859_2_cs[] = {
599 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
600 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
601 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
602 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
603};
604
605static const int32_t ngrams_8859_2_hu[] = {
606 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
607 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
608 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
609 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
610};
611
612static const int32_t ngrams_8859_2_pl[] = {
613 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
614 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
615 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
616 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
617};
618
619static const int32_t ngrams_8859_2_ro[] = {
620 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
621 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
622 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
623 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
624};
625
626static const int32_t ngrams_8859_5_ru[] = {
627 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
628 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
629 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
630 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
631};
632
633static const int32_t ngrams_8859_6_ar[] = {
634 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
635 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
636 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
637 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
638};
639
640static const int32_t ngrams_8859_7_el[] = {
641 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
642 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
643 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
644 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
645};
646
647static const int32_t ngrams_8859_8_I_he[] = {
648 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
649 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
650 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
651 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
652};
653
654static const int32_t ngrams_8859_8_he[] = {
655 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
656 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
657 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
658 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
659};
660
661static const int32_t ngrams_8859_9_tr[] = {
662 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
663 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
664 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
665 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
666};
667
668CharsetRecog_8859_1::~CharsetRecog_8859_1()
669{
670 // nothing to do
671}
672
673const char *CharsetRecog_8859_1::getName() const
674{
675 return haveC1Bytes? "windows-1252" : "ISO-8859-1";
676}
677
678const char *CharsetRecog_8859_1_en::getLanguage() const
679{
680 return "en";
681}
682
683CharsetRecog_8859_1_en::~CharsetRecog_8859_1_en()
684{
685 // nothing to do
686}
687
688int32_t CharsetRecog_8859_1_en::match(InputText *textIn)
689{
690 int32_t result = match_sbcs(textIn, ngrams_8859_1_en, charMap_8859_1);
691
692 // printf("8859_1_en: result = %d\n", result);
693 return result; //match_sbcs(textIn, ngrams, charMap);
694}
695
696CharsetRecog_8859_1_da::~CharsetRecog_8859_1_da()
697{
698 // nothing to do
699}
700
701const char *CharsetRecog_8859_1_da::getLanguage() const
702{
703 return "da";
704}
705
706int32_t CharsetRecog_8859_1_da::match(InputText *textIn)
707{
708 return match_sbcs(textIn, ngrams_8859_1_da, charMap_8859_1);
709}
710
711CharsetRecog_8859_1_de::~CharsetRecog_8859_1_de() {}
712
713const char *CharsetRecog_8859_1_de::getLanguage() const
714{
715 return "de";
716}
717
718int32_t CharsetRecog_8859_1_de::match(InputText *textIn)
719{
720 return match_sbcs(textIn, ngrams_8859_1_de, charMap_8859_1);
721}
722
723CharsetRecog_8859_1_es::~CharsetRecog_8859_1_es()
724{
725 // nothing to do
726}
727
728const char *CharsetRecog_8859_1_es::getLanguage() const
729{
730 return "es";
731}
732
733int32_t CharsetRecog_8859_1_es::match(InputText *textIn)
734{
735 return match_sbcs(textIn, ngrams_8859_1_es, charMap_8859_1);
736}
737
738CharsetRecog_8859_1_fr::~CharsetRecog_8859_1_fr()
739{
740 // nothing to do
741}
742
743const char *CharsetRecog_8859_1_fr::getLanguage() const
744{
745 return "fr";
746}
747
748int32_t CharsetRecog_8859_1_fr::match(InputText *textIn)
749{
750 return match_sbcs(textIn, ngrams_8859_1_fr, charMap_8859_1);
751}
752
753CharsetRecog_8859_1_it::~CharsetRecog_8859_1_it()
754{
755 // nothing to do
756}
757
758const char *CharsetRecog_8859_1_it::getLanguage() const
759{
760 return "it";
761}
762
763int32_t CharsetRecog_8859_1_it::match(InputText *textIn)
764{
765 return match_sbcs(textIn, ngrams_8859_1_it, charMap_8859_1);
766}
767
768CharsetRecog_8859_1_nl::~CharsetRecog_8859_1_nl()
769{
770 // nothing to do
771}
772
773const char *CharsetRecog_8859_1_nl::getLanguage() const
774{
775 return "nl";
776}
777
778int32_t CharsetRecog_8859_1_nl::match(InputText *textIn)
779{
780 return match_sbcs(textIn, ngrams_8859_1_nl, charMap_8859_1);
781}
782
783CharsetRecog_8859_1_no::~CharsetRecog_8859_1_no() {}
784
785const char *CharsetRecog_8859_1_no::getLanguage() const
786{
787 return "no";
788}
789
790int32_t CharsetRecog_8859_1_no::match(InputText *textIn)
791{
792 return match_sbcs(textIn, ngrams_8859_1_no, charMap_8859_1);
793}
794
795CharsetRecog_8859_1_pt::~CharsetRecog_8859_1_pt()
796{
797 // nothing to do
798}
799
800const char *CharsetRecog_8859_1_pt::getLanguage() const
801{
802 return "pt";
803}
804
805int32_t CharsetRecog_8859_1_pt::match(InputText *textIn)
806{
807 return match_sbcs(textIn, ngrams_8859_1_pt, charMap_8859_1);
808}
809
810CharsetRecog_8859_1_sv::~CharsetRecog_8859_1_sv() {}
811
812const char *CharsetRecog_8859_1_sv::getLanguage() const
813{
814 return "sv";
815}
816
817int32_t CharsetRecog_8859_1_sv::match(InputText *textIn)
818{
819 return match_sbcs(textIn, ngrams_8859_1_sv, charMap_8859_1);
820}
821
822CharsetRecog_8859_2::~CharsetRecog_8859_2()
823{
824 // nothing to do
825}
826
827const char *CharsetRecog_8859_2::getName() const
828{
829 return haveC1Bytes? "windows-1250" : "ISO-8859-2";
830}
831
832CharsetRecog_8859_2_cs::~CharsetRecog_8859_2_cs()
833{
834 // nothing to do
835}
836
837const char *CharsetRecog_8859_2_cs::getLanguage() const
838{
839 return "cs";
840}
841
842int32_t CharsetRecog_8859_2_cs::match(InputText *textIn)
843{
844 return match_sbcs(textIn, ngrams_8859_2_cs, charMap_8859_2);
845}
846
847CharsetRecog_8859_2_hu::~CharsetRecog_8859_2_hu()
848{
849 // nothing to do
850}
851
852const char *CharsetRecog_8859_2_hu::getLanguage() const
853{
854 return "hu";
855}
856
857int32_t CharsetRecog_8859_2_hu::match(InputText *textIn)
858{
859 return match_sbcs(textIn, ngrams_8859_2_hu, charMap_8859_2);
860}
861
862CharsetRecog_8859_2_pl::~CharsetRecog_8859_2_pl()
863{
864 // nothing to do
865}
866
867const char *CharsetRecog_8859_2_pl::getLanguage() const
868{
869 return "pl";
870}
871
872int32_t CharsetRecog_8859_2_pl::match(InputText *textIn)
873{
874 return match_sbcs(textIn, ngrams_8859_2_pl, charMap_8859_2);
875}
876
877CharsetRecog_8859_2_ro::~CharsetRecog_8859_2_ro()
878{
879 // nothing to do
880}
881
882const char *CharsetRecog_8859_2_ro::getLanguage() const
883{
884 return "ro";
885}
886
887int32_t CharsetRecog_8859_2_ro::match(InputText *textIn)
888{
889 return match_sbcs(textIn, ngrams_8859_2_ro, charMap_8859_2);
890}
891
892CharsetRecog_8859_5::~CharsetRecog_8859_5()
893{
894 // nothing to do
895}
896
897const char *CharsetRecog_8859_5::getName() const
898{
899 return "ISO-8859-5";
900}
901
902CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
903{
904 // nothing to do
905}
906
907const char *CharsetRecog_8859_5_ru::getLanguage() const
908{
909 return "ru";
910}
911
912int32_t CharsetRecog_8859_5_ru::match(InputText *textIn)
913{
914 return match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
915}
916
917CharsetRecog_8859_6::~CharsetRecog_8859_6()
918{
919 // nothing to do
920}
921
922const char *CharsetRecog_8859_6::getName() const
923{
924 return "ISO-8859-6";
925}
926
927CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
928{
929 // nothing to do
930}
931
932const char *CharsetRecog_8859_6_ar::getLanguage() const
933{
934 return "ar";
935}
936
937int32_t CharsetRecog_8859_6_ar::match(InputText *textIn)
938{
939 return match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
940}
941
942CharsetRecog_8859_7::~CharsetRecog_8859_7()
943{
944 // nothing to do
945}
946
947const char *CharsetRecog_8859_7::getName() const
948{
949 return haveC1Bytes? "windows-1253" : "ISO-8859-7";
950}
951
952CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
953{
954 // nothing to do
955}
956
957const char *CharsetRecog_8859_7_el::getLanguage() const
958{
959 return "el";
960}
961
962int32_t CharsetRecog_8859_7_el::match(InputText *textIn)
963{
964 return match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
965}
966
967CharsetRecog_8859_8::~CharsetRecog_8859_8()
968{
969 // nothing to do
970}
971
972const char *CharsetRecog_8859_8::getName() const
973{
974 return haveC1Bytes? "windows-1255" : "ISO-8859-8";
975}
976
977CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
978{
979 // nothing to do
980}
981
982const char *CharsetRecog_8859_8_I_he::getName() const
983{
984 return haveC1Bytes? "windows-1255" : "ISO-8859-8-I";
985}
986
987const char *CharsetRecog_8859_8_I_he::getLanguage() const
988{
989 return "he";
990}
991
992int32_t CharsetRecog_8859_8_I_he::match(InputText *textIn)
993{
994 return match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
995}
996
997CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
998{
999 // od ot gnihton
1000}
1001
1002const char *CharsetRecog_8859_8_he::getLanguage() const
1003{
1004 return "he";
1005}
1006
1007int32_t CharsetRecog_8859_8_he::match(InputText *textIn)
1008{
1009 return match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
1010}
1011
1012CharsetRecog_8859_9::~CharsetRecog_8859_9()
1013{
1014 // nothing to do
1015}
1016
1017const char *CharsetRecog_8859_9::getName() const
1018{
1019 return haveC1Bytes? "windows-1254" : "ISO-8859-9";
1020}
1021
1022CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
1023{
1024 // nothing to do
1025}
1026
1027const char *CharsetRecog_8859_9_tr::getLanguage() const
1028{
1029 return "tr";
1030}
1031
1032int32_t CharsetRecog_8859_9_tr::match(InputText *textIn)
1033{
1034 return match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
1035}
1036
1037CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
1038{
1039 // nothing to do
1040}
1041
1042const char *CharsetRecog_windows_1256::getName() const
1043{
1044 return "windows-1256";
1045}
1046
1047const char *CharsetRecog_windows_1256::getLanguage() const
1048{
1049 return "ar";
1050}
1051
1052int32_t CharsetRecog_windows_1256::match(InputText *textIn)
1053{
1054 return match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
1055}
1056
1057CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
1058{
1059 // nothing to do
1060}
1061
1062const char *CharsetRecog_windows_1251::getName() const
1063{
1064 return "windows-1251";
1065}
1066
1067const char *CharsetRecog_windows_1251::getLanguage() const
1068{
1069 return "ru";
1070}
1071
1072int32_t CharsetRecog_windows_1251::match(InputText *textIn)
1073{
1074 return match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
1075}
1076
1077CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
1078{
1079 // nothing to do
1080}
1081
1082const char *CharsetRecog_KOI8_R::getName() const
1083{
1084 return "KOI8-R";
1085}
1086
1087const char *CharsetRecog_KOI8_R::getLanguage() const
1088{
1089 return "ru";
1090}
1091
1092int32_t CharsetRecog_KOI8_R::match(InputText *textIn)
1093{
1094 return match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1095}
1096
1097U_NAMESPACE_END
1098#endif
1099