]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csrmbcs.cpp
2 **********************************************************************
3 * Copyright (C) 2005-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
10 #if !UCONFIG_NO_CONVERSION
20 #define min(x,y) (((x)<(y))?(x):(y))
22 static const uint16_t commonChars_sjis
[] = {
23 // TODO: This set of data comes from the character frequency-
24 // of-occurence analysis tool. The data needs to be moved
25 // into a resource and loaded from there.
26 0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
27 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
28 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
29 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
30 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
31 0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa};
33 static const uint16_t commonChars_euc_jp
[] = {
34 // TODO: This set of data comes from the character frequency-
35 // of-occurence analysis tool. The data needs to be moved
36 // into a resource and loaded from there.
37 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
38 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
39 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
40 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
41 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
42 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
43 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
44 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
45 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
46 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
48 static const uint16_t commonChars_euc_kr
[] = {
49 // TODO: This set of data comes from the character frequency-
50 // of-occurence analysis tool. The data needs to be moved
51 // into a resource and loaded from there.
52 0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
53 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
54 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
55 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
56 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
57 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
58 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
59 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
60 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
61 0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad};
63 static const uint16_t commonChars_big5
[] = {
64 // TODO: This set of data comes from the character frequency-
65 // of-occurence analysis tool. The data needs to be moved
66 // into a resource and loaded from there.
67 0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
68 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
69 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
70 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
71 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
72 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
73 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
74 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
75 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
76 0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f};
78 static const uint16_t commonChars_gb_18030
[] = {
79 // TODO: This set of data comes from the character frequency-
80 // of-occurence analysis tool. The data needs to be moved
81 // into a resource and loaded from there.
82 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
83 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
84 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
85 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
86 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
87 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
88 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
89 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
90 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
91 0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0};
93 #if U_PLATFORM_IS_DARWIN_BASED
94 static const uint8_t keyStrings_sjis
[][MAX_KEY_STRING_WITH_NULL
] = {
95 {0x82,0xa9,0x82,0xe7,0x91,0x97,0x90,0x4d,0}, // Signatures - Sent from my ...
96 {0x93,0x5d,0x91,0x97,0x83,0x81,0x83,0x62,0x83,0x5a,0x81,0x5b,0x83,0x57,0}, // forward
99 static const uint8_t keyStrings_euc_jp
[][MAX_KEY_STRING_WITH_NULL
] = {
100 {0xa4,0xab,0xa4,0xe9,0xc1,0xf7,0xbf,0xae,0}, // Signatures - Sent from my ...
101 {0xc5,0xbe,0xc1,0xf7,0xa5,0xe1,0xa5,0xc3,0xa5,0xbb,0xa1,0xbc,0xa5,0xb8,0}, // forward
104 static const uint8_t keyStrings_euc_kr
[][MAX_KEY_STRING_WITH_NULL
] = {
105 {0xb3,0xaa,0xc0,0xc7,0}, // Signatures - Sent from my ... #1
106 {0xbf,0xa1,0xbc,0xad,0x20,0xba,0xb8,0xb3,0xbf,0}, // Signatures - Sent from my ... #2
107 {0xc0,0xfc,0xb4,0xde,0xb5,0xc8,0x20,0xb8,0xde,0xbd,0xc3,0xc1,0xf6,0}, // forward
110 static const uint8_t keyStrings_big5
[][MAX_KEY_STRING_WITH_NULL
] = {
111 {0xb1,0x71,0xa7,0xda,0xaa,0xba,0}, // Signatures - Sent from my ... #1
112 {0xb6,0xc7,0xb0,0x65,0}, // Signatures - Sent from my ... #2
113 {0xb6,0x7d,0xa9,0x6c,0xc2,0xe0,0xb1,0x48,0xb6,0x6c,0xa5,0xf3,0}, // forward
116 static const uint8_t keyStrings_gb_18030
[][MAX_KEY_STRING_WITH_NULL
] = {
117 {0xb7,0xa2,0xd7,0xd4,0xce,0xd2,0xb5,0xc4,0}, // Signatures - Sent from my iP...
118 {0xd7,0xaa,0xb7,0xa2,0xb5,0xc4,0xd3,0xca,0xbc,0xfe,0}, // forward
123 static int32_t binarySearch(const uint16_t *array
, int32_t len
, uint16_t value
)
125 int32_t start
= 0, end
= len
-1;
126 int32_t mid
= (start
+end
)/2;
128 while(start
<= end
) {
129 if(array
[mid
] == value
) {
133 if(array
[mid
] < value
){
145 #if U_PLATFORM_IS_DARWIN_BASED
146 // If testPrefix is a prefix of base, return its length, else return 0
147 static int32_t isPrefix(const uint8_t *testPrefix
, const uint8_t *base
, const uint8_t *baseLimit
) {
148 const uint8_t *testPrefixStart
= testPrefix
;
149 while (*testPrefix
!= 0 && base
< baseLimit
&& *testPrefix
== *base
) {
153 return (*testPrefix
== 0)? (int32_t)(testPrefix
-testPrefixStart
): 0;
157 IteratedChar::IteratedChar() :
158 charValue(0), index(-1), nextIndex(0), error(FALSE
), done(FALSE
)
160 // nothing else to do.
163 /*void IteratedChar::reset()
172 int32_t IteratedChar::nextByte(InputText
*det
)
174 if (nextIndex
>= det
->fRawLength
) {
180 return det
->fRawInput
[nextIndex
++];
183 CharsetRecog_mbcs::~CharsetRecog_mbcs()
188 #if U_PLATFORM_IS_DARWIN_BASED
189 int32_t CharsetRecog_mbcs::match_mbcs(InputText
*det
, const uint16_t commonChars
[], int32_t commonCharsLen
, const uint8_t (*keyStrings
)[MAX_KEY_STRING_WITH_NULL
] ) const {
191 int32_t CharsetRecog_mbcs::match_mbcs(InputText
*det
, const uint16_t commonChars
[], int32_t commonCharsLen
) const {
193 int32_t singleByteCharCount
= 0;
194 int32_t doubleByteCharCount
= 0;
195 int32_t commonCharCount
= 0;
196 int32_t badCharCount
= 0;
197 int32_t totalCharCount
= 0;
198 int32_t confidence
= 0;
199 #if U_PLATFORM_IS_DARWIN_BASED
200 int32_t confidenceFromKeys
= 0;
204 while (nextChar(&iter
, det
)) {
210 if (iter
.charValue
<= 0xFF) {
211 singleByteCharCount
++;
213 doubleByteCharCount
++;
215 if (commonChars
!= 0) {
216 if (binarySearch(commonChars
, commonCharsLen
, iter
.charValue
) >= 0){
217 commonCharCount
+= 1;
220 #if U_PLATFORM_IS_DARWIN_BASED
221 if (doubleByteCharCount
<= 20) {
223 for ( keyIndex
= 0; keyStrings
[keyIndex
][0] != 0; keyIndex
++ ) {
224 int32_t prefixLen
= isPrefix(keyStrings
[keyIndex
], &det
->fRawInput
[iter
.index
], &det
->fRawInput
[det
->fRawLength
]);
225 confidenceFromKeys
+= prefixLen
*5;
233 if (badCharCount
>= 2 && badCharCount
*5 >= doubleByteCharCount
) {
234 // Bail out early if the byte data is not matching the encoding scheme.
235 // break detectBlock;
240 if (doubleByteCharCount
<= 10 && badCharCount
== 0) {
241 // Not many multi-byte chars.
242 if (doubleByteCharCount
== 0 && totalCharCount
< 10) {
243 // There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes.
244 // We don't have enough data to have any confidence.
245 // Statistical analysis of single byte non-ASCII charcters would probably help here.
249 // ASCII or ISO file? It's probably not our encoding,
250 // but is not incompatible with our encoding, so don't give it a zero.
251 #if U_PLATFORM_IS_DARWIN_BASED
252 if (confidenceFromKeys
> 90) {
253 confidenceFromKeys
= 90;
254 } else if (confidenceFromKeys
> 0 && confidenceFromKeys
< 70) {
255 confidenceFromKeys
+= 20;
257 confidence
= 10 + confidenceFromKeys
;
267 // No match if there are too many characters that don't fit the encoding scheme.
268 // (should we have zero tolerance for these?)
270 if (doubleByteCharCount
< 20*badCharCount
) {
276 if (commonChars
== 0) {
277 // We have no statistics on frequently occuring characters.
278 // Assess confidence purely on having a reasonable number of
279 // multi-byte characters (the more the better)
280 confidence
= 30 + doubleByteCharCount
- 20*badCharCount
;
281 #if U_PLATFORM_IS_DARWIN_BASED
282 confidence
+= confidenceFromKeys
;
285 if (confidence
> 100) {
290 // Frequency of occurence statistics exist.
293 double maxVal
= log((double)doubleByteCharCount
/ 4); /*(float)?*/
294 double scaleFactor
= 90.0 / maxVal
;
295 confidence
= (int32_t)(log((double)commonCharCount
+1) * scaleFactor
+ 10.0);
296 #if U_PLATFORM_IS_DARWIN_BASED
297 confidence
+= confidenceFromKeys
;
300 confidence
= min(confidence
, 100);
303 if (confidence
< 0) {
310 CharsetRecog_sjis::~CharsetRecog_sjis()
315 UBool
CharsetRecog_sjis::nextChar(IteratedChar
* it
, InputText
* det
) const {
316 it
->index
= it
->nextIndex
;
319 int32_t firstByte
= it
->charValue
= it
->nextByte(det
);
325 if (firstByte
<= 0x7F || (firstByte
> 0xA0 && firstByte
<= 0xDF)) {
329 int32_t secondByte
= it
->nextByte(det
);
330 if (secondByte
>= 0) {
331 it
->charValue
= (firstByte
<< 8) | secondByte
;
333 // else we'll handle the error later.
335 if (! ((secondByte
>= 0x40 && secondByte
<= 0x7F) || (secondByte
>= 0x80 && secondByte
<= 0xFE))) {
336 // Illegal second byte value.
343 UBool
CharsetRecog_sjis::match(InputText
* det
, CharsetMatch
*results
) const {
344 #if U_PLATFORM_IS_DARWIN_BASED
345 int32_t confidence
= match_mbcs(det
, commonChars_sjis
, UPRV_LENGTHOF(commonChars_sjis
), keyStrings_sjis
);
347 int32_t confidence
= match_mbcs(det
, commonChars_sjis
, UPRV_LENGTHOF(commonChars_sjis
));
349 results
->set(det
, this, confidence
);
350 return (confidence
> 0);
353 const char *CharsetRecog_sjis::getName() const
358 const char *CharsetRecog_sjis::getLanguage() const
363 CharsetRecog_euc::~CharsetRecog_euc()
368 UBool
CharsetRecog_euc::nextChar(IteratedChar
* it
, InputText
* det
) const {
369 int32_t firstByte
= 0;
370 int32_t secondByte
= 0;
371 int32_t thirdByte
= 0;
373 it
->index
= it
->nextIndex
;
375 firstByte
= it
->charValue
= it
->nextByte(det
);
378 // Ran off the end of the input data
382 if (firstByte
<= 0x8D) {
387 secondByte
= it
->nextByte(det
);
388 if (secondByte
>= 0) {
389 it
->charValue
= (it
->charValue
<< 8) | secondByte
;
391 // else we'll handle the error later.
393 if (firstByte
>= 0xA1 && firstByte
<= 0xFE) {
395 if (secondByte
< 0xA1) {
402 if (firstByte
== 0x8E) {
404 // In EUC-JP, total char size is 2 bytes, only one byte of actual char value.
405 // In EUC-TW, total char size is 4 bytes, three bytes contribute to char value.
406 // We don't know which we've got.
407 // Treat it like EUC-JP. If the data really was EUC-TW, the following two
408 // bytes will look like a well formed 2 byte char.
409 if (secondByte
< 0xA1) {
416 if (firstByte
== 0x8F) {
418 // Three byte total char size, two bytes of actual char value.
419 thirdByte
= it
->nextByte(det
);
420 it
->charValue
= (it
->charValue
<< 8) | thirdByte
;
422 if (thirdByte
< 0xa1) {
423 // Bad second byte or ran off the end of the input data with a non-ASCII first byte.
432 CharsetRecog_euc_jp::~CharsetRecog_euc_jp()
437 const char *CharsetRecog_euc_jp::getName() const
442 const char *CharsetRecog_euc_jp::getLanguage() const
447 UBool
CharsetRecog_euc_jp::match(InputText
*det
, CharsetMatch
*results
) const
449 #if U_PLATFORM_IS_DARWIN_BASED
450 int32_t confidence
= match_mbcs(det
, commonChars_euc_jp
, UPRV_LENGTHOF(commonChars_euc_jp
), keyStrings_euc_jp
);
452 int32_t confidence
= match_mbcs(det
, commonChars_euc_jp
, UPRV_LENGTHOF(commonChars_euc_jp
));
454 results
->set(det
, this, confidence
);
455 return (confidence
> 0);
458 CharsetRecog_euc_kr::~CharsetRecog_euc_kr()
463 const char *CharsetRecog_euc_kr::getName() const
468 const char *CharsetRecog_euc_kr::getLanguage() const
473 UBool
CharsetRecog_euc_kr::match(InputText
*det
, CharsetMatch
*results
) const
475 #if U_PLATFORM_IS_DARWIN_BASED
476 int32_t confidence
= match_mbcs(det
, commonChars_euc_kr
, UPRV_LENGTHOF(commonChars_euc_kr
), keyStrings_euc_kr
);
478 int32_t confidence
= match_mbcs(det
, commonChars_euc_kr
, UPRV_LENGTHOF(commonChars_euc_kr
));
480 results
->set(det
, this, confidence
);
481 return (confidence
> 0);
484 CharsetRecog_big5::~CharsetRecog_big5()
489 UBool
CharsetRecog_big5::nextChar(IteratedChar
* it
, InputText
* det
) const
493 it
->index
= it
->nextIndex
;
495 firstByte
= it
->charValue
= it
->nextByte(det
);
501 if (firstByte
<= 0x7F || firstByte
== 0xFF) {
502 // single byte character.
506 int32_t secondByte
= it
->nextByte(det
);
507 if (secondByte
>= 0) {
508 it
->charValue
= (it
->charValue
<< 8) | secondByte
;
510 // else we'll handle the error later.
512 if (secondByte
< 0x40 || secondByte
== 0x7F || secondByte
== 0xFF) {
519 const char *CharsetRecog_big5::getName() const
524 const char *CharsetRecog_big5::getLanguage() const
529 UBool
CharsetRecog_big5::match(InputText
*det
, CharsetMatch
*results
) const
531 #if U_PLATFORM_IS_DARWIN_BASED
532 int32_t confidence
= match_mbcs(det
, commonChars_big5
, UPRV_LENGTHOF(commonChars_big5
), keyStrings_big5
);
534 int32_t confidence
= match_mbcs(det
, commonChars_big5
, UPRV_LENGTHOF(commonChars_big5
));
536 results
->set(det
, this, confidence
);
537 return (confidence
> 0);
540 CharsetRecog_gb_18030::~CharsetRecog_gb_18030()
545 UBool
CharsetRecog_gb_18030::nextChar(IteratedChar
* it
, InputText
* det
) const {
546 int32_t firstByte
= 0;
547 int32_t secondByte
= 0;
548 int32_t thirdByte
= 0;
549 int32_t fourthByte
= 0;
551 it
->index
= it
->nextIndex
;
553 firstByte
= it
->charValue
= it
->nextByte(det
);
556 // Ran off the end of the input data
560 if (firstByte
<= 0x80) {
565 secondByte
= it
->nextByte(det
);
566 if (secondByte
>= 0) {
567 it
->charValue
= (it
->charValue
<< 8) | secondByte
;
569 // else we'll handle the error later.
571 if (firstByte
>= 0x81 && firstByte
<= 0xFE) {
573 if ((secondByte
>= 0x40 && secondByte
<= 0x7E) || (secondByte
>=80 && secondByte
<= 0xFE)) {
578 if (secondByte
>= 0x30 && secondByte
<= 0x39) {
579 thirdByte
= it
->nextByte(det
);
581 if (thirdByte
>= 0x81 && thirdByte
<= 0xFE) {
582 fourthByte
= it
->nextByte(det
);
584 if (fourthByte
>= 0x30 && fourthByte
<= 0x39) {
585 it
->charValue
= (it
->charValue
<< 16) | (thirdByte
<< 8) | fourthByte
;
592 // Something wasn't valid, or we ran out of data (-1).
599 const char *CharsetRecog_gb_18030::getName() const
604 const char *CharsetRecog_gb_18030::getLanguage() const
609 UBool
CharsetRecog_gb_18030::match(InputText
*det
, CharsetMatch
*results
) const
611 #if U_PLATFORM_IS_DARWIN_BASED
612 int32_t confidence
= match_mbcs(det
, commonChars_gb_18030
, UPRV_LENGTHOF(commonChars_gb_18030
), keyStrings_gb_18030
);
614 int32_t confidence
= match_mbcs(det
, commonChars_gb_18030
, UPRV_LENGTHOF(commonChars_gb_18030
));
616 results
->set(det
, this, confidence
);
617 return (confidence
> 0);