]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csrmbcs.cpp
2 **********************************************************************
3 * Copyright (C) 2005-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 #include "unicode/utypes.h"
10 #if !UCONFIG_NO_CONVERSION
19 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
21 #define min(x,y) (((x)<(y))?(x):(y))
23 static const uint16_t commonChars_sjis
[] = {
24 // TODO: This set of data comes from the character frequency-
25 // of-occurence analysis tool. The data needs to be moved
26 // into a resource and loaded from there.
27 0x8140, 0x8141, 0x8142, 0x8145, 0x815b, 0x8169, 0x816a, 0x8175, 0x8176, 0x82a0,
28 0x82a2, 0x82a4, 0x82a9, 0x82aa, 0x82ab, 0x82ad, 0x82af, 0x82b1, 0x82b3, 0x82b5,
29 0x82b7, 0x82bd, 0x82be, 0x82c1, 0x82c4, 0x82c5, 0x82c6, 0x82c8, 0x82c9, 0x82cc,
30 0x82cd, 0x82dc, 0x82e0, 0x82e7, 0x82e8, 0x82e9, 0x82ea, 0x82f0, 0x82f1, 0x8341,
31 0x8343, 0x834e, 0x834f, 0x8358, 0x835e, 0x8362, 0x8367, 0x8375, 0x8376, 0x8389,
32 0x838a, 0x838b, 0x838d, 0x8393, 0x8e96, 0x93fa, 0x95aa};
34 static const uint16_t commonChars_euc_jp
[] = {
35 // TODO: This set of data comes from the character frequency-
36 // of-occurence analysis tool. The data needs to be moved
37 // into a resource and loaded from there.
38 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a6, 0xa1bc, 0xa1ca, 0xa1cb, 0xa1d6, 0xa1d7, 0xa4a2,
39 0xa4a4, 0xa4a6, 0xa4a8, 0xa4aa, 0xa4ab, 0xa4ac, 0xa4ad, 0xa4af, 0xa4b1, 0xa4b3,
40 0xa4b5, 0xa4b7, 0xa4b9, 0xa4bb, 0xa4bd, 0xa4bf, 0xa4c0, 0xa4c1, 0xa4c3, 0xa4c4,
41 0xa4c6, 0xa4c7, 0xa4c8, 0xa4c9, 0xa4ca, 0xa4cb, 0xa4ce, 0xa4cf, 0xa4d0, 0xa4de,
42 0xa4df, 0xa4e1, 0xa4e2, 0xa4e4, 0xa4e8, 0xa4e9, 0xa4ea, 0xa4eb, 0xa4ec, 0xa4ef,
43 0xa4f2, 0xa4f3, 0xa5a2, 0xa5a3, 0xa5a4, 0xa5a6, 0xa5a7, 0xa5aa, 0xa5ad, 0xa5af,
44 0xa5b0, 0xa5b3, 0xa5b5, 0xa5b7, 0xa5b8, 0xa5b9, 0xa5bf, 0xa5c3, 0xa5c6, 0xa5c7,
45 0xa5c8, 0xa5c9, 0xa5cb, 0xa5d0, 0xa5d5, 0xa5d6, 0xa5d7, 0xa5de, 0xa5e0, 0xa5e1,
46 0xa5e5, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5f3, 0xb8a9, 0xb9d4, 0xbaee,
47 0xbbc8, 0xbef0, 0xbfb7, 0xc4ea, 0xc6fc, 0xc7bd, 0xcab8, 0xcaf3, 0xcbdc, 0xcdd1};
49 static const uint16_t commonChars_euc_kr
[] = {
50 // TODO: This set of data comes from the character frequency-
51 // of-occurence analysis tool. The data needs to be moved
52 // into a resource and loaded from there.
53 0xb0a1, 0xb0b3, 0xb0c5, 0xb0cd, 0xb0d4, 0xb0e6, 0xb0ed, 0xb0f8, 0xb0fa, 0xb0fc,
54 0xb1b8, 0xb1b9, 0xb1c7, 0xb1d7, 0xb1e2, 0xb3aa, 0xb3bb, 0xb4c2, 0xb4cf, 0xb4d9,
55 0xb4eb, 0xb5a5, 0xb5b5, 0xb5bf, 0xb5c7, 0xb5e9, 0xb6f3, 0xb7af, 0xb7c2, 0xb7ce,
56 0xb8a6, 0xb8ae, 0xb8b6, 0xb8b8, 0xb8bb, 0xb8e9, 0xb9ab, 0xb9ae, 0xb9cc, 0xb9ce,
57 0xb9fd, 0xbab8, 0xbace, 0xbad0, 0xbaf1, 0xbbe7, 0xbbf3, 0xbbfd, 0xbcad, 0xbcba,
58 0xbcd2, 0xbcf6, 0xbdba, 0xbdc0, 0xbdc3, 0xbdc5, 0xbec6, 0xbec8, 0xbedf, 0xbeee,
59 0xbef8, 0xbefa, 0xbfa1, 0xbfa9, 0xbfc0, 0xbfe4, 0xbfeb, 0xbfec, 0xbff8, 0xc0a7,
60 0xc0af, 0xc0b8, 0xc0ba, 0xc0bb, 0xc0bd, 0xc0c7, 0xc0cc, 0xc0ce, 0xc0cf, 0xc0d6,
61 0xc0da, 0xc0e5, 0xc0fb, 0xc0fc, 0xc1a4, 0xc1a6, 0xc1b6, 0xc1d6, 0xc1df, 0xc1f6,
62 0xc1f8, 0xc4a1, 0xc5cd, 0xc6ae, 0xc7cf, 0xc7d1, 0xc7d2, 0xc7d8, 0xc7e5, 0xc8ad};
64 static const uint16_t commonChars_big5
[] = {
65 // TODO: This set of data comes from the character frequency-
66 // of-occurence analysis tool. The data needs to be moved
67 // into a resource and loaded from there.
68 0xa140, 0xa141, 0xa142, 0xa143, 0xa147, 0xa149, 0xa175, 0xa176, 0xa440, 0xa446,
69 0xa447, 0xa448, 0xa451, 0xa454, 0xa457, 0xa464, 0xa46a, 0xa46c, 0xa477, 0xa4a3,
70 0xa4a4, 0xa4a7, 0xa4c1, 0xa4ce, 0xa4d1, 0xa4df, 0xa4e8, 0xa4fd, 0xa540, 0xa548,
71 0xa558, 0xa569, 0xa5cd, 0xa5e7, 0xa657, 0xa661, 0xa662, 0xa668, 0xa670, 0xa6a8,
72 0xa6b3, 0xa6b9, 0xa6d3, 0xa6db, 0xa6e6, 0xa6f2, 0xa740, 0xa751, 0xa759, 0xa7da,
73 0xa8a3, 0xa8a5, 0xa8ad, 0xa8d1, 0xa8d3, 0xa8e4, 0xa8fc, 0xa9c0, 0xa9d2, 0xa9f3,
74 0xaa6b, 0xaaba, 0xaabe, 0xaacc, 0xaafc, 0xac47, 0xac4f, 0xacb0, 0xacd2, 0xad59,
75 0xaec9, 0xafe0, 0xb0ea, 0xb16f, 0xb2b3, 0xb2c4, 0xb36f, 0xb44c, 0xb44e, 0xb54c,
76 0xb5a5, 0xb5bd, 0xb5d0, 0xb5d8, 0xb671, 0xb7ed, 0xb867, 0xb944, 0xbad8, 0xbb44,
77 0xbba1, 0xbdd1, 0xc2c4, 0xc3b9, 0xc440, 0xc45f};
79 static const uint16_t commonChars_gb_18030
[] = {
80 // TODO: This set of data comes from the character frequency-
81 // of-occurence analysis tool. The data needs to be moved
82 // into a resource and loaded from there.
83 0xa1a1, 0xa1a2, 0xa1a3, 0xa1a4, 0xa1b0, 0xa1b1, 0xa1f1, 0xa1f3, 0xa3a1, 0xa3ac,
84 0xa3ba, 0xb1a8, 0xb1b8, 0xb1be, 0xb2bb, 0xb3c9, 0xb3f6, 0xb4f3, 0xb5bd, 0xb5c4,
85 0xb5e3, 0xb6af, 0xb6d4, 0xb6e0, 0xb7a2, 0xb7a8, 0xb7bd, 0xb7d6, 0xb7dd, 0xb8b4,
86 0xb8df, 0xb8f6, 0xb9ab, 0xb9c9, 0xb9d8, 0xb9fa, 0xb9fd, 0xbacd, 0xbba7, 0xbbd6,
87 0xbbe1, 0xbbfa, 0xbcbc, 0xbcdb, 0xbcfe, 0xbdcc, 0xbecd, 0xbedd, 0xbfb4, 0xbfc6,
88 0xbfc9, 0xc0b4, 0xc0ed, 0xc1cb, 0xc2db, 0xc3c7, 0xc4dc, 0xc4ea, 0xc5cc, 0xc6f7,
89 0xc7f8, 0xc8ab, 0xc8cb, 0xc8d5, 0xc8e7, 0xc9cf, 0xc9fa, 0xcab1, 0xcab5, 0xcac7,
90 0xcad0, 0xcad6, 0xcaf5, 0xcafd, 0xccec, 0xcdf8, 0xceaa, 0xcec4, 0xced2, 0xcee5,
91 0xcfb5, 0xcfc2, 0xcfd6, 0xd0c2, 0xd0c5, 0xd0d0, 0xd0d4, 0xd1a7, 0xd2aa, 0xd2b2,
92 0xd2b5, 0xd2bb, 0xd2d4, 0xd3c3, 0xd3d0, 0xd3fd, 0xd4c2, 0xd4da, 0xd5e2, 0xd6d0};
94 #if U_PLATFORM_IS_DARWIN_BASED
95 static const uint8_t keyStrings_sjis
[][MAX_KEY_STRING_WITH_NULL
] = {
96 {0x82,0xa9,0x82,0xe7,0x91,0x97,0x90,0x4d,0}, // Signatures - Sent from my ...
97 {0x93,0x5d,0x91,0x97,0x83,0x81,0x83,0x62,0x83,0x5a,0x81,0x5b,0x83,0x57,0}, // forward
100 static const uint8_t keyStrings_euc_jp
[][MAX_KEY_STRING_WITH_NULL
] = {
101 {0xa4,0xab,0xa4,0xe9,0xc1,0xf7,0xbf,0xae,0}, // Signatures - Sent from my ...
102 {0xc5,0xbe,0xc1,0xf7,0xa5,0xe1,0xa5,0xc3,0xa5,0xbb,0xa1,0xbc,0xa5,0xb8,0}, // forward
105 static const uint8_t keyStrings_euc_kr
[][MAX_KEY_STRING_WITH_NULL
] = {
106 {0xb3,0xaa,0xc0,0xc7,0}, // Signatures - Sent from my ... #1
107 {0xbf,0xa1,0xbc,0xad,0x20,0xba,0xb8,0xb3,0xbf,0}, // Signatures - Sent from my ... #2
108 {0xc0,0xfc,0xb4,0xde,0xb5,0xc8,0x20,0xb8,0xde,0xbd,0xc3,0xc1,0xf6,0}, // forward
111 static const uint8_t keyStrings_big5
[][MAX_KEY_STRING_WITH_NULL
] = {
112 {0xb1,0x71,0xa7,0xda,0xaa,0xba,0}, // Signatures - Sent from my ... #1
113 {0xb6,0xc7,0xb0,0x65,0}, // Signatures - Sent from my ... #2
114 {0xb6,0x7d,0xa9,0x6c,0xc2,0xe0,0xb1,0x48,0xb6,0x6c,0xa5,0xf3,0}, // forward
117 static const uint8_t keyStrings_gb_18030
[][MAX_KEY_STRING_WITH_NULL
] = {
118 {0xb7,0xa2,0xd7,0xd4,0xce,0xd2,0xb5,0xc4,0}, // Signatures - Sent from my iP...
119 {0xd7,0xaa,0xb7,0xa2,0xb5,0xc4,0xd3,0xca,0xbc,0xfe,0}, // forward
124 static int32_t binarySearch(const uint16_t *array
, int32_t len
, uint16_t value
)
126 int32_t start
= 0, end
= len
-1;
127 int32_t mid
= (start
+end
)/2;
129 while(start
<= end
) {
130 if(array
[mid
] == value
) {
134 if(array
[mid
] < value
){
146 #if U_PLATFORM_IS_DARWIN_BASED
147 // If testPrefix is a prefix of base, return its length, else return 0
148 static int32_t isPrefix(const uint8_t *testPrefix
, const uint8_t *base
, const uint8_t *baseLimit
) {
149 const uint8_t *testPrefixStart
= testPrefix
;
150 while (*testPrefix
!= 0 && base
< baseLimit
&& *testPrefix
== *base
) {
154 return (*testPrefix
== 0)? (int32_t)(testPrefix
-testPrefixStart
): 0;
158 IteratedChar::IteratedChar() :
159 charValue(0), index(-1), nextIndex(0), error(FALSE
), done(FALSE
)
161 // nothing else to do.
164 /*void IteratedChar::reset()
173 int32_t IteratedChar::nextByte(InputText
*det
)
175 if (nextIndex
>= det
->fRawLength
) {
181 return det
->fRawInput
[nextIndex
++];
184 CharsetRecog_mbcs::~CharsetRecog_mbcs()
189 #if U_PLATFORM_IS_DARWIN_BASED
190 int32_t CharsetRecog_mbcs::match_mbcs(InputText
*det
, const uint16_t commonChars
[], int32_t commonCharsLen
, const uint8_t (*keyStrings
)[MAX_KEY_STRING_WITH_NULL
] ) const {
192 int32_t CharsetRecog_mbcs::match_mbcs(InputText
*det
, const uint16_t commonChars
[], int32_t commonCharsLen
) const {
194 int32_t singleByteCharCount
= 0;
195 int32_t doubleByteCharCount
= 0;
196 int32_t commonCharCount
= 0;
197 int32_t badCharCount
= 0;
198 int32_t totalCharCount
= 0;
199 int32_t confidence
= 0;
200 #if U_PLATFORM_IS_DARWIN_BASED
201 int32_t confidenceFromKeys
= 0;
205 while (nextChar(&iter
, det
)) {
211 if (iter
.charValue
<= 0xFF) {
212 singleByteCharCount
++;
214 doubleByteCharCount
++;
216 if (commonChars
!= 0) {
217 if (binarySearch(commonChars
, commonCharsLen
, iter
.charValue
) >= 0){
218 commonCharCount
+= 1;
221 #if U_PLATFORM_IS_DARWIN_BASED
222 if (doubleByteCharCount
<= 20) {
224 for ( keyIndex
= 0; keyStrings
[keyIndex
][0] != 0; keyIndex
++ ) {
225 int32_t prefixLen
= isPrefix(keyStrings
[keyIndex
], &det
->fRawInput
[iter
.index
], &det
->fRawInput
[det
->fRawLength
]);
226 confidenceFromKeys
+= prefixLen
*5;
234 if (badCharCount
>= 2 && badCharCount
*5 >= doubleByteCharCount
) {
235 // Bail out early if the byte data is not matching the encoding scheme.
236 // break detectBlock;
241 if (doubleByteCharCount
<= 10 && badCharCount
== 0) {
242 // Not many multi-byte chars.
243 if (doubleByteCharCount
== 0 && totalCharCount
< 10) {
244 // There weren't any multibyte sequences, and there was a low density of non-ASCII single bytes.
245 // We don't have enough data to have any confidence.
246 // Statistical analysis of single byte non-ASCII charcters would probably help here.
250 // ASCII or ISO file? It's probably not our encoding,
251 // but is not incompatible with our encoding, so don't give it a zero.
252 #if U_PLATFORM_IS_DARWIN_BASED
253 if (confidenceFromKeys
> 90) {
254 confidenceFromKeys
= 90;
255 } else if (confidenceFromKeys
> 0 && confidenceFromKeys
< 70) {
256 confidenceFromKeys
+= 20;
258 confidence
= 10 + confidenceFromKeys
;
268 // No match if there are too many characters that don't fit the encoding scheme.
269 // (should we have zero tolerance for these?)
271 if (doubleByteCharCount
< 20*badCharCount
) {
277 if (commonChars
== 0) {
278 // We have no statistics on frequently occuring characters.
279 // Assess confidence purely on having a reasonable number of
280 // multi-byte characters (the more the better)
281 confidence
= 30 + doubleByteCharCount
- 20*badCharCount
;
282 #if U_PLATFORM_IS_DARWIN_BASED
283 confidence
+= confidenceFromKeys
;
286 if (confidence
> 100) {
291 // Frequency of occurence statistics exist.
294 double maxVal
= log((double)doubleByteCharCount
/ 4); /*(float)?*/
295 double scaleFactor
= 90.0 / maxVal
;
296 confidence
= (int32_t)(log((double)commonCharCount
+1) * scaleFactor
+ 10.0);
297 #if U_PLATFORM_IS_DARWIN_BASED
298 confidence
+= confidenceFromKeys
;
301 confidence
= min(confidence
, 100);
304 if (confidence
< 0) {
311 CharsetRecog_sjis::~CharsetRecog_sjis()
316 UBool
CharsetRecog_sjis::nextChar(IteratedChar
* it
, InputText
* det
) const {
317 it
->index
= it
->nextIndex
;
320 int32_t firstByte
= it
->charValue
= it
->nextByte(det
);
326 if (firstByte
<= 0x7F || (firstByte
> 0xA0 && firstByte
<= 0xDF)) {
330 int32_t secondByte
= it
->nextByte(det
);
331 if (secondByte
>= 0) {
332 it
->charValue
= (firstByte
<< 8) | secondByte
;
334 // else we'll handle the error later.
336 if (! ((secondByte
>= 0x40 && secondByte
<= 0x7F) || (secondByte
>= 0x80 && secondByte
<= 0xFE))) {
337 // Illegal second byte value.
344 UBool
CharsetRecog_sjis::match(InputText
* det
, CharsetMatch
*results
) const {
345 #if U_PLATFORM_IS_DARWIN_BASED
346 int32_t confidence
= match_mbcs(det
, commonChars_sjis
, ARRAY_SIZE(commonChars_sjis
), keyStrings_sjis
);
348 int32_t confidence
= match_mbcs(det
, commonChars_sjis
, ARRAY_SIZE(commonChars_sjis
));
350 results
->set(det
, this, confidence
);
351 return (confidence
> 0);
354 const char *CharsetRecog_sjis::getName() const
359 const char *CharsetRecog_sjis::getLanguage() const
364 CharsetRecog_euc::~CharsetRecog_euc()
369 UBool
CharsetRecog_euc::nextChar(IteratedChar
* it
, InputText
* det
) const {
370 int32_t firstByte
= 0;
371 int32_t secondByte
= 0;
372 int32_t thirdByte
= 0;
374 it
->index
= it
->nextIndex
;
376 firstByte
= it
->charValue
= it
->nextByte(det
);
379 // Ran off the end of the input data
383 if (firstByte
<= 0x8D) {
388 secondByte
= it
->nextByte(det
);
389 if (secondByte
>= 0) {
390 it
->charValue
= (it
->charValue
<< 8) | secondByte
;
392 // else we'll handle the error later.
394 if (firstByte
>= 0xA1 && firstByte
<= 0xFE) {
396 if (secondByte
< 0xA1) {
403 if (firstByte
== 0x8E) {
405 // In EUC-JP, total char size is 2 bytes, only one byte of actual char value.
406 // In EUC-TW, total char size is 4 bytes, three bytes contribute to char value.
407 // We don't know which we've got.
408 // Treat it like EUC-JP. If the data really was EUC-TW, the following two
409 // bytes will look like a well formed 2 byte char.
410 if (secondByte
< 0xA1) {
417 if (firstByte
== 0x8F) {
419 // Three byte total char size, two bytes of actual char value.
420 thirdByte
= it
->nextByte(det
);
421 it
->charValue
= (it
->charValue
<< 8) | thirdByte
;
423 if (thirdByte
< 0xa1) {
424 // Bad second byte or ran off the end of the input data with a non-ASCII first byte.
433 CharsetRecog_euc_jp::~CharsetRecog_euc_jp()
438 const char *CharsetRecog_euc_jp::getName() const
443 const char *CharsetRecog_euc_jp::getLanguage() const
448 UBool
CharsetRecog_euc_jp::match(InputText
*det
, CharsetMatch
*results
) const
450 #if U_PLATFORM_IS_DARWIN_BASED
451 int32_t confidence
= match_mbcs(det
, commonChars_euc_jp
, ARRAY_SIZE(commonChars_euc_jp
), keyStrings_euc_jp
);
453 int32_t confidence
= match_mbcs(det
, commonChars_euc_jp
, ARRAY_SIZE(commonChars_euc_jp
));
455 results
->set(det
, this, confidence
);
456 return (confidence
> 0);
459 CharsetRecog_euc_kr::~CharsetRecog_euc_kr()
464 const char *CharsetRecog_euc_kr::getName() const
469 const char *CharsetRecog_euc_kr::getLanguage() const
474 UBool
CharsetRecog_euc_kr::match(InputText
*det
, CharsetMatch
*results
) const
476 #if U_PLATFORM_IS_DARWIN_BASED
477 int32_t confidence
= match_mbcs(det
, commonChars_euc_kr
, ARRAY_SIZE(commonChars_euc_kr
), keyStrings_euc_kr
);
479 int32_t confidence
= match_mbcs(det
, commonChars_euc_kr
, ARRAY_SIZE(commonChars_euc_kr
));
481 results
->set(det
, this, confidence
);
482 return (confidence
> 0);
485 CharsetRecog_big5::~CharsetRecog_big5()
490 UBool
CharsetRecog_big5::nextChar(IteratedChar
* it
, InputText
* det
) const
494 it
->index
= it
->nextIndex
;
496 firstByte
= it
->charValue
= it
->nextByte(det
);
502 if (firstByte
<= 0x7F || firstByte
== 0xFF) {
503 // single byte character.
507 int32_t secondByte
= it
->nextByte(det
);
508 if (secondByte
>= 0) {
509 it
->charValue
= (it
->charValue
<< 8) | secondByte
;
511 // else we'll handle the error later.
513 if (secondByte
< 0x40 || secondByte
== 0x7F || secondByte
== 0xFF) {
520 const char *CharsetRecog_big5::getName() const
525 const char *CharsetRecog_big5::getLanguage() const
530 UBool
CharsetRecog_big5::match(InputText
*det
, CharsetMatch
*results
) const
532 #if U_PLATFORM_IS_DARWIN_BASED
533 int32_t confidence
= match_mbcs(det
, commonChars_big5
, ARRAY_SIZE(commonChars_big5
), keyStrings_big5
);
535 int32_t confidence
= match_mbcs(det
, commonChars_big5
, ARRAY_SIZE(commonChars_big5
));
537 results
->set(det
, this, confidence
);
538 return (confidence
> 0);
541 CharsetRecog_gb_18030::~CharsetRecog_gb_18030()
546 UBool
CharsetRecog_gb_18030::nextChar(IteratedChar
* it
, InputText
* det
) const {
547 int32_t firstByte
= 0;
548 int32_t secondByte
= 0;
549 int32_t thirdByte
= 0;
550 int32_t fourthByte
= 0;
552 it
->index
= it
->nextIndex
;
554 firstByte
= it
->charValue
= it
->nextByte(det
);
557 // Ran off the end of the input data
561 if (firstByte
<= 0x80) {
566 secondByte
= it
->nextByte(det
);
567 if (secondByte
>= 0) {
568 it
->charValue
= (it
->charValue
<< 8) | secondByte
;
570 // else we'll handle the error later.
572 if (firstByte
>= 0x81 && firstByte
<= 0xFE) {
574 if ((secondByte
>= 0x40 && secondByte
<= 0x7E) || (secondByte
>=80 && secondByte
<= 0xFE)) {
579 if (secondByte
>= 0x30 && secondByte
<= 0x39) {
580 thirdByte
= it
->nextByte(det
);
582 if (thirdByte
>= 0x81 && thirdByte
<= 0xFE) {
583 fourthByte
= it
->nextByte(det
);
585 if (fourthByte
>= 0x30 && fourthByte
<= 0x39) {
586 it
->charValue
= (it
->charValue
<< 16) | (thirdByte
<< 8) | fourthByte
;
593 // Something wasn't valid, or we ran out of data (-1).
600 const char *CharsetRecog_gb_18030::getName() const
605 const char *CharsetRecog_gb_18030::getLanguage() const
610 UBool
CharsetRecog_gb_18030::match(InputText
*det
, CharsetMatch
*results
) const
612 #if U_PLATFORM_IS_DARWIN_BASED
613 int32_t confidence
= match_mbcs(det
, commonChars_gb_18030
, ARRAY_SIZE(commonChars_gb_18030
), keyStrings_gb_18030
);
615 int32_t confidence
= match_mbcs(det
, commonChars_gb_18030
, ARRAY_SIZE(commonChars_gb_18030
));
617 results
->set(det
, this, confidence
);
618 return (confidence
> 0);