/*
**********************************************************************
- * Copyright (C) 2005-2012, International Business Machines
+ * Copyright (C) 2005-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#if !UCONFIG_NO_CONVERSION
+#include "cmemory.h"
+#include "csmatch.h"
#include "csrmbcs.h"
#include <math.h>
U_NAMESPACE_BEGIN
-#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
-
#define min(x,y) (((x)<(y))?(x):(y))
static const uint16_t commonChars_sjis [] = {
}
#if U_PLATFORM_IS_DARWIN_BASED
-int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars[], int32_t commonCharsLen, const uint8_t (*keyStrings)[MAX_KEY_STRING_WITH_NULL] ) {
+int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars[], int32_t commonCharsLen, const uint8_t (*keyStrings)[MAX_KEY_STRING_WITH_NULL] ) const {
#else
-int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars[], int32_t commonCharsLen) {
+int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars[], int32_t commonCharsLen) const {
#endif
int32_t singleByteCharCount = 0;
int32_t doubleByteCharCount = 0;
// nothing to do
}
-UBool CharsetRecog_sjis::nextChar(IteratedChar* it, InputText* det) {
+UBool CharsetRecog_sjis::nextChar(IteratedChar* it, InputText* det) const {
it->index = it->nextIndex;
it->error = FALSE;
return TRUE;
}
-int32_t CharsetRecog_sjis::match(InputText* det)
-{
+UBool CharsetRecog_sjis::match(InputText* det, CharsetMatch *results) const {
#if U_PLATFORM_IS_DARWIN_BASED
- return match_mbcs(det, commonChars_sjis, ARRAY_SIZE(commonChars_sjis), keyStrings_sjis);
+ int32_t confidence = match_mbcs(det, commonChars_sjis, UPRV_LENGTHOF(commonChars_sjis), keyStrings_sjis);
#else
- return match_mbcs(det, commonChars_sjis, ARRAY_SIZE(commonChars_sjis));
+ int32_t confidence = match_mbcs(det, commonChars_sjis, UPRV_LENGTHOF(commonChars_sjis));
#endif
+ results->set(det, this, confidence);
+ return (confidence > 0);
}
const char *CharsetRecog_sjis::getName() const
// nothing to do
}
-UBool CharsetRecog_euc::nextChar(IteratedChar* it, InputText* det) {
+UBool CharsetRecog_euc::nextChar(IteratedChar* it, InputText* det) const {
int32_t firstByte = 0;
int32_t secondByte = 0;
int32_t thirdByte = 0;
return "ja";
}
-int32_t CharsetRecog_euc_jp::match(InputText *det)
+UBool CharsetRecog_euc_jp::match(InputText *det, CharsetMatch *results) const
{
#if U_PLATFORM_IS_DARWIN_BASED
- return match_mbcs(det, commonChars_euc_jp, ARRAY_SIZE(commonChars_euc_jp), keyStrings_euc_jp);
+ int32_t confidence = match_mbcs(det, commonChars_euc_jp, UPRV_LENGTHOF(commonChars_euc_jp), keyStrings_euc_jp);
#else
- return match_mbcs(det, commonChars_euc_jp, ARRAY_SIZE(commonChars_euc_jp));
+ int32_t confidence = match_mbcs(det, commonChars_euc_jp, UPRV_LENGTHOF(commonChars_euc_jp));
#endif
+ results->set(det, this, confidence);
+ return (confidence > 0);
}
CharsetRecog_euc_kr::~CharsetRecog_euc_kr()
return "ko";
}
-int32_t CharsetRecog_euc_kr::match(InputText *det)
+UBool CharsetRecog_euc_kr::match(InputText *det, CharsetMatch *results) const
{
#if U_PLATFORM_IS_DARWIN_BASED
- return match_mbcs(det, commonChars_euc_kr, ARRAY_SIZE(commonChars_euc_kr), keyStrings_euc_kr);
+ int32_t confidence = match_mbcs(det, commonChars_euc_kr, UPRV_LENGTHOF(commonChars_euc_kr), keyStrings_euc_kr);
#else
- return match_mbcs(det, commonChars_euc_kr, ARRAY_SIZE(commonChars_euc_kr));
+ int32_t confidence = match_mbcs(det, commonChars_euc_kr, UPRV_LENGTHOF(commonChars_euc_kr));
#endif
+ results->set(det, this, confidence);
+ return (confidence > 0);
}
CharsetRecog_big5::~CharsetRecog_big5()
// nothing to do
}
-UBool CharsetRecog_big5::nextChar(IteratedChar* it, InputText* det)
+UBool CharsetRecog_big5::nextChar(IteratedChar* it, InputText* det) const
{
int32_t firstByte;
return "zh";
}
-int32_t CharsetRecog_big5::match(InputText *det)
+UBool CharsetRecog_big5::match(InputText *det, CharsetMatch *results) const
{
#if U_PLATFORM_IS_DARWIN_BASED
- return match_mbcs(det, commonChars_big5, ARRAY_SIZE(commonChars_big5), keyStrings_big5);
+ int32_t confidence = match_mbcs(det, commonChars_big5, UPRV_LENGTHOF(commonChars_big5), keyStrings_big5);
#else
- return match_mbcs(det, commonChars_big5, ARRAY_SIZE(commonChars_big5));
+ int32_t confidence = match_mbcs(det, commonChars_big5, UPRV_LENGTHOF(commonChars_big5));
#endif
+ results->set(det, this, confidence);
+ return (confidence > 0);
}
CharsetRecog_gb_18030::~CharsetRecog_gb_18030()
// nothing to do
}
-UBool CharsetRecog_gb_18030::nextChar(IteratedChar* it, InputText* det) {
+UBool CharsetRecog_gb_18030::nextChar(IteratedChar* it, InputText* det) const {
int32_t firstByte = 0;
int32_t secondByte = 0;
int32_t thirdByte = 0;
return "zh";
}
-int32_t CharsetRecog_gb_18030::match(InputText *det)
+UBool CharsetRecog_gb_18030::match(InputText *det, CharsetMatch *results) const
{
#if U_PLATFORM_IS_DARWIN_BASED
- return match_mbcs(det, commonChars_gb_18030, ARRAY_SIZE(commonChars_gb_18030), keyStrings_gb_18030);
+ int32_t confidence = match_mbcs(det, commonChars_gb_18030, UPRV_LENGTHOF(commonChars_gb_18030), keyStrings_gb_18030);
#else
- return match_mbcs(det, commonChars_gb_18030, ARRAY_SIZE(commonChars_gb_18030));
+ int32_t confidence = match_mbcs(det, commonChars_gb_18030, UPRV_LENGTHOF(commonChars_gb_18030));
#endif
+ results->set(det, this, confidence);
+ return (confidence > 0);
}
U_NAMESPACE_END