/*
**********************************************************************
- * Copyright (C) 2005-2006, International Business Machines
+ * Copyright (C) 2005-2012, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
class IteratedChar : public UMemory
{
public:
- int32_t charValue; // 1-4 bytes from the raw input data
- int32_t index;
- int32_t nextIndex;
- UBool error;
- UBool done;
+ uint32_t charValue; // 1-4 bytes from the raw input data
+ int32_t index;
+ int32_t nextIndex;
+ UBool error;
+ UBool done;
public:
IteratedChar();
- void reset();
+ //void reset();
int32_t nextByte(InputText* det);
};
+#if U_PLATFORM_IS_DARWIN_BASED
+#define MAX_KEY_STRING_WITH_NULL 16
+#endif
class CharsetRecog_mbcs : public CharsetRecognizer {
* <br/>
* bits 8-15: The match reason, an enum-like value.
*/
- int32_t match_mbcs(InputText* det, const int32_t commonChars[], int32_t commonCharsLen);
+#if U_PLATFORM_IS_DARWIN_BASED
+ int32_t match_mbcs(InputText* det, const uint16_t commonChars[], int32_t commonCharsLen, const uint8_t (*keyStrings)[MAX_KEY_STRING_WITH_NULL] ) const;
+#else
+ int32_t match_mbcs(InputText* det, const uint16_t commonChars[], int32_t commonCharsLen) const;
+#endif
public:
const char *getName() const = 0;
const char *getLanguage() const = 0;
- int32_t match(InputText* det) = 0;
+ UBool match(InputText* input, CharsetMatch *results) const = 0;
/**
* Get the next character (however many bytes it is) from the input data
* being iterated over.
* @return True if a character was returned, false at end of input.
*/
- virtual UBool nextChar(IteratedChar *it, InputText *textIn) = 0;
+ virtual UBool nextChar(IteratedChar *it, InputText *textIn) const = 0;
};
public:
virtual ~CharsetRecog_sjis();
- UBool nextChar(IteratedChar *it, InputText *det);
+ UBool nextChar(IteratedChar *it, InputText *det) const;
- int32_t match(InputText *det);
+ UBool match(InputText* input, CharsetMatch *results) const;
const char *getName() const;
const char *getLanguage() const;
const char *getName() const = 0;
const char *getLanguage() const = 0;
- int32_t match(InputText* det) = 0;
+ UBool match(InputText* input, CharsetMatch *results) const = 0;
/*
* (non-Javadoc)
* Get the next character value for EUC based encodings.
* Character "value" is simply the raw bytes that make up the character
* packed into an int.
*/
- UBool nextChar(IteratedChar *it, InputText *det);
+ UBool nextChar(IteratedChar *it, InputText *det) const;
};
/**
const char *getName() const;
const char *getLanguage() const;
- int32_t match(InputText *det);
+ UBool match(InputText* input, CharsetMatch *results) const;
};
/**
const char *getName() const;
const char *getLanguage() const;
- int32_t match(InputText *det);
+ UBool match(InputText* input, CharsetMatch *results) const;
};
/**
public:
virtual ~CharsetRecog_big5();
- UBool nextChar(IteratedChar* it, InputText* det);
+ UBool nextChar(IteratedChar* it, InputText* det) const;
const char *getName() const;
const char *getLanguage() const;
- int32_t match(InputText *det);
+ UBool match(InputText* input, CharsetMatch *results) const;
};
public:
virtual ~CharsetRecog_gb_18030();
- UBool nextChar(IteratedChar* it, InputText* det);
+ UBool nextChar(IteratedChar* it, InputText* det) const;
const char *getName() const;
const char *getLanguage() const;
- int32_t match(InputText *det);
+ UBool match(InputText* input, CharsetMatch *results) const;
};
U_NAMESPACE_END