X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/73c04bcfe1096173b00431f0cdc742894b15eef0..b331163bffd790ced0e88b73f44f86d49ccc48a5:/icuSources/i18n/csrsbcs.h diff --git a/icuSources/i18n/csrsbcs.h b/icuSources/i18n/csrsbcs.h index 456fc4df..935a3e7e 100644 --- a/icuSources/i18n/csrsbcs.h +++ b/icuSources/i18n/csrsbcs.h @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2006, International Business Machines + * Copyright (C) 2005-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -19,17 +19,21 @@ U_NAMESPACE_BEGIN class NGramParser : public UMemory { private: - int32_t byteIndex; int32_t ngram; - - const int32_t *ngramList; - const uint8_t *charMap; + const int32_t *ngramList; int32_t ngramCount; int32_t hitCount; +protected: + int32_t byteIndex; + const uint8_t *charMap; + + void addByte(int32_t b); + public: NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap); + virtual ~NGramParser(); private: /* @@ -38,52 +42,60 @@ private: int32_t search(const int32_t *table, int32_t value); void lookup(int32_t thisNgram); - void addByte(int32_t b); - int32_t nextByte(InputText *det); + + virtual int32_t nextByte(InputText *det); + virtual void parseCharacters(InputText *det); public: int32_t parse(InputText *det); }; -class CharsetRecog_sbcs : public CharsetRecognizer +#if !UCONFIG_ONLY_HTML_CONVERSION +class NGramParser_IBM420 : public NGramParser { -protected: - UBool haveC1Bytes; +private: + int32_t alef; + int32_t isLamAlef(int32_t b); + int32_t nextByte(InputText *det); + void parseCharacters(InputText *det); public: - CharsetRecog_sbcs(); + NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); +}; +#endif - virtual ~CharsetRecog_sbcs(); +class CharsetRecog_sbcs : public CharsetRecognizer +{ +public: + CharsetRecog_sbcs(); + virtual ~CharsetRecog_sbcs(); virtual const char *getName() const = 0; - - virtual int32_t match(InputText *det) = 0; - - int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]); + virtual UBool match(InputText *det, CharsetMatch *results) const = 0; + virtual int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; }; class CharsetRecog_8859_1 : public CharsetRecog_sbcs { public: virtual ~CharsetRecog_8859_1(); - const char *getName() const; + virtual UBool match(InputText *det, CharsetMatch *results) const; }; class CharsetRecog_8859_2 : public CharsetRecog_sbcs { public: virtual ~CharsetRecog_8859_2(); - const char *getName() const; + virtual UBool match(InputText *det, CharsetMatch *results) const; }; class CharsetRecog_8859_5 : public CharsetRecog_sbcs { public: virtual ~CharsetRecog_8859_5(); - const char *getName() const; }; @@ -119,246 +131,162 @@ public: const char *getName() const; }; -class CharsetRecog_8859_1_en : public CharsetRecog_8859_1 -{ -public: - virtual ~CharsetRecog_8859_1_en(); - - const char *getLanguage() const; - - int32_t match(InputText *textIn); -}; - -class CharsetRecog_8859_1_da : public CharsetRecog_8859_1 -{ -public: - virtual ~CharsetRecog_8859_1_da(); - - const char *getLanguage() const; - - int32_t match(InputText *textIn); -}; - -class CharsetRecog_8859_1_de : public CharsetRecog_8859_1 -{ -public: - virtual ~CharsetRecog_8859_1_de(); - const char *getLanguage() const; - int32_t match(InputText *textIn); -}; - -class CharsetRecog_8859_1_es : public CharsetRecog_8859_1 +class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5 { public: - virtual ~CharsetRecog_8859_1_es(); + virtual ~CharsetRecog_8859_5_ru(); const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_1_fr : public CharsetRecog_8859_1 +class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6 { public: - virtual ~CharsetRecog_8859_1_fr(); + virtual ~CharsetRecog_8859_6_ar(); const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_1_it : public CharsetRecog_8859_1 +class CharsetRecog_8859_7_el : public CharsetRecog_8859_7 { public: - virtual ~CharsetRecog_8859_1_it(); + virtual ~CharsetRecog_8859_7_el(); const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_1_nl : public CharsetRecog_8859_1 +class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8 { public: - virtual ~CharsetRecog_8859_1_nl(); + virtual ~CharsetRecog_8859_8_I_he(); + + const char *getName() const; const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_1_no : public CharsetRecog_8859_1 +class CharsetRecog_8859_8_he : public CharsetRecog_8859_8 { public: - virtual ~CharsetRecog_8859_1_no(); + virtual ~CharsetRecog_8859_8_he (); const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_1_pt : public CharsetRecog_8859_1 +class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9 { public: - virtual ~CharsetRecog_8859_1_pt(); + virtual ~CharsetRecog_8859_9_tr (); const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_1_sv : public CharsetRecog_8859_1 +class CharsetRecog_windows_1256 : public CharsetRecog_sbcs { public: - virtual ~CharsetRecog_8859_1_sv(); - - const char *getLanguage() const; - - int32_t match(InputText *textIn); -}; + virtual ~CharsetRecog_windows_1256(); -class CharsetRecog_8859_2_cs : public CharsetRecog_8859_2 -{ -public: - virtual ~CharsetRecog_8859_2_cs(); + const char *getName() const; const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_2_hu : public CharsetRecog_8859_2 +class CharsetRecog_windows_1251 : public CharsetRecog_sbcs { public: - virtual ~CharsetRecog_8859_2_hu(); - - const char *getLanguage() const; - - int32_t match(InputText *textIn); -}; + virtual ~CharsetRecog_windows_1251(); -class CharsetRecog_8859_2_pl : public CharsetRecog_8859_2 -{ -public: - virtual ~CharsetRecog_8859_2_pl(); + const char *getName() const; const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_2_ro : public CharsetRecog_8859_2 -{ -public: - virtual ~CharsetRecog_8859_2_ro(); - const char *getLanguage() const; - - int32_t match(InputText *textIn); -}; - -class CharsetRecog_8859_5_ru : public CharsetRecog_8859_5 +class CharsetRecog_KOI8_R : public CharsetRecog_sbcs { public: - virtual ~CharsetRecog_8859_5_ru(); - - const char *getLanguage() const; - - int32_t match(InputText *textIn); -}; + virtual ~CharsetRecog_KOI8_R(); -class CharsetRecog_8859_6_ar : public CharsetRecog_8859_6 -{ -public: - virtual ~CharsetRecog_8859_6_ar(); + const char *getName() const; const char *getLanguage() const; - int32_t match(InputText *textIn); + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_7_el : public CharsetRecog_8859_7 +#if !UCONFIG_ONLY_HTML_CONVERSION +class CharsetRecog_IBM424_he : public CharsetRecog_sbcs { public: - virtual ~CharsetRecog_8859_7_el(); + virtual ~CharsetRecog_IBM424_he(); const char *getLanguage() const; - - int32_t match(InputText *textIn); }; -class CharsetRecog_8859_8_I_he : public CharsetRecog_8859_8 -{ +class CharsetRecog_IBM424_he_rtl : public CharsetRecog_IBM424_he { public: - virtual ~CharsetRecog_8859_8_I_he(); - + virtual ~CharsetRecog_IBM424_he_rtl(); + const char *getName() const; - - const char *getLanguage() const; - - int32_t match(InputText *textIn); + + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_8_he : public CharsetRecog_8859_8 -{ -public: - virtual ~CharsetRecog_8859_8_he (); - - const char *getLanguage() const; - - int32_t match(InputText *textIn); +class CharsetRecog_IBM424_he_ltr : public CharsetRecog_IBM424_he { + virtual ~CharsetRecog_IBM424_he_ltr(); + + const char *getName() const; + + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_8859_9_tr : public CharsetRecog_8859_9 +class CharsetRecog_IBM420_ar : public CharsetRecog_sbcs { public: - virtual ~CharsetRecog_8859_9_tr (); + virtual ~CharsetRecog_IBM420_ar(); const char *getLanguage() const; - - int32_t match(InputText *textIn); + int32_t match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t charMap[]) const; + }; -class CharsetRecog_windows_1256 : public CharsetRecog_sbcs -{ +class CharsetRecog_IBM420_ar_rtl : public CharsetRecog_IBM420_ar { public: - virtual ~CharsetRecog_windows_1256(); - + virtual ~CharsetRecog_IBM420_ar_rtl(); + const char *getName() const; - - const char *getLanguage() const; - - int32_t match(InputText *textIn); + + virtual UBool match(InputText *det, CharsetMatch *results) const; }; -class CharsetRecog_windows_1251 : public CharsetRecog_sbcs -{ -public: - virtual ~CharsetRecog_windows_1251(); - +class CharsetRecog_IBM420_ar_ltr : public CharsetRecog_IBM420_ar { + virtual ~CharsetRecog_IBM420_ar_ltr(); + const char *getName() const; - - const char *getLanguage() const; - - int32_t match(InputText *textIn); -}; - - -class CharsetRecog_KOI8_R : public CharsetRecog_sbcs -{ -public: - virtual ~CharsetRecog_KOI8_R(); - - const char *getName() const; - - const char *getLanguage() const; - - int32_t match(InputText *textIn); + + virtual UBool match(InputText *det, CharsetMatch *results) const; }; +#endif U_NAMESPACE_END -#endif +#endif /* !UCONFIG_NO_CONVERSION */ #endif /* __CSRSBCS_H */