]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csr2022.h
2 **********************************************************************
3 * Copyright (C) 2005-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_CONVERSION
22 * class CharsetRecog_2022 part of the ICU charset detection imlementation.
23 * This is a superclass for the individual detectors for
24 * each of the detectable members of the ISO 2022 family
27 * The separate classes are nested within this class.
31 class CharsetRecog_2022
: public CharsetRecognizer
35 virtual ~CharsetRecog_2022() = 0;
40 * Matching function shared among the 2022 detectors JP, CN and KR
41 * Counts up the number of legal an unrecognized escape sequences in
42 * the sample of text, and computes a score based on the total number &
43 * the proportion that fit the encoding.
46 * @param text the byte buffer containing text to analyse
47 * @param textLen the size of the text in the byte.
48 * @param escapeSequences the byte escape sequences to test for.
49 * @return match quality, in the range of 0-100.
51 int32_t match_2022(const uint8_t *text
,
53 const uint8_t escapeSequences
[][5],
54 int32_t escapeSequences_length
) const;
58 class CharsetRecog_2022JP
:public CharsetRecog_2022
61 virtual ~CharsetRecog_2022JP();
63 const char *getName() const;
65 UBool
match(InputText
*textIn
, CharsetMatch
*results
) const;
68 class CharsetRecog_2022KR
:public CharsetRecog_2022
{
70 virtual ~CharsetRecog_2022KR();
72 const char *getName() const;
74 UBool
match(InputText
*textIn
, CharsetMatch
*results
) const;
78 class CharsetRecog_2022CN
:public CharsetRecog_2022
81 virtual ~CharsetRecog_2022CN();
83 const char* getName() const;
85 UBool
match(InputText
*textIn
, CharsetMatch
*results
) const;
91 #endif /* __CSR2022_H */