]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/csr2022.h
2 **********************************************************************
3 * Copyright (C) 2005-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_CONVERSION
20 * class CharsetRecog_2022 part of the ICU charset detection imlementation.
21 * This is a superclass for the individual detectors for
22 * each of the detectable members of the ISO 2022 family
25 * The separate classes are nested within this class.
29 class CharsetRecog_2022
: public CharsetRecognizer
33 virtual ~CharsetRecog_2022() = 0;
38 * Matching function shared among the 2022 detectors JP, CN and KR
39 * Counts up the number of legal an unrecognized escape sequences in
40 * the sample of text, and computes a score based on the total number &
41 * the proportion that fit the encoding.
44 * @param text the byte buffer containing text to analyse
45 * @param textLen the size of the text in the byte.
46 * @param escapeSequences the byte escape sequences to test for.
47 * @return match quality, in the range of 0-100.
49 int32_t match_2022(const uint8_t *text
, int32_t textLen
, const uint8_t escapeSequences
[][5], int32_t escapeSequences_length
);
53 class CharsetRecog_2022JP
:public CharsetRecog_2022
56 virtual ~CharsetRecog_2022JP() {}
58 const char *getName() const;
60 int32_t match(InputText
*textIn
);
63 class CharsetRecog_2022KR
:public CharsetRecog_2022
{
65 virtual ~CharsetRecog_2022KR() {}
67 const char *getName() const;
69 int32_t match(InputText
*textIn
);
73 class CharsetRecog_2022CN
:public CharsetRecog_2022
76 virtual ~CharsetRecog_2022CN() {}
78 const char* getName() const;
80 int32_t match(InputText
*textIn
);
86 #endif /* __CSR2022_H */