]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/csr2022.h
ICU-62123.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / csr2022.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
73c04bcf
A
3/*
4 **********************************************************************
b331163b 5 * Copyright (C) 2005-2015, International Business Machines
73c04bcf
A
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 */
9
10#ifndef __CSR2022_H
11#define __CSR2022_H
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_CONVERSION
16
17#include "csrecog.h"
18
19U_NAMESPACE_BEGIN
20
51004dcb
A
21class CharsetMatch;
22
73c04bcf
A
23/**
24 * class CharsetRecog_2022 part of the ICU charset detection imlementation.
25 * This is a superclass for the individual detectors for
26 * each of the detectable members of the ISO 2022 family
27 * of encodings.
28 *
29 * The separate classes are nested within this class.
30 *
31 * @internal
32 */
33class CharsetRecog_2022 : public CharsetRecognizer
34{
35
36public:
37 virtual ~CharsetRecog_2022() = 0;
38
39protected:
40
41 /**
42 * Matching function shared among the 2022 detectors JP, CN and KR
43 * Counts up the number of legal an unrecognized escape sequences in
44 * the sample of text, and computes a score based on the total number &
45 * the proportion that fit the encoding.
46 *
47 *
48 * @param text the byte buffer containing text to analyse
49 * @param textLen the size of the text in the byte.
50 * @param escapeSequences the byte escape sequences to test for.
51 * @return match quality, in the range of 0-100.
52 */
51004dcb
A
53 int32_t match_2022(const uint8_t *text,
54 int32_t textLen,
55 const uint8_t escapeSequences[][5],
56 int32_t escapeSequences_length) const;
73c04bcf
A
57
58};
59
60class CharsetRecog_2022JP :public CharsetRecog_2022
61{
4388f060
A
62public:
63 virtual ~CharsetRecog_2022JP();
73c04bcf
A
64
65 const char *getName() const;
66
51004dcb 67 UBool match(InputText *textIn, CharsetMatch *results) const;
73c04bcf
A
68};
69
b331163b 70#if !UCONFIG_ONLY_HTML_CONVERSION
73c04bcf 71class CharsetRecog_2022KR :public CharsetRecog_2022 {
4388f060
A
72public:
73 virtual ~CharsetRecog_2022KR();
73c04bcf
A
74
75 const char *getName() const;
76
51004dcb 77 UBool match(InputText *textIn, CharsetMatch *results) const;
73c04bcf
A
78
79};
80
81class CharsetRecog_2022CN :public CharsetRecog_2022
82{
4388f060
A
83public:
84 virtual ~CharsetRecog_2022CN();
73c04bcf
A
85
86 const char* getName() const;
87
51004dcb 88 UBool match(InputText *textIn, CharsetMatch *results) const;
73c04bcf 89};
b331163b 90#endif
73c04bcf
A
91
92U_NAMESPACE_END
93
94#endif
95#endif /* __CSR2022_H */