]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ********************************************************************** | |
5 | * Copyright (C) 2005-2015, International Business Machines | |
6 | * Corporation and others. All Rights Reserved. | |
7 | ********************************************************************** | |
8 | */ | |
9 | ||
10 | #ifndef __CSR2022_H | |
11 | #define __CSR2022_H | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | ||
15 | #if !UCONFIG_NO_CONVERSION | |
16 | ||
17 | #include "csrecog.h" | |
18 | ||
19 | U_NAMESPACE_BEGIN | |
20 | ||
21 | class CharsetMatch; | |
22 | ||
23 | /** | |
24 | * class CharsetRecog_2022 part of the ICU charset detection imlementation. | |
25 | * This is a superclass for the individual detectors for | |
26 | * each of the detectable members of the ISO 2022 family | |
27 | * of encodings. | |
28 | * | |
29 | * The separate classes are nested within this class. | |
30 | * | |
31 | * @internal | |
32 | */ | |
33 | class CharsetRecog_2022 : public CharsetRecognizer | |
34 | { | |
35 | ||
36 | public: | |
37 | virtual ~CharsetRecog_2022() = 0; | |
38 | ||
39 | protected: | |
40 | ||
41 | /** | |
42 | * Matching function shared among the 2022 detectors JP, CN and KR | |
43 | * Counts up the number of legal an unrecognized escape sequences in | |
44 | * the sample of text, and computes a score based on the total number & | |
45 | * the proportion that fit the encoding. | |
46 | * | |
47 | * | |
48 | * @param text the byte buffer containing text to analyse | |
49 | * @param textLen the size of the text in the byte. | |
50 | * @param escapeSequences the byte escape sequences to test for. | |
51 | * @return match quality, in the range of 0-100. | |
52 | */ | |
53 | int32_t match_2022(const uint8_t *text, | |
54 | int32_t textLen, | |
55 | const uint8_t escapeSequences[][5], | |
56 | int32_t escapeSequences_length) const; | |
57 | ||
58 | }; | |
59 | ||
60 | class CharsetRecog_2022JP :public CharsetRecog_2022 | |
61 | { | |
62 | public: | |
63 | virtual ~CharsetRecog_2022JP(); | |
64 | ||
65 | const char *getName() const; | |
66 | ||
67 | UBool match(InputText *textIn, CharsetMatch *results) const; | |
68 | }; | |
69 | ||
70 | #if !UCONFIG_ONLY_HTML_CONVERSION | |
71 | class CharsetRecog_2022KR :public CharsetRecog_2022 { | |
72 | public: | |
73 | virtual ~CharsetRecog_2022KR(); | |
74 | ||
75 | const char *getName() const; | |
76 | ||
77 | UBool match(InputText *textIn, CharsetMatch *results) const; | |
78 | ||
79 | }; | |
80 | ||
81 | class CharsetRecog_2022CN :public CharsetRecog_2022 | |
82 | { | |
83 | public: | |
84 | virtual ~CharsetRecog_2022CN(); | |
85 | ||
86 | const char* getName() const; | |
87 | ||
88 | UBool match(InputText *textIn, CharsetMatch *results) const; | |
89 | }; | |
90 | #endif | |
91 | ||
92 | U_NAMESPACE_END | |
93 | ||
94 | #endif | |
95 | #endif /* __CSR2022_H */ |