]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | /* |
2 | ********************************************************************** | |
4388f060 | 3 | * Copyright (C) 2005-2011, International Business Machines |
73c04bcf A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | */ | |
7 | ||
8 | #ifndef __CSR2022_H | |
9 | #define __CSR2022_H | |
10 | ||
11 | #include "unicode/utypes.h" | |
12 | ||
13 | #if !UCONFIG_NO_CONVERSION | |
14 | ||
15 | #include "csrecog.h" | |
16 | ||
17 | U_NAMESPACE_BEGIN | |
18 | ||
19 | /** | |
20 | * class CharsetRecog_2022 part of the ICU charset detection imlementation. | |
21 | * This is a superclass for the individual detectors for | |
22 | * each of the detectable members of the ISO 2022 family | |
23 | * of encodings. | |
24 | * | |
25 | * The separate classes are nested within this class. | |
26 | * | |
27 | * @internal | |
28 | */ | |
29 | class CharsetRecog_2022 : public CharsetRecognizer | |
30 | { | |
31 | ||
32 | public: | |
33 | virtual ~CharsetRecog_2022() = 0; | |
34 | ||
35 | protected: | |
36 | ||
37 | /** | |
38 | * Matching function shared among the 2022 detectors JP, CN and KR | |
39 | * Counts up the number of legal an unrecognized escape sequences in | |
40 | * the sample of text, and computes a score based on the total number & | |
41 | * the proportion that fit the encoding. | |
42 | * | |
43 | * | |
44 | * @param text the byte buffer containing text to analyse | |
45 | * @param textLen the size of the text in the byte. | |
46 | * @param escapeSequences the byte escape sequences to test for. | |
47 | * @return match quality, in the range of 0-100. | |
48 | */ | |
49 | int32_t match_2022(const uint8_t *text, int32_t textLen, const uint8_t escapeSequences[][5], int32_t escapeSequences_length); | |
50 | ||
51 | }; | |
52 | ||
53 | class CharsetRecog_2022JP :public CharsetRecog_2022 | |
54 | { | |
4388f060 A |
55 | public: |
56 | virtual ~CharsetRecog_2022JP(); | |
73c04bcf A |
57 | |
58 | const char *getName() const; | |
59 | ||
60 | int32_t match(InputText *textIn); | |
61 | }; | |
62 | ||
63 | class CharsetRecog_2022KR :public CharsetRecog_2022 { | |
4388f060 A |
64 | public: |
65 | virtual ~CharsetRecog_2022KR(); | |
73c04bcf A |
66 | |
67 | const char *getName() const; | |
68 | ||
69 | int32_t match(InputText *textIn); | |
70 | ||
71 | }; | |
72 | ||
73 | class CharsetRecog_2022CN :public CharsetRecog_2022 | |
74 | { | |
4388f060 A |
75 | public: |
76 | virtual ~CharsetRecog_2022CN(); | |
73c04bcf A |
77 | |
78 | const char* getName() const; | |
79 | ||
80 | int32_t match(InputText *textIn); | |
81 | }; | |
82 | ||
83 | U_NAMESPACE_END | |
84 | ||
85 | #endif | |
86 | #endif /* __CSR2022_H */ |