]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2013-2014, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * collationsets.h | |
7 | * | |
8 | * created on: 2013feb09 | |
9 | * created by: Markus W. Scherer | |
10 | */ | |
11 | ||
12 | #ifndef __COLLATIONSETS_H__ | |
13 | #define __COLLATIONSETS_H__ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | ||
17 | #if !UCONFIG_NO_COLLATION | |
18 | ||
19 | #include "unicode/uniset.h" | |
20 | #include "collation.h" | |
21 | ||
22 | U_NAMESPACE_BEGIN | |
23 | ||
24 | struct CollationData; | |
25 | ||
26 | /** | |
27 | * Finds the set of characters and strings that sort differently in the tailoring | |
28 | * from the base data. | |
29 | * | |
30 | * Every mapping in the tailoring needs to be compared to the base, | |
31 | * because some mappings are copied for optimization, and | |
32 | * all contractions for a character are copied if any contractions for that character | |
33 | * are added, modified or removed. | |
34 | * | |
35 | * It might be simpler to re-parse the rule string, but: | |
36 | * - That would require duplicating some of the from-rules builder code. | |
37 | * - That would make the runtime code depend on the builder. | |
38 | * - That would only work if we have the rule string, and we allow users to | |
39 | * omit the rule string from data files. | |
40 | */ | |
41 | class TailoredSet : public UMemory { | |
42 | public: | |
43 | TailoredSet(UnicodeSet *t) | |
44 | : data(NULL), baseData(NULL), | |
45 | tailored(t), | |
46 | suffix(NULL), | |
47 | errorCode(U_ZERO_ERROR) {} | |
48 | ||
49 | void forData(const CollationData *d, UErrorCode &errorCode); | |
50 | ||
51 | /** | |
52 | * @return U_SUCCESS(errorCode) in C++, void in Java | |
53 | * @internal only public for access by callback | |
54 | */ | |
55 | UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); | |
56 | ||
57 | private: | |
58 | void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); | |
59 | void comparePrefixes(UChar32 c, const UChar *p, const UChar *q); | |
60 | void compareContractions(UChar32 c, const UChar *p, const UChar *q); | |
61 | ||
62 | void addPrefixes(const CollationData *d, UChar32 c, const UChar *p); | |
63 | void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32); | |
64 | void addContractions(UChar32 c, const UChar *p); | |
65 | void addSuffix(UChar32 c, const UnicodeString &sfx); | |
66 | void add(UChar32 c); | |
67 | ||
68 | /** Prefixes are reversed in the data structure. */ | |
69 | void setPrefix(const UnicodeString &pfx) { | |
70 | unreversedPrefix = pfx; | |
71 | unreversedPrefix.reverse(); | |
72 | } | |
73 | void resetPrefix() { | |
74 | unreversedPrefix.remove(); | |
75 | } | |
76 | ||
77 | const CollationData *data; | |
78 | const CollationData *baseData; | |
79 | UnicodeSet *tailored; | |
80 | UnicodeString unreversedPrefix; | |
81 | const UnicodeString *suffix; | |
82 | UErrorCode errorCode; | |
83 | }; | |
84 | ||
85 | class ContractionsAndExpansions : public UMemory { | |
86 | public: | |
87 | class CESink : public UMemory { | |
88 | public: | |
89 | virtual ~CESink(); | |
90 | virtual void handleCE(int64_t ce) = 0; | |
91 | virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; | |
92 | }; | |
93 | ||
94 | ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes) | |
95 | : data(NULL), | |
96 | contractions(con), expansions(exp), | |
97 | sink(s), | |
98 | addPrefixes(prefixes), | |
99 | checkTailored(0), | |
100 | suffix(NULL), | |
101 | errorCode(U_ZERO_ERROR) {} | |
102 | ||
103 | void forData(const CollationData *d, UErrorCode &errorCode); | |
104 | void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); | |
105 | ||
106 | // all following: @internal, only public for access by callback | |
107 | ||
108 | void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); | |
109 | ||
110 | void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); | |
111 | void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); | |
112 | ||
113 | void addExpansions(UChar32 start, UChar32 end); | |
114 | void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); | |
115 | ||
116 | /** Prefixes are reversed in the data structure. */ | |
117 | void setPrefix(const UnicodeString &pfx) { | |
118 | unreversedPrefix = pfx; | |
119 | unreversedPrefix.reverse(); | |
120 | } | |
121 | void resetPrefix() { | |
122 | unreversedPrefix.remove(); | |
123 | } | |
124 | ||
125 | const CollationData *data; | |
126 | UnicodeSet *contractions; | |
127 | UnicodeSet *expansions; | |
128 | CESink *sink; | |
129 | UBool addPrefixes; | |
130 | int8_t checkTailored; // -1: collected tailored +1: exclude tailored | |
131 | UnicodeSet tailored; | |
132 | UnicodeSet ranges; | |
133 | UnicodeString unreversedPrefix; | |
134 | const UnicodeString *suffix; | |
135 | int64_t ces[Collation::MAX_EXPANSION_LENGTH]; | |
136 | UErrorCode errorCode; | |
137 | }; | |
138 | ||
139 | U_NAMESPACE_END | |
140 | ||
141 | #endif // !UCONFIG_NO_COLLATION | |
142 | #endif // __COLLATIONSETS_H__ |