]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
57a6839d A |
3 | /* |
4 | ******************************************************************************* | |
5 | * Copyright (C) 2013-2014, International Business Machines | |
6 | * Corporation and others. All Rights Reserved. | |
7 | ******************************************************************************* | |
8 | * collationsets.h | |
9 | * | |
10 | * created on: 2013feb09 | |
11 | * created by: Markus W. Scherer | |
12 | */ | |
13 | ||
14 | #ifndef __COLLATIONSETS_H__ | |
15 | #define __COLLATIONSETS_H__ | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | ||
19 | #if !UCONFIG_NO_COLLATION | |
20 | ||
21 | #include "unicode/uniset.h" | |
22 | #include "collation.h" | |
23 | ||
24 | U_NAMESPACE_BEGIN | |
25 | ||
26 | struct CollationData; | |
27 | ||
28 | /** | |
29 | * Finds the set of characters and strings that sort differently in the tailoring | |
30 | * from the base data. | |
31 | * | |
32 | * Every mapping in the tailoring needs to be compared to the base, | |
33 | * because some mappings are copied for optimization, and | |
34 | * all contractions for a character are copied if any contractions for that character | |
35 | * are added, modified or removed. | |
36 | * | |
37 | * It might be simpler to re-parse the rule string, but: | |
38 | * - That would require duplicating some of the from-rules builder code. | |
39 | * - That would make the runtime code depend on the builder. | |
40 | * - That would only work if we have the rule string, and we allow users to | |
41 | * omit the rule string from data files. | |
42 | */ | |
43 | class TailoredSet : public UMemory { | |
44 | public: | |
45 | TailoredSet(UnicodeSet *t) | |
46 | : data(NULL), baseData(NULL), | |
47 | tailored(t), | |
48 | suffix(NULL), | |
49 | errorCode(U_ZERO_ERROR) {} | |
50 | ||
51 | void forData(const CollationData *d, UErrorCode &errorCode); | |
52 | ||
53 | /** | |
54 | * @return U_SUCCESS(errorCode) in C++, void in Java | |
55 | * @internal only public for access by callback | |
56 | */ | |
57 | UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); | |
58 | ||
59 | private: | |
60 | void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); | |
61 | void comparePrefixes(UChar32 c, const UChar *p, const UChar *q); | |
62 | void compareContractions(UChar32 c, const UChar *p, const UChar *q); | |
63 | ||
64 | void addPrefixes(const CollationData *d, UChar32 c, const UChar *p); | |
65 | void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32); | |
66 | void addContractions(UChar32 c, const UChar *p); | |
67 | void addSuffix(UChar32 c, const UnicodeString &sfx); | |
68 | void add(UChar32 c); | |
69 | ||
70 | /** Prefixes are reversed in the data structure. */ | |
71 | void setPrefix(const UnicodeString &pfx) { | |
72 | unreversedPrefix = pfx; | |
73 | unreversedPrefix.reverse(); | |
74 | } | |
75 | void resetPrefix() { | |
76 | unreversedPrefix.remove(); | |
77 | } | |
78 | ||
79 | const CollationData *data; | |
80 | const CollationData *baseData; | |
81 | UnicodeSet *tailored; | |
82 | UnicodeString unreversedPrefix; | |
83 | const UnicodeString *suffix; | |
84 | UErrorCode errorCode; | |
85 | }; | |
86 | ||
87 | class ContractionsAndExpansions : public UMemory { | |
88 | public: | |
89 | class CESink : public UMemory { | |
90 | public: | |
91 | virtual ~CESink(); | |
92 | virtual void handleCE(int64_t ce) = 0; | |
93 | virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; | |
94 | }; | |
95 | ||
96 | ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes) | |
97 | : data(NULL), | |
98 | contractions(con), expansions(exp), | |
99 | sink(s), | |
100 | addPrefixes(prefixes), | |
101 | checkTailored(0), | |
102 | suffix(NULL), | |
103 | errorCode(U_ZERO_ERROR) {} | |
104 | ||
105 | void forData(const CollationData *d, UErrorCode &errorCode); | |
106 | void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); | |
107 | ||
108 | // all following: @internal, only public for access by callback | |
109 | ||
110 | void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); | |
111 | ||
112 | void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); | |
113 | void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); | |
114 | ||
115 | void addExpansions(UChar32 start, UChar32 end); | |
116 | void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); | |
117 | ||
118 | /** Prefixes are reversed in the data structure. */ | |
119 | void setPrefix(const UnicodeString &pfx) { | |
120 | unreversedPrefix = pfx; | |
121 | unreversedPrefix.reverse(); | |
122 | } | |
123 | void resetPrefix() { | |
124 | unreversedPrefix.remove(); | |
125 | } | |
126 | ||
127 | const CollationData *data; | |
128 | UnicodeSet *contractions; | |
129 | UnicodeSet *expansions; | |
130 | CESink *sink; | |
131 | UBool addPrefixes; | |
132 | int8_t checkTailored; // -1: collected tailored +1: exclude tailored | |
133 | UnicodeSet tailored; | |
134 | UnicodeSet ranges; | |
135 | UnicodeString unreversedPrefix; | |
136 | const UnicodeString *suffix; | |
137 | int64_t ces[Collation::MAX_EXPANSION_LENGTH]; | |
138 | UErrorCode errorCode; | |
139 | }; | |
140 | ||
141 | U_NAMESPACE_END | |
142 | ||
143 | #endif // !UCONFIG_NO_COLLATION | |
144 | #endif // __COLLATIONSETS_H__ |