]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/collationsets.h
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / collationsets.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2013-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * collationsets.h
9 *
10 * created on: 2013feb09
11 * created by: Markus W. Scherer
12 */
13
14 #ifndef __COLLATIONSETS_H__
15 #define __COLLATIONSETS_H__
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_COLLATION
20
21 #include "unicode/uniset.h"
22 #include "collation.h"
23
24 U_NAMESPACE_BEGIN
25
26 struct CollationData;
27
28 /**
29 * Finds the set of characters and strings that sort differently in the tailoring
30 * from the base data.
31 *
32 * Every mapping in the tailoring needs to be compared to the base,
33 * because some mappings are copied for optimization, and
34 * all contractions for a character are copied if any contractions for that character
35 * are added, modified or removed.
36 *
37 * It might be simpler to re-parse the rule string, but:
38 * - That would require duplicating some of the from-rules builder code.
39 * - That would make the runtime code depend on the builder.
40 * - That would only work if we have the rule string, and we allow users to
41 * omit the rule string from data files.
42 */
43 class TailoredSet : public UMemory {
44 public:
45 TailoredSet(UnicodeSet *t)
46 : data(NULL), baseData(NULL),
47 tailored(t),
48 suffix(NULL),
49 errorCode(U_ZERO_ERROR) {}
50
51 void forData(const CollationData *d, UErrorCode &errorCode);
52
53 /**
54 * @return U_SUCCESS(errorCode) in C++, void in Java
55 * @internal only public for access by callback
56 */
57 UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
58
59 private:
60 void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
61 void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
62 void compareContractions(UChar32 c, const UChar *p, const UChar *q);
63
64 void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
65 void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
66 void addContractions(UChar32 c, const UChar *p);
67 void addSuffix(UChar32 c, const UnicodeString &sfx);
68 void add(UChar32 c);
69
70 /** Prefixes are reversed in the data structure. */
71 void setPrefix(const UnicodeString &pfx) {
72 unreversedPrefix = pfx;
73 unreversedPrefix.reverse();
74 }
75 void resetPrefix() {
76 unreversedPrefix.remove();
77 }
78
79 const CollationData *data;
80 const CollationData *baseData;
81 UnicodeSet *tailored;
82 UnicodeString unreversedPrefix;
83 const UnicodeString *suffix;
84 UErrorCode errorCode;
85 };
86
87 class ContractionsAndExpansions : public UMemory {
88 public:
89 class CESink : public UMemory {
90 public:
91 virtual ~CESink();
92 virtual void handleCE(int64_t ce) = 0;
93 virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
94 };
95
96 ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
97 : data(NULL),
98 contractions(con), expansions(exp),
99 sink(s),
100 addPrefixes(prefixes),
101 checkTailored(0),
102 suffix(NULL),
103 errorCode(U_ZERO_ERROR) {}
104
105 void forData(const CollationData *d, UErrorCode &errorCode);
106 void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
107
108 // all following: @internal, only public for access by callback
109
110 void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
111
112 void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
113 void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
114
115 void addExpansions(UChar32 start, UChar32 end);
116 void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
117
118 /** Prefixes are reversed in the data structure. */
119 void setPrefix(const UnicodeString &pfx) {
120 unreversedPrefix = pfx;
121 unreversedPrefix.reverse();
122 }
123 void resetPrefix() {
124 unreversedPrefix.remove();
125 }
126
127 const CollationData *data;
128 UnicodeSet *contractions;
129 UnicodeSet *expansions;
130 CESink *sink;
131 UBool addPrefixes;
132 int8_t checkTailored; // -1: collected tailored +1: exclude tailored
133 UnicodeSet tailored;
134 UnicodeSet ranges;
135 UnicodeString unreversedPrefix;
136 const UnicodeString *suffix;
137 int64_t ces[Collation::MAX_EXPANSION_LENGTH];
138 UErrorCode errorCode;
139 };
140
141 U_NAMESPACE_END
142
143 #endif // !UCONFIG_NO_COLLATION
144 #endif // __COLLATIONSETS_H__