]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/collationsets.h
ICU-531.31.tar.gz
[apple/icu.git] / icuSources / i18n / collationsets.h
1 /*
2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationsets.h
7 *
8 * created on: 2013feb09
9 * created by: Markus W. Scherer
10 */
11
12 #ifndef __COLLATIONSETS_H__
13 #define __COLLATIONSETS_H__
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_COLLATION
18
19 #include "unicode/uniset.h"
20 #include "collation.h"
21
22 U_NAMESPACE_BEGIN
23
24 struct CollationData;
25
26 /**
27 * Finds the set of characters and strings that sort differently in the tailoring
28 * from the base data.
29 *
30 * Every mapping in the tailoring needs to be compared to the base,
31 * because some mappings are copied for optimization, and
32 * all contractions for a character are copied if any contractions for that character
33 * are added, modified or removed.
34 *
35 * It might be simpler to re-parse the rule string, but:
36 * - That would require duplicating some of the from-rules builder code.
37 * - That would make the runtime code depend on the builder.
38 * - That would only work if we have the rule string, and we allow users to
39 * omit the rule string from data files.
40 */
41 class TailoredSet : public UMemory {
42 public:
43 TailoredSet(UnicodeSet *t)
44 : data(NULL), baseData(NULL),
45 tailored(t),
46 suffix(NULL),
47 errorCode(U_ZERO_ERROR) {}
48
49 void forData(const CollationData *d, UErrorCode &errorCode);
50
51 /**
52 * @return U_SUCCESS(errorCode) in C++, void in Java
53 * @internal only public for access by callback
54 */
55 UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
56
57 private:
58 void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
59 void comparePrefixes(UChar32 c, const UChar *p, const UChar *q);
60 void compareContractions(UChar32 c, const UChar *p, const UChar *q);
61
62 void addPrefixes(const CollationData *d, UChar32 c, const UChar *p);
63 void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
64 void addContractions(UChar32 c, const UChar *p);
65 void addSuffix(UChar32 c, const UnicodeString &sfx);
66 void add(UChar32 c);
67
68 /** Prefixes are reversed in the data structure. */
69 void setPrefix(const UnicodeString &pfx) {
70 unreversedPrefix = pfx;
71 unreversedPrefix.reverse();
72 }
73 void resetPrefix() {
74 unreversedPrefix.remove();
75 }
76
77 const CollationData *data;
78 const CollationData *baseData;
79 UnicodeSet *tailored;
80 UnicodeString unreversedPrefix;
81 const UnicodeString *suffix;
82 UErrorCode errorCode;
83 };
84
85 class ContractionsAndExpansions : public UMemory {
86 public:
87 class CESink : public UMemory {
88 public:
89 virtual ~CESink();
90 virtual void handleCE(int64_t ce) = 0;
91 virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
92 };
93
94 ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
95 : data(NULL),
96 contractions(con), expansions(exp),
97 sink(s),
98 addPrefixes(prefixes),
99 checkTailored(0),
100 suffix(NULL),
101 errorCode(U_ZERO_ERROR) {}
102
103 void forData(const CollationData *d, UErrorCode &errorCode);
104 void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
105
106 // all following: @internal, only public for access by callback
107
108 void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
109
110 void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
111 void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
112
113 void addExpansions(UChar32 start, UChar32 end);
114 void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
115
116 /** Prefixes are reversed in the data structure. */
117 void setPrefix(const UnicodeString &pfx) {
118 unreversedPrefix = pfx;
119 unreversedPrefix.reverse();
120 }
121 void resetPrefix() {
122 unreversedPrefix.remove();
123 }
124
125 const CollationData *data;
126 UnicodeSet *contractions;
127 UnicodeSet *expansions;
128 CESink *sink;
129 UBool addPrefixes;
130 int8_t checkTailored; // -1: collected tailored +1: exclude tailored
131 UnicodeSet tailored;
132 UnicodeSet ranges;
133 UnicodeString unreversedPrefix;
134 const UnicodeString *suffix;
135 int64_t ces[Collation::MAX_EXPANSION_LENGTH];
136 UErrorCode errorCode;
137 };
138
139 U_NAMESPACE_END
140
141 #endif // !UCONFIG_NO_COLLATION
142 #endif // __COLLATIONSETS_H__