1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2013-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * created on: 2013feb09
11 * created by: Markus W. Scherer
14 #ifndef __COLLATIONSETS_H__
15 #define __COLLATIONSETS_H__
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_COLLATION
21 #include "unicode/uniset.h"
22 #include "collation.h"
29 * Finds the set of characters and strings that sort differently in the tailoring
32 * Every mapping in the tailoring needs to be compared to the base,
33 * because some mappings are copied for optimization, and
34 * all contractions for a character are copied if any contractions for that character
35 * are added, modified or removed.
37 * It might be simpler to re-parse the rule string, but:
38 * - That would require duplicating some of the from-rules builder code.
39 * - That would make the runtime code depend on the builder.
40 * - That would only work if we have the rule string, and we allow users to
41 * omit the rule string from data files.
43 class TailoredSet
: public UMemory
{
45 TailoredSet(UnicodeSet
*t
)
46 : data(NULL
), baseData(NULL
),
49 errorCode(U_ZERO_ERROR
) {}
51 void forData(const CollationData
*d
, UErrorCode
&errorCode
);
54 * @return U_SUCCESS(errorCode) in C++, void in Java
55 * @internal only public for access by callback
57 UBool
handleCE32(UChar32 start
, UChar32 end
, uint32_t ce32
);
60 void compare(UChar32 c
, uint32_t ce32
, uint32_t baseCE32
);
61 void comparePrefixes(UChar32 c
, const UChar
*p
, const UChar
*q
);
62 void compareContractions(UChar32 c
, const UChar
*p
, const UChar
*q
);
64 void addPrefixes(const CollationData
*d
, UChar32 c
, const UChar
*p
);
65 void addPrefix(const CollationData
*d
, const UnicodeString
&pfx
, UChar32 c
, uint32_t ce32
);
66 void addContractions(UChar32 c
, const UChar
*p
);
67 void addSuffix(UChar32 c
, const UnicodeString
&sfx
);
70 /** Prefixes are reversed in the data structure. */
71 void setPrefix(const UnicodeString
&pfx
) {
72 unreversedPrefix
= pfx
;
73 unreversedPrefix
.reverse();
76 unreversedPrefix
.remove();
79 const CollationData
*data
;
80 const CollationData
*baseData
;
82 UnicodeString unreversedPrefix
;
83 const UnicodeString
*suffix
;
87 class ContractionsAndExpansions
: public UMemory
{
89 class CESink
: public UMemory
{
92 virtual void handleCE(int64_t ce
) = 0;
93 virtual void handleExpansion(const int64_t ces
[], int32_t length
) = 0;
96 ContractionsAndExpansions(UnicodeSet
*con
, UnicodeSet
*exp
, CESink
*s
, UBool prefixes
)
98 contractions(con
), expansions(exp
),
100 addPrefixes(prefixes
),
103 errorCode(U_ZERO_ERROR
) {}
105 void forData(const CollationData
*d
, UErrorCode
&errorCode
);
106 void forCodePoint(const CollationData
*d
, UChar32 c
, UErrorCode
&ec
);
108 // all following: @internal, only public for access by callback
110 void handleCE32(UChar32 start
, UChar32 end
, uint32_t ce32
);
112 void handlePrefixes(UChar32 start
, UChar32 end
, uint32_t ce32
);
113 void handleContractions(UChar32 start
, UChar32 end
, uint32_t ce32
);
115 void addExpansions(UChar32 start
, UChar32 end
);
116 void addStrings(UChar32 start
, UChar32 end
, UnicodeSet
*set
);
118 /** Prefixes are reversed in the data structure. */
119 void setPrefix(const UnicodeString
&pfx
) {
120 unreversedPrefix
= pfx
;
121 unreversedPrefix
.reverse();
124 unreversedPrefix
.remove();
127 const CollationData
*data
;
128 UnicodeSet
*contractions
;
129 UnicodeSet
*expansions
;
132 int8_t checkTailored
; // -1: collected tailored +1: exclude tailored
135 UnicodeString unreversedPrefix
;
136 const UnicodeString
*suffix
;
137 int64_t ces
[Collation::MAX_EXPANSION_LENGTH
];
138 UErrorCode errorCode
;
143 #endif // !UCONFIG_NO_COLLATION
144 #endif // __COLLATIONSETS_H__