2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
8 * created on: 2013feb09
9 * created by: Markus W. Scherer
12 #ifndef __COLLATIONSETS_H__
13 #define __COLLATIONSETS_H__
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_COLLATION
19 #include "unicode/uniset.h"
20 #include "collation.h"
27 * Finds the set of characters and strings that sort differently in the tailoring
30 * Every mapping in the tailoring needs to be compared to the base,
31 * because some mappings are copied for optimization, and
32 * all contractions for a character are copied if any contractions for that character
33 * are added, modified or removed.
35 * It might be simpler to re-parse the rule string, but:
36 * - That would require duplicating some of the from-rules builder code.
37 * - That would make the runtime code depend on the builder.
38 * - That would only work if we have the rule string, and we allow users to
39 * omit the rule string from data files.
41 class TailoredSet
: public UMemory
{
43 TailoredSet(UnicodeSet
*t
)
44 : data(NULL
), baseData(NULL
),
47 errorCode(U_ZERO_ERROR
) {}
49 void forData(const CollationData
*d
, UErrorCode
&errorCode
);
52 * @return U_SUCCESS(errorCode) in C++, void in Java
53 * @internal only public for access by callback
55 UBool
handleCE32(UChar32 start
, UChar32 end
, uint32_t ce32
);
58 void compare(UChar32 c
, uint32_t ce32
, uint32_t baseCE32
);
59 void comparePrefixes(UChar32 c
, const UChar
*p
, const UChar
*q
);
60 void compareContractions(UChar32 c
, const UChar
*p
, const UChar
*q
);
62 void addPrefixes(const CollationData
*d
, UChar32 c
, const UChar
*p
);
63 void addPrefix(const CollationData
*d
, const UnicodeString
&pfx
, UChar32 c
, uint32_t ce32
);
64 void addContractions(UChar32 c
, const UChar
*p
);
65 void addSuffix(UChar32 c
, const UnicodeString
&sfx
);
68 /** Prefixes are reversed in the data structure. */
69 void setPrefix(const UnicodeString
&pfx
) {
70 unreversedPrefix
= pfx
;
71 unreversedPrefix
.reverse();
74 unreversedPrefix
.remove();
77 const CollationData
*data
;
78 const CollationData
*baseData
;
80 UnicodeString unreversedPrefix
;
81 const UnicodeString
*suffix
;
85 class ContractionsAndExpansions
: public UMemory
{
87 class CESink
: public UMemory
{
90 virtual void handleCE(int64_t ce
) = 0;
91 virtual void handleExpansion(const int64_t ces
[], int32_t length
) = 0;
94 ContractionsAndExpansions(UnicodeSet
*con
, UnicodeSet
*exp
, CESink
*s
, UBool prefixes
)
96 contractions(con
), expansions(exp
),
98 addPrefixes(prefixes
),
101 errorCode(U_ZERO_ERROR
) {}
103 void forData(const CollationData
*d
, UErrorCode
&errorCode
);
104 void forCodePoint(const CollationData
*d
, UChar32 c
, UErrorCode
&ec
);
106 // all following: @internal, only public for access by callback
108 void handleCE32(UChar32 start
, UChar32 end
, uint32_t ce32
);
110 void handlePrefixes(UChar32 start
, UChar32 end
, uint32_t ce32
);
111 void handleContractions(UChar32 start
, UChar32 end
, uint32_t ce32
);
113 void addExpansions(UChar32 start
, UChar32 end
);
114 void addStrings(UChar32 start
, UChar32 end
, UnicodeSet
*set
);
116 /** Prefixes are reversed in the data structure. */
117 void setPrefix(const UnicodeString
&pfx
) {
118 unreversedPrefix
= pfx
;
119 unreversedPrefix
.reverse();
122 unreversedPrefix
.remove();
125 const CollationData
*data
;
126 UnicodeSet
*contractions
;
127 UnicodeSet
*expansions
;
130 int8_t checkTailored
; // -1: collected tailored +1: exclude tailored
133 UnicodeString unreversedPrefix
;
134 const UnicodeString
*suffix
;
135 int64_t ces
[Collation::MAX_EXPANSION_LENGTH
];
136 UErrorCode errorCode
;
141 #endif // !UCONFIG_NO_COLLATION
142 #endif // __COLLATIONSETS_H__