]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/collationsettings.h
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / i18n / collationsettings.h
CommitLineData
57a6839d
A
1/*
2*******************************************************************************
3* Copyright (C) 2013-2014, International Business Machines
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* collationsettings.h
7*
8* created on: 2013feb07
9* created by: Markus W. Scherer
10*/
11
12#ifndef __COLLATIONSETTINGS_H__
13#define __COLLATIONSETTINGS_H__
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_COLLATION
18
19#include "unicode/ucol.h"
20#include "collation.h"
21#include "sharedobject.h"
22#include "umutex.h"
23
24U_NAMESPACE_BEGIN
25
26/**
27 * Collation settings/options/attributes.
28 * These are the values that can be changed via API.
29 */
30struct U_I18N_API CollationSettings : public SharedObject {
31 /**
32 * Options bit 0: Perform the FCD check on the input text and deliver normalized text.
33 */
34 static const int32_t CHECK_FCD = 1;
35 /**
36 * Options bit 1: Numeric collation.
37 * Also known as CODAN = COllate Digits As Numbers.
38 *
39 * Treat digit sequences as numbers with CE sequences in numeric order,
40 * rather than returning a normal CE for each digit.
41 */
42 static const int32_t NUMERIC = 2;
43 /**
44 * "Shifted" alternate handling, see ALTERNATE_MASK.
45 */
46 static const int32_t SHIFTED = 4;
47 /**
48 * Options bits 3..2: Alternate-handling mask. 0 for non-ignorable.
49 * Reserve values 8 and 0xc for shift-trimmed and blanked.
50 */
51 static const int32_t ALTERNATE_MASK = 0xc;
52 /**
53 * Options bits 6..4: The 3-bit maxVariable value bit field is shifted by this value.
54 */
55 static const int32_t MAX_VARIABLE_SHIFT = 4;
56 /** maxVariable options bit mask before shifting. */
57 static const int32_t MAX_VARIABLE_MASK = 0x70;
58 /** Options bit 7: Reserved/unused/0. */
59 /**
60 * Options bit 8: Sort uppercase first if caseLevel or caseFirst is on.
61 */
62 static const int32_t UPPER_FIRST = 0x100;
63 /**
64 * Options bit 9: Keep the case bits in the tertiary weight (they trump other tertiary values)
65 * unless case level is on (when they are *moved* into the separate case level).
66 * By default, the case bits are removed from the tertiary weight (ignored).
67 *
68 * When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
69 * the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs. UCOL_UPPER_FIRST.
70 */
71 static const int32_t CASE_FIRST = 0x200;
72 /**
73 * Options bit mask for caseFirst and upperFirst, before shifting.
74 * Same value as caseFirst==upperFirst.
75 */
76 static const int32_t CASE_FIRST_AND_UPPER_MASK = CASE_FIRST | UPPER_FIRST;
77 /**
78 * Options bit 10: Insert the case level between the secondary and tertiary levels.
79 */
80 static const int32_t CASE_LEVEL = 0x400;
81 /**
82 * Options bit 11: Compare secondary weights backwards. ("French secondary")
83 */
84 static const int32_t BACKWARD_SECONDARY = 0x800;
85 /**
86 * Options bits 15..12: The 4-bit strength value bit field is shifted by this value.
87 * It is the top used bit field in the options. (No need to mask after shifting.)
88 */
89 static const int32_t STRENGTH_SHIFT = 12;
90 /** Strength options bit mask before shifting. */
91 static const int32_t STRENGTH_MASK = 0xf000;
92
93 /** maxVariable values */
94 enum MaxVariable {
95 MAX_VAR_SPACE,
96 MAX_VAR_PUNCT,
97 MAX_VAR_SYMBOL,
98 MAX_VAR_CURRENCY
99 };
100
101 CollationSettings()
102 : options((UCOL_DEFAULT_STRENGTH << STRENGTH_SHIFT) |
103 (MAX_VAR_PUNCT << MAX_VARIABLE_SHIFT)),
104 variableTop(0),
105 reorderTable(NULL),
106 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
107 fastLatinOptions(-1) {}
108
109 CollationSettings(const CollationSettings &other);
110 virtual ~CollationSettings();
111
112 UBool operator==(const CollationSettings &other) const;
113
114 inline UBool operator!=(const CollationSettings &other) const {
115 return !operator==(other);
116 }
117
118 int32_t hashCode() const;
119
120 void resetReordering();
121 void aliasReordering(const int32_t *codes, int32_t length, const uint8_t *table);
122 UBool setReordering(const int32_t *codes, int32_t length, const uint8_t table[256]);
123
124 void setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode);
125
126 static int32_t getStrength(int32_t options) {
127 return options >> STRENGTH_SHIFT;
128 }
129
130 int32_t getStrength() const {
131 return getStrength(options);
132 }
133
134 /** Sets the options bit for an on/off attribute. */
135 void setFlag(int32_t bit, UColAttributeValue value,
136 int32_t defaultOptions, UErrorCode &errorCode);
137
138 UColAttributeValue getFlag(int32_t bit) const {
139 return ((options & bit) != 0) ? UCOL_ON : UCOL_OFF;
140 }
141
142 void setCaseFirst(UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode);
143
144 UColAttributeValue getCaseFirst() const {
145 int32_t option = options & CASE_FIRST_AND_UPPER_MASK;
146 return (option == 0) ? UCOL_OFF :
147 (option == CASE_FIRST) ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST;
148 }
149
150 void setAlternateHandling(UColAttributeValue value,
151 int32_t defaultOptions, UErrorCode &errorCode);
152
153 UColAttributeValue getAlternateHandling() const {
154 return ((options & ALTERNATE_MASK) == 0) ? UCOL_NON_IGNORABLE : UCOL_SHIFTED;
155 }
156
157 void setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode);
158
159 MaxVariable getMaxVariable() const {
160 return (MaxVariable)((options & MAX_VARIABLE_MASK) >> MAX_VARIABLE_SHIFT);
161 }
162
163 /**
164 * Include case bits in the tertiary level if caseLevel=off and caseFirst!=off.
165 */
166 static inline UBool isTertiaryWithCaseBits(int32_t options) {
167 return (options & (CASE_LEVEL | CASE_FIRST)) == CASE_FIRST;
168 }
169 static uint32_t getTertiaryMask(int32_t options) {
170 // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off.
171 return isTertiaryWithCaseBits(options) ?
172 Collation::CASE_AND_TERTIARY_MASK : Collation::ONLY_TERTIARY_MASK;
173 }
174
175 static UBool sortsTertiaryUpperCaseFirst(int32_t options) {
176 // On tertiary level, consider case bits and sort uppercase first
177 // if caseLevel is off and caseFirst==upperFirst.
178 return (options & (CASE_LEVEL | CASE_FIRST_AND_UPPER_MASK)) == CASE_FIRST_AND_UPPER_MASK;
179 }
180
181 inline UBool dontCheckFCD() const {
182 return (options & CHECK_FCD) == 0;
183 }
184
185 inline UBool hasBackwardSecondary() const {
186 return (options & BACKWARD_SECONDARY) != 0;
187 }
188
189 inline UBool isNumeric() const {
190 return (options & NUMERIC) != 0;
191 }
192
193 /** CHECK_FCD etc. */
194 int32_t options;
195 /** Variable-top primary weight. */
196 uint32_t variableTop;
197 /** 256-byte table for reordering permutation of primary lead bytes; NULL if no reordering. */
198 const uint8_t *reorderTable;
199 /** Array of reorder codes; ignored if reorderCodesLength == 0. */
200 const int32_t *reorderCodes;
201 /** Number of reorder codes; 0 if no reordering. */
202 int32_t reorderCodesLength;
203 /**
204 * Capacity of reorderCodes.
205 * If 0, then the table and codes are aliases.
206 * Otherwise, this object owns the memory via the reorderCodes pointer;
207 * the table and the codes are in the same memory block, with the codes first.
208 */
209 int32_t reorderCodesCapacity;
210
211 /** Options for CollationFastLatin. Negative if disabled. */
212 int32_t fastLatinOptions;
213 uint16_t fastLatinPrimaries[0x180];
214};
215
216U_NAMESPACE_END
217
218#endif // !UCONFIG_NO_COLLATION
219#endif // __COLLATIONSETTINGS_H__