]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/collationkeys.h
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / collationkeys.h
1 /*
2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationkeys.h
7 *
8 * created on: 2012sep02
9 * created by: Markus W. Scherer
10 */
11
12 #ifndef __COLLATIONKEYS_H__
13 #define __COLLATIONKEYS_H__
14
15 #include "unicode/utypes.h"
16
17 #if !UCONFIG_NO_COLLATION
18
19 #include "unicode/bytestream.h"
20 #include "unicode/ucol.h"
21 #include "charstr.h"
22 #include "collation.h"
23
24 U_NAMESPACE_BEGIN
25
26 class CollationIterator;
27 struct CollationDataReader;
28 struct CollationSettings;
29
30 class SortKeyByteSink : public ByteSink {
31 public:
32 SortKeyByteSink(char *dest, int32_t destCapacity)
33 : buffer_(dest), capacity_(destCapacity),
34 appended_(0), ignore_(0) {}
35 virtual ~SortKeyByteSink();
36
37 void IgnoreBytes(int32_t numIgnore) { ignore_ = numIgnore; }
38
39 virtual void Append(const char *bytes, int32_t n);
40 void Append(uint32_t b) {
41 if (ignore_ > 0) {
42 --ignore_;
43 } else {
44 if (appended_ < capacity_ || Resize(1, appended_)) {
45 buffer_[appended_] = (char)b;
46 }
47 ++appended_;
48 }
49 }
50 virtual char *GetAppendBuffer(int32_t min_capacity,
51 int32_t desired_capacity_hint,
52 char *scratch, int32_t scratch_capacity,
53 int32_t *result_capacity);
54 int32_t NumberOfBytesAppended() const { return appended_; }
55
56 /**
57 * @return how many bytes can be appended (including ignored ones)
58 * without reallocation
59 */
60 int32_t GetRemainingCapacity() const {
61 // Either ignore_ or appended_ should be 0.
62 return ignore_ + capacity_ - appended_;
63 }
64
65 UBool Overflowed() const { return appended_ > capacity_; }
66 /** @return FALSE if memory allocation failed */
67 UBool IsOk() const { return buffer_ != NULL; }
68
69 protected:
70 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) = 0;
71 virtual UBool Resize(int32_t appendCapacity, int32_t length) = 0;
72
73 void SetNotOk() {
74 buffer_ = NULL;
75 capacity_ = 0;
76 }
77
78 char *buffer_;
79 int32_t capacity_;
80 int32_t appended_;
81 int32_t ignore_;
82
83 private:
84 SortKeyByteSink(const SortKeyByteSink &); // copy constructor not implemented
85 SortKeyByteSink &operator=(const SortKeyByteSink &); // assignment operator not implemented
86 };
87
88 class U_I18N_API CollationKeys /* not : public UObject because all methods are static */ {
89 public:
90 class LevelCallback : public UMemory {
91 public:
92 virtual ~LevelCallback();
93 /**
94 * @param level The next level about to be written to the ByteSink.
95 * @return TRUE if the level is to be written
96 * (the base class implementation always returns TRUE)
97 */
98 virtual UBool needToWrite(Collation::Level level);
99 };
100
101 /**
102 * Writes the sort key bytes for minLevel up to the iterator data's strength.
103 * Optionally writes the case level.
104 * Stops writing levels when callback.needToWrite(level) returns FALSE.
105 * Separates levels with the LEVEL_SEPARATOR_BYTE
106 * but does not write a TERMINATOR_BYTE.
107 */
108 static void writeSortKeyUpToQuaternary(CollationIterator &iter,
109 const UBool *compressibleBytes,
110 const CollationSettings &settings,
111 SortKeyByteSink &sink,
112 Collation::Level minLevel, LevelCallback &callback,
113 UBool preflight, UErrorCode &errorCode);
114 private:
115 friend struct CollationDataReader;
116
117 CollationKeys(); // no instantiation
118
119 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
120 static const uint32_t SEC_COMMON_LOW = Collation::COMMON_BYTE;
121 static const uint32_t SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
122 static const uint32_t SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40;
123 static const int32_t SEC_COMMON_MAX_COUNT = 0x21;
124
125 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
126 static const uint32_t CASE_LOWER_FIRST_COMMON_LOW = 1;
127 static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
128 static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH = 13;
129 static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
130
131 // Case level, upperFirst: Compress up to 13 common weights as 3..15.
132 static const uint32_t CASE_UPPER_FIRST_COMMON_LOW = 3;
133 static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH = 15;
134 static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
135
136 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
137 static const uint32_t TER_ONLY_COMMON_LOW = Collation::COMMON_BYTE;
138 static const uint32_t TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
139 static const uint32_t TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
140 static const int32_t TER_ONLY_COMMON_MAX_COUNT = 0x61;
141
142 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
143 static const uint32_t TER_LOWER_FIRST_COMMON_LOW = Collation::COMMON_BYTE;
144 static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
145 static const uint32_t TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
146 static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
147
148 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
149 static const uint32_t TER_UPPER_FIRST_COMMON_LOW = Collation::COMMON_BYTE + 0x80;
150 static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
151 static const uint32_t TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
152 static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
153
154 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
155 static const uint32_t QUAT_COMMON_LOW = 0x1c;
156 static const uint32_t QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
157 static const uint32_t QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
158 static const int32_t QUAT_COMMON_MAX_COUNT = 0x71;
159 // Primary weights shifted to quaternary level must be encoded with
160 // a lead byte below the common-weight compression range.
161 static const uint32_t QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
162 };
163
164 U_NAMESPACE_END
165
166 #endif // !UCONFIG_NO_COLLATION
167 #endif // __COLLATIONKEYS_H__