2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
8 * created on: 2012sep02
9 * created by: Markus W. Scherer
12 #ifndef __COLLATIONKEYS_H__
13 #define __COLLATIONKEYS_H__
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_COLLATION
19 #include "unicode/bytestream.h"
20 #include "unicode/ucol.h"
22 #include "collation.h"
26 class CollationIterator
;
27 struct CollationDataReader
;
28 struct CollationSettings
;
30 class SortKeyByteSink
: public ByteSink
{
32 SortKeyByteSink(char *dest
, int32_t destCapacity
)
33 : buffer_(dest
), capacity_(destCapacity
),
34 appended_(0), ignore_(0) {}
35 virtual ~SortKeyByteSink();
37 void IgnoreBytes(int32_t numIgnore
) { ignore_
= numIgnore
; }
39 virtual void Append(const char *bytes
, int32_t n
);
40 void Append(uint32_t b
) {
44 if (appended_
< capacity_
|| Resize(1, appended_
)) {
45 buffer_
[appended_
] = (char)b
;
50 virtual char *GetAppendBuffer(int32_t min_capacity
,
51 int32_t desired_capacity_hint
,
52 char *scratch
, int32_t scratch_capacity
,
53 int32_t *result_capacity
);
54 int32_t NumberOfBytesAppended() const { return appended_
; }
57 * @return how many bytes can be appended (including ignored ones)
58 * without reallocation
60 int32_t GetRemainingCapacity() const {
61 // Either ignore_ or appended_ should be 0.
62 return ignore_
+ capacity_
- appended_
;
65 UBool
Overflowed() const { return appended_
> capacity_
; }
66 /** @return FALSE if memory allocation failed */
67 UBool
IsOk() const { return buffer_
!= NULL
; }
70 virtual void AppendBeyondCapacity(const char *bytes
, int32_t n
, int32_t length
) = 0;
71 virtual UBool
Resize(int32_t appendCapacity
, int32_t length
) = 0;
84 SortKeyByteSink(const SortKeyByteSink
&); // copy constructor not implemented
85 SortKeyByteSink
&operator=(const SortKeyByteSink
&); // assignment operator not implemented
88 class U_I18N_API CollationKeys
/* not : public UObject because all methods are static */ {
90 class LevelCallback
: public UMemory
{
92 virtual ~LevelCallback();
94 * @param level The next level about to be written to the ByteSink.
95 * @return TRUE if the level is to be written
96 * (the base class implementation always returns TRUE)
98 virtual UBool
needToWrite(Collation::Level level
);
102 * Writes the sort key bytes for minLevel up to the iterator data's strength.
103 * Optionally writes the case level.
104 * Stops writing levels when callback.needToWrite(level) returns FALSE.
105 * Separates levels with the LEVEL_SEPARATOR_BYTE
106 * but does not write a TERMINATOR_BYTE.
108 static void writeSortKeyUpToQuaternary(CollationIterator
&iter
,
109 const UBool
*compressibleBytes
,
110 const CollationSettings
&settings
,
111 SortKeyByteSink
&sink
,
112 Collation::Level minLevel
, LevelCallback
&callback
,
113 UBool preflight
, UErrorCode
&errorCode
);
115 friend struct CollationDataReader
;
117 CollationKeys(); // no instantiation
119 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
120 static const uint32_t SEC_COMMON_LOW
= Collation::COMMON_BYTE
;
121 static const uint32_t SEC_COMMON_MIDDLE
= SEC_COMMON_LOW
+ 0x20;
122 static const uint32_t SEC_COMMON_HIGH
= SEC_COMMON_LOW
+ 0x40;
123 static const int32_t SEC_COMMON_MAX_COUNT
= 0x21;
125 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
126 static const uint32_t CASE_LOWER_FIRST_COMMON_LOW
= 1;
127 static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE
= 7;
128 static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH
= 13;
129 static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT
= 7;
131 // Case level, upperFirst: Compress up to 13 common weights as 3..15.
132 static const uint32_t CASE_UPPER_FIRST_COMMON_LOW
= 3;
133 static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH
= 15;
134 static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT
= 13;
136 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
137 static const uint32_t TER_ONLY_COMMON_LOW
= Collation::COMMON_BYTE
;
138 static const uint32_t TER_ONLY_COMMON_MIDDLE
= TER_ONLY_COMMON_LOW
+ 0x60;
139 static const uint32_t TER_ONLY_COMMON_HIGH
= TER_ONLY_COMMON_LOW
+ 0xc0;
140 static const int32_t TER_ONLY_COMMON_MAX_COUNT
= 0x61;
142 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
143 static const uint32_t TER_LOWER_FIRST_COMMON_LOW
= Collation::COMMON_BYTE
;
144 static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE
= TER_LOWER_FIRST_COMMON_LOW
+ 0x20;
145 static const uint32_t TER_LOWER_FIRST_COMMON_HIGH
= TER_LOWER_FIRST_COMMON_LOW
+ 0x40;
146 static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT
= 0x21;
148 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
149 static const uint32_t TER_UPPER_FIRST_COMMON_LOW
= Collation::COMMON_BYTE
+ 0x80;
150 static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE
= TER_UPPER_FIRST_COMMON_LOW
+ 0x20;
151 static const uint32_t TER_UPPER_FIRST_COMMON_HIGH
= TER_UPPER_FIRST_COMMON_LOW
+ 0x40;
152 static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT
= 0x21;
154 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
155 static const uint32_t QUAT_COMMON_LOW
= 0x1c;
156 static const uint32_t QUAT_COMMON_MIDDLE
= QUAT_COMMON_LOW
+ 0x70;
157 static const uint32_t QUAT_COMMON_HIGH
= QUAT_COMMON_LOW
+ 0xE0;
158 static const int32_t QUAT_COMMON_MAX_COUNT
= 0x71;
159 // Primary weights shifted to quaternary level must be encoded with
160 // a lead byte below the common-weight compression range.
161 static const uint32_t QUAT_SHIFTED_LIMIT_BYTE
= QUAT_COMMON_LOW
- 1; // 0x1b
166 #endif // !UCONFIG_NO_COLLATION
167 #endif // __COLLATIONKEYS_H__