1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2012-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * created on: 2012sep02
11 * created by: Markus W. Scherer
14 #ifndef __COLLATIONKEYS_H__
15 #define __COLLATIONKEYS_H__
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_COLLATION
21 #include "unicode/bytestream.h"
22 #include "unicode/ucol.h"
24 #include "collation.h"
28 class CollationIterator
;
29 struct CollationDataReader
;
30 struct CollationSettings
;
32 class SortKeyByteSink
: public ByteSink
{
34 SortKeyByteSink(char *dest
, int32_t destCapacity
)
35 : buffer_(dest
), capacity_(destCapacity
),
36 appended_(0), ignore_(0) {}
37 virtual ~SortKeyByteSink();
39 void IgnoreBytes(int32_t numIgnore
) { ignore_
= numIgnore
; }
41 virtual void Append(const char *bytes
, int32_t n
);
42 void Append(uint32_t b
) {
46 if (appended_
< capacity_
|| Resize(1, appended_
)) {
47 buffer_
[appended_
] = (char)b
;
52 virtual char *GetAppendBuffer(int32_t min_capacity
,
53 int32_t desired_capacity_hint
,
54 char *scratch
, int32_t scratch_capacity
,
55 int32_t *result_capacity
);
56 int32_t NumberOfBytesAppended() const { return appended_
; }
59 * @return how many bytes can be appended (including ignored ones)
60 * without reallocation
62 int32_t GetRemainingCapacity() const {
63 // Either ignore_ or appended_ should be 0.
64 return ignore_
+ capacity_
- appended_
;
67 UBool
Overflowed() const { return appended_
> capacity_
; }
68 /** @return FALSE if memory allocation failed */
69 UBool
IsOk() const { return buffer_
!= NULL
; }
72 virtual void AppendBeyondCapacity(const char *bytes
, int32_t n
, int32_t length
) = 0;
73 virtual UBool
Resize(int32_t appendCapacity
, int32_t length
) = 0;
86 SortKeyByteSink(const SortKeyByteSink
&); // copy constructor not implemented
87 SortKeyByteSink
&operator=(const SortKeyByteSink
&); // assignment operator not implemented
90 class U_I18N_API CollationKeys
/* not : public UObject because all methods are static */ {
92 class LevelCallback
: public UMemory
{
94 virtual ~LevelCallback();
96 * @param level The next level about to be written to the ByteSink.
97 * @return TRUE if the level is to be written
98 * (the base class implementation always returns TRUE)
100 virtual UBool
needToWrite(Collation::Level level
);
104 * Writes the sort key bytes for minLevel up to the iterator data's strength.
105 * Optionally writes the case level.
106 * Stops writing levels when callback.needToWrite(level) returns FALSE.
107 * Separates levels with the LEVEL_SEPARATOR_BYTE
108 * but does not write a TERMINATOR_BYTE.
110 static void writeSortKeyUpToQuaternary(CollationIterator
&iter
,
111 const UBool
*compressibleBytes
,
112 const CollationSettings
&settings
,
113 SortKeyByteSink
&sink
,
114 Collation::Level minLevel
, LevelCallback
&callback
,
115 UBool preflight
, UErrorCode
&errorCode
);
117 friend struct CollationDataReader
;
119 CollationKeys(); // no instantiation
121 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
122 static const uint32_t SEC_COMMON_LOW
= Collation::COMMON_BYTE
;
123 static const uint32_t SEC_COMMON_MIDDLE
= SEC_COMMON_LOW
+ 0x20;
124 static const uint32_t SEC_COMMON_HIGH
= SEC_COMMON_LOW
+ 0x40;
125 static const int32_t SEC_COMMON_MAX_COUNT
= 0x21;
127 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
128 static const uint32_t CASE_LOWER_FIRST_COMMON_LOW
= 1;
129 static const uint32_t CASE_LOWER_FIRST_COMMON_MIDDLE
= 7;
130 static const uint32_t CASE_LOWER_FIRST_COMMON_HIGH
= 13;
131 static const int32_t CASE_LOWER_FIRST_COMMON_MAX_COUNT
= 7;
133 // Case level, upperFirst: Compress up to 13 common weights as 3..15.
134 static const uint32_t CASE_UPPER_FIRST_COMMON_LOW
= 3;
135 static const uint32_t CASE_UPPER_FIRST_COMMON_HIGH
= 15;
136 static const int32_t CASE_UPPER_FIRST_COMMON_MAX_COUNT
= 13;
138 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
139 static const uint32_t TER_ONLY_COMMON_LOW
= Collation::COMMON_BYTE
;
140 static const uint32_t TER_ONLY_COMMON_MIDDLE
= TER_ONLY_COMMON_LOW
+ 0x60;
141 static const uint32_t TER_ONLY_COMMON_HIGH
= TER_ONLY_COMMON_LOW
+ 0xc0;
142 static const int32_t TER_ONLY_COMMON_MAX_COUNT
= 0x61;
144 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
145 static const uint32_t TER_LOWER_FIRST_COMMON_LOW
= Collation::COMMON_BYTE
;
146 static const uint32_t TER_LOWER_FIRST_COMMON_MIDDLE
= TER_LOWER_FIRST_COMMON_LOW
+ 0x20;
147 static const uint32_t TER_LOWER_FIRST_COMMON_HIGH
= TER_LOWER_FIRST_COMMON_LOW
+ 0x40;
148 static const int32_t TER_LOWER_FIRST_COMMON_MAX_COUNT
= 0x21;
150 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
151 static const uint32_t TER_UPPER_FIRST_COMMON_LOW
= Collation::COMMON_BYTE
+ 0x80;
152 static const uint32_t TER_UPPER_FIRST_COMMON_MIDDLE
= TER_UPPER_FIRST_COMMON_LOW
+ 0x20;
153 static const uint32_t TER_UPPER_FIRST_COMMON_HIGH
= TER_UPPER_FIRST_COMMON_LOW
+ 0x40;
154 static const int32_t TER_UPPER_FIRST_COMMON_MAX_COUNT
= 0x21;
156 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
157 static const uint32_t QUAT_COMMON_LOW
= 0x1c;
158 static const uint32_t QUAT_COMMON_MIDDLE
= QUAT_COMMON_LOW
+ 0x70;
159 static const uint32_t QUAT_COMMON_HIGH
= QUAT_COMMON_LOW
+ 0xE0;
160 static const int32_t QUAT_COMMON_MAX_COUNT
= 0x71;
161 // Primary weights shifted to quaternary level must be encoded with
162 // a lead byte below the common-weight compression range.
163 static const uint32_t QUAT_SHIFTED_LIMIT_BYTE
= QUAT_COMMON_LOW
- 1; // 0x1b
168 #endif // !UCONFIG_NO_COLLATION
169 #endif // __COLLATIONKEYS_H__