]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
57a6839d A |
3 | /* |
4 | ******************************************************************************* | |
2ca993e8 | 5 | * Copyright (C) 2013-2016, International Business Machines |
57a6839d A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * collationfastlatinbuilder.h | |
9 | * | |
10 | * created on: 2013aug09 | |
11 | * created by: Markus W. Scherer | |
12 | */ | |
13 | ||
14 | #ifndef __COLLATIONFASTLATINBUILDER_H__ | |
15 | #define __COLLATIONFASTLATINBUILDER_H__ | |
16 | ||
17 | #include "unicode/utypes.h" | |
18 | ||
19 | #if !UCONFIG_NO_COLLATION | |
20 | ||
2ca993e8 | 21 | #include "unicode/ucol.h" |
57a6839d A |
22 | #include "unicode/unistr.h" |
23 | #include "unicode/uobject.h" | |
24 | #include "collation.h" | |
25 | #include "collationfastlatin.h" | |
26 | #include "uvectr64.h" | |
27 | ||
28 | U_NAMESPACE_BEGIN | |
29 | ||
30 | struct CollationData; | |
31 | ||
32 | class U_I18N_API CollationFastLatinBuilder : public UObject { | |
33 | public: | |
34 | CollationFastLatinBuilder(UErrorCode &errorCode); | |
35 | ~CollationFastLatinBuilder(); | |
36 | ||
37 | UBool forData(const CollationData &data, UErrorCode &errorCode); | |
38 | ||
39 | const uint16_t *getTable() const { | |
40 | return reinterpret_cast<const uint16_t *>(result.getBuffer()); | |
41 | } | |
42 | int32_t lengthOfTable() const { return result.length(); } | |
43 | ||
44 | private: | |
b331163b A |
45 | // space, punct, symbol, currency (not digit) |
46 | enum { NUM_SPECIAL_GROUPS = UCOL_REORDER_CODE_CURRENCY - UCOL_REORDER_CODE_FIRST + 1 }; | |
47 | ||
57a6839d A |
48 | UBool loadGroups(const CollationData &data, UErrorCode &errorCode); |
49 | UBool inSameGroup(uint32_t p, uint32_t q) const; | |
50 | ||
51 | void resetCEs(); | |
52 | void getCEs(const CollationData &data, UErrorCode &errorCode); | |
53 | UBool getCEsFromCE32(const CollationData &data, UChar32 c, uint32_t ce32, | |
54 | UErrorCode &errorCode); | |
55 | UBool getCEsFromContractionCE32(const CollationData &data, uint32_t ce32, | |
56 | UErrorCode &errorCode); | |
57 | void addContractionEntry(int32_t x, int64_t cce0, int64_t cce1, UErrorCode &errorCode); | |
58 | void addUniqueCE(int64_t ce, UErrorCode &errorCode); | |
59 | uint32_t getMiniCE(int64_t ce) const; | |
60 | UBool encodeUniqueCEs(UErrorCode &errorCode); | |
61 | UBool encodeCharCEs(UErrorCode &errorCode); | |
62 | UBool encodeContractions(UErrorCode &errorCode); | |
63 | uint32_t encodeTwoCEs(int64_t first, int64_t second) const; | |
64 | ||
65 | static UBool isContractionCharCE(int64_t ce) { | |
66 | return (uint32_t)(ce >> 32) == Collation::NO_CE_PRIMARY && ce != Collation::NO_CE; | |
67 | } | |
68 | ||
69 | static const uint32_t CONTRACTION_FLAG = 0x80000000; | |
70 | ||
71 | // temporary "buffer" | |
72 | int64_t ce0, ce1; | |
73 | ||
74 | int64_t charCEs[CollationFastLatin::NUM_FAST_CHARS][2]; | |
75 | ||
76 | UVector64 contractionCEs; | |
77 | UVector64 uniqueCEs; | |
78 | ||
79 | /** One 16-bit mini CE per unique CE. */ | |
80 | uint16_t *miniCEs; | |
81 | ||
b331163b A |
82 | // These are constant for a given root collator. |
83 | uint32_t lastSpecialPrimaries[NUM_SPECIAL_GROUPS]; | |
57a6839d A |
84 | uint32_t firstDigitPrimary; |
85 | uint32_t firstLatinPrimary; | |
86 | uint32_t lastLatinPrimary; | |
87 | // This determines the first normal primary weight which is mapped to | |
88 | // a short mini primary. It must be >=firstDigitPrimary. | |
89 | uint32_t firstShortPrimary; | |
90 | ||
91 | UBool shortPrimaryOverflow; | |
92 | ||
93 | UnicodeString result; | |
94 | int32_t headerLength; | |
95 | }; | |
96 | ||
97 | U_NAMESPACE_END | |
98 | ||
99 | #endif // !UCONFIG_NO_COLLATION | |
100 | #endif // __COLLATIONFASTLATINBUILDER_H__ |