]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************* | |
2ca993e8 | 3 | * Copyright (C) 2013-2016, International Business Machines |
57a6839d A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * collationfastlatinbuilder.h | |
7 | * | |
8 | * created on: 2013aug09 | |
9 | * created by: Markus W. Scherer | |
10 | */ | |
11 | ||
12 | #ifndef __COLLATIONFASTLATINBUILDER_H__ | |
13 | #define __COLLATIONFASTLATINBUILDER_H__ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | ||
17 | #if !UCONFIG_NO_COLLATION | |
18 | ||
2ca993e8 | 19 | #include "unicode/ucol.h" |
57a6839d A |
20 | #include "unicode/unistr.h" |
21 | #include "unicode/uobject.h" | |
22 | #include "collation.h" | |
23 | #include "collationfastlatin.h" | |
24 | #include "uvectr64.h" | |
25 | ||
26 | U_NAMESPACE_BEGIN | |
27 | ||
28 | struct CollationData; | |
29 | ||
30 | class U_I18N_API CollationFastLatinBuilder : public UObject { | |
31 | public: | |
32 | CollationFastLatinBuilder(UErrorCode &errorCode); | |
33 | ~CollationFastLatinBuilder(); | |
34 | ||
35 | UBool forData(const CollationData &data, UErrorCode &errorCode); | |
36 | ||
37 | const uint16_t *getTable() const { | |
38 | return reinterpret_cast<const uint16_t *>(result.getBuffer()); | |
39 | } | |
40 | int32_t lengthOfTable() const { return result.length(); } | |
41 | ||
42 | private: | |
b331163b A |
43 | // space, punct, symbol, currency (not digit) |
44 | enum { NUM_SPECIAL_GROUPS = UCOL_REORDER_CODE_CURRENCY - UCOL_REORDER_CODE_FIRST + 1 }; | |
45 | ||
57a6839d A |
46 | UBool loadGroups(const CollationData &data, UErrorCode &errorCode); |
47 | UBool inSameGroup(uint32_t p, uint32_t q) const; | |
48 | ||
49 | void resetCEs(); | |
50 | void getCEs(const CollationData &data, UErrorCode &errorCode); | |
51 | UBool getCEsFromCE32(const CollationData &data, UChar32 c, uint32_t ce32, | |
52 | UErrorCode &errorCode); | |
53 | UBool getCEsFromContractionCE32(const CollationData &data, uint32_t ce32, | |
54 | UErrorCode &errorCode); | |
55 | void addContractionEntry(int32_t x, int64_t cce0, int64_t cce1, UErrorCode &errorCode); | |
56 | void addUniqueCE(int64_t ce, UErrorCode &errorCode); | |
57 | uint32_t getMiniCE(int64_t ce) const; | |
58 | UBool encodeUniqueCEs(UErrorCode &errorCode); | |
59 | UBool encodeCharCEs(UErrorCode &errorCode); | |
60 | UBool encodeContractions(UErrorCode &errorCode); | |
61 | uint32_t encodeTwoCEs(int64_t first, int64_t second) const; | |
62 | ||
63 | static UBool isContractionCharCE(int64_t ce) { | |
64 | return (uint32_t)(ce >> 32) == Collation::NO_CE_PRIMARY && ce != Collation::NO_CE; | |
65 | } | |
66 | ||
67 | static const uint32_t CONTRACTION_FLAG = 0x80000000; | |
68 | ||
69 | // temporary "buffer" | |
70 | int64_t ce0, ce1; | |
71 | ||
72 | int64_t charCEs[CollationFastLatin::NUM_FAST_CHARS][2]; | |
73 | ||
74 | UVector64 contractionCEs; | |
75 | UVector64 uniqueCEs; | |
76 | ||
77 | /** One 16-bit mini CE per unique CE. */ | |
78 | uint16_t *miniCEs; | |
79 | ||
b331163b A |
80 | // These are constant for a given root collator. |
81 | uint32_t lastSpecialPrimaries[NUM_SPECIAL_GROUPS]; | |
57a6839d A |
82 | uint32_t firstDigitPrimary; |
83 | uint32_t firstLatinPrimary; | |
84 | uint32_t lastLatinPrimary; | |
85 | // This determines the first normal primary weight which is mapped to | |
86 | // a short mini primary. It must be >=firstDigitPrimary. | |
87 | uint32_t firstShortPrimary; | |
88 | ||
89 | UBool shortPrimaryOverflow; | |
90 | ||
91 | UnicodeString result; | |
92 | int32_t headerLength; | |
93 | }; | |
94 | ||
95 | U_NAMESPACE_END | |
96 | ||
97 | #endif // !UCONFIG_NO_COLLATION | |
98 | #endif // __COLLATIONFASTLATINBUILDER_H__ |