]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************* | |
b331163b | 3 | * Copyright (C) 2013-2015, International Business Machines |
57a6839d A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * collationfastlatinbuilder.h | |
7 | * | |
8 | * created on: 2013aug09 | |
9 | * created by: Markus W. Scherer | |
10 | */ | |
11 | ||
12 | #ifndef __COLLATIONFASTLATINBUILDER_H__ | |
13 | #define __COLLATIONFASTLATINBUILDER_H__ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | ||
17 | #if !UCONFIG_NO_COLLATION | |
18 | ||
19 | #include "unicode/unistr.h" | |
20 | #include "unicode/uobject.h" | |
21 | #include "collation.h" | |
22 | #include "collationfastlatin.h" | |
23 | #include "uvectr64.h" | |
24 | ||
25 | U_NAMESPACE_BEGIN | |
26 | ||
27 | struct CollationData; | |
28 | ||
29 | class U_I18N_API CollationFastLatinBuilder : public UObject { | |
30 | public: | |
31 | CollationFastLatinBuilder(UErrorCode &errorCode); | |
32 | ~CollationFastLatinBuilder(); | |
33 | ||
34 | UBool forData(const CollationData &data, UErrorCode &errorCode); | |
35 | ||
36 | const uint16_t *getTable() const { | |
37 | return reinterpret_cast<const uint16_t *>(result.getBuffer()); | |
38 | } | |
39 | int32_t lengthOfTable() const { return result.length(); } | |
40 | ||
41 | private: | |
b331163b A |
42 | // space, punct, symbol, currency (not digit) |
43 | enum { NUM_SPECIAL_GROUPS = UCOL_REORDER_CODE_CURRENCY - UCOL_REORDER_CODE_FIRST + 1 }; | |
44 | ||
57a6839d A |
45 | UBool loadGroups(const CollationData &data, UErrorCode &errorCode); |
46 | UBool inSameGroup(uint32_t p, uint32_t q) const; | |
47 | ||
48 | void resetCEs(); | |
49 | void getCEs(const CollationData &data, UErrorCode &errorCode); | |
50 | UBool getCEsFromCE32(const CollationData &data, UChar32 c, uint32_t ce32, | |
51 | UErrorCode &errorCode); | |
52 | UBool getCEsFromContractionCE32(const CollationData &data, uint32_t ce32, | |
53 | UErrorCode &errorCode); | |
54 | void addContractionEntry(int32_t x, int64_t cce0, int64_t cce1, UErrorCode &errorCode); | |
55 | void addUniqueCE(int64_t ce, UErrorCode &errorCode); | |
56 | uint32_t getMiniCE(int64_t ce) const; | |
57 | UBool encodeUniqueCEs(UErrorCode &errorCode); | |
58 | UBool encodeCharCEs(UErrorCode &errorCode); | |
59 | UBool encodeContractions(UErrorCode &errorCode); | |
60 | uint32_t encodeTwoCEs(int64_t first, int64_t second) const; | |
61 | ||
62 | static UBool isContractionCharCE(int64_t ce) { | |
63 | return (uint32_t)(ce >> 32) == Collation::NO_CE_PRIMARY && ce != Collation::NO_CE; | |
64 | } | |
65 | ||
66 | static const uint32_t CONTRACTION_FLAG = 0x80000000; | |
67 | ||
68 | // temporary "buffer" | |
69 | int64_t ce0, ce1; | |
70 | ||
71 | int64_t charCEs[CollationFastLatin::NUM_FAST_CHARS][2]; | |
72 | ||
73 | UVector64 contractionCEs; | |
74 | UVector64 uniqueCEs; | |
75 | ||
76 | /** One 16-bit mini CE per unique CE. */ | |
77 | uint16_t *miniCEs; | |
78 | ||
b331163b A |
79 | // These are constant for a given root collator. |
80 | uint32_t lastSpecialPrimaries[NUM_SPECIAL_GROUPS]; | |
57a6839d A |
81 | uint32_t firstDigitPrimary; |
82 | uint32_t firstLatinPrimary; | |
83 | uint32_t lastLatinPrimary; | |
84 | // This determines the first normal primary weight which is mapped to | |
85 | // a short mini primary. It must be >=firstDigitPrimary. | |
86 | uint32_t firstShortPrimary; | |
87 | ||
88 | UBool shortPrimaryOverflow; | |
89 | ||
90 | UnicodeString result; | |
91 | int32_t headerLength; | |
92 | }; | |
93 | ||
94 | U_NAMESPACE_END | |
95 | ||
96 | #endif // !UCONFIG_NO_COLLATION | |
97 | #endif // __COLLATIONFASTLATINBUILDER_H__ |