]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2013-2014, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * collationfastlatinbuilder.h | |
7 | * | |
8 | * created on: 2013aug09 | |
9 | * created by: Markus W. Scherer | |
10 | */ | |
11 | ||
12 | #ifndef __COLLATIONFASTLATINBUILDER_H__ | |
13 | #define __COLLATIONFASTLATINBUILDER_H__ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | ||
17 | #if !UCONFIG_NO_COLLATION | |
18 | ||
19 | #include "unicode/unistr.h" | |
20 | #include "unicode/uobject.h" | |
21 | #include "collation.h" | |
22 | #include "collationfastlatin.h" | |
23 | #include "uvectr64.h" | |
24 | ||
25 | U_NAMESPACE_BEGIN | |
26 | ||
27 | struct CollationData; | |
28 | ||
29 | class U_I18N_API CollationFastLatinBuilder : public UObject { | |
30 | public: | |
31 | CollationFastLatinBuilder(UErrorCode &errorCode); | |
32 | ~CollationFastLatinBuilder(); | |
33 | ||
34 | UBool forData(const CollationData &data, UErrorCode &errorCode); | |
35 | ||
36 | const uint16_t *getTable() const { | |
37 | return reinterpret_cast<const uint16_t *>(result.getBuffer()); | |
38 | } | |
39 | int32_t lengthOfTable() const { return result.length(); } | |
40 | ||
41 | private: | |
42 | UBool loadGroups(const CollationData &data, UErrorCode &errorCode); | |
43 | UBool inSameGroup(uint32_t p, uint32_t q) const; | |
44 | ||
45 | void resetCEs(); | |
46 | void getCEs(const CollationData &data, UErrorCode &errorCode); | |
47 | UBool getCEsFromCE32(const CollationData &data, UChar32 c, uint32_t ce32, | |
48 | UErrorCode &errorCode); | |
49 | UBool getCEsFromContractionCE32(const CollationData &data, uint32_t ce32, | |
50 | UErrorCode &errorCode); | |
51 | void addContractionEntry(int32_t x, int64_t cce0, int64_t cce1, UErrorCode &errorCode); | |
52 | void addUniqueCE(int64_t ce, UErrorCode &errorCode); | |
53 | uint32_t getMiniCE(int64_t ce) const; | |
54 | UBool encodeUniqueCEs(UErrorCode &errorCode); | |
55 | UBool encodeCharCEs(UErrorCode &errorCode); | |
56 | UBool encodeContractions(UErrorCode &errorCode); | |
57 | uint32_t encodeTwoCEs(int64_t first, int64_t second) const; | |
58 | ||
59 | static UBool isContractionCharCE(int64_t ce) { | |
60 | return (uint32_t)(ce >> 32) == Collation::NO_CE_PRIMARY && ce != Collation::NO_CE; | |
61 | } | |
62 | ||
63 | static const uint32_t CONTRACTION_FLAG = 0x80000000; | |
64 | ||
65 | // temporary "buffer" | |
66 | int64_t ce0, ce1; | |
67 | ||
68 | int64_t charCEs[CollationFastLatin::NUM_FAST_CHARS][2]; | |
69 | ||
70 | UVector64 contractionCEs; | |
71 | UVector64 uniqueCEs; | |
72 | ||
73 | /** One 16-bit mini CE per unique CE. */ | |
74 | uint16_t *miniCEs; | |
75 | ||
76 | // These are constant for a given list of CollationData.scripts. | |
77 | uint32_t firstDigitPrimary; | |
78 | uint32_t firstLatinPrimary; | |
79 | uint32_t lastLatinPrimary; | |
80 | // This determines the first normal primary weight which is mapped to | |
81 | // a short mini primary. It must be >=firstDigitPrimary. | |
82 | uint32_t firstShortPrimary; | |
83 | ||
84 | UBool shortPrimaryOverflow; | |
85 | ||
86 | UnicodeString result; | |
87 | int32_t headerLength; | |
88 | }; | |
89 | ||
90 | U_NAMESPACE_END | |
91 | ||
92 | #endif // !UCONFIG_NO_COLLATION | |
93 | #endif // __COLLATIONFASTLATINBUILDER_H__ |