]>
Commit | Line | Data |
---|---|---|
57a6839d A |
1 | /* |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2012-2014, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * collationbasedatabuilder.h | |
7 | * | |
8 | * created on: 2012aug11 | |
9 | * created by: Markus W. Scherer | |
10 | */ | |
11 | ||
12 | #ifndef __COLLATIONBASEDATABUILDER_H__ | |
13 | #define __COLLATIONBASEDATABUILDER_H__ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | ||
17 | #if !UCONFIG_NO_COLLATION | |
18 | ||
19 | #include "unicode/uniset.h" | |
20 | #include "unicode/unistr.h" | |
21 | #include "collation.h" | |
22 | #include "collationdata.h" | |
23 | #include "collationdatabuilder.h" | |
24 | #include "normalizer2impl.h" | |
25 | #include "utrie2.h" | |
26 | #include "uvectr32.h" | |
27 | #include "uvectr64.h" | |
28 | #include "uvector.h" | |
29 | ||
30 | U_NAMESPACE_BEGIN | |
31 | ||
32 | /** | |
33 | * Low-level base CollationData builder. | |
34 | */ | |
35 | class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder { | |
36 | public: | |
37 | CollationBaseDataBuilder(UErrorCode &errorCode); | |
38 | ||
39 | virtual ~CollationBaseDataBuilder(); | |
40 | ||
41 | void init(UErrorCode &errorCode); | |
42 | ||
43 | /** | |
44 | * Sets the Han ranges as ranges of offset CE32s. | |
45 | * Note: Unihan extension A sorts after the other BMP ranges. | |
46 | * See http://www.unicode.org/reports/tr10/#Implicit_Weights | |
47 | * | |
48 | * @param ranges array of ranges of [:Unified_Ideograph:] in collation order, | |
49 | * as (start, end) code point pairs | |
50 | * @param length number of code points (not pairs) | |
51 | * @param errorCode in/out error code | |
52 | */ | |
53 | void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &errorCode); | |
54 | ||
55 | void setNumericPrimary(uint32_t np) { numericPrimary = np; } | |
56 | ||
57 | virtual UBool isCompressibleLeadByte(uint32_t b) const; | |
58 | ||
59 | void setCompressibleLeadByte(uint32_t b); | |
60 | ||
61 | static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); | |
62 | static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompressible); | |
63 | ||
64 | virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); | |
65 | ||
66 | void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &errorCode); | |
67 | void addRootElement(int64_t ce, UErrorCode &errorCode); | |
68 | ||
69 | void addReorderingGroup(uint32_t firstByte, uint32_t lastByte, | |
70 | const UnicodeString &groupScripts, | |
71 | UErrorCode &errorCode); | |
72 | ||
73 | virtual void build(CollationData &data, UErrorCode &errorCode); | |
74 | ||
75 | void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode); | |
76 | ||
77 | private: | |
78 | int32_t writeRootElementsRange( | |
79 | uint32_t prevPrimary, uint32_t p, int32_t i, | |
80 | UVector32 &table, UErrorCode &errorCode); | |
81 | ||
82 | // Flags for which primary-weight lead bytes are compressible. | |
83 | UBool compressibleBytes[256]; | |
84 | uint32_t numericPrimary; | |
85 | uint32_t firstHanPrimary; | |
86 | uint32_t lastHanPrimary; | |
87 | int32_t hanStep; | |
88 | UVector64 rootElements; | |
89 | UnicodeString scripts; | |
90 | }; | |
91 | ||
92 | U_NAMESPACE_END | |
93 | ||
94 | #endif // !UCONFIG_NO_COLLATION | |
95 | #endif // __COLLATIONBASEDATABUILDER_H__ |