2 *******************************************************************************
3 * Copyright (C) 2013-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationfastlatinbuilder.h
8 * created on: 2013aug09
9 * created by: Markus W. Scherer
12 #ifndef __COLLATIONFASTLATINBUILDER_H__
13 #define __COLLATIONFASTLATINBUILDER_H__
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_COLLATION
19 #include "unicode/ucol.h"
20 #include "unicode/unistr.h"
21 #include "unicode/uobject.h"
22 #include "collation.h"
23 #include "collationfastlatin.h"
30 class U_I18N_API CollationFastLatinBuilder
: public UObject
{
32 CollationFastLatinBuilder(UErrorCode
&errorCode
);
33 ~CollationFastLatinBuilder();
35 UBool
forData(const CollationData
&data
, UErrorCode
&errorCode
);
37 const uint16_t *getTable() const {
38 return reinterpret_cast<const uint16_t *>(result
.getBuffer());
40 int32_t lengthOfTable() const { return result
.length(); }
43 // space, punct, symbol, currency (not digit)
44 enum { NUM_SPECIAL_GROUPS
= UCOL_REORDER_CODE_CURRENCY
- UCOL_REORDER_CODE_FIRST
+ 1 };
46 UBool
loadGroups(const CollationData
&data
, UErrorCode
&errorCode
);
47 UBool
inSameGroup(uint32_t p
, uint32_t q
) const;
50 void getCEs(const CollationData
&data
, UErrorCode
&errorCode
);
51 UBool
getCEsFromCE32(const CollationData
&data
, UChar32 c
, uint32_t ce32
,
52 UErrorCode
&errorCode
);
53 UBool
getCEsFromContractionCE32(const CollationData
&data
, uint32_t ce32
,
54 UErrorCode
&errorCode
);
55 void addContractionEntry(int32_t x
, int64_t cce0
, int64_t cce1
, UErrorCode
&errorCode
);
56 void addUniqueCE(int64_t ce
, UErrorCode
&errorCode
);
57 uint32_t getMiniCE(int64_t ce
) const;
58 UBool
encodeUniqueCEs(UErrorCode
&errorCode
);
59 UBool
encodeCharCEs(UErrorCode
&errorCode
);
60 UBool
encodeContractions(UErrorCode
&errorCode
);
61 uint32_t encodeTwoCEs(int64_t first
, int64_t second
) const;
63 static UBool
isContractionCharCE(int64_t ce
) {
64 return (uint32_t)(ce
>> 32) == Collation::NO_CE_PRIMARY
&& ce
!= Collation::NO_CE
;
67 static const uint32_t CONTRACTION_FLAG
= 0x80000000;
72 int64_t charCEs
[CollationFastLatin::NUM_FAST_CHARS
][2];
74 UVector64 contractionCEs
;
77 /** One 16-bit mini CE per unique CE. */
80 // These are constant for a given root collator.
81 uint32_t lastSpecialPrimaries
[NUM_SPECIAL_GROUPS
];
82 uint32_t firstDigitPrimary
;
83 uint32_t firstLatinPrimary
;
84 uint32_t lastLatinPrimary
;
85 // This determines the first normal primary weight which is mapped to
86 // a short mini primary. It must be >=firstDigitPrimary.
87 uint32_t firstShortPrimary
;
89 UBool shortPrimaryOverflow
;
97 #endif // !UCONFIG_NO_COLLATION
98 #endif // __COLLATIONFASTLATINBUILDER_H__