2 *******************************************************************************
3 * Copyright (C) 2013-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationfastlatinbuilder.h
8 * created on: 2013aug09
9 * created by: Markus W. Scherer
12 #ifndef __COLLATIONFASTLATINBUILDER_H__
13 #define __COLLATIONFASTLATINBUILDER_H__
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_COLLATION
19 #include "unicode/unistr.h"
20 #include "unicode/uobject.h"
21 #include "collation.h"
22 #include "collationfastlatin.h"
29 class U_I18N_API CollationFastLatinBuilder
: public UObject
{
31 CollationFastLatinBuilder(UErrorCode
&errorCode
);
32 ~CollationFastLatinBuilder();
34 UBool
forData(const CollationData
&data
, UErrorCode
&errorCode
);
36 const uint16_t *getTable() const {
37 return reinterpret_cast<const uint16_t *>(result
.getBuffer());
39 int32_t lengthOfTable() const { return result
.length(); }
42 // space, punct, symbol, currency (not digit)
43 enum { NUM_SPECIAL_GROUPS
= UCOL_REORDER_CODE_CURRENCY
- UCOL_REORDER_CODE_FIRST
+ 1 };
45 UBool
loadGroups(const CollationData
&data
, UErrorCode
&errorCode
);
46 UBool
inSameGroup(uint32_t p
, uint32_t q
) const;
49 void getCEs(const CollationData
&data
, UErrorCode
&errorCode
);
50 UBool
getCEsFromCE32(const CollationData
&data
, UChar32 c
, uint32_t ce32
,
51 UErrorCode
&errorCode
);
52 UBool
getCEsFromContractionCE32(const CollationData
&data
, uint32_t ce32
,
53 UErrorCode
&errorCode
);
54 void addContractionEntry(int32_t x
, int64_t cce0
, int64_t cce1
, UErrorCode
&errorCode
);
55 void addUniqueCE(int64_t ce
, UErrorCode
&errorCode
);
56 uint32_t getMiniCE(int64_t ce
) const;
57 UBool
encodeUniqueCEs(UErrorCode
&errorCode
);
58 UBool
encodeCharCEs(UErrorCode
&errorCode
);
59 UBool
encodeContractions(UErrorCode
&errorCode
);
60 uint32_t encodeTwoCEs(int64_t first
, int64_t second
) const;
62 static UBool
isContractionCharCE(int64_t ce
) {
63 return (uint32_t)(ce
>> 32) == Collation::NO_CE_PRIMARY
&& ce
!= Collation::NO_CE
;
66 static const uint32_t CONTRACTION_FLAG
= 0x80000000;
71 int64_t charCEs
[CollationFastLatin::NUM_FAST_CHARS
][2];
73 UVector64 contractionCEs
;
76 /** One 16-bit mini CE per unique CE. */
79 // These are constant for a given root collator.
80 uint32_t lastSpecialPrimaries
[NUM_SPECIAL_GROUPS
];
81 uint32_t firstDigitPrimary
;
82 uint32_t firstLatinPrimary
;
83 uint32_t lastLatinPrimary
;
84 // This determines the first normal primary weight which is mapped to
85 // a short mini primary. It must be >=firstDigitPrimary.
86 uint32_t firstShortPrimary
;
88 UBool shortPrimaryOverflow
;
96 #endif // !UCONFIG_NO_COLLATION
97 #endif // __COLLATIONFASTLATINBUILDER_H__