1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2013-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * collationfastlatinbuilder.h
10 * created on: 2013aug09
11 * created by: Markus W. Scherer
14 #ifndef __COLLATIONFASTLATINBUILDER_H__
15 #define __COLLATIONFASTLATINBUILDER_H__
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_COLLATION
21 #include "unicode/ucol.h"
22 #include "unicode/unistr.h"
23 #include "unicode/uobject.h"
24 #include "collation.h"
25 #include "collationfastlatin.h"
32 class U_I18N_API CollationFastLatinBuilder
: public UObject
{
34 CollationFastLatinBuilder(UErrorCode
&errorCode
);
35 ~CollationFastLatinBuilder();
37 UBool
forData(const CollationData
&data
, UErrorCode
&errorCode
);
39 const uint16_t *getTable() const {
40 return reinterpret_cast<const uint16_t *>(result
.getBuffer());
42 int32_t lengthOfTable() const { return result
.length(); }
45 // space, punct, symbol, currency (not digit)
46 enum { NUM_SPECIAL_GROUPS
= UCOL_REORDER_CODE_CURRENCY
- UCOL_REORDER_CODE_FIRST
+ 1 };
48 UBool
loadGroups(const CollationData
&data
, UErrorCode
&errorCode
);
49 UBool
inSameGroup(uint32_t p
, uint32_t q
) const;
52 void getCEs(const CollationData
&data
, UErrorCode
&errorCode
);
53 UBool
getCEsFromCE32(const CollationData
&data
, UChar32 c
, uint32_t ce32
,
54 UErrorCode
&errorCode
);
55 UBool
getCEsFromContractionCE32(const CollationData
&data
, uint32_t ce32
,
56 UErrorCode
&errorCode
);
57 void addContractionEntry(int32_t x
, int64_t cce0
, int64_t cce1
, UErrorCode
&errorCode
);
58 void addUniqueCE(int64_t ce
, UErrorCode
&errorCode
);
59 uint32_t getMiniCE(int64_t ce
) const;
60 UBool
encodeUniqueCEs(UErrorCode
&errorCode
);
61 UBool
encodeCharCEs(UErrorCode
&errorCode
);
62 UBool
encodeContractions(UErrorCode
&errorCode
);
63 uint32_t encodeTwoCEs(int64_t first
, int64_t second
) const;
65 static UBool
isContractionCharCE(int64_t ce
) {
66 return (uint32_t)(ce
>> 32) == Collation::NO_CE_PRIMARY
&& ce
!= Collation::NO_CE
;
69 static const uint32_t CONTRACTION_FLAG
= 0x80000000;
74 int64_t charCEs
[CollationFastLatin::NUM_FAST_CHARS
][2];
76 UVector64 contractionCEs
;
79 /** One 16-bit mini CE per unique CE. */
82 // These are constant for a given root collator.
83 uint32_t lastSpecialPrimaries
[NUM_SPECIAL_GROUPS
];
84 uint32_t firstDigitPrimary
;
85 uint32_t firstLatinPrimary
;
86 uint32_t lastLatinPrimary
;
87 // This determines the first normal primary weight which is mapped to
88 // a short mini primary. It must be >=firstDigitPrimary.
89 uint32_t firstShortPrimary
;
91 UBool shortPrimaryOverflow
;
99 #endif // !UCONFIG_NO_COLLATION
100 #endif // __COLLATIONFASTLATINBUILDER_H__