2 *******************************************************************************
3 * Copyright (C) 2012-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationbasedatabuilder.h
8 * created on: 2012aug11
9 * created by: Markus W. Scherer
12 #ifndef __COLLATIONBASEDATABUILDER_H__
13 #define __COLLATIONBASEDATABUILDER_H__
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_COLLATION
19 #include "unicode/uniset.h"
20 #include "unicode/unistr.h"
21 #include "collation.h"
22 #include "collationdata.h"
23 #include "collationdatabuilder.h"
24 #include "normalizer2impl.h"
33 * Low-level base CollationData builder.
35 class U_I18N_API CollationBaseDataBuilder
: public CollationDataBuilder
{
37 CollationBaseDataBuilder(UErrorCode
&errorCode
);
39 virtual ~CollationBaseDataBuilder();
41 void init(UErrorCode
&errorCode
);
44 * Sets the Han ranges as ranges of offset CE32s.
45 * Note: Unihan extension A sorts after the other BMP ranges.
46 * See http://www.unicode.org/reports/tr10/#Implicit_Weights
48 * @param ranges array of ranges of [:Unified_Ideograph:] in collation order,
49 * as (start, end) code point pairs
50 * @param length number of code points (not pairs)
51 * @param errorCode in/out error code
53 void initHanRanges(const UChar32 ranges
[], int32_t length
, UErrorCode
&errorCode
);
55 void setNumericPrimary(uint32_t np
) { numericPrimary
= np
; }
57 virtual UBool
isCompressibleLeadByte(uint32_t b
) const;
59 void setCompressibleLeadByte(uint32_t b
);
61 static int32_t diffTwoBytePrimaries(uint32_t p1
, uint32_t p2
, UBool isCompressible
);
62 static int32_t diffThreeBytePrimaries(uint32_t p1
, uint32_t p2
, UBool isCompressible
);
64 virtual uint32_t encodeCEs(const int64_t ces
[], int32_t cesLength
, UErrorCode
&errorCode
);
66 void addRootElements(const int64_t ces
[], int32_t cesLength
, UErrorCode
&errorCode
);
67 void addRootElement(int64_t ce
, UErrorCode
&errorCode
);
69 void addReorderingGroup(uint32_t firstByte
, uint32_t lastByte
,
70 const UnicodeString
&groupScripts
,
71 UErrorCode
&errorCode
);
73 virtual void build(CollationData
&data
, UErrorCode
&errorCode
);
75 void buildRootElementsTable(UVector32
&table
, UErrorCode
&errorCode
);
78 int32_t writeRootElementsRange(
79 uint32_t prevPrimary
, uint32_t p
, int32_t i
,
80 UVector32
&table
, UErrorCode
&errorCode
);
82 // Flags for which primary-weight lead bytes are compressible.
83 UBool compressibleBytes
[256];
84 uint32_t numericPrimary
;
85 uint32_t firstHanPrimary
;
86 uint32_t lastHanPrimary
;
88 UVector64 rootElements
;
89 UnicodeString scripts
;
94 #endif // !UCONFIG_NO_COLLATION
95 #endif // __COLLATIONBASEDATABUILDER_H__