2 *******************************************************************************
3 * Copyright (C) 2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
8 * created on: 2012may31
9 * created by: Markus W. Scherer & Maxime Serrano
12 #include "dictionarydata.h"
13 #include "unicode/ucharstrie.h"
14 #include "unicode/bytestrie.h"
15 #include "unicode/udata.h"
18 #if !UCONFIG_NO_BREAK_ITERATION
22 #ifndef CYGWINMSVC /* On Cygwin/MSVC, the error redefinition of symbols occurs.*/
23 const int32_t DictionaryData::TRIE_TYPE_BYTES
;
24 const int32_t DictionaryData::TRIE_TYPE_UCHARS
;
27 DictionaryMatcher::~DictionaryMatcher() {
30 UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
34 int32_t UCharsDictionaryMatcher::getType() const {
35 return DictionaryData::TRIE_TYPE_UCHARS
;
38 int32_t UCharsDictionaryMatcher::matches(UText
*text
, int32_t maxLength
, int32_t *lengths
, int32_t &count
, int32_t limit
, int32_t *values
) const {
39 UCharsTrie
uct(characters
);
40 UChar32 c
= utext_next32(text
);
44 UStringTrieResult result
= uct
.first(c
);
48 if (USTRINGTRIE_HAS_VALUE(result
)) {
51 values
[count
] = uct
.getValue();
53 lengths
[count
++] = numChars
;
55 if (result
== USTRINGTRIE_FINAL_VALUE
) {
59 else if (result
== USTRINGTRIE_NO_MATCH
) {
63 // TODO: why do we have a text limit if the UText knows its length?
64 if (numChars
>= maxLength
) {
68 c
= utext_next32(text
);
78 BytesDictionaryMatcher::~BytesDictionaryMatcher() {
82 UChar32
BytesDictionaryMatcher::transform(UChar32 c
) const {
83 if ((transformConstant
& DictionaryData::TRANSFORM_TYPE_MASK
) == DictionaryData::TRANSFORM_TYPE_OFFSET
) {
86 } else if (c
== 0x200C) {
89 int32_t delta
= c
- (transformConstant
& DictionaryData::TRANSFORM_OFFSET_MASK
);
90 if (delta
< 0 || 0xFD < delta
) {
93 return (UChar32
)delta
;
98 int32_t BytesDictionaryMatcher::getType() const {
99 return DictionaryData::TRIE_TYPE_BYTES
;
102 int32_t BytesDictionaryMatcher::matches(UText
*text
, int32_t maxLength
, int32_t *lengths
, int32_t &count
, int32_t limit
, int32_t *values
) const {
103 BytesTrie
bt(characters
);
104 UChar32 c
= utext_next32(text
);
108 UStringTrieResult result
= bt
.first(transform(c
));
109 int32_t numChars
= 1;
112 if (USTRINGTRIE_HAS_VALUE(result
)) {
114 if (values
!= NULL
) {
115 values
[count
] = bt
.getValue();
117 lengths
[count
++] = numChars
;
119 if (result
== USTRINGTRIE_FINAL_VALUE
) {
123 else if (result
== USTRINGTRIE_NO_MATCH
) {
127 // TODO: why do we have a text limit if the UText knows its length?
128 if (numChars
>= maxLength
) {
132 c
= utext_next32(text
);
137 result
= bt
.next(transform(c
));
147 U_CAPI
int32_t U_EXPORT2
148 udict_swap(const UDataSwapper
*ds
, const void *inData
, int32_t length
,
149 void *outData
, UErrorCode
*pErrorCode
) {
150 const UDataInfo
*pInfo
;
152 const uint8_t *inBytes
;
154 const int32_t *inIndexes
;
155 int32_t indexes
[DictionaryData::IX_COUNT
];
156 int32_t i
, offset
, size
;
158 headerSize
= udata_swapDataHeader(ds
, inData
, length
, outData
, pErrorCode
);
159 if (pErrorCode
== NULL
|| U_FAILURE(*pErrorCode
)) return 0;
160 pInfo
= (const UDataInfo
*)((const char *)inData
+ 4);
161 if (!(pInfo
->dataFormat
[0] == 0x44 &&
162 pInfo
->dataFormat
[1] == 0x69 &&
163 pInfo
->dataFormat
[2] == 0x63 &&
164 pInfo
->dataFormat
[3] == 0x74 &&
165 pInfo
->formatVersion
[0] == 1)) {
166 udata_printError(ds
, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
167 pInfo
->dataFormat
[0], pInfo
->dataFormat
[1], pInfo
->dataFormat
[2], pInfo
->dataFormat
[3], pInfo
->formatVersion
[0]);
168 *pErrorCode
= U_UNSUPPORTED_ERROR
;
172 inBytes
= (const uint8_t *)inData
+ headerSize
;
173 outBytes
= (uint8_t *)outData
+ headerSize
;
175 inIndexes
= (const int32_t *)inBytes
;
177 length
-= headerSize
;
178 if (length
< (int32_t)(sizeof(indexes
))) {
179 udata_printError(ds
, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length
);
180 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
185 for (i
= 0; i
< DictionaryData::IX_COUNT
; i
++) {
186 indexes
[i
] = udata_readInt32(ds
, inIndexes
[i
]);
189 size
= indexes
[DictionaryData::IX_TOTAL_SIZE
];
193 udata_printError(ds
, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length
);
194 *pErrorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
198 if (inBytes
!= outBytes
) {
199 uprv_memcpy(outBytes
, inBytes
, size
);
203 ds
->swapArray32(ds
, inBytes
, sizeof(indexes
), outBytes
, pErrorCode
);
204 offset
= (int32_t)sizeof(indexes
);
205 int32_t trieType
= indexes
[DictionaryData::IX_TRIE_TYPE
] & DictionaryData::TRIE_TYPE_MASK
;
206 int32_t nextOffset
= indexes
[DictionaryData::IX_RESERVED1_OFFSET
];
208 if (trieType
== DictionaryData::TRIE_TYPE_UCHARS
) {
209 ds
->swapArray16(ds
, inBytes
+ offset
, nextOffset
- offset
, outBytes
+ offset
, pErrorCode
);
210 } else if (trieType
== DictionaryData::TRIE_TYPE_BYTES
) {
213 udata_printError(ds
, "udict_swap(): unknown trie type!\n");
214 *pErrorCode
= U_UNSUPPORTED_ERROR
;
218 // these next two sections are empty in the current format,
219 // but may be used later.
221 nextOffset
= indexes
[DictionaryData::IX_RESERVED2_OFFSET
];
223 nextOffset
= indexes
[DictionaryData::IX_TOTAL_SIZE
];
226 return headerSize
+ size
;