]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/dictionarydata.cpp
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / common / dictionarydata.cpp
CommitLineData
51004dcb
A
1/*
2*******************************************************************************
3* Copyright (C) 2012, International Business Machines
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* dictionarydata.h
7*
8* created on: 2012may31
9* created by: Markus W. Scherer & Maxime Serrano
10*/
11
12#include "dictionarydata.h"
13#include "unicode/ucharstrie.h"
14#include "unicode/bytestrie.h"
15#include "unicode/udata.h"
16#include "cmemory.h"
17
18#if !UCONFIG_NO_BREAK_ITERATION
19
20U_NAMESPACE_BEGIN
21
22#ifndef CYGWINMSVC /* On Cygwin/MSVC, the error redefinition of symbols occurs.*/
23const int32_t DictionaryData::TRIE_TYPE_BYTES;
24const int32_t DictionaryData::TRIE_TYPE_UCHARS;
25#endif
26
27DictionaryMatcher::~DictionaryMatcher() {
28}
29
30UCharsDictionaryMatcher::~UCharsDictionaryMatcher() {
31 udata_close(file);
32}
33
34int32_t UCharsDictionaryMatcher::getType() const {
35 return DictionaryData::TRIE_TYPE_UCHARS;
36}
37
38int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
39 UCharsTrie uct(characters);
40 UChar32 c = utext_next32(text);
41 if (c < 0) {
42 return 0;
43 }
44 UStringTrieResult result = uct.first(c);
45 int32_t numChars = 1;
46 count = 0;
47 for (;;) {
48 if (USTRINGTRIE_HAS_VALUE(result)) {
49 if (count < limit) {
50 if (values != NULL) {
51 values[count] = uct.getValue();
52 }
53 lengths[count++] = numChars;
54 }
55 if (result == USTRINGTRIE_FINAL_VALUE) {
56 break;
57 }
58 }
59 else if (result == USTRINGTRIE_NO_MATCH) {
60 break;
61 }
62
63 // TODO: why do we have a text limit if the UText knows its length?
64 if (numChars >= maxLength) {
65 break;
66 }
67
68 c = utext_next32(text);
69 if (c < 0) {
70 break;
71 }
72 ++numChars;
73 result = uct.next(c);
74 }
75 return numChars;
76}
77
78BytesDictionaryMatcher::~BytesDictionaryMatcher() {
79 udata_close(file);
80}
81
82UChar32 BytesDictionaryMatcher::transform(UChar32 c) const {
83 if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) {
84 if (c == 0x200D) {
85 return 0xFF;
86 } else if (c == 0x200C) {
87 return 0xFE;
88 }
89 int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK);
90 if (delta < 0 || 0xFD < delta) {
91 return U_SENTINEL;
92 }
93 return (UChar32)delta;
94 }
95 return c;
96}
97
98int32_t BytesDictionaryMatcher::getType() const {
99 return DictionaryData::TRIE_TYPE_BYTES;
100}
101
102int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
103 BytesTrie bt(characters);
104 UChar32 c = utext_next32(text);
105 if (c < 0) {
106 return 0;
107 }
108 UStringTrieResult result = bt.first(transform(c));
109 int32_t numChars = 1;
110 count = 0;
111 for (;;) {
112 if (USTRINGTRIE_HAS_VALUE(result)) {
113 if (count < limit) {
114 if (values != NULL) {
115 values[count] = bt.getValue();
116 }
117 lengths[count++] = numChars;
118 }
119 if (result == USTRINGTRIE_FINAL_VALUE) {
120 break;
121 }
122 }
123 else if (result == USTRINGTRIE_NO_MATCH) {
124 break;
125 }
126
127 // TODO: why do we have a text limit if the UText knows its length?
128 if (numChars >= maxLength) {
129 break;
130 }
131
132 c = utext_next32(text);
133 if (c < 0) {
134 break;
135 }
136 ++numChars;
137 result = bt.next(transform(c));
138 }
139 return numChars;
140}
141
142
143U_NAMESPACE_END
144
145U_NAMESPACE_USE
146
147U_CAPI int32_t U_EXPORT2
148udict_swap(const UDataSwapper *ds, const void *inData, int32_t length,
149 void *outData, UErrorCode *pErrorCode) {
150 const UDataInfo *pInfo;
151 int32_t headerSize;
152 const uint8_t *inBytes;
153 uint8_t *outBytes;
154 const int32_t *inIndexes;
155 int32_t indexes[DictionaryData::IX_COUNT];
156 int32_t i, offset, size;
157
158 headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
159 if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0;
160 pInfo = (const UDataInfo *)((const char *)inData + 4);
161 if (!(pInfo->dataFormat[0] == 0x44 &&
162 pInfo->dataFormat[1] == 0x69 &&
163 pInfo->dataFormat[2] == 0x63 &&
164 pInfo->dataFormat[3] == 0x74 &&
165 pInfo->formatVersion[0] == 1)) {
166 udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n",
167 pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]);
168 *pErrorCode = U_UNSUPPORTED_ERROR;
169 return 0;
170 }
171
172 inBytes = (const uint8_t *)inData + headerSize;
173 outBytes = (uint8_t *)outData + headerSize;
174
175 inIndexes = (const int32_t *)inBytes;
176 if (length >= 0) {
177 length -= headerSize;
178 if (length < (int32_t)(sizeof(indexes))) {
179 udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length);
180 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
181 return 0;
182 }
183 }
184
185 for (i = 0; i < DictionaryData::IX_COUNT; i++) {
186 indexes[i] = udata_readInt32(ds, inIndexes[i]);
187 }
188
189 size = indexes[DictionaryData::IX_TOTAL_SIZE];
190
191 if (length >= 0) {
192 if (length < size) {
193 udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length);
194 *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
195 return 0;
196 }
197
198 if (inBytes != outBytes) {
199 uprv_memcpy(outBytes, inBytes, size);
200 }
201
202 offset = 0;
203 ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode);
204 offset = (int32_t)sizeof(indexes);
205 int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
206 int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET];
207
208 if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
209 ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode);
210 } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
211 // nothing to do
212 } else {
213 udata_printError(ds, "udict_swap(): unknown trie type!\n");
214 *pErrorCode = U_UNSUPPORTED_ERROR;
215 return 0;
216 }
217
218 // these next two sections are empty in the current format,
219 // but may be used later.
220 offset = nextOffset;
221 nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET];
222 offset = nextOffset;
223 nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE];
224 offset = nextOffset;
225 }
226 return headerSize + size;
227}
228#endif