]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | *************************************************************************** | |
3 | * Copyright (C) 1999-2003 International Business Machines Corporation * | |
4 | * and others. All rights reserved. * | |
5 | *************************************************************************** | |
6 | */ | |
7 | ||
8 | #include "unicode/utypes.h" | |
9 | ||
10 | #if !UCONFIG_NO_BREAK_ITERATION | |
11 | ||
12 | #include "unicode/utypes.h" | |
13 | #include "rbbidata.h" | |
14 | #include "rbbirb.h" | |
15 | #include "utrie.h" | |
16 | #include "udatamem.h" | |
17 | #include "cmemory.h" | |
18 | #include "cstring.h" | |
19 | #include "umutex.h" | |
20 | ||
21 | #include "uassert.h" | |
22 | ||
23 | ||
24 | //----------------------------------------------------------------------------------- | |
25 | // | |
26 | // Trie access folding function. Copied as-is from properties code in uchar.c | |
27 | // | |
28 | //----------------------------------------------------------------------------------- | |
29 | U_CDECL_BEGIN | |
30 | static int32_t U_CALLCONV | |
31 | getFoldingOffset(uint32_t data) { | |
32 | /* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */ | |
33 | if(data&0x8000) { | |
34 | return (int32_t)(data&0x7fff); | |
35 | } else { | |
36 | return 0; | |
37 | } | |
38 | } | |
39 | U_CDECL_END | |
40 | ||
41 | U_NAMESPACE_BEGIN | |
42 | ||
43 | //----------------------------------------------------------------------------- | |
44 | // | |
45 | // Constructors. | |
46 | // | |
47 | //----------------------------------------------------------------------------- | |
48 | RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) { | |
49 | init(data, status); | |
50 | } | |
51 | ||
52 | RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) { | |
53 | const RBBIDataHeader *d = (const RBBIDataHeader *) | |
54 | ((char *)&(udm->pHeader->info) + udm->pHeader->info.size); | |
55 | init(d, status); | |
56 | fUDataMem = udm; | |
57 | } | |
58 | ||
59 | //----------------------------------------------------------------------------- | |
60 | // | |
61 | // init(). Does most of the work of construction, shared between the | |
62 | // constructors. | |
63 | // | |
64 | //----------------------------------------------------------------------------- | |
65 | void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) { | |
66 | if (U_FAILURE(status)) { | |
67 | return; | |
68 | } | |
69 | fHeader = data; | |
70 | if (fHeader->fMagic != 0xb1a0) { | |
71 | status = U_BRK_INTERNAL_ERROR; | |
72 | return; | |
73 | } | |
74 | ||
75 | fUDataMem = NULL; | |
76 | fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable); | |
77 | fReverseTable = NULL; | |
78 | if (data->fRTableLen != 0) { | |
79 | fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable); | |
80 | } | |
81 | ||
82 | ||
83 | utrie_unserialize(&fTrie, | |
84 | (uint8_t *)data + fHeader->fTrie, | |
85 | fHeader->fTrieLen, | |
86 | &status); | |
87 | if (U_FAILURE(status)) { | |
88 | return; | |
89 | } | |
90 | fTrie.getFoldingOffset=getFoldingOffset; | |
91 | ||
92 | ||
93 | fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource); | |
94 | fRuleString.setTo(TRUE, fRuleSource, -1); | |
95 | ||
96 | fRefCount = 1; | |
97 | ||
98 | #ifdef RBBI_DEBUG | |
99 | char *debugEnv = getenv("U_RBBIDEBUG"); | |
100 | if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();} | |
101 | #endif | |
102 | } | |
103 | ||
104 | ||
105 | //----------------------------------------------------------------------------- | |
106 | // | |
107 | // Destructor. Don't call this - use removeReferenc() instead. | |
108 | // | |
109 | //----------------------------------------------------------------------------- | |
110 | RBBIDataWrapper::~RBBIDataWrapper() { | |
111 | U_ASSERT(fRefCount == 0); | |
112 | if (fUDataMem) { | |
113 | udata_close(fUDataMem); | |
114 | } else { | |
115 | uprv_free((void *)fHeader); | |
116 | } | |
117 | } | |
118 | ||
119 | ||
120 | ||
121 | //----------------------------------------------------------------------------- | |
122 | // | |
123 | // Operator == Consider two RBBIDataWrappers to be equal if they | |
124 | // refer to the same underlying data. Although | |
125 | // the data wrappers are normally shared between | |
126 | // iterator instances, it's possible to independently | |
127 | // open the same data twice, and get two instances, which | |
128 | // should still be ==. | |
129 | // | |
130 | //----------------------------------------------------------------------------- | |
131 | UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const { | |
132 | if (fHeader == other.fHeader) { | |
133 | return TRUE; | |
134 | } | |
135 | if (fHeader->fLength != other.fHeader->fLength) { | |
136 | return FALSE; | |
137 | } | |
138 | if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) { | |
139 | return TRUE; | |
140 | } | |
141 | return FALSE; | |
142 | } | |
143 | ||
144 | int32_t RBBIDataWrapper::hashCode() { | |
145 | return fHeader->fFTableLen; | |
146 | } | |
147 | ||
148 | ||
149 | ||
150 | //----------------------------------------------------------------------------- | |
151 | // | |
152 | // Reference Counting. A single RBBIDataWrapper object is shared among | |
153 | // however many RulesBasedBreakIterator instances are | |
154 | // referencing the same data. | |
155 | // | |
156 | //----------------------------------------------------------------------------- | |
157 | void RBBIDataWrapper::removeReference() { | |
158 | if (umtx_atomic_dec(&fRefCount) == 0) { | |
159 | delete this; | |
160 | } | |
161 | } | |
162 | ||
163 | ||
164 | RBBIDataWrapper *RBBIDataWrapper::addReference() { | |
165 | umtx_atomic_inc(&fRefCount); | |
166 | return this; | |
167 | } | |
168 | ||
169 | ||
170 | ||
171 | //----------------------------------------------------------------------------- | |
172 | // | |
173 | // getRuleSourceString | |
174 | // | |
175 | //----------------------------------------------------------------------------- | |
176 | const UnicodeString &RBBIDataWrapper::getRuleSourceString() { | |
177 | return fRuleString; | |
178 | } | |
179 | ||
180 | ||
181 | //----------------------------------------------------------------------------- | |
182 | // | |
183 | // print - debugging function to dump the runtime data tables. | |
184 | // | |
185 | //----------------------------------------------------------------------------- | |
186 | void RBBIDataWrapper::printData() { | |
187 | #ifdef RBBI_DEBUG | |
188 | uint32_t c, s; | |
189 | ||
190 | RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader); | |
191 | RBBIDebugPrintf(" Version = %d\n", fHeader->fVersion); | |
192 | RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength); | |
193 | RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount); | |
194 | ||
195 | RBBIDebugPrintf(" Forward State Transition Table\n"); | |
196 | RBBIDebugPrintf("State | Acc LA Tag"); | |
197 | for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);} | |
198 | RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) {RBBIDebugPrintf("----");} | |
199 | RBBIDebugPrintf("\n"); | |
200 | ||
201 | for (s=0; s<fForwardTable->fNumStates; s++) { | |
202 | RBBIStateTableRow *row = (RBBIStateTableRow *) | |
203 | (fForwardTable->fTableData + (fForwardTable->fRowLen * s)); | |
204 | RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->fAccepting, row->fLookAhead, row->fTag); | |
205 | for (c=0; c<fHeader->fCatCount; c++) { | |
206 | RBBIDebugPrintf("%3d ", row->fNextState[c]); | |
207 | } | |
208 | RBBIDebugPrintf("\n"); | |
209 | } | |
210 | ||
211 | RBBIDebugPrintf("\nOrignal Rules source:\n"); | |
212 | c = 0; | |
213 | for (;;) { | |
214 | if (fRuleSource[c] == 0) | |
215 | break; | |
216 | RBBIDebugPrintf("%c", fRuleSource[c]); | |
217 | c++; | |
218 | } | |
219 | RBBIDebugPrintf("\n\n"); | |
220 | #endif | |
221 | } | |
222 | ||
223 | ||
224 | ||
225 | U_NAMESPACE_END | |
226 | ||
227 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |