]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | /* |
2 | ***************************************************************************************** | |
46f4442e | 3 | * Copyright (C) 2006-2008 Apple Inc. All Rights Reserved. |
73c04bcf A |
4 | ***************************************************************************************** |
5 | */ | |
6 | ||
7 | #include "unicode/utypes.h" | |
8 | ||
9 | #if !UCONFIG_NO_BREAK_ITERATION | |
10 | ||
11 | #include "unicode/urbtok.h" | |
12 | ||
13 | #include "rbtok.h" | |
14 | #include "unicode/ustring.h" | |
15 | #include "rbbidata.h" | |
16 | #include "cmemory.h" | |
17 | #include "ucmndata.h" | |
18 | ||
19 | U_NAMESPACE_USE | |
20 | ||
21 | U_CAPI UBreakIterator* U_EXPORT2 | |
22 | urbtok_openRules(const UChar *rules, | |
23 | int32_t rulesLength, | |
24 | UParseError *parseErr, | |
25 | UErrorCode *status) | |
26 | { | |
27 | if (status == NULL || U_FAILURE(*status)){ | |
28 | return 0; | |
29 | } | |
30 | ||
31 | BreakIterator *result = 0; | |
32 | UnicodeString ruleString(rules, rulesLength); | |
33 | result = new RuleBasedTokenizer(ruleString, *parseErr, *status); | |
34 | if(U_FAILURE(*status)) { | |
35 | return 0; | |
36 | } | |
37 | ||
38 | UBreakIterator *uBI = (UBreakIterator *)result; | |
39 | return uBI; | |
40 | } | |
41 | ||
42 | U_CAPI UBreakIterator* U_EXPORT2 | |
43 | urbtok_openBinaryRules(const uint8_t *rules, | |
44 | UErrorCode *status) | |
45 | { | |
46 | if (status == NULL || U_FAILURE(*status)){ | |
47 | return 0; | |
48 | } | |
49 | ||
50 | uint32_t length = ((const RBBIDataHeader *)rules)->fLength; | |
51 | uint8_t *ruleCopy = (uint8_t *) uprv_malloc(length); | |
52 | if (ruleCopy == 0) | |
53 | { | |
54 | *status = U_MEMORY_ALLOCATION_ERROR; | |
55 | return 0; | |
56 | } | |
57 | // Copy the rules so they can be adopted by the tokenizer | |
58 | uprv_memcpy(ruleCopy, rules, length); | |
59 | BreakIterator *result = 0; | |
60 | result = new RuleBasedTokenizer(ruleCopy, *status); | |
61 | if(U_FAILURE(*status)) { | |
62 | return 0; | |
63 | } | |
64 | ||
65 | UBreakIterator *uBI = (UBreakIterator *)result; | |
66 | return uBI; | |
67 | } | |
68 | ||
46f4442e A |
69 | U_CAPI UBreakIterator* U_EXPORT2 |
70 | urbtok_openBinaryRulesNoCopy(const uint8_t *rules, | |
71 | UErrorCode *status) | |
72 | { | |
73 | if (status == NULL || U_FAILURE(*status)){ | |
74 | return 0; | |
75 | } | |
76 | ||
77 | BreakIterator *result = 0; | |
78 | result = new RuleBasedTokenizer(rules, RuleBasedTokenizer::kDontAdopt, *status); | |
79 | if(U_FAILURE(*status)) { | |
80 | return 0; | |
81 | } | |
82 | ||
83 | UBreakIterator *uBI = (UBreakIterator *)result; | |
84 | return uBI; | |
85 | } | |
86 | ||
73c04bcf A |
87 | U_CAPI uint32_t U_EXPORT2 |
88 | urbtok_getBinaryRules(UBreakIterator *bi, | |
89 | uint8_t *buffer, | |
90 | uint32_t buffSize, | |
91 | UErrorCode *status) | |
92 | { | |
93 | if (status == NULL || U_FAILURE(*status)){ | |
94 | return 0; | |
95 | } | |
96 | ||
97 | uint32_t length; | |
98 | const uint8_t *rules = ((RuleBasedBreakIterator *)bi)->getBinaryRules(length); | |
99 | if (buffer != 0) | |
100 | { | |
101 | if (length > buffSize) | |
102 | { | |
103 | *status = U_BUFFER_OVERFLOW_ERROR; | |
104 | } | |
105 | else | |
106 | { | |
107 | uprv_memcpy(buffer, rules, length); | |
108 | } | |
109 | } | |
110 | return length; | |
111 | } | |
112 | ||
113 | U_CAPI int32_t U_EXPORT2 | |
114 | urbtok_tokenize(UBreakIterator *bi, | |
115 | int32_t maxTokens, | |
116 | RuleBasedTokenRange *outTokens, | |
117 | unsigned long *outTokenFlags) | |
118 | { | |
119 | return ((RuleBasedTokenizer *)bi)->tokenize(maxTokens, outTokens, outTokenFlags); | |
120 | } | |
121 | ||
122 | U_CAPI void U_EXPORT2 | |
123 | urbtok_swapBinaryRules(const uint8_t *rules, | |
124 | uint8_t *buffer, | |
125 | UBool inIsBigEndian, | |
126 | UBool outIsBigEndian, | |
127 | UErrorCode *status) | |
128 | { | |
46f4442e A |
129 | DataHeader *outH = NULL; |
130 | int32_t outLength = 0; | |
73c04bcf A |
131 | UDataSwapper *ds = udata_openSwapper(inIsBigEndian, U_CHARSET_FAMILY, outIsBigEndian, U_CHARSET_FAMILY, status); |
132 | ||
133 | if (status == NULL || U_FAILURE(*status)){ | |
134 | return; | |
135 | } | |
136 | ||
137 | uint32_t length = ds->readUInt32(((const RBBIDataHeader *)rules)->fLength); | |
138 | uint32_t totalLength = sizeof(DataHeader) + length; | |
139 | ||
140 | DataHeader *dh = (DataHeader *)uprv_malloc(totalLength); | |
141 | if (dh == 0) | |
142 | { | |
143 | *status = U_MEMORY_ALLOCATION_ERROR; | |
144 | goto closeSwapper; | |
145 | } | |
46f4442e | 146 | outH = (DataHeader *)uprv_malloc(totalLength); |
73c04bcf A |
147 | if (outH == 0) |
148 | { | |
149 | *status = U_MEMORY_ALLOCATION_ERROR; | |
150 | uprv_free(dh); | |
151 | goto closeSwapper; | |
152 | } | |
153 | dh->dataHeader.headerSize = ds->readUInt16(sizeof(DataHeader)); | |
154 | dh->dataHeader.magic1 = 0xda; | |
155 | dh->dataHeader.magic2 = 0x27; | |
156 | dh->info.size = ds->readUInt16(sizeof(UDataInfo)); | |
157 | dh->info.reservedWord = 0; | |
158 | dh->info.isBigEndian = inIsBigEndian; | |
159 | dh->info.charsetFamily = U_CHARSET_FAMILY; | |
160 | dh->info.sizeofUChar = U_SIZEOF_UCHAR; | |
161 | dh->info.reservedByte = 0; | |
162 | uprv_memcpy(dh->info.dataFormat, "Brk ", sizeof(dh->info.dataFormat)); | |
163 | uprv_memcpy(dh->info.formatVersion, ((const RBBIDataHeader *)rules)->fFormatVersion, sizeof(dh->info.formatVersion)); | |
164 | dh->info.dataVersion[0] = 4; // Unicode version | |
165 | dh->info.dataVersion[1] = 1; | |
166 | dh->info.dataVersion[2] = 0; | |
167 | dh->info.dataVersion[3] = 0; | |
168 | uprv_memcpy(((uint8_t*)dh) + sizeof(DataHeader), rules, length); | |
169 | ||
46f4442e | 170 | outLength = ubrk_swap(ds, dh, totalLength, outH, status); |
73c04bcf A |
171 | if (U_SUCCESS(*status) && outLength != totalLength) // something went horribly wrong |
172 | { | |
173 | *status = U_INVALID_FORMAT_ERROR; | |
174 | } | |
175 | ||
176 | if (U_SUCCESS(*status)) | |
177 | { | |
178 | uprv_memcpy(buffer, ((uint8_t *)outH) + sizeof(DataHeader), length); | |
179 | } | |
180 | uprv_free(outH); | |
181 | uprv_free(dh); | |
182 | ||
183 | closeSwapper: | |
184 | udata_closeSwapper(ds); | |
185 | } | |
186 | ||
187 | ||
188 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |