]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/urbtok.cpp
ICU-531.30.tar.gz
[apple/icu.git] / icuSources / common / urbtok.cpp
CommitLineData
73c04bcf
A
1/*
2*****************************************************************************************
46f4442e 3* Copyright (C) 2006-2008 Apple Inc. All Rights Reserved.
73c04bcf
A
4*****************************************************************************************
5*/
6
7#include "unicode/utypes.h"
8
9#if !UCONFIG_NO_BREAK_ITERATION
10
11#include "unicode/urbtok.h"
12
13#include "rbtok.h"
14#include "unicode/ustring.h"
15#include "rbbidata.h"
16#include "cmemory.h"
17#include "ucmndata.h"
18
19U_NAMESPACE_USE
20
21U_CAPI UBreakIterator* U_EXPORT2
22urbtok_openRules(const UChar *rules,
23 int32_t rulesLength,
24 UParseError *parseErr,
25 UErrorCode *status)
26{
27 if (status == NULL || U_FAILURE(*status)){
28 return 0;
29 }
30
31 BreakIterator *result = 0;
32 UnicodeString ruleString(rules, rulesLength);
33 result = new RuleBasedTokenizer(ruleString, *parseErr, *status);
34 if(U_FAILURE(*status)) {
35 return 0;
36 }
37
38 UBreakIterator *uBI = (UBreakIterator *)result;
39 return uBI;
40}
41
42U_CAPI UBreakIterator* U_EXPORT2
43urbtok_openBinaryRules(const uint8_t *rules,
44 UErrorCode *status)
45{
46 if (status == NULL || U_FAILURE(*status)){
47 return 0;
48 }
49
50 uint32_t length = ((const RBBIDataHeader *)rules)->fLength;
51 uint8_t *ruleCopy = (uint8_t *) uprv_malloc(length);
52 if (ruleCopy == 0)
53 {
54 *status = U_MEMORY_ALLOCATION_ERROR;
55 return 0;
56 }
57 // Copy the rules so they can be adopted by the tokenizer
58 uprv_memcpy(ruleCopy, rules, length);
59 BreakIterator *result = 0;
60 result = new RuleBasedTokenizer(ruleCopy, *status);
61 if(U_FAILURE(*status)) {
62 return 0;
63 }
64
65 UBreakIterator *uBI = (UBreakIterator *)result;
66 return uBI;
67}
68
46f4442e
A
69U_CAPI UBreakIterator* U_EXPORT2
70urbtok_openBinaryRulesNoCopy(const uint8_t *rules,
71 UErrorCode *status)
72{
73 if (status == NULL || U_FAILURE(*status)){
74 return 0;
75 }
76
77 BreakIterator *result = 0;
78 result = new RuleBasedTokenizer(rules, RuleBasedTokenizer::kDontAdopt, *status);
79 if(U_FAILURE(*status)) {
80 return 0;
81 }
82
83 UBreakIterator *uBI = (UBreakIterator *)result;
84 return uBI;
85}
86
73c04bcf
A
87U_CAPI uint32_t U_EXPORT2
88urbtok_getBinaryRules(UBreakIterator *bi,
89 uint8_t *buffer,
90 uint32_t buffSize,
91 UErrorCode *status)
92{
93 if (status == NULL || U_FAILURE(*status)){
94 return 0;
95 }
96
97 uint32_t length;
98 const uint8_t *rules = ((RuleBasedBreakIterator *)bi)->getBinaryRules(length);
99 if (buffer != 0)
100 {
101 if (length > buffSize)
102 {
103 *status = U_BUFFER_OVERFLOW_ERROR;
104 }
105 else
106 {
107 uprv_memcpy(buffer, rules, length);
108 }
109 }
110 return length;
111}
112
113U_CAPI int32_t U_EXPORT2
114urbtok_tokenize(UBreakIterator *bi,
115 int32_t maxTokens,
116 RuleBasedTokenRange *outTokens,
117 unsigned long *outTokenFlags)
118{
119 return ((RuleBasedTokenizer *)bi)->tokenize(maxTokens, outTokens, outTokenFlags);
120}
121
122U_CAPI void U_EXPORT2
123urbtok_swapBinaryRules(const uint8_t *rules,
124 uint8_t *buffer,
125 UBool inIsBigEndian,
126 UBool outIsBigEndian,
127 UErrorCode *status)
128{
46f4442e
A
129 DataHeader *outH = NULL;
130 int32_t outLength = 0;
73c04bcf
A
131 UDataSwapper *ds = udata_openSwapper(inIsBigEndian, U_CHARSET_FAMILY, outIsBigEndian, U_CHARSET_FAMILY, status);
132
133 if (status == NULL || U_FAILURE(*status)){
134 return;
135 }
136
137 uint32_t length = ds->readUInt32(((const RBBIDataHeader *)rules)->fLength);
138 uint32_t totalLength = sizeof(DataHeader) + length;
139
140 DataHeader *dh = (DataHeader *)uprv_malloc(totalLength);
141 if (dh == 0)
142 {
143 *status = U_MEMORY_ALLOCATION_ERROR;
144 goto closeSwapper;
145 }
46f4442e 146 outH = (DataHeader *)uprv_malloc(totalLength);
73c04bcf
A
147 if (outH == 0)
148 {
149 *status = U_MEMORY_ALLOCATION_ERROR;
150 uprv_free(dh);
151 goto closeSwapper;
152 }
153 dh->dataHeader.headerSize = ds->readUInt16(sizeof(DataHeader));
154 dh->dataHeader.magic1 = 0xda;
155 dh->dataHeader.magic2 = 0x27;
156 dh->info.size = ds->readUInt16(sizeof(UDataInfo));
157 dh->info.reservedWord = 0;
158 dh->info.isBigEndian = inIsBigEndian;
159 dh->info.charsetFamily = U_CHARSET_FAMILY;
160 dh->info.sizeofUChar = U_SIZEOF_UCHAR;
161 dh->info.reservedByte = 0;
162 uprv_memcpy(dh->info.dataFormat, "Brk ", sizeof(dh->info.dataFormat));
163 uprv_memcpy(dh->info.formatVersion, ((const RBBIDataHeader *)rules)->fFormatVersion, sizeof(dh->info.formatVersion));
164 dh->info.dataVersion[0] = 4; // Unicode version
165 dh->info.dataVersion[1] = 1;
166 dh->info.dataVersion[2] = 0;
167 dh->info.dataVersion[3] = 0;
168 uprv_memcpy(((uint8_t*)dh) + sizeof(DataHeader), rules, length);
169
46f4442e 170 outLength = ubrk_swap(ds, dh, totalLength, outH, status);
73c04bcf
A
171 if (U_SUCCESS(*status) && outLength != totalLength) // something went horribly wrong
172 {
173 *status = U_INVALID_FORMAT_ERROR;
174 }
175
176 if (U_SUCCESS(*status))
177 {
178 uprv_memcpy(buffer, ((uint8_t *)outH) + sizeof(DataHeader), length);
179 }
180 uprv_free(outH);
181 uprv_free(dh);
182
183closeSwapper:
184 udata_closeSwapper(ds);
185}
186
187
188#endif /* #if !UCONFIG_NO_BREAK_ITERATION */