2 *****************************************************************************************
3 * Copyright (C) 2006-2008 Apple Inc. All Rights Reserved.
4 *****************************************************************************************
7 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_BREAK_ITERATION
11 #include "unicode/urbtok.h"
14 #include "unicode/ustring.h"
21 U_CAPI UBreakIterator
* U_EXPORT2
22 urbtok_openRules(const UChar
*rules
,
24 UParseError
*parseErr
,
27 if (status
== NULL
|| U_FAILURE(*status
)){
31 BreakIterator
*result
= 0;
32 UnicodeString
ruleString(rules
, rulesLength
);
33 result
= new RuleBasedTokenizer(ruleString
, *parseErr
, *status
);
34 if(U_FAILURE(*status
)) {
38 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
42 U_CAPI UBreakIterator
* U_EXPORT2
43 urbtok_openBinaryRules(const uint8_t *rules
,
46 if (status
== NULL
|| U_FAILURE(*status
)){
50 uint32_t length
= ((const RBBIDataHeader
*)rules
)->fLength
;
51 uint8_t *ruleCopy
= (uint8_t *) uprv_malloc(length
);
54 *status
= U_MEMORY_ALLOCATION_ERROR
;
57 // Copy the rules so they can be adopted by the tokenizer
58 uprv_memcpy(ruleCopy
, rules
, length
);
59 BreakIterator
*result
= 0;
60 result
= new RuleBasedTokenizer(ruleCopy
, *status
);
61 if(U_FAILURE(*status
)) {
65 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
69 U_CAPI UBreakIterator
* U_EXPORT2
70 urbtok_openBinaryRulesNoCopy(const uint8_t *rules
,
73 if (status
== NULL
|| U_FAILURE(*status
)){
77 BreakIterator
*result
= 0;
78 result
= new RuleBasedTokenizer(rules
, RuleBasedTokenizer::kDontAdopt
, *status
);
79 if(U_FAILURE(*status
)) {
83 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
87 U_CAPI
uint32_t U_EXPORT2
88 urbtok_getBinaryRules(UBreakIterator
*bi
,
93 if (status
== NULL
|| U_FAILURE(*status
)){
98 const uint8_t *rules
= ((RuleBasedBreakIterator
*)bi
)->getBinaryRules(length
);
101 if (length
> buffSize
)
103 *status
= U_BUFFER_OVERFLOW_ERROR
;
107 uprv_memcpy(buffer
, rules
, length
);
113 U_CAPI
int32_t U_EXPORT2
114 urbtok_tokenize(UBreakIterator
*bi
,
116 RuleBasedTokenRange
*outTokens
,
117 unsigned long *outTokenFlags
)
119 return ((RuleBasedTokenizer
*)bi
)->tokenize(maxTokens
, outTokens
, outTokenFlags
);
122 U_CAPI
void U_EXPORT2
123 urbtok_swapBinaryRules(const uint8_t *rules
,
126 UBool outIsBigEndian
,
129 DataHeader
*outH
= NULL
;
130 int32_t outLength
= 0;
131 UDataSwapper
*ds
= udata_openSwapper(inIsBigEndian
, U_CHARSET_FAMILY
, outIsBigEndian
, U_CHARSET_FAMILY
, status
);
133 if (status
== NULL
|| U_FAILURE(*status
)){
137 uint32_t length
= ds
->readUInt32(((const RBBIDataHeader
*)rules
)->fLength
);
138 uint32_t totalLength
= sizeof(DataHeader
) + length
;
140 DataHeader
*dh
= (DataHeader
*)uprv_malloc(totalLength
);
143 *status
= U_MEMORY_ALLOCATION_ERROR
;
146 outH
= (DataHeader
*)uprv_malloc(totalLength
);
149 *status
= U_MEMORY_ALLOCATION_ERROR
;
153 dh
->dataHeader
.headerSize
= ds
->readUInt16(sizeof(DataHeader
));
154 dh
->dataHeader
.magic1
= 0xda;
155 dh
->dataHeader
.magic2
= 0x27;
156 dh
->info
.size
= ds
->readUInt16(sizeof(UDataInfo
));
157 dh
->info
.reservedWord
= 0;
158 dh
->info
.isBigEndian
= inIsBigEndian
;
159 dh
->info
.charsetFamily
= U_CHARSET_FAMILY
;
160 dh
->info
.sizeofUChar
= U_SIZEOF_UCHAR
;
161 dh
->info
.reservedByte
= 0;
162 uprv_memcpy(dh
->info
.dataFormat
, "Brk ", sizeof(dh
->info
.dataFormat
));
163 uprv_memcpy(dh
->info
.formatVersion
, ((const RBBIDataHeader
*)rules
)->fFormatVersion
, sizeof(dh
->info
.formatVersion
));
164 dh
->info
.dataVersion
[0] = 4; // Unicode version
165 dh
->info
.dataVersion
[1] = 1;
166 dh
->info
.dataVersion
[2] = 0;
167 dh
->info
.dataVersion
[3] = 0;
168 uprv_memcpy(((uint8_t*)dh
) + sizeof(DataHeader
), rules
, length
);
170 outLength
= ubrk_swap(ds
, dh
, totalLength
, outH
, status
);
171 if (U_SUCCESS(*status
) && outLength
!= totalLength
) // something went horribly wrong
173 *status
= U_INVALID_FORMAT_ERROR
;
176 if (U_SUCCESS(*status
))
178 uprv_memcpy(buffer
, ((uint8_t *)outH
) + sizeof(DataHeader
), length
);
184 udata_closeSwapper(ds
);
188 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */