2 *****************************************************************************************
3 * Copyright (C) 2006, Apple Computer, Inc.
5 *****************************************************************************************
8 #include "unicode/utypes.h"
10 #if !UCONFIG_NO_BREAK_ITERATION
12 #include "unicode/urbtok.h"
15 #include "unicode/ustring.h"
22 U_CAPI UBreakIterator
* U_EXPORT2
23 urbtok_openRules(const UChar
*rules
,
25 UParseError
*parseErr
,
28 if (status
== NULL
|| U_FAILURE(*status
)){
32 BreakIterator
*result
= 0;
33 UnicodeString
ruleString(rules
, rulesLength
);
34 result
= new RuleBasedTokenizer(ruleString
, *parseErr
, *status
);
35 if(U_FAILURE(*status
)) {
39 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
43 U_CAPI UBreakIterator
* U_EXPORT2
44 urbtok_openBinaryRules(const uint8_t *rules
,
47 if (status
== NULL
|| U_FAILURE(*status
)){
51 uint32_t length
= ((const RBBIDataHeader
*)rules
)->fLength
;
52 uint8_t *ruleCopy
= (uint8_t *) uprv_malloc(length
);
55 *status
= U_MEMORY_ALLOCATION_ERROR
;
58 // Copy the rules so they can be adopted by the tokenizer
59 uprv_memcpy(ruleCopy
, rules
, length
);
60 BreakIterator
*result
= 0;
61 result
= new RuleBasedTokenizer(ruleCopy
, *status
);
62 if(U_FAILURE(*status
)) {
66 UBreakIterator
*uBI
= (UBreakIterator
*)result
;
70 U_CAPI
uint32_t U_EXPORT2
71 urbtok_getBinaryRules(UBreakIterator
*bi
,
76 if (status
== NULL
|| U_FAILURE(*status
)){
81 const uint8_t *rules
= ((RuleBasedBreakIterator
*)bi
)->getBinaryRules(length
);
84 if (length
> buffSize
)
86 *status
= U_BUFFER_OVERFLOW_ERROR
;
90 uprv_memcpy(buffer
, rules
, length
);
96 U_CAPI
int32_t U_EXPORT2
97 urbtok_tokenize(UBreakIterator
*bi
,
99 RuleBasedTokenRange
*outTokens
,
100 unsigned long *outTokenFlags
)
102 return ((RuleBasedTokenizer
*)bi
)->tokenize(maxTokens
, outTokens
, outTokenFlags
);
105 U_CAPI
void U_EXPORT2
106 urbtok_swapBinaryRules(const uint8_t *rules
,
109 UBool outIsBigEndian
,
112 UDataSwapper
*ds
= udata_openSwapper(inIsBigEndian
, U_CHARSET_FAMILY
, outIsBigEndian
, U_CHARSET_FAMILY
, status
);
114 if (status
== NULL
|| U_FAILURE(*status
)){
118 uint32_t length
= ds
->readUInt32(((const RBBIDataHeader
*)rules
)->fLength
);
119 uint32_t totalLength
= sizeof(DataHeader
) + length
;
121 DataHeader
*dh
= (DataHeader
*)uprv_malloc(totalLength
);
124 *status
= U_MEMORY_ALLOCATION_ERROR
;
127 DataHeader
*outH
= (DataHeader
*)uprv_malloc(totalLength
);
130 *status
= U_MEMORY_ALLOCATION_ERROR
;
134 dh
->dataHeader
.headerSize
= ds
->readUInt16(sizeof(DataHeader
));
135 dh
->dataHeader
.magic1
= 0xda;
136 dh
->dataHeader
.magic2
= 0x27;
137 dh
->info
.size
= ds
->readUInt16(sizeof(UDataInfo
));
138 dh
->info
.reservedWord
= 0;
139 dh
->info
.isBigEndian
= inIsBigEndian
;
140 dh
->info
.charsetFamily
= U_CHARSET_FAMILY
;
141 dh
->info
.sizeofUChar
= U_SIZEOF_UCHAR
;
142 dh
->info
.reservedByte
= 0;
143 uprv_memcpy(dh
->info
.dataFormat
, "Brk ", sizeof(dh
->info
.dataFormat
));
144 uprv_memcpy(dh
->info
.formatVersion
, ((const RBBIDataHeader
*)rules
)->fFormatVersion
, sizeof(dh
->info
.formatVersion
));
145 dh
->info
.dataVersion
[0] = 4; // Unicode version
146 dh
->info
.dataVersion
[1] = 1;
147 dh
->info
.dataVersion
[2] = 0;
148 dh
->info
.dataVersion
[3] = 0;
149 uprv_memcpy(((uint8_t*)dh
) + sizeof(DataHeader
), rules
, length
);
151 int32_t outLength
= ubrk_swap(ds
, dh
, totalLength
, outH
, status
);
152 if (U_SUCCESS(*status
) && outLength
!= totalLength
) // something went horribly wrong
154 *status
= U_INVALID_FORMAT_ERROR
;
157 if (U_SUCCESS(*status
))
159 uprv_memcpy(buffer
, ((uint8_t *)outH
) + sizeof(DataHeader
), length
);
165 udata_closeSwapper(ds
);
169 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */