]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/urbtok.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / urbtok.cpp
1 /*
2 *****************************************************************************************
3 * Copyright (C) 2006, Apple Computer, Inc.
4 * All Rights Reserved.
5 *****************************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9
10 #if !UCONFIG_NO_BREAK_ITERATION
11
12 #include "unicode/urbtok.h"
13
14 #include "rbtok.h"
15 #include "unicode/ustring.h"
16 #include "rbbidata.h"
17 #include "cmemory.h"
18 #include "ucmndata.h"
19
20 U_NAMESPACE_USE
21
22 U_CAPI UBreakIterator* U_EXPORT2
23 urbtok_openRules(const UChar *rules,
24 int32_t rulesLength,
25 UParseError *parseErr,
26 UErrorCode *status)
27 {
28 if (status == NULL || U_FAILURE(*status)){
29 return 0;
30 }
31
32 BreakIterator *result = 0;
33 UnicodeString ruleString(rules, rulesLength);
34 result = new RuleBasedTokenizer(ruleString, *parseErr, *status);
35 if(U_FAILURE(*status)) {
36 return 0;
37 }
38
39 UBreakIterator *uBI = (UBreakIterator *)result;
40 return uBI;
41 }
42
43 U_CAPI UBreakIterator* U_EXPORT2
44 urbtok_openBinaryRules(const uint8_t *rules,
45 UErrorCode *status)
46 {
47 if (status == NULL || U_FAILURE(*status)){
48 return 0;
49 }
50
51 uint32_t length = ((const RBBIDataHeader *)rules)->fLength;
52 uint8_t *ruleCopy = (uint8_t *) uprv_malloc(length);
53 if (ruleCopy == 0)
54 {
55 *status = U_MEMORY_ALLOCATION_ERROR;
56 return 0;
57 }
58 // Copy the rules so they can be adopted by the tokenizer
59 uprv_memcpy(ruleCopy, rules, length);
60 BreakIterator *result = 0;
61 result = new RuleBasedTokenizer(ruleCopy, *status);
62 if(U_FAILURE(*status)) {
63 return 0;
64 }
65
66 UBreakIterator *uBI = (UBreakIterator *)result;
67 return uBI;
68 }
69
70 U_CAPI uint32_t U_EXPORT2
71 urbtok_getBinaryRules(UBreakIterator *bi,
72 uint8_t *buffer,
73 uint32_t buffSize,
74 UErrorCode *status)
75 {
76 if (status == NULL || U_FAILURE(*status)){
77 return 0;
78 }
79
80 uint32_t length;
81 const uint8_t *rules = ((RuleBasedBreakIterator *)bi)->getBinaryRules(length);
82 if (buffer != 0)
83 {
84 if (length > buffSize)
85 {
86 *status = U_BUFFER_OVERFLOW_ERROR;
87 }
88 else
89 {
90 uprv_memcpy(buffer, rules, length);
91 }
92 }
93 return length;
94 }
95
96 U_CAPI int32_t U_EXPORT2
97 urbtok_tokenize(UBreakIterator *bi,
98 int32_t maxTokens,
99 RuleBasedTokenRange *outTokens,
100 unsigned long *outTokenFlags)
101 {
102 return ((RuleBasedTokenizer *)bi)->tokenize(maxTokens, outTokens, outTokenFlags);
103 }
104
105 U_CAPI void U_EXPORT2
106 urbtok_swapBinaryRules(const uint8_t *rules,
107 uint8_t *buffer,
108 UBool inIsBigEndian,
109 UBool outIsBigEndian,
110 UErrorCode *status)
111 {
112 UDataSwapper *ds = udata_openSwapper(inIsBigEndian, U_CHARSET_FAMILY, outIsBigEndian, U_CHARSET_FAMILY, status);
113
114 if (status == NULL || U_FAILURE(*status)){
115 return;
116 }
117
118 uint32_t length = ds->readUInt32(((const RBBIDataHeader *)rules)->fLength);
119 uint32_t totalLength = sizeof(DataHeader) + length;
120
121 DataHeader *dh = (DataHeader *)uprv_malloc(totalLength);
122 if (dh == 0)
123 {
124 *status = U_MEMORY_ALLOCATION_ERROR;
125 goto closeSwapper;
126 }
127 DataHeader *outH = (DataHeader *)uprv_malloc(totalLength);
128 if (outH == 0)
129 {
130 *status = U_MEMORY_ALLOCATION_ERROR;
131 uprv_free(dh);
132 goto closeSwapper;
133 }
134 dh->dataHeader.headerSize = ds->readUInt16(sizeof(DataHeader));
135 dh->dataHeader.magic1 = 0xda;
136 dh->dataHeader.magic2 = 0x27;
137 dh->info.size = ds->readUInt16(sizeof(UDataInfo));
138 dh->info.reservedWord = 0;
139 dh->info.isBigEndian = inIsBigEndian;
140 dh->info.charsetFamily = U_CHARSET_FAMILY;
141 dh->info.sizeofUChar = U_SIZEOF_UCHAR;
142 dh->info.reservedByte = 0;
143 uprv_memcpy(dh->info.dataFormat, "Brk ", sizeof(dh->info.dataFormat));
144 uprv_memcpy(dh->info.formatVersion, ((const RBBIDataHeader *)rules)->fFormatVersion, sizeof(dh->info.formatVersion));
145 dh->info.dataVersion[0] = 4; // Unicode version
146 dh->info.dataVersion[1] = 1;
147 dh->info.dataVersion[2] = 0;
148 dh->info.dataVersion[3] = 0;
149 uprv_memcpy(((uint8_t*)dh) + sizeof(DataHeader), rules, length);
150
151 int32_t outLength = ubrk_swap(ds, dh, totalLength, outH, status);
152 if (U_SUCCESS(*status) && outLength != totalLength) // something went horribly wrong
153 {
154 *status = U_INVALID_FORMAT_ERROR;
155 }
156
157 if (U_SUCCESS(*status))
158 {
159 uprv_memcpy(buffer, ((uint8_t *)outH) + sizeof(DataHeader), length);
160 }
161 uprv_free(outH);
162 uprv_free(dh);
163
164 closeSwapper:
165 udata_closeSwapper(ds);
166 }
167
168
169 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */