]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/rle.c
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / tools / genrb / rle.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2000-2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File writejava.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 01/11/02 Ram Creation.
15 *******************************************************************************
16 */
17 #include "rle.h"
18 /**
19 * The ESCAPE character is used during run-length encoding. It signals
20 * a run of identical chars.
21 */
22 static const uint16_t ESCAPE = 0xA5A5;
23
24 /**
25 * The ESCAPE_BYTE character is used during run-length encoding. It signals
26 * a run of identical bytes.
27 */
28 static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
29
30 /**
31 * Append a byte to the given StringBuffer, packing two bytes into each
32 * character. The state parameter maintains intermediary data between
33 * calls.
34 * @param state A two-element array, with state[0] == 0 if this is the
35 * first byte of a pair, or state[0] != 0 if this is the second byte
36 * of a pair, in which case state[1] is the first byte.
37 */
38 static uint16_t*
39 appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
40 if(!status || U_FAILURE(*status)){
41 return NULL;
42 }
43 if (state[0] != 0) {
44 uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
45 if(buffer < buffLimit){
46 *buffer++ = c;
47 }else{
48 *status = U_BUFFER_OVERFLOW_ERROR;
49 }
50 state[0] = 0;
51 return buffer;
52 }
53 else {
54 state[0] = 1;
55 state[1] = value;
56 return buffer;
57 }
58 }
59 /**
60 * Encode a run, possibly a degenerate run (of < 4 values).
61 * @param length The length of the run; must be > 0 && <= 0xFF.
62 */
63 static uint16_t*
64 encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
65 if(!status || U_FAILURE(*status)){
66 return NULL;
67 }
68 if (length < 4) {
69 int32_t j=0;
70 for (; j<length; ++j) {
71 if (value == ESCAPE_BYTE) {
72 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
73 }
74 buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
75 }
76 }
77 else {
78 if (length == ESCAPE_BYTE) {
79 if (value == ESCAPE_BYTE){
80 buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
81 }
82 buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
83 --length;
84 }
85 buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
86 buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
87 buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
88 }
89 return buffer;
90 }
91
92 #define APPEND( buffer, bufLimit, value, num, status){ \
93 if(buffer<bufLimit){ \
94 *buffer++=(value); \
95 }else{ \
96 *status = U_BUFFER_OVERFLOW_ERROR; \
97 } \
98 num++; \
99 }
100
101 /**
102 * Encode a run, possibly a degenerate run (of < 4 values).
103 * @param length The length of the run; must be > 0 && <= 0xFFFF.
104 */
105 static uint16_t*
106 encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
107 int32_t num=0;
108 if (length < 4) {
109 int j=0;
110 for (; j<length; ++j) {
111 if (value == (int32_t) ESCAPE){
112 APPEND(buffer,bufLimit,ESCAPE, num, status);
113
114 }
115 APPEND(buffer,bufLimit,value,num, status);
116 }
117 }
118 else {
119 if (length == (int32_t) ESCAPE) {
120 if (value == (int32_t) ESCAPE){
121 APPEND(buffer,bufLimit,ESCAPE,num,status);
122
123 }
124 APPEND(buffer,bufLimit,value,num,status);
125 --length;
126 }
127 APPEND(buffer,bufLimit,ESCAPE,num,status);
128 APPEND(buffer,bufLimit,(uint16_t) length, num,status);
129 APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
130 }
131 return buffer;
132 }
133
134 /**
135 * Construct a string representing a char array. Use run-length encoding.
136 * A character represents itself, unless it is the ESCAPE character. Then
137 * the following notations are possible:
138 * ESCAPE ESCAPE ESCAPE literal
139 * ESCAPE n c n instances of character c
140 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
141 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
142 * If we encounter a run where n == ESCAPE, we represent this as:
143 * c ESCAPE n-1 c
144 * The ESCAPE value is chosen so as not to collide with commonly
145 * seen values.
146 */
147 int32_t
148 usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
149 uint16_t* bufLimit = buffer+bufLen;
150 uint16_t* saveBuffer = buffer;
151 if(buffer < bufLimit){
152 *buffer++ = (uint16_t)(srcLen>>16);
153 if(buffer<bufLimit){
154 uint16_t runValue = src[0];
155 int32_t runLength = 1;
156 int i=1;
157 *buffer++ = (uint16_t) srcLen;
158
159 for (; i<srcLen; ++i) {
160 uint16_t s = src[i];
161 if (s == runValue && runLength < 0xFFFF){
162 ++runLength;
163 }else {
164 buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
165 runValue = s;
166 runLength = 1;
167 }
168 }
169 buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
170 }else{
171 *status = U_BUFFER_OVERFLOW_ERROR;
172 }
173 }else{
174 *status = U_BUFFER_OVERFLOW_ERROR;
175 }
176 return (int32_t)(buffer - saveBuffer);
177 }
178
179 /**
180 * Construct a string representing a byte array. Use run-length encoding.
181 * Two bytes are packed into a single char, with a single extra zero byte at
182 * the end if needed. A byte represents itself, unless it is the
183 * ESCAPE_BYTE. Then the following notations are possible:
184 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
185 * ESCAPE_BYTE n b n instances of byte b
186 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
187 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
188 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
189 * b ESCAPE_BYTE n-1 b
190 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
191 * seen values.
192 */
193 int32_t
194 byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
195 const uint16_t* saveBuf = buffer;
196 uint16_t* bufLimit = buffer+bufLen;
197 if(buffer < bufLimit){
198 *buffer++ = ((uint16_t) (srcLen >> 16));
199
200 if(buffer<bufLimit){
201 uint8_t runValue = src[0];
202 int runLength = 1;
203 uint8_t state[2]= {0};
204 int i=1;
205 *buffer++=((uint16_t) srcLen);
206 for (; i<srcLen; ++i) {
207 uint8_t b = src[i];
208 if (b == runValue && runLength < 0xFF){
209 ++runLength;
210 }
211 else {
212 buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
213 runValue = b;
214 runLength = 1;
215 }
216 }
217 buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
218
219 /* We must save the final byte, if there is one, by padding
220 * an extra zero.
221 */
222 if (state[0] != 0) {
223 buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
224 }
225 }else{
226 *status = U_BUFFER_OVERFLOW_ERROR;
227 }
228 }else{
229 *status = U_BUFFER_OVERFLOW_ERROR;
230 }
231 return (int32_t) (buffer - saveBuf);
232 }
233
234
235 /**
236 * Construct an array of shorts from a run-length encoded string.
237 */
238 int32_t
239 rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
240 int32_t length = 0;
241 int32_t ai = 0;
242 int i=2;
243
244 if(!status || U_FAILURE(*status)){
245 return 0;
246 }
247 /* the source is null terminated */
248 if(srcLen == -1){
249 srcLen = u_strlen(src);
250 }
251 if(srcLen <= 2){
252 return 2;
253 }
254 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
255
256 if(target == NULL){
257 return length;
258 }
259 if(tgtLen < length){
260 *status = U_BUFFER_OVERFLOW_ERROR;
261 return length;
262 }
263
264 for (; i<srcLen; ++i) {
265 uint16_t c = src[i];
266 if (c == ESCAPE) {
267 c = src[++i];
268 if (c == ESCAPE) {
269 target[ai++] = c;
270 } else {
271 int32_t runLength = (int32_t) c;
272 uint16_t runValue = src[++i];
273 int j=0;
274 for (; j<runLength; ++j) {
275 target[ai++] = runValue;
276 }
277 }
278 }
279 else {
280 target[ai++] = c;
281 }
282 }
283
284 if (ai != length){
285 *status = U_INTERNAL_PROGRAM_ERROR;
286 }
287
288 return length;
289 }
290
291 /**
292 * Construct an array of bytes from a run-length encoded string.
293 */
294 int32_t
295 rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
296
297 int32_t length = 0;
298 UBool nextChar = TRUE;
299 uint16_t c = 0;
300 int32_t node = 0;
301 int32_t runLength = 0;
302 int32_t i = 2;
303 int32_t ai=0;
304
305 if(!status || U_FAILURE(*status)){
306 return 0;
307 }
308 /* the source is null terminated */
309 if(srcLen == -1){
310 srcLen = u_strlen(src);
311 }
312 if(srcLen <= 2){
313 return 2;
314 }
315 length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
316
317 if(target == NULL){
318 return length;
319 }
320 if(tgtLen < length){
321 *status = U_BUFFER_OVERFLOW_ERROR;
322 return length;
323 }
324
325 for (; ai<tgtLen; ) {
326 /* This part of the loop places the next byte into the local
327 * variable 'b' each time through the loop. It keeps the
328 * current character in 'c' and uses the boolean 'nextChar'
329 * to see if we've taken both bytes out of 'c' yet.
330 */
331 uint8_t b;
332 if (nextChar) {
333 c = src[i++];
334 b = (uint8_t) (c >> 8);
335 nextChar = FALSE;
336 }
337 else {
338 b = (uint8_t) (c & 0xFF);
339 nextChar = TRUE;
340 }
341
342 /* This part of the loop is a tiny state machine which handles
343 * the parsing of the run-length encoding. This would be simpler
344 * if we could look ahead, but we can't, so we use 'node' to
345 * move between three nodes in the state machine.
346 */
347 switch (node) {
348 case 0:
349 /* Normal idle node */
350 if (b == ESCAPE_BYTE) {
351 node = 1;
352 }
353 else {
354 target[ai++] = b;
355 }
356 break;
357 case 1:
358 /* We have seen one ESCAPE_BYTE; we expect either a second
359 * one, or a run length and value.
360 */
361 if (b == ESCAPE_BYTE) {
362 target[ai++] = ESCAPE_BYTE;
363 node = 0;
364 }
365 else {
366 runLength = b;
367 node = 2;
368 }
369 break;
370 case 2:
371 {
372 int j=0;
373 /* We have seen an ESCAPE_BYTE and length byte. We interpret
374 * the next byte as the value to be repeated.
375 */
376 for (; j<runLength; ++j){
377 if(ai<tgtLen){
378 target[ai++] = b;
379 }else{
380 *status = U_BUFFER_OVERFLOW_ERROR;
381 return ai;
382 }
383 }
384 node = 0;
385 break;
386 }
387 }
388 }
389
390 if (node != 0){
391 *status = U_INTERNAL_PROGRAM_ERROR;
392 /*("Bad run-length encoded byte array")*/
393 return 0;
394 }
395
396
397 if (i != srcLen){
398 /*("Excess data in RLE byte array string");*/
399 *status = U_INTERNAL_PROGRAM_ERROR;
400 return ai;
401 }
402
403 return ai;
404 }
405