]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/rle.c
2 *******************************************************************************
4 * Copyright (C) 2000-2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 01/11/02 Ram Creation.
15 *******************************************************************************
19 * The ESCAPE character is used during run-length encoding. It signals
20 * a run of identical chars.
22 static const uint16_t ESCAPE
= 0xA5A5;
25 * The ESCAPE_BYTE character is used during run-length encoding. It signals
26 * a run of identical bytes.
28 static const uint8_t ESCAPE_BYTE
= (uint8_t)0xA5;
31 * Append a byte to the given StringBuffer, packing two bytes into each
32 * character. The state parameter maintains intermediary data between
34 * @param state A two-element array, with state[0] == 0 if this is the
35 * first byte of a pair, or state[0] != 0 if this is the second byte
36 * of a pair, in which case state[1] is the first byte.
39 appendEncodedByte(uint16_t* buffer
, uint16_t* buffLimit
, uint8_t value
, uint8_t state
[],UErrorCode
* status
) {
40 if(!status
|| U_FAILURE(*status
)){
44 uint16_t c
= (uint16_t) ((state
[1] << 8) | (((int32_t) value
) & 0xFF));
45 if(buffer
< buffLimit
){
48 *status
= U_BUFFER_OVERFLOW_ERROR
;
60 * Encode a run, possibly a degenerate run (of < 4 values).
61 * @param length The length of the run; must be > 0 && <= 0xFF.
64 encodeRunByte(uint16_t* buffer
,uint16_t* bufLimit
, uint8_t value
, int32_t length
, uint8_t state
[], UErrorCode
* status
) {
65 if(!status
|| U_FAILURE(*status
)){
70 for (; j
<length
; ++j
) {
71 if (value
== ESCAPE_BYTE
) {
72 buffer
= appendEncodedByte(buffer
,bufLimit
, ESCAPE_BYTE
, state
,status
);
74 buffer
= appendEncodedByte(buffer
,bufLimit
, value
, state
, status
);
78 if (length
== ESCAPE_BYTE
) {
79 if (value
== ESCAPE_BYTE
){
80 buffer
= appendEncodedByte(buffer
, bufLimit
,ESCAPE_BYTE
, state
,status
);
82 buffer
= appendEncodedByte(buffer
,bufLimit
, value
, state
, status
);
85 buffer
= appendEncodedByte(buffer
,bufLimit
, ESCAPE_BYTE
, state
,status
);
86 buffer
= appendEncodedByte(buffer
,bufLimit
, (char)length
, state
, status
);
87 buffer
= appendEncodedByte(buffer
,bufLimit
, value
, state
, status
); /* Don't need to escape this value*/
92 #define APPEND( buffer, bufLimit, value, num, status){ \
93 if(buffer<bufLimit){ \
96 *status = U_BUFFER_OVERFLOW_ERROR; \
102 * Encode a run, possibly a degenerate run (of < 4 values).
103 * @param length The length of the run; must be > 0 && <= 0xFFFF.
106 encodeRunShort(uint16_t* buffer
,uint16_t* bufLimit
, uint16_t value
, int32_t length
,UErrorCode
* status
) {
110 for (; j
<length
; ++j
) {
111 if (value
== (int32_t) ESCAPE
){
112 APPEND(buffer
,bufLimit
,ESCAPE
, num
, status
);
115 APPEND(buffer
,bufLimit
,value
,num
, status
);
119 if (length
== (int32_t) ESCAPE
) {
120 if (value
== (int32_t) ESCAPE
){
121 APPEND(buffer
,bufLimit
,ESCAPE
,num
,status
);
124 APPEND(buffer
,bufLimit
,value
,num
,status
);
127 APPEND(buffer
,bufLimit
,ESCAPE
,num
,status
);
128 APPEND(buffer
,bufLimit
,(uint16_t) length
, num
,status
);
129 APPEND(buffer
,bufLimit
,(uint16_t)value
, num
, status
); /* Don't need to escape this value */
135 * Construct a string representing a char array. Use run-length encoding.
136 * A character represents itself, unless it is the ESCAPE character. Then
137 * the following notations are possible:
138 * ESCAPE ESCAPE ESCAPE literal
139 * ESCAPE n c n instances of character c
140 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
141 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
142 * If we encounter a run where n == ESCAPE, we represent this as:
144 * The ESCAPE value is chosen so as not to collide with commonly
148 usArrayToRLEString(const uint16_t* src
,int32_t srcLen
,uint16_t* buffer
, int32_t bufLen
,UErrorCode
* status
) {
149 uint16_t* bufLimit
= buffer
+bufLen
;
150 uint16_t* saveBuffer
= buffer
;
151 if(buffer
< bufLimit
){
152 *buffer
++ = (uint16_t)(srcLen
>>16);
154 uint16_t runValue
= src
[0];
155 int32_t runLength
= 1;
157 *buffer
++ = (uint16_t) srcLen
;
159 for (; i
<srcLen
; ++i
) {
161 if (s
== runValue
&& runLength
< 0xFFFF){
164 buffer
= encodeRunShort(buffer
,bufLimit
, (uint16_t)runValue
, runLength
,status
);
169 buffer
= encodeRunShort(buffer
,bufLimit
,(uint16_t)runValue
, runLength
,status
);
171 *status
= U_BUFFER_OVERFLOW_ERROR
;
174 *status
= U_BUFFER_OVERFLOW_ERROR
;
176 return (buffer
- saveBuffer
);
180 * Construct a string representing a byte array. Use run-length encoding.
181 * Two bytes are packed into a single char, with a single extra zero byte at
182 * the end if needed. A byte represents itself, unless it is the
183 * ESCAPE_BYTE. Then the following notations are possible:
184 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
185 * ESCAPE_BYTE n b n instances of byte b
186 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
187 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
188 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
189 * b ESCAPE_BYTE n-1 b
190 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
194 byteArrayToRLEString(const uint8_t* src
,int32_t srcLen
, uint16_t* buffer
,int32_t bufLen
, UErrorCode
* status
) {
195 const uint16_t* saveBuf
= buffer
;
196 uint16_t* bufLimit
= buffer
+bufLen
;
197 if(buffer
< bufLimit
){
198 *buffer
++ = ((uint16_t) (srcLen
>> 16));
201 uint8_t runValue
= src
[0];
203 uint8_t state
[2]= {0};
205 *buffer
++=((uint16_t) srcLen
);
206 for (; i
<srcLen
; ++i
) {
208 if (b
== runValue
&& runLength
< 0xFF){
212 buffer
= encodeRunByte(buffer
, bufLimit
,runValue
, runLength
, state
,status
);
217 buffer
= encodeRunByte(buffer
,bufLimit
, runValue
, runLength
, state
, status
);
219 /* We must save the final byte, if there is one, by padding
223 buffer
= appendEncodedByte(buffer
,bufLimit
, 0, state
,status
);
226 *status
= U_BUFFER_OVERFLOW_ERROR
;
229 *status
= U_BUFFER_OVERFLOW_ERROR
;
231 return (int32_t) (buffer
- saveBuf
);
236 * Construct an array of shorts from a run-length encoded string.
239 rleStringToUCharArray(uint16_t* src
, int32_t srcLen
, uint16_t* target
, int32_t tgtLen
, UErrorCode
* status
) {
244 if(!status
|| U_FAILURE(*status
)){
247 /* the source is null terminated */
249 srcLen
= u_strlen(src
);
254 length
= (((int32_t) src
[0]) << 16) | ((int32_t) src
[1]);
260 *status
= U_BUFFER_OVERFLOW_ERROR
;
264 for (; i
<srcLen
; ++i
) {
271 int32_t runLength
= (int32_t) c
;
272 uint16_t runValue
= src
[++i
];
274 for (; j
<runLength
; ++j
) {
275 target
[ai
++] = runValue
;
285 *status
= U_INTERNAL_PROGRAM_ERROR
;
292 * Construct an array of bytes from a run-length encoded string.
295 rleStringToByteArray(uint16_t* src
, int32_t srcLen
, uint8_t* target
, int32_t tgtLen
, UErrorCode
* status
) {
298 UBool nextChar
= TRUE
;
301 int32_t runLength
= 0;
305 if(!status
|| U_FAILURE(*status
)){
308 /* the source is null terminated */
310 srcLen
= u_strlen(src
);
315 length
= (((int32_t) src
[0]) << 16) | ((int32_t) src
[1]);
321 *status
= U_BUFFER_OVERFLOW_ERROR
;
325 for (; ai
<tgtLen
; ) {
326 /* This part of the loop places the next byte into the local
327 * variable 'b' each time through the loop. It keeps the
328 * current character in 'c' and uses the boolean 'nextChar'
329 * to see if we've taken both bytes out of 'c' yet.
334 b
= (uint8_t) (c
>> 8);
338 b
= (uint8_t) (c
& 0xFF);
342 /* This part of the loop is a tiny state machine which handles
343 * the parsing of the run-length encoding. This would be simpler
344 * if we could look ahead, but we can't, so we use 'node' to
345 * move between three nodes in the state machine.
349 /* Normal idle node */
350 if (b
== ESCAPE_BYTE
) {
358 /* We have seen one ESCAPE_BYTE; we expect either a second
359 * one, or a run length and value.
361 if (b
== ESCAPE_BYTE
) {
362 target
[ai
++] = ESCAPE_BYTE
;
373 /* We have seen an ESCAPE_BYTE and length byte. We interpret
374 * the next byte as the value to be repeated.
376 for (; j
<runLength
; ++j
){
380 *status
= U_BUFFER_OVERFLOW_ERROR
;
391 *status
= U_INTERNAL_PROGRAM_ERROR
;
392 /*("Bad run-length encoded byte array")*/
398 /*("Excess data in RLE byte array string");*/
399 *status
= U_INTERNAL_PROGRAM_ERROR
;