]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/rle.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2000-2003, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
13 * Modification History:
15 * Date Name Description
16 * 01/11/02 Ram Creation.
17 *******************************************************************************
21 * The ESCAPE character is used during run-length encoding. It signals
22 * a run of identical chars.
24 static const uint16_t ESCAPE
= 0xA5A5;
27 * The ESCAPE_BYTE character is used during run-length encoding. It signals
28 * a run of identical bytes.
30 static const uint8_t ESCAPE_BYTE
= (uint8_t)0xA5;
33 * Append a byte to the given StringBuffer, packing two bytes into each
34 * character. The state parameter maintains intermediary data between
36 * @param state A two-element array, with state[0] == 0 if this is the
37 * first byte of a pair, or state[0] != 0 if this is the second byte
38 * of a pair, in which case state[1] is the first byte.
41 appendEncodedByte(uint16_t* buffer
, uint16_t* buffLimit
, uint8_t value
, uint8_t state
[],UErrorCode
* status
) {
42 if(!status
|| U_FAILURE(*status
)){
46 uint16_t c
= (uint16_t) ((state
[1] << 8) | (((int32_t) value
) & 0xFF));
47 if(buffer
< buffLimit
){
50 *status
= U_BUFFER_OVERFLOW_ERROR
;
62 * Encode a run, possibly a degenerate run (of < 4 values).
63 * @param length The length of the run; must be > 0 && <= 0xFF.
66 encodeRunByte(uint16_t* buffer
,uint16_t* bufLimit
, uint8_t value
, int32_t length
, uint8_t state
[], UErrorCode
* status
) {
67 if(!status
|| U_FAILURE(*status
)){
72 for (; j
<length
; ++j
) {
73 if (value
== ESCAPE_BYTE
) {
74 buffer
= appendEncodedByte(buffer
,bufLimit
, ESCAPE_BYTE
, state
,status
);
76 buffer
= appendEncodedByte(buffer
,bufLimit
, value
, state
, status
);
80 if (length
== ESCAPE_BYTE
) {
81 if (value
== ESCAPE_BYTE
){
82 buffer
= appendEncodedByte(buffer
, bufLimit
,ESCAPE_BYTE
, state
,status
);
84 buffer
= appendEncodedByte(buffer
,bufLimit
, value
, state
, status
);
87 buffer
= appendEncodedByte(buffer
,bufLimit
, ESCAPE_BYTE
, state
,status
);
88 buffer
= appendEncodedByte(buffer
,bufLimit
, (char)length
, state
, status
);
89 buffer
= appendEncodedByte(buffer
,bufLimit
, value
, state
, status
); /* Don't need to escape this value*/
94 #define APPEND( buffer, bufLimit, value, num, status){ \
95 if(buffer<bufLimit){ \
98 *status = U_BUFFER_OVERFLOW_ERROR; \
104 * Encode a run, possibly a degenerate run (of < 4 values).
105 * @param length The length of the run; must be > 0 && <= 0xFFFF.
108 encodeRunShort(uint16_t* buffer
,uint16_t* bufLimit
, uint16_t value
, int32_t length
,UErrorCode
* status
) {
112 for (; j
<length
; ++j
) {
113 if (value
== (int32_t) ESCAPE
){
114 APPEND(buffer
,bufLimit
,ESCAPE
, num
, status
);
117 APPEND(buffer
,bufLimit
,value
,num
, status
);
121 if (length
== (int32_t) ESCAPE
) {
122 if (value
== (int32_t) ESCAPE
){
123 APPEND(buffer
,bufLimit
,ESCAPE
,num
,status
);
126 APPEND(buffer
,bufLimit
,value
,num
,status
);
129 APPEND(buffer
,bufLimit
,ESCAPE
,num
,status
);
130 APPEND(buffer
,bufLimit
,(uint16_t) length
, num
,status
);
131 APPEND(buffer
,bufLimit
,(uint16_t)value
, num
, status
); /* Don't need to escape this value */
137 * Construct a string representing a char array. Use run-length encoding.
138 * A character represents itself, unless it is the ESCAPE character. Then
139 * the following notations are possible:
140 * ESCAPE ESCAPE ESCAPE literal
141 * ESCAPE n c n instances of character c
142 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
143 * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
144 * If we encounter a run where n == ESCAPE, we represent this as:
146 * The ESCAPE value is chosen so as not to collide with commonly
150 usArrayToRLEString(const uint16_t* src
,int32_t srcLen
,uint16_t* buffer
, int32_t bufLen
,UErrorCode
* status
) {
151 uint16_t* bufLimit
= buffer
+bufLen
;
152 uint16_t* saveBuffer
= buffer
;
153 if(buffer
< bufLimit
){
154 *buffer
++ = (uint16_t)(srcLen
>>16);
156 uint16_t runValue
= src
[0];
157 int32_t runLength
= 1;
159 *buffer
++ = (uint16_t) srcLen
;
161 for (; i
<srcLen
; ++i
) {
163 if (s
== runValue
&& runLength
< 0xFFFF){
166 buffer
= encodeRunShort(buffer
,bufLimit
, (uint16_t)runValue
, runLength
,status
);
171 buffer
= encodeRunShort(buffer
,bufLimit
,(uint16_t)runValue
, runLength
,status
);
173 *status
= U_BUFFER_OVERFLOW_ERROR
;
176 *status
= U_BUFFER_OVERFLOW_ERROR
;
178 return (int32_t)(buffer
- saveBuffer
);
182 * Construct a string representing a byte array. Use run-length encoding.
183 * Two bytes are packed into a single char, with a single extra zero byte at
184 * the end if needed. A byte represents itself, unless it is the
185 * ESCAPE_BYTE. Then the following notations are possible:
186 * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
187 * ESCAPE_BYTE n b n instances of byte b
188 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
189 * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
190 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
191 * b ESCAPE_BYTE n-1 b
192 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
196 byteArrayToRLEString(const uint8_t* src
,int32_t srcLen
, uint16_t* buffer
,int32_t bufLen
, UErrorCode
* status
) {
197 const uint16_t* saveBuf
= buffer
;
198 uint16_t* bufLimit
= buffer
+bufLen
;
199 if(buffer
< bufLimit
){
200 *buffer
++ = ((uint16_t) (srcLen
>> 16));
203 uint8_t runValue
= src
[0];
205 uint8_t state
[2]= {0};
207 *buffer
++=((uint16_t) srcLen
);
208 for (; i
<srcLen
; ++i
) {
210 if (b
== runValue
&& runLength
< 0xFF){
214 buffer
= encodeRunByte(buffer
, bufLimit
,runValue
, runLength
, state
,status
);
219 buffer
= encodeRunByte(buffer
,bufLimit
, runValue
, runLength
, state
, status
);
221 /* We must save the final byte, if there is one, by padding
225 buffer
= appendEncodedByte(buffer
,bufLimit
, 0, state
,status
);
228 *status
= U_BUFFER_OVERFLOW_ERROR
;
231 *status
= U_BUFFER_OVERFLOW_ERROR
;
233 return (int32_t) (buffer
- saveBuf
);
238 * Construct an array of shorts from a run-length encoded string.
241 rleStringToUCharArray(uint16_t* src
, int32_t srcLen
, uint16_t* target
, int32_t tgtLen
, UErrorCode
* status
) {
246 if(!status
|| U_FAILURE(*status
)){
249 /* the source is null terminated */
251 srcLen
= u_strlen(src
);
256 length
= (((int32_t) src
[0]) << 16) | ((int32_t) src
[1]);
262 *status
= U_BUFFER_OVERFLOW_ERROR
;
266 for (; i
<srcLen
; ++i
) {
273 int32_t runLength
= (int32_t) c
;
274 uint16_t runValue
= src
[++i
];
276 for (; j
<runLength
; ++j
) {
277 target
[ai
++] = runValue
;
287 *status
= U_INTERNAL_PROGRAM_ERROR
;
294 * Construct an array of bytes from a run-length encoded string.
297 rleStringToByteArray(uint16_t* src
, int32_t srcLen
, uint8_t* target
, int32_t tgtLen
, UErrorCode
* status
) {
300 UBool nextChar
= TRUE
;
303 int32_t runLength
= 0;
307 if(!status
|| U_FAILURE(*status
)){
310 /* the source is null terminated */
312 srcLen
= u_strlen(src
);
317 length
= (((int32_t) src
[0]) << 16) | ((int32_t) src
[1]);
323 *status
= U_BUFFER_OVERFLOW_ERROR
;
327 for (; ai
<tgtLen
; ) {
328 /* This part of the loop places the next byte into the local
329 * variable 'b' each time through the loop. It keeps the
330 * current character in 'c' and uses the boolean 'nextChar'
331 * to see if we've taken both bytes out of 'c' yet.
336 b
= (uint8_t) (c
>> 8);
340 b
= (uint8_t) (c
& 0xFF);
344 /* This part of the loop is a tiny state machine which handles
345 * the parsing of the run-length encoding. This would be simpler
346 * if we could look ahead, but we can't, so we use 'node' to
347 * move between three nodes in the state machine.
351 /* Normal idle node */
352 if (b
== ESCAPE_BYTE
) {
360 /* We have seen one ESCAPE_BYTE; we expect either a second
361 * one, or a run length and value.
363 if (b
== ESCAPE_BYTE
) {
364 target
[ai
++] = ESCAPE_BYTE
;
375 /* We have seen an ESCAPE_BYTE and length byte. We interpret
376 * the next byte as the value to be repeated.
378 for (; j
<runLength
; ++j
){
382 *status
= U_BUFFER_OVERFLOW_ERROR
;
393 *status
= U_INTERNAL_PROGRAM_ERROR
;
394 /*("Bad run-length encoded byte array")*/
400 /*("Excess data in RLE byte array string");*/
401 *status
= U_INTERNAL_PROGRAM_ERROR
;