]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * | |
6 | * Copyright (C) 2000-2003, International Business Machines | |
7 | * Corporation and others. All Rights Reserved. | |
8 | * | |
9 | ******************************************************************************* | |
10 | * | |
11 | * File writejava.c | |
12 | * | |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 01/11/02 Ram Creation. | |
17 | ******************************************************************************* | |
18 | */ | |
19 | #include "rle.h" | |
20 | /** | |
21 | * The ESCAPE character is used during run-length encoding. It signals | |
22 | * a run of identical chars. | |
23 | */ | |
24 | static const uint16_t ESCAPE = 0xA5A5; | |
25 | ||
26 | /** | |
27 | * The ESCAPE_BYTE character is used during run-length encoding. It signals | |
28 | * a run of identical bytes. | |
29 | */ | |
30 | static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; | |
31 | ||
32 | /** | |
33 | * Append a byte to the given StringBuffer, packing two bytes into each | |
34 | * character. The state parameter maintains intermediary data between | |
35 | * calls. | |
36 | * @param state A two-element array, with state[0] == 0 if this is the | |
37 | * first byte of a pair, or state[0] != 0 if this is the second byte | |
38 | * of a pair, in which case state[1] is the first byte. | |
39 | */ | |
40 | static uint16_t* | |
41 | appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { | |
42 | if(!status || U_FAILURE(*status)){ | |
43 | return NULL; | |
44 | } | |
45 | if (state[0] != 0) { | |
46 | uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); | |
47 | if(buffer < buffLimit){ | |
48 | *buffer++ = c; | |
49 | }else{ | |
50 | *status = U_BUFFER_OVERFLOW_ERROR; | |
51 | } | |
52 | state[0] = 0; | |
53 | return buffer; | |
54 | } | |
55 | else { | |
56 | state[0] = 1; | |
57 | state[1] = value; | |
58 | return buffer; | |
59 | } | |
60 | } | |
61 | /** | |
62 | * Encode a run, possibly a degenerate run (of < 4 values). | |
63 | * @param length The length of the run; must be > 0 && <= 0xFF. | |
64 | */ | |
65 | static uint16_t* | |
66 | encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { | |
67 | if(!status || U_FAILURE(*status)){ | |
68 | return NULL; | |
69 | } | |
70 | if (length < 4) { | |
71 | int32_t j=0; | |
72 | for (; j<length; ++j) { | |
73 | if (value == ESCAPE_BYTE) { | |
74 | buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); | |
75 | } | |
76 | buffer = appendEncodedByte(buffer,bufLimit, value, state, status); | |
77 | } | |
78 | } | |
79 | else { | |
80 | if (length == ESCAPE_BYTE) { | |
81 | if (value == ESCAPE_BYTE){ | |
82 | buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status); | |
83 | } | |
84 | buffer = appendEncodedByte(buffer,bufLimit, value, state, status); | |
85 | --length; | |
86 | } | |
87 | buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); | |
88 | buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status); | |
89 | buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/ | |
90 | } | |
91 | return buffer; | |
92 | } | |
93 | ||
94 | #define APPEND( buffer, bufLimit, value, num, status) UPRV_BLOCK_MACRO_BEGIN { \ | |
95 | if(buffer<bufLimit){ \ | |
96 | *buffer++=(value); \ | |
97 | }else{ \ | |
98 | *status = U_BUFFER_OVERFLOW_ERROR; \ | |
99 | } \ | |
100 | num++; \ | |
101 | } UPRV_BLOCK_MACRO_END | |
102 | ||
103 | /** | |
104 | * Encode a run, possibly a degenerate run (of < 4 values). | |
105 | * @param length The length of the run; must be > 0 && <= 0xFFFF. | |
106 | */ | |
107 | static uint16_t* | |
108 | encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { | |
109 | int32_t num=0; | |
110 | if (length < 4) { | |
111 | int j=0; | |
112 | for (; j<length; ++j) { | |
113 | if (value == (int32_t) ESCAPE){ | |
114 | APPEND(buffer,bufLimit,ESCAPE, num, status); | |
115 | ||
116 | } | |
117 | APPEND(buffer,bufLimit,value,num, status); | |
118 | } | |
119 | } | |
120 | else { | |
121 | if (length == (int32_t) ESCAPE) { | |
122 | if (value == (int32_t) ESCAPE){ | |
123 | APPEND(buffer,bufLimit,ESCAPE,num,status); | |
124 | ||
125 | } | |
126 | APPEND(buffer,bufLimit,value,num,status); | |
127 | --length; | |
128 | } | |
129 | APPEND(buffer,bufLimit,ESCAPE,num,status); | |
130 | APPEND(buffer,bufLimit,(uint16_t) length, num,status); | |
131 | APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */ | |
132 | } | |
133 | return buffer; | |
134 | } | |
135 | ||
136 | /** | |
137 | * Construct a string representing a char array. Use run-length encoding. | |
138 | * A character represents itself, unless it is the ESCAPE character. Then | |
139 | * the following notations are possible: | |
140 | * ESCAPE ESCAPE ESCAPE literal | |
141 | * ESCAPE n c n instances of character c | |
142 | * Since an encoded run occupies 3 characters, we only encode runs of 4 or | |
143 | * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. | |
144 | * If we encounter a run where n == ESCAPE, we represent this as: | |
145 | * c ESCAPE n-1 c | |
146 | * The ESCAPE value is chosen so as not to collide with commonly | |
147 | * seen values. | |
148 | */ | |
149 | int32_t | |
150 | usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) { | |
151 | uint16_t* bufLimit = buffer+bufLen; | |
152 | uint16_t* saveBuffer = buffer; | |
153 | if(buffer < bufLimit){ | |
154 | *buffer++ = (uint16_t)(srcLen>>16); | |
155 | if(buffer<bufLimit){ | |
156 | uint16_t runValue = src[0]; | |
157 | int32_t runLength = 1; | |
158 | int i=1; | |
159 | *buffer++ = (uint16_t) srcLen; | |
160 | ||
161 | for (; i<srcLen; ++i) { | |
162 | uint16_t s = src[i]; | |
163 | if (s == runValue && runLength < 0xFFFF){ | |
164 | ++runLength; | |
165 | }else { | |
166 | buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status); | |
167 | runValue = s; | |
168 | runLength = 1; | |
169 | } | |
170 | } | |
171 | buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status); | |
172 | }else{ | |
173 | *status = U_BUFFER_OVERFLOW_ERROR; | |
174 | } | |
175 | }else{ | |
176 | *status = U_BUFFER_OVERFLOW_ERROR; | |
177 | } | |
178 | return (int32_t)(buffer - saveBuffer); | |
179 | } | |
180 | ||
181 | /** | |
182 | * Construct a string representing a byte array. Use run-length encoding. | |
183 | * Two bytes are packed into a single char, with a single extra zero byte at | |
184 | * the end if needed. A byte represents itself, unless it is the | |
185 | * ESCAPE_BYTE. Then the following notations are possible: | |
186 | * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal | |
187 | * ESCAPE_BYTE n b n instances of byte b | |
188 | * Since an encoded run occupies 3 bytes, we only encode runs of 4 or | |
189 | * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. | |
190 | * If we encounter a run where n == ESCAPE_BYTE, we represent this as: | |
191 | * b ESCAPE_BYTE n-1 b | |
192 | * The ESCAPE_BYTE value is chosen so as not to collide with commonly | |
193 | * seen values. | |
194 | */ | |
195 | int32_t | |
196 | byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { | |
197 | const uint16_t* saveBuf = buffer; | |
198 | uint16_t* bufLimit = buffer+bufLen; | |
199 | if(buffer < bufLimit){ | |
200 | *buffer++ = ((uint16_t) (srcLen >> 16)); | |
201 | ||
202 | if(buffer<bufLimit){ | |
203 | uint8_t runValue = src[0]; | |
204 | int runLength = 1; | |
205 | uint8_t state[2]= {0}; | |
206 | int i=1; | |
207 | *buffer++=((uint16_t) srcLen); | |
208 | for (; i<srcLen; ++i) { | |
209 | uint8_t b = src[i]; | |
210 | if (b == runValue && runLength < 0xFF){ | |
211 | ++runLength; | |
212 | } | |
213 | else { | |
214 | buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status); | |
215 | runValue = b; | |
216 | runLength = 1; | |
217 | } | |
218 | } | |
219 | buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status); | |
220 | ||
221 | /* We must save the final byte, if there is one, by padding | |
222 | * an extra zero. | |
223 | */ | |
224 | if (state[0] != 0) { | |
225 | buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); | |
226 | } | |
227 | }else{ | |
228 | *status = U_BUFFER_OVERFLOW_ERROR; | |
229 | } | |
230 | }else{ | |
231 | *status = U_BUFFER_OVERFLOW_ERROR; | |
232 | } | |
233 | return (int32_t) (buffer - saveBuf); | |
234 | } | |
235 | ||
236 | ||
237 | /** | |
238 | * Construct an array of shorts from a run-length encoded string. | |
239 | */ | |
240 | int32_t | |
241 | rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) { | |
242 | int32_t length = 0; | |
243 | int32_t ai = 0; | |
244 | int i=2; | |
245 | ||
246 | if(!status || U_FAILURE(*status)){ | |
247 | return 0; | |
248 | } | |
249 | /* the source is null terminated */ | |
250 | if(srcLen == -1){ | |
251 | srcLen = u_strlen(src); | |
252 | } | |
253 | if(srcLen <= 2){ | |
254 | return 2; | |
255 | } | |
256 | length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); | |
257 | ||
258 | if(target == NULL){ | |
259 | return length; | |
260 | } | |
261 | if(tgtLen < length){ | |
262 | *status = U_BUFFER_OVERFLOW_ERROR; | |
263 | return length; | |
264 | } | |
265 | ||
266 | for (; i<srcLen; ++i) { | |
267 | uint16_t c = src[i]; | |
268 | if (c == ESCAPE) { | |
269 | c = src[++i]; | |
270 | if (c == ESCAPE) { | |
271 | target[ai++] = c; | |
272 | } else { | |
273 | int32_t runLength = (int32_t) c; | |
274 | uint16_t runValue = src[++i]; | |
275 | int j=0; | |
276 | for (; j<runLength; ++j) { | |
277 | target[ai++] = runValue; | |
278 | } | |
279 | } | |
280 | } | |
281 | else { | |
282 | target[ai++] = c; | |
283 | } | |
284 | } | |
285 | ||
286 | if (ai != length){ | |
287 | *status = U_INTERNAL_PROGRAM_ERROR; | |
288 | } | |
289 | ||
290 | return length; | |
291 | } | |
292 | ||
293 | /** | |
294 | * Construct an array of bytes from a run-length encoded string. | |
295 | */ | |
296 | int32_t | |
297 | rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) { | |
298 | ||
299 | int32_t length = 0; | |
300 | UBool nextChar = TRUE; | |
301 | uint16_t c = 0; | |
302 | int32_t node = 0; | |
303 | int32_t runLength = 0; | |
304 | int32_t i = 2; | |
305 | int32_t ai=0; | |
306 | ||
307 | if(!status || U_FAILURE(*status)){ | |
308 | return 0; | |
309 | } | |
310 | /* the source is null terminated */ | |
311 | if(srcLen == -1){ | |
312 | srcLen = u_strlen(src); | |
313 | } | |
314 | if(srcLen <= 2){ | |
315 | return 2; | |
316 | } | |
317 | length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); | |
318 | ||
319 | if(target == NULL){ | |
320 | return length; | |
321 | } | |
322 | if(tgtLen < length){ | |
323 | *status = U_BUFFER_OVERFLOW_ERROR; | |
324 | return length; | |
325 | } | |
326 | ||
327 | for (; ai<tgtLen; ) { | |
328 | /* This part of the loop places the next byte into the local | |
329 | * variable 'b' each time through the loop. It keeps the | |
330 | * current character in 'c' and uses the boolean 'nextChar' | |
331 | * to see if we've taken both bytes out of 'c' yet. | |
332 | */ | |
333 | uint8_t b; | |
334 | if (nextChar) { | |
335 | c = src[i++]; | |
336 | b = (uint8_t) (c >> 8); | |
337 | nextChar = FALSE; | |
338 | } | |
339 | else { | |
340 | b = (uint8_t) (c & 0xFF); | |
341 | nextChar = TRUE; | |
342 | } | |
343 | ||
344 | /* This part of the loop is a tiny state machine which handles | |
345 | * the parsing of the run-length encoding. This would be simpler | |
346 | * if we could look ahead, but we can't, so we use 'node' to | |
347 | * move between three nodes in the state machine. | |
348 | */ | |
349 | switch (node) { | |
350 | case 0: | |
351 | /* Normal idle node */ | |
352 | if (b == ESCAPE_BYTE) { | |
353 | node = 1; | |
354 | } | |
355 | else { | |
356 | target[ai++] = b; | |
357 | } | |
358 | break; | |
359 | case 1: | |
360 | /* We have seen one ESCAPE_BYTE; we expect either a second | |
361 | * one, or a run length and value. | |
362 | */ | |
363 | if (b == ESCAPE_BYTE) { | |
364 | target[ai++] = ESCAPE_BYTE; | |
365 | node = 0; | |
366 | } | |
367 | else { | |
368 | runLength = b; | |
369 | node = 2; | |
370 | } | |
371 | break; | |
372 | case 2: | |
373 | { | |
374 | int j=0; | |
375 | /* We have seen an ESCAPE_BYTE and length byte. We interpret | |
376 | * the next byte as the value to be repeated. | |
377 | */ | |
378 | for (; j<runLength; ++j){ | |
379 | if(ai<tgtLen){ | |
380 | target[ai++] = b; | |
381 | }else{ | |
382 | *status = U_BUFFER_OVERFLOW_ERROR; | |
383 | return ai; | |
384 | } | |
385 | } | |
386 | node = 0; | |
387 | break; | |
388 | } | |
389 | } | |
390 | } | |
391 | ||
392 | if (node != 0){ | |
393 | *status = U_INTERNAL_PROGRAM_ERROR; | |
394 | /*("Bad run-length encoded byte array")*/ | |
395 | return 0; | |
396 | } | |
397 | ||
398 | ||
399 | if (i != srcLen){ | |
400 | /*("Excess data in RLE byte array string");*/ | |
401 | *status = U_INTERNAL_PROGRAM_ERROR; | |
402 | return ai; | |
403 | } | |
404 | ||
405 | return ai; | |
406 | } | |
407 |