]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2000-2003, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * | |
9 | * File writejava.c | |
10 | * | |
11 | * Modification History: | |
12 | * | |
13 | * Date Name Description | |
14 | * 01/11/02 Ram Creation. | |
15 | ******************************************************************************* | |
16 | */ | |
17 | #include "rle.h" | |
18 | /** | |
19 | * The ESCAPE character is used during run-length encoding. It signals | |
20 | * a run of identical chars. | |
21 | */ | |
22 | static const uint16_t ESCAPE = 0xA5A5; | |
23 | ||
24 | /** | |
25 | * The ESCAPE_BYTE character is used during run-length encoding. It signals | |
26 | * a run of identical bytes. | |
27 | */ | |
28 | static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5; | |
29 | ||
30 | /** | |
31 | * Append a byte to the given StringBuffer, packing two bytes into each | |
32 | * character. The state parameter maintains intermediary data between | |
33 | * calls. | |
34 | * @param state A two-element array, with state[0] == 0 if this is the | |
35 | * first byte of a pair, or state[0] != 0 if this is the second byte | |
36 | * of a pair, in which case state[1] is the first byte. | |
37 | */ | |
38 | static uint16_t* | |
39 | appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) { | |
40 | if(!status || U_FAILURE(*status)){ | |
41 | return NULL; | |
42 | } | |
43 | if (state[0] != 0) { | |
44 | uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF)); | |
45 | if(buffer < buffLimit){ | |
46 | *buffer++ = c; | |
47 | }else{ | |
48 | *status = U_BUFFER_OVERFLOW_ERROR; | |
49 | } | |
50 | state[0] = 0; | |
51 | return buffer; | |
52 | } | |
53 | else { | |
54 | state[0] = 1; | |
55 | state[1] = value; | |
56 | return buffer; | |
57 | } | |
58 | } | |
59 | /** | |
60 | * Encode a run, possibly a degenerate run (of < 4 values). | |
61 | * @param length The length of the run; must be > 0 && <= 0xFF. | |
62 | */ | |
63 | static uint16_t* | |
64 | encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) { | |
65 | if(!status || U_FAILURE(*status)){ | |
66 | return NULL; | |
67 | } | |
68 | if (length < 4) { | |
69 | int32_t j=0; | |
70 | for (; j<length; ++j) { | |
71 | if (value == ESCAPE_BYTE) { | |
72 | buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); | |
73 | } | |
74 | buffer = appendEncodedByte(buffer,bufLimit, value, state, status); | |
75 | } | |
76 | } | |
77 | else { | |
78 | if (length == ESCAPE_BYTE) { | |
79 | if (value == ESCAPE_BYTE){ | |
80 | buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status); | |
81 | } | |
82 | buffer = appendEncodedByte(buffer,bufLimit, value, state, status); | |
83 | --length; | |
84 | } | |
85 | buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status); | |
86 | buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status); | |
87 | buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/ | |
88 | } | |
89 | return buffer; | |
90 | } | |
91 | ||
92 | #define APPEND( buffer, bufLimit, value, num, status){ \ | |
374ca955 A |
93 | if(buffer<bufLimit){ \ |
94 | *buffer++=(value); \ | |
95 | }else{ \ | |
96 | *status = U_BUFFER_OVERFLOW_ERROR; \ | |
97 | } \ | |
98 | num++; \ | |
b75a7d8f A |
99 | } |
100 | ||
101 | /** | |
102 | * Encode a run, possibly a degenerate run (of < 4 values). | |
103 | * @param length The length of the run; must be > 0 && <= 0xFFFF. | |
104 | */ | |
374ca955 | 105 | static uint16_t* |
b75a7d8f A |
106 | encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) { |
107 | int32_t num=0; | |
374ca955 | 108 | if (length < 4) { |
b75a7d8f A |
109 | int j=0; |
110 | for (; j<length; ++j) { | |
111 | if (value == (int32_t) ESCAPE){ | |
112 | APPEND(buffer,bufLimit,ESCAPE, num, status); | |
113 | ||
114 | } | |
115 | APPEND(buffer,bufLimit,value,num, status); | |
116 | } | |
117 | } | |
118 | else { | |
119 | if (length == (int32_t) ESCAPE) { | |
120 | if (value == (int32_t) ESCAPE){ | |
121 | APPEND(buffer,bufLimit,ESCAPE,num,status); | |
122 | ||
123 | } | |
124 | APPEND(buffer,bufLimit,value,num,status); | |
125 | --length; | |
126 | } | |
127 | APPEND(buffer,bufLimit,ESCAPE,num,status); | |
128 | APPEND(buffer,bufLimit,(uint16_t) length, num,status); | |
129 | APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */ | |
130 | } | |
131 | return buffer; | |
132 | } | |
133 | ||
134 | /** | |
135 | * Construct a string representing a char array. Use run-length encoding. | |
136 | * A character represents itself, unless it is the ESCAPE character. Then | |
137 | * the following notations are possible: | |
138 | * ESCAPE ESCAPE ESCAPE literal | |
139 | * ESCAPE n c n instances of character c | |
140 | * Since an encoded run occupies 3 characters, we only encode runs of 4 or | |
141 | * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF. | |
142 | * If we encounter a run where n == ESCAPE, we represent this as: | |
143 | * c ESCAPE n-1 c | |
144 | * The ESCAPE value is chosen so as not to collide with commonly | |
145 | * seen values. | |
146 | */ | |
374ca955 | 147 | int32_t |
b75a7d8f A |
148 | usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) { |
149 | uint16_t* bufLimit = buffer+bufLen; | |
374ca955 | 150 | uint16_t* saveBuffer = buffer; |
b75a7d8f A |
151 | if(buffer < bufLimit){ |
152 | *buffer++ = (uint16_t)(srcLen>>16); | |
153 | if(buffer<bufLimit){ | |
154 | uint16_t runValue = src[0]; | |
155 | int32_t runLength = 1; | |
156 | int i=1; | |
157 | *buffer++ = (uint16_t) srcLen; | |
158 | ||
159 | for (; i<srcLen; ++i) { | |
160 | uint16_t s = src[i]; | |
161 | if (s == runValue && runLength < 0xFFFF){ | |
162 | ++runLength; | |
163 | }else { | |
164 | buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status); | |
165 | runValue = s; | |
166 | runLength = 1; | |
167 | } | |
168 | } | |
169 | buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status); | |
170 | }else{ | |
171 | *status = U_BUFFER_OVERFLOW_ERROR; | |
172 | } | |
173 | }else{ | |
174 | *status = U_BUFFER_OVERFLOW_ERROR; | |
175 | } | |
374ca955 | 176 | return (int32_t)(buffer - saveBuffer); |
b75a7d8f A |
177 | } |
178 | ||
179 | /** | |
180 | * Construct a string representing a byte array. Use run-length encoding. | |
181 | * Two bytes are packed into a single char, with a single extra zero byte at | |
182 | * the end if needed. A byte represents itself, unless it is the | |
183 | * ESCAPE_BYTE. Then the following notations are possible: | |
184 | * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal | |
185 | * ESCAPE_BYTE n b n instances of byte b | |
186 | * Since an encoded run occupies 3 bytes, we only encode runs of 4 or | |
187 | * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF. | |
188 | * If we encounter a run where n == ESCAPE_BYTE, we represent this as: | |
189 | * b ESCAPE_BYTE n-1 b | |
190 | * The ESCAPE_BYTE value is chosen so as not to collide with commonly | |
191 | * seen values. | |
192 | */ | |
193 | int32_t | |
194 | byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) { | |
195 | const uint16_t* saveBuf = buffer; | |
196 | uint16_t* bufLimit = buffer+bufLen; | |
197 | if(buffer < bufLimit){ | |
198 | *buffer++ = ((uint16_t) (srcLen >> 16)); | |
199 | ||
200 | if(buffer<bufLimit){ | |
201 | uint8_t runValue = src[0]; | |
202 | int runLength = 1; | |
203 | uint8_t state[2]= {0}; | |
204 | int i=1; | |
205 | *buffer++=((uint16_t) srcLen); | |
206 | for (; i<srcLen; ++i) { | |
207 | uint8_t b = src[i]; | |
208 | if (b == runValue && runLength < 0xFF){ | |
209 | ++runLength; | |
210 | } | |
211 | else { | |
212 | buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status); | |
213 | runValue = b; | |
214 | runLength = 1; | |
215 | } | |
216 | } | |
217 | buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status); | |
218 | ||
219 | /* We must save the final byte, if there is one, by padding | |
220 | * an extra zero. | |
221 | */ | |
222 | if (state[0] != 0) { | |
223 | buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status); | |
224 | } | |
225 | }else{ | |
226 | *status = U_BUFFER_OVERFLOW_ERROR; | |
227 | } | |
228 | }else{ | |
229 | *status = U_BUFFER_OVERFLOW_ERROR; | |
230 | } | |
231 | return (int32_t) (buffer - saveBuf); | |
232 | } | |
233 | ||
234 | ||
235 | /** | |
236 | * Construct an array of shorts from a run-length encoded string. | |
237 | */ | |
238 | int32_t | |
239 | rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) { | |
240 | int32_t length = 0; | |
241 | int32_t ai = 0; | |
242 | int i=2; | |
243 | ||
244 | if(!status || U_FAILURE(*status)){ | |
245 | return 0; | |
246 | } | |
247 | /* the source is null terminated */ | |
248 | if(srcLen == -1){ | |
249 | srcLen = u_strlen(src); | |
250 | } | |
251 | if(srcLen <= 2){ | |
252 | return 2; | |
253 | } | |
254 | length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); | |
374ca955 | 255 | |
b75a7d8f A |
256 | if(target == NULL){ |
257 | return length; | |
258 | } | |
259 | if(tgtLen < length){ | |
260 | *status = U_BUFFER_OVERFLOW_ERROR; | |
261 | return length; | |
262 | } | |
263 | ||
264 | for (; i<srcLen; ++i) { | |
265 | uint16_t c = src[i]; | |
266 | if (c == ESCAPE) { | |
267 | c = src[++i]; | |
268 | if (c == ESCAPE) { | |
269 | target[ai++] = c; | |
270 | } else { | |
271 | int32_t runLength = (int32_t) c; | |
272 | uint16_t runValue = src[++i]; | |
273 | int j=0; | |
274 | for (; j<runLength; ++j) { | |
275 | target[ai++] = runValue; | |
276 | } | |
277 | } | |
278 | } | |
279 | else { | |
280 | target[ai++] = c; | |
281 | } | |
282 | } | |
283 | ||
284 | if (ai != length){ | |
285 | *status = U_INTERNAL_PROGRAM_ERROR; | |
286 | } | |
374ca955 | 287 | |
b75a7d8f A |
288 | return length; |
289 | } | |
290 | ||
291 | /** | |
292 | * Construct an array of bytes from a run-length encoded string. | |
293 | */ | |
294 | int32_t | |
295 | rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) { | |
296 | ||
297 | int32_t length = 0; | |
298 | UBool nextChar = TRUE; | |
299 | uint16_t c = 0; | |
300 | int32_t node = 0; | |
301 | int32_t runLength = 0; | |
302 | int32_t i = 2; | |
303 | int32_t ai=0; | |
304 | ||
305 | if(!status || U_FAILURE(*status)){ | |
306 | return 0; | |
307 | } | |
308 | /* the source is null terminated */ | |
309 | if(srcLen == -1){ | |
310 | srcLen = u_strlen(src); | |
311 | } | |
312 | if(srcLen <= 2){ | |
313 | return 2; | |
314 | } | |
315 | length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]); | |
374ca955 | 316 | |
b75a7d8f A |
317 | if(target == NULL){ |
318 | return length; | |
319 | } | |
320 | if(tgtLen < length){ | |
321 | *status = U_BUFFER_OVERFLOW_ERROR; | |
322 | return length; | |
323 | } | |
374ca955 | 324 | |
b75a7d8f A |
325 | for (; ai<tgtLen; ) { |
326 | /* This part of the loop places the next byte into the local | |
327 | * variable 'b' each time through the loop. It keeps the | |
328 | * current character in 'c' and uses the boolean 'nextChar' | |
329 | * to see if we've taken both bytes out of 'c' yet. | |
330 | */ | |
331 | uint8_t b; | |
332 | if (nextChar) { | |
333 | c = src[i++]; | |
334 | b = (uint8_t) (c >> 8); | |
335 | nextChar = FALSE; | |
336 | } | |
337 | else { | |
338 | b = (uint8_t) (c & 0xFF); | |
339 | nextChar = TRUE; | |
340 | } | |
341 | ||
342 | /* This part of the loop is a tiny state machine which handles | |
343 | * the parsing of the run-length encoding. This would be simpler | |
344 | * if we could look ahead, but we can't, so we use 'node' to | |
345 | * move between three nodes in the state machine. | |
346 | */ | |
347 | switch (node) { | |
348 | case 0: | |
349 | /* Normal idle node */ | |
350 | if (b == ESCAPE_BYTE) { | |
351 | node = 1; | |
352 | } | |
353 | else { | |
354 | target[ai++] = b; | |
355 | } | |
356 | break; | |
357 | case 1: | |
358 | /* We have seen one ESCAPE_BYTE; we expect either a second | |
359 | * one, or a run length and value. | |
360 | */ | |
361 | if (b == ESCAPE_BYTE) { | |
362 | target[ai++] = ESCAPE_BYTE; | |
363 | node = 0; | |
364 | } | |
365 | else { | |
366 | runLength = b; | |
367 | node = 2; | |
368 | } | |
369 | break; | |
370 | case 2: | |
371 | { | |
372 | int j=0; | |
373 | /* We have seen an ESCAPE_BYTE and length byte. We interpret | |
374 | * the next byte as the value to be repeated. | |
375 | */ | |
376 | for (; j<runLength; ++j){ | |
377 | if(ai<tgtLen){ | |
378 | target[ai++] = b; | |
379 | }else{ | |
380 | *status = U_BUFFER_OVERFLOW_ERROR; | |
381 | return ai; | |
382 | } | |
383 | } | |
384 | node = 0; | |
385 | break; | |
386 | } | |
387 | } | |
388 | } | |
389 | ||
390 | if (node != 0){ | |
391 | *status = U_INTERNAL_PROGRAM_ERROR; | |
392 | /*("Bad run-length encoded byte array")*/ | |
393 | return 0; | |
394 | } | |
395 | ||
374ca955 | 396 | |
b75a7d8f A |
397 | if (i != srcLen){ |
398 | /*("Excess data in RLE byte array string");*/ | |
399 | *status = U_INTERNAL_PROGRAM_ERROR; | |
400 | return ai; | |
401 | } | |
402 | ||
403 | return ai; | |
404 | } | |
405 |