git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	*******************************************************************************
	3	*
	4	* Copyright (C) 2000-2003, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	*
	9	* File writejava.c
	10	*
	11	* Modification History:
	12	*
	13	* Date Name Description
	14	* 01/11/02 Ram Creation.
	15	*******************************************************************************
	16	*/
	17	#include "rle.h"
	18	/**
	19	* The ESCAPE character is used during run-length encoding. It signals
	20	* a run of identical chars.
	21	*/
	22	static const uint16_t ESCAPE = 0xA5A5;
	23
	24	/**
	25	* The ESCAPE_BYTE character is used during run-length encoding. It signals
	26	* a run of identical bytes.
	27	*/
	28	static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
	29
	30	/**
	31	* Append a byte to the given StringBuffer, packing two bytes into each
	32	* character. The state parameter maintains intermediary data between
	33	* calls.
	34	* @param state A two-element array, with state[0] == 0 if this is the
	35	* first byte of a pair, or state[0] != 0 if this is the second byte
	36	* of a pair, in which case state[1] is the first byte.
	37	*/
	38	static uint16_t*
	39	appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
	40	if(!status \|\| U_FAILURE(*status)){
	41	return NULL;
	42	}
	43	if (state[0] != 0) {
	44	uint16_t c = (uint16_t) ((state[1] << 8) \| (((int32_t) value) & 0xFF));
	45	if(buffer < buffLimit){
	46	*buffer++ = c;
	47	}else{
	48	*status = U_BUFFER_OVERFLOW_ERROR;
	49	}
	50	state[0] = 0;
	51	return buffer;
	52	}
	53	else {
	54	state[0] = 1;
	55	state[1] = value;
	56	return buffer;
	57	}
	58	}
	59	/**
	60	* Encode a run, possibly a degenerate run (of < 4 values).
	61	* @param length The length of the run; must be > 0 && <= 0xFF.
	62	*/
	63	static uint16_t*
	64	encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
	65	if(!status \|\| U_FAILURE(*status)){
	66	return NULL;
	67	}
	68	if (length < 4) {
	69	int32_t j=0;
	70	for (; j<length; ++j) {
	71	if (value == ESCAPE_BYTE) {
	72	buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
	73	}
	74	buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
	75	}
	76	}
	77	else {
	78	if (length == ESCAPE_BYTE) {
	79	if (value == ESCAPE_BYTE){
	80	buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
	81	}
	82	buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
	83	--length;
	84	}
	85	buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
	86	buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
	87	buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
	88	}
	89	return buffer;
	90	}
	91
	92	#define APPEND( buffer, bufLimit, value, num, status){ \
	93	if(buffer<bufLimit){ \
	94	*buffer++=(value); \
	95	}else{ \
	96	*status = U_BUFFER_OVERFLOW_ERROR; \
	97	} \
	98	num++; \
	99	}
	100
	101	/**
	102	* Encode a run, possibly a degenerate run (of < 4 values).
	103	* @param length The length of the run; must be > 0 && <= 0xFFFF.
	104	*/
	105	static uint16_t*
	106	encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
	107	int32_t num=0;
	108	if (length < 4) {
	109	int j=0;
	110	for (; j<length; ++j) {
	111	if (value == (int32_t) ESCAPE){
	112	APPEND(buffer,bufLimit,ESCAPE, num, status);
	113
	114	}
	115	APPEND(buffer,bufLimit,value,num, status);
	116	}
	117	}
	118	else {
	119	if (length == (int32_t) ESCAPE) {
	120	if (value == (int32_t) ESCAPE){
	121	APPEND(buffer,bufLimit,ESCAPE,num,status);
	122
	123	}
	124	APPEND(buffer,bufLimit,value,num,status);
	125	--length;
	126	}
	127	APPEND(buffer,bufLimit,ESCAPE,num,status);
	128	APPEND(buffer,bufLimit,(uint16_t) length, num,status);
	129	APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
	130	}
	131	return buffer;
	132	}
	133
	134	/**
	135	* Construct a string representing a char array. Use run-length encoding.
	136	* A character represents itself, unless it is the ESCAPE character. Then
	137	* the following notations are possible:
	138	* ESCAPE ESCAPE ESCAPE literal
	139	* ESCAPE n c n instances of character c
	140	* Since an encoded run occupies 3 characters, we only encode runs of 4 or
	141	* more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
	142	* If we encounter a run where n == ESCAPE, we represent this as:
	143	* c ESCAPE n-1 c
	144	* The ESCAPE value is chosen so as not to collide with commonly
	145	* seen values.
	146	*/
	147	int32_t
	148	usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
	149	uint16_t* bufLimit = buffer+bufLen;
	150	uint16_t* saveBuffer = buffer;
	151	if(buffer < bufLimit){
	152	*buffer++ = (uint16_t)(srcLen>>16);
	153	if(buffer<bufLimit){
	154	uint16_t runValue = src[0];
	155	int32_t runLength = 1;
	156	int i=1;
	157	*buffer++ = (uint16_t) srcLen;
	158
	159	for (; i<srcLen; ++i) {
	160	uint16_t s = src[i];
	161	if (s == runValue && runLength < 0xFFFF){
	162	++runLength;
	163	}else {
	164	buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
	165	runValue = s;
	166	runLength = 1;
	167	}
	168	}
	169	buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
	170	}else{
	171	*status = U_BUFFER_OVERFLOW_ERROR;
	172	}
	173	}else{
	174	*status = U_BUFFER_OVERFLOW_ERROR;
	175	}
	176	return (int32_t)(buffer - saveBuffer);
	177	}
	178
	179	/**
	180	* Construct a string representing a byte array. Use run-length encoding.
	181	* Two bytes are packed into a single char, with a single extra zero byte at
	182	* the end if needed. A byte represents itself, unless it is the
	183	* ESCAPE_BYTE. Then the following notations are possible:
	184	* ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
	185	* ESCAPE_BYTE n b n instances of byte b
	186	* Since an encoded run occupies 3 bytes, we only encode runs of 4 or
	187	* more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
	188	* If we encounter a run where n == ESCAPE_BYTE, we represent this as:
	189	* b ESCAPE_BYTE n-1 b
	190	* The ESCAPE_BYTE value is chosen so as not to collide with commonly
	191	* seen values.
	192	*/
	193	int32_t
	194	byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
	195	const uint16_t* saveBuf = buffer;
	196	uint16_t* bufLimit = buffer+bufLen;
	197	if(buffer < bufLimit){
	198	*buffer++ = ((uint16_t) (srcLen >> 16));
	199
	200	if(buffer<bufLimit){
	201	uint8_t runValue = src[0];
	202	int runLength = 1;
	203	uint8_t state[2]= {0};
	204	int i=1;
	205	*buffer++=((uint16_t) srcLen);
	206	for (; i<srcLen; ++i) {
	207	uint8_t b = src[i];
	208	if (b == runValue && runLength < 0xFF){
	209	++runLength;
	210	}
	211	else {
	212	buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
	213	runValue = b;
	214	runLength = 1;
	215	}
	216	}
	217	buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
	218
	219	/* We must save the final byte, if there is one, by padding
	220	* an extra zero.
	221	*/
	222	if (state[0] != 0) {
	223	buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
	224	}
	225	}else{
	226	*status = U_BUFFER_OVERFLOW_ERROR;
	227	}
	228	}else{
	229	*status = U_BUFFER_OVERFLOW_ERROR;
	230	}
	231	return (int32_t) (buffer - saveBuf);
	232	}
	233
	234
	235	/**
	236	* Construct an array of shorts from a run-length encoded string.
	237	*/
	238	int32_t
	239	rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
	240	int32_t length = 0;
	241	int32_t ai = 0;
	242	int i=2;
	243
	244	if(!status \|\| U_FAILURE(*status)){
	245	return 0;
	246	}
	247	/* the source is null terminated */
	248	if(srcLen == -1){
	249	srcLen = u_strlen(src);
	250	}
	251	if(srcLen <= 2){
	252	return 2;
	253	}
	254	length = (((int32_t) src[0]) << 16) \| ((int32_t) src[1]);
	255
	256	if(target == NULL){
	257	return length;
	258	}
	259	if(tgtLen < length){
	260	*status = U_BUFFER_OVERFLOW_ERROR;
	261	return length;
	262	}
	263
	264	for (; i<srcLen; ++i) {
	265	uint16_t c = src[i];
	266	if (c == ESCAPE) {
	267	c = src[++i];
	268	if (c == ESCAPE) {
	269	target[ai++] = c;
	270	} else {
	271	int32_t runLength = (int32_t) c;
	272	uint16_t runValue = src[++i];
	273	int j=0;
	274	for (; j<runLength; ++j) {
	275	target[ai++] = runValue;
	276	}
	277	}
	278	}
	279	else {
	280	target[ai++] = c;
	281	}
	282	}
	283
	284	if (ai != length){
	285	*status = U_INTERNAL_PROGRAM_ERROR;
	286	}
	287
	288	return length;
	289	}
	290
	291	/**
	292	* Construct an array of bytes from a run-length encoded string.
	293	*/
	294	int32_t
	295	rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
	296
	297	int32_t length = 0;
	298	UBool nextChar = TRUE;
	299	uint16_t c = 0;
	300	int32_t node = 0;
	301	int32_t runLength = 0;
	302	int32_t i = 2;
	303	int32_t ai=0;
	304
	305	if(!status \|\| U_FAILURE(*status)){
	306	return 0;
	307	}
	308	/* the source is null terminated */
	309	if(srcLen == -1){
	310	srcLen = u_strlen(src);
	311	}
	312	if(srcLen <= 2){
	313	return 2;
	314	}
	315	length = (((int32_t) src[0]) << 16) \| ((int32_t) src[1]);
	316
	317	if(target == NULL){
	318	return length;
	319	}
	320	if(tgtLen < length){
	321	*status = U_BUFFER_OVERFLOW_ERROR;
	322	return length;
	323	}
	324
	325	for (; ai<tgtLen; ) {
	326	/* This part of the loop places the next byte into the local
	327	* variable 'b' each time through the loop. It keeps the
	328	* current character in 'c' and uses the boolean 'nextChar'
	329	* to see if we've taken both bytes out of 'c' yet.
	330	*/
	331	uint8_t b;
	332	if (nextChar) {
	333	c = src[i++];
	334	b = (uint8_t) (c >> 8);
	335	nextChar = FALSE;
	336	}
	337	else {
	338	b = (uint8_t) (c & 0xFF);
	339	nextChar = TRUE;
	340	}
	341
	342	/* This part of the loop is a tiny state machine which handles
	343	* the parsing of the run-length encoding. This would be simpler
	344	* if we could look ahead, but we can't, so we use 'node' to
	345	* move between three nodes in the state machine.
	346	*/
	347	switch (node) {
	348	case 0:
	349	/* Normal idle node */
	350	if (b == ESCAPE_BYTE) {
	351	node = 1;
	352	}
	353	else {
	354	target[ai++] = b;
	355	}
	356	break;
	357	case 1:
	358	/* We have seen one ESCAPE_BYTE; we expect either a second
	359	* one, or a run length and value.
	360	*/
	361	if (b == ESCAPE_BYTE) {
	362	target[ai++] = ESCAPE_BYTE;
	363	node = 0;
	364	}
	365	else {
	366	runLength = b;
	367	node = 2;
	368	}
	369	break;
	370	case 2:
	371	{
	372	int j=0;
	373	/* We have seen an ESCAPE_BYTE and length byte. We interpret
	374	* the next byte as the value to be repeated.
	375	*/
	376	for (; j<runLength; ++j){
	377	if(ai<tgtLen){
	378	target[ai++] = b;
	379	}else{
	380	*status = U_BUFFER_OVERFLOW_ERROR;
	381	return ai;
	382	}
	383	}
	384	node = 0;
	385	break;
	386	}
	387	}
	388	}
	389
	390	if (node != 0){
	391	*status = U_INTERNAL_PROGRAM_ERROR;
	392	/("Bad run-length encoded byte array")/
	393	return 0;
	394	}
	395
	396
	397	if (i != srcLen){
	398	/("Excess data in RLE byte array string");/
	399	*status = U_INTERNAL_PROGRAM_ERROR;
	400	return ai;
	401	}
	402
	403	return ai;
	404	}
	405