git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	*******************************************************************************
	5	*
	6	* Copyright (C) 2000-2003, International Business Machines
	7	* Corporation and others. All Rights Reserved.
	8	*
	9	*******************************************************************************
	10	*
	11	* File writejava.c
	12	*
	13	* Modification History:
	14	*
	15	* Date Name Description
	16	* 01/11/02 Ram Creation.
	17	*******************************************************************************
	18	*/
	19	#include "rle.h"
	20	/**
	21	* The ESCAPE character is used during run-length encoding. It signals
	22	* a run of identical chars.
	23	*/
	24	static const uint16_t ESCAPE = 0xA5A5;
	25
	26	/**
	27	* The ESCAPE_BYTE character is used during run-length encoding. It signals
	28	* a run of identical bytes.
	29	*/
	30	static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
	31
	32	/**
	33	* Append a byte to the given StringBuffer, packing two bytes into each
	34	* character. The state parameter maintains intermediary data between
	35	* calls.
	36	* @param state A two-element array, with state[0] == 0 if this is the
	37	* first byte of a pair, or state[0] != 0 if this is the second byte
	38	* of a pair, in which case state[1] is the first byte.
	39	*/
	40	static uint16_t*
	41	appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
	42	if(!status \|\| U_FAILURE(*status)){
	43	return NULL;
	44	}
	45	if (state[0] != 0) {
	46	uint16_t c = (uint16_t) ((state[1] << 8) \| (((int32_t) value) & 0xFF));
	47	if(buffer < buffLimit){
	48	*buffer++ = c;
	49	}else{
	50	*status = U_BUFFER_OVERFLOW_ERROR;
	51	}
	52	state[0] = 0;
	53	return buffer;
	54	}
	55	else {
	56	state[0] = 1;
	57	state[1] = value;
	58	return buffer;
	59	}
	60	}
	61	/**
	62	* Encode a run, possibly a degenerate run (of < 4 values).
	63	* @param length The length of the run; must be > 0 && <= 0xFF.
	64	*/
	65	static uint16_t*
	66	encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
	67	if(!status \|\| U_FAILURE(*status)){
	68	return NULL;
	69	}
	70	if (length < 4) {
	71	int32_t j=0;
	72	for (; j<length; ++j) {
	73	if (value == ESCAPE_BYTE) {
	74	buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
	75	}
	76	buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
	77	}
	78	}
	79	else {
	80	if (length == ESCAPE_BYTE) {
	81	if (value == ESCAPE_BYTE){
	82	buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
	83	}
	84	buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
	85	--length;
	86	}
	87	buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
	88	buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
	89	buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
	90	}
	91	return buffer;
	92	}
	93
	94	#define APPEND( buffer, bufLimit, value, num, status) UPRV_BLOCK_MACRO_BEGIN { \
	95	if(buffer<bufLimit){ \
	96	*buffer++=(value); \
	97	}else{ \
	98	*status = U_BUFFER_OVERFLOW_ERROR; \
	99	} \
	100	num++; \
	101	} UPRV_BLOCK_MACRO_END
	102
	103	/**
	104	* Encode a run, possibly a degenerate run (of < 4 values).
	105	* @param length The length of the run; must be > 0 && <= 0xFFFF.
	106	*/
	107	static uint16_t*
	108	encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
	109	int32_t num=0;
	110	if (length < 4) {
	111	int j=0;
	112	for (; j<length; ++j) {
	113	if (value == (int32_t) ESCAPE){
	114	APPEND(buffer,bufLimit,ESCAPE, num, status);
	115
	116	}
	117	APPEND(buffer,bufLimit,value,num, status);
	118	}
	119	}
	120	else {
	121	if (length == (int32_t) ESCAPE) {
	122	if (value == (int32_t) ESCAPE){
	123	APPEND(buffer,bufLimit,ESCAPE,num,status);
	124
	125	}
	126	APPEND(buffer,bufLimit,value,num,status);
	127	--length;
	128	}
	129	APPEND(buffer,bufLimit,ESCAPE,num,status);
	130	APPEND(buffer,bufLimit,(uint16_t) length, num,status);
	131	APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
	132	}
	133	return buffer;
	134	}
	135
	136	/**
	137	* Construct a string representing a char array. Use run-length encoding.
	138	* A character represents itself, unless it is the ESCAPE character. Then
	139	* the following notations are possible:
	140	* ESCAPE ESCAPE ESCAPE literal
	141	* ESCAPE n c n instances of character c
	142	* Since an encoded run occupies 3 characters, we only encode runs of 4 or
	143	* more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
	144	* If we encounter a run where n == ESCAPE, we represent this as:
	145	* c ESCAPE n-1 c
	146	* The ESCAPE value is chosen so as not to collide with commonly
	147	* seen values.
	148	*/
	149	int32_t
	150	usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
	151	uint16_t* bufLimit = buffer+bufLen;
	152	uint16_t* saveBuffer = buffer;
	153	if(buffer < bufLimit){
	154	*buffer++ = (uint16_t)(srcLen>>16);
	155	if(buffer<bufLimit){
	156	uint16_t runValue = src[0];
	157	int32_t runLength = 1;
	158	int i=1;
	159	*buffer++ = (uint16_t) srcLen;
	160
	161	for (; i<srcLen; ++i) {
	162	uint16_t s = src[i];
	163	if (s == runValue && runLength < 0xFFFF){
	164	++runLength;
	165	}else {
	166	buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
	167	runValue = s;
	168	runLength = 1;
	169	}
	170	}
	171	buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
	172	}else{
	173	*status = U_BUFFER_OVERFLOW_ERROR;
	174	}
	175	}else{
	176	*status = U_BUFFER_OVERFLOW_ERROR;
	177	}
	178	return (int32_t)(buffer - saveBuffer);
	179	}
	180
	181	/**
	182	* Construct a string representing a byte array. Use run-length encoding.
	183	* Two bytes are packed into a single char, with a single extra zero byte at
	184	* the end if needed. A byte represents itself, unless it is the
	185	* ESCAPE_BYTE. Then the following notations are possible:
	186	* ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
	187	* ESCAPE_BYTE n b n instances of byte b
	188	* Since an encoded run occupies 3 bytes, we only encode runs of 4 or
	189	* more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
	190	* If we encounter a run where n == ESCAPE_BYTE, we represent this as:
	191	* b ESCAPE_BYTE n-1 b
	192	* The ESCAPE_BYTE value is chosen so as not to collide with commonly
	193	* seen values.
	194	*/
	195	int32_t
	196	byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
	197	const uint16_t* saveBuf = buffer;
	198	uint16_t* bufLimit = buffer+bufLen;
	199	if(buffer < bufLimit){
	200	*buffer++ = ((uint16_t) (srcLen >> 16));
	201
	202	if(buffer<bufLimit){
	203	uint8_t runValue = src[0];
	204	int runLength = 1;
	205	uint8_t state[2]= {0};
	206	int i=1;
	207	*buffer++=((uint16_t) srcLen);
	208	for (; i<srcLen; ++i) {
	209	uint8_t b = src[i];
	210	if (b == runValue && runLength < 0xFF){
	211	++runLength;
	212	}
	213	else {
	214	buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
	215	runValue = b;
	216	runLength = 1;
	217	}
	218	}
	219	buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
	220
	221	/* We must save the final byte, if there is one, by padding
	222	* an extra zero.
	223	*/
	224	if (state[0] != 0) {
	225	buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
	226	}
	227	}else{
	228	*status = U_BUFFER_OVERFLOW_ERROR;
	229	}
	230	}else{
	231	*status = U_BUFFER_OVERFLOW_ERROR;
	232	}
	233	return (int32_t) (buffer - saveBuf);
	234	}
	235
	236
	237	/**
	238	* Construct an array of shorts from a run-length encoded string.
	239	*/
	240	int32_t
	241	rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
	242	int32_t length = 0;
	243	int32_t ai = 0;
	244	int i=2;
	245
	246	if(!status \|\| U_FAILURE(*status)){
	247	return 0;
	248	}
	249	/* the source is null terminated */
	250	if(srcLen == -1){
	251	srcLen = u_strlen(src);
	252	}
	253	if(srcLen <= 2){
	254	return 2;
	255	}
	256	length = (((int32_t) src[0]) << 16) \| ((int32_t) src[1]);
	257
	258	if(target == NULL){
	259	return length;
	260	}
	261	if(tgtLen < length){
	262	*status = U_BUFFER_OVERFLOW_ERROR;
	263	return length;
	264	}
	265
	266	for (; i<srcLen; ++i) {
	267	uint16_t c = src[i];
	268	if (c == ESCAPE) {
	269	c = src[++i];
	270	if (c == ESCAPE) {
	271	target[ai++] = c;
	272	} else {
	273	int32_t runLength = (int32_t) c;
	274	uint16_t runValue = src[++i];
	275	int j=0;
	276	for (; j<runLength; ++j) {
	277	target[ai++] = runValue;
	278	}
	279	}
	280	}
	281	else {
	282	target[ai++] = c;
	283	}
	284	}
	285
	286	if (ai != length){
	287	*status = U_INTERNAL_PROGRAM_ERROR;
	288	}
	289
	290	return length;
	291	}
	292
	293	/**
	294	* Construct an array of bytes from a run-length encoded string.
	295	*/
	296	int32_t
	297	rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
	298
	299	int32_t length = 0;
	300	UBool nextChar = TRUE;
	301	uint16_t c = 0;
	302	int32_t node = 0;
	303	int32_t runLength = 0;
	304	int32_t i = 2;
	305	int32_t ai=0;
	306
	307	if(!status \|\| U_FAILURE(*status)){
	308	return 0;
	309	}
	310	/* the source is null terminated */
	311	if(srcLen == -1){
	312	srcLen = u_strlen(src);
	313	}
	314	if(srcLen <= 2){
	315	return 2;
	316	}
	317	length = (((int32_t) src[0]) << 16) \| ((int32_t) src[1]);
	318
	319	if(target == NULL){
	320	return length;
	321	}
	322	if(tgtLen < length){
	323	*status = U_BUFFER_OVERFLOW_ERROR;
	324	return length;
	325	}
	326
	327	for (; ai<tgtLen; ) {
	328	/* This part of the loop places the next byte into the local
	329	* variable 'b' each time through the loop. It keeps the
	330	* current character in 'c' and uses the boolean 'nextChar'
	331	* to see if we've taken both bytes out of 'c' yet.
	332	*/
	333	uint8_t b;
	334	if (nextChar) {
	335	c = src[i++];
	336	b = (uint8_t) (c >> 8);
	337	nextChar = FALSE;
	338	}
	339	else {
	340	b = (uint8_t) (c & 0xFF);
	341	nextChar = TRUE;
	342	}
	343
	344	/* This part of the loop is a tiny state machine which handles
	345	* the parsing of the run-length encoding. This would be simpler
	346	* if we could look ahead, but we can't, so we use 'node' to
	347	* move between three nodes in the state machine.
	348	*/
	349	switch (node) {
	350	case 0:
	351	/* Normal idle node */
	352	if (b == ESCAPE_BYTE) {
	353	node = 1;
	354	}
	355	else {
	356	target[ai++] = b;
	357	}
	358	break;
	359	case 1:
	360	/* We have seen one ESCAPE_BYTE; we expect either a second
	361	* one, or a run length and value.
	362	*/
	363	if (b == ESCAPE_BYTE) {
	364	target[ai++] = ESCAPE_BYTE;
	365	node = 0;
	366	}
	367	else {
	368	runLength = b;
	369	node = 2;
	370	}
	371	break;
	372	case 2:
	373	{
	374	int j=0;
	375	/* We have seen an ESCAPE_BYTE and length byte. We interpret
	376	* the next byte as the value to be repeated.
	377	*/
	378	for (; j<runLength; ++j){
	379	if(ai<tgtLen){
	380	target[ai++] = b;
	381	}else{
	382	*status = U_BUFFER_OVERFLOW_ERROR;
	383	return ai;
	384	}
	385	}
	386	node = 0;
	387	break;
	388	}
	389	}
	390	}
	391
	392	if (node != 0){
	393	*status = U_INTERNAL_PROGRAM_ERROR;
	394	/("Bad run-length encoded byte array")/
	395	return 0;
	396	}
	397
	398
	399	if (i != srcLen){
	400	/("Excess data in RLE byte array string");/
	401	*status = U_INTERNAL_PROGRAM_ERROR;
	402	return ai;
	403	}
	404
	405	return ai;
	406	}
	407