git.saurik.com Git - apple/javascriptcore.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	*******************************************************************************
	3	*
	4	* Copyright (C) 1999-2009, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	* file name: utf8.h
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 1999sep13
	14	* created by: Markus W. Scherer
	15	*/
	16
	17	/**
	18	* \file
	19	* \brief C API: 8-bit Unicode handling macros
	20	*
	21	* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
	22	* utf8.h is included by utf.h after unicode/umachine.h
	23	* and some common definitions.
	24	*
	25	* For more information see utf.h and the ICU User Guide Strings chapter
	26	* (http://icu-project.org/userguide/strings.html).
	27	*
	28	* <em>Usage:</em>
	29	* ICU coding guidelines for if() statements should be followed when using these macros.
	30	* Compound statements (curly braces {}) must be used for if-else-while...
	31	* bodies and all macro statements should be terminated with semicolon.
	32	*/
	33
	34	#ifndef __UTF8_H__
	35	#define __UTF8_H__
	36
	37	/* utf.h must be included first. */
	38	#ifndef __UTF_H__
	39	# include "unicode/utf.h"
	40	#endif
	41
	42	/* internal definitions ----------------------------------------------------- */
	43
	44	/**
	45	* \var utf8_countTrailBytes
	46	* Internal array with numbers of trail bytes for any given byte used in
	47	* lead byte position.
	48	*
	49	* This is internal since it is not meant to be called directly by external clients;
	50	* however it is called by public macros in this file and thus must remain stable,
	51	* and should not be hidden when other internal functions are hidden (otherwise
	52	* public macros would fail to compile).
	53	* @internal
	54	*/
	55	#ifdef U_UTF8_IMPL
	56	U_EXPORT const uint8_t
	57	#elif defined(U_STATIC_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION)
	58	U_CFUNC const uint8_t
	59	#else
	60	U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? / /U_IMPORT*/
	61	#endif
	62	utf8_countTrailBytes[256];
	63
	64	/**
	65	* Count the trail bytes for a UTF-8 lead byte.
	66	*
	67	* This is internal since it is not meant to be called directly by external clients;
	68	* however it is called by public macros in this file and thus must remain stable.
	69	* @internal
	70	*/
	71	#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
	72
	73	/**
	74	* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
	75	*
	76	* This is internal since it is not meant to be called directly by external clients;
	77	* however it is called by public macros in this file and thus must remain stable.
	78	* @internal
	79	*/
	80	#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
	81
	82	/**
	83	* Function for handling "next code point" with error-checking.
	84	*
	85	* This is internal since it is not meant to be called directly by external clients;
	86	* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
	87	* file and thus must remain stable, and should not be hidden when other internal
	88	* functions are hidden (otherwise public macros would fail to compile).
	89	* @internal
	90	*/
	91	U_STABLE UChar32 U_EXPORT2
	92	utf8_nextCharSafeBody(const uint8_t s, int32_t pi, int32_t length, UChar32 c, UBool strict);
	93
	94	/**
	95	* Function for handling "append code point" with error-checking.
	96	*
	97	* This is internal since it is not meant to be called directly by external clients;
	98	* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
	99	* file and thus must remain stable, and should not be hidden when other internal
	100	* functions are hidden (otherwise public macros would fail to compile).
	101	* @internal
	102	*/
	103	U_STABLE int32_t U_EXPORT2
	104	utf8_appendCharSafeBody(uint8_t s, int32_t i, int32_t length, UChar32 c, UBool pIsError);
	105
	106	/**
	107	* Function for handling "previous code point" with error-checking.
	108	*
	109	* This is internal since it is not meant to be called directly by external clients;
	110	* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
	111	* file and thus must remain stable, and should not be hidden when other internal
	112	* functions are hidden (otherwise public macros would fail to compile).
	113	* @internal
	114	*/
	115	U_STABLE UChar32 U_EXPORT2
	116	utf8_prevCharSafeBody(const uint8_t s, int32_t start, int32_t pi, UChar32 c, UBool strict);
	117
	118	/**
	119	* Function for handling "skip backward one code point" with error-checking.
	120	*
	121	* This is internal since it is not meant to be called directly by external clients;
	122	* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
	123	* file and thus must remain stable, and should not be hidden when other internal
	124	* functions are hidden (otherwise public macros would fail to compile).
	125	* @internal
	126	*/
	127	U_STABLE int32_t U_EXPORT2
	128	utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
	129
	130	/* single-code point definitions -------------------------------------------- */
	131
	132	/**
	133	* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
	134	* @param c 8-bit code unit (byte)
	135	* @return TRUE or FALSE
	136	* @stable ICU 2.4
	137	*/
	138	#define U8_IS_SINGLE(c) (((c)&0x80)==0)
	139
	140	/**
	141	* Is this code unit (byte) a UTF-8 lead byte?
	142	* @param c 8-bit code unit (byte)
	143	* @return TRUE or FALSE
	144	* @stable ICU 2.4
	145	*/
	146	#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
	147
	148	/**
	149	* Is this code unit (byte) a UTF-8 trail byte?
	150	* @param c 8-bit code unit (byte)
	151	* @return TRUE or FALSE
	152	* @stable ICU 2.4
	153	*/
	154	#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
	155
	156	/**
	157	* How many code units (bytes) are used for the UTF-8 encoding
	158	* of this Unicode code point?
	159	* @param c 32-bit code point
	160	* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
	161	* @stable ICU 2.4
	162	*/
	163	#define U8_LENGTH(c) \
	164	((uint32_t)(c)<=0x7f ? 1 : \
	165	((uint32_t)(c)<=0x7ff ? 2 : \
	166	((uint32_t)(c)<=0xd7ff ? 3 : \
	167	((uint32_t)(c)<=0xdfff \|\| (uint32_t)(c)>0x10ffff ? 0 : \
	168	((uint32_t)(c)<=0xffff ? 3 : 4)\
	169	) \
	170	) \
	171	) \
	172	)
	173
	174	/**
	175	* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
	176	* @return 4
	177	* @stable ICU 2.4
	178	*/
	179	#define U8_MAX_LENGTH 4
	180
	181	/**
	182	* Get a code point from a string at a random-access offset,
	183	* without changing the offset.
	184	* The offset may point to either the lead byte or one of the trail bytes
	185	* for a code point, in which case the macro will read all of the bytes
	186	* for the code point.
	187	* The result is undefined if the offset points to an illegal UTF-8
	188	* byte sequence.
	189	* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
	190	*
	191	* @param s const uint8_t * string
	192	* @param i string offset
	193	* @param c output UChar32 variable
	194	* @see U8_GET
	195	* @stable ICU 2.4
	196	*/
	197	#define U8_GET_UNSAFE(s, i, c) { \
	198	int32_t _u8_get_unsafe_index=(int32_t)(i); \
	199	U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
	200	U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
	201	}
	202
	203	/**
	204	* Get a code point from a string at a random-access offset,
	205	* without changing the offset.
	206	* The offset may point to either the lead byte or one of the trail bytes
	207	* for a code point, in which case the macro will read all of the bytes
	208	* for the code point.
	209	* If the offset points to an illegal UTF-8 byte sequence, then
	210	* c is set to a negative value.
	211	* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
	212	*
	213	* @param s const uint8_t * string
	214	* @param start starting string offset
	215	* @param i string offset, must be start<=i<length
	216	* @param length string length
	217	* @param c output UChar32 variable, set to <0 in case of an error
	218	* @see U8_GET_UNSAFE
	219	* @stable ICU 2.4
	220	*/
	221	#define U8_GET(s, start, i, length, c) { \
	222	int32_t _u8_get_index=(int32_t)(i); \
	223	U8_SET_CP_START(s, start, _u8_get_index); \
	224	U8_NEXT(s, _u8_get_index, length, c); \
	225	}
	226
	227	/* definitions with forward iteration --------------------------------------- */
	228
	229	/**
	230	* Get a code point from a string at a code point boundary offset,
	231	* and advance the offset to the next code point boundary.
	232	* (Post-incrementing forward iteration.)
	233	* "Unsafe" macro, assumes well-formed UTF-8.
	234	*
	235	* The offset may point to the lead byte of a multi-byte sequence,
	236	* in which case the macro will read the whole sequence.
	237	* The result is undefined if the offset points to a trail byte
	238	* or an illegal UTF-8 sequence.
	239	*
	240	* @param s const uint8_t * string
	241	* @param i string offset
	242	* @param c output UChar32 variable
	243	* @see U8_NEXT
	244	* @stable ICU 2.4
	245	*/
	246	#define U8_NEXT_UNSAFE(s, i, c) { \
	247	(c)=(uint8_t)(s)[(i)++]; \
	248	if((uint8_t)((c)-0xc0)<0x35) { \
	249	uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \
	250	U8_MASK_LEAD_BYTE(c, __count); \
	251	switch(__count) { \
	252	/* each following branch falls through to the next one */ \
	253	case 3: \
	254	(c)=((c)<<6)\|((s)[(i)++]&0x3f); \
	255	case 2: \
	256	(c)=((c)<<6)\|((s)[(i)++]&0x3f); \
	257	case 1: \
	258	(c)=((c)<<6)\|((s)[(i)++]&0x3f); \
	259	/* no other branches to optimize switch() */ \
	260	break; \
	261	} \
	262	} \
	263	}
	264
	265	/**
	266	* Get a code point from a string at a code point boundary offset,
	267	* and advance the offset to the next code point boundary.
	268	* (Post-incrementing forward iteration.)
	269	* "Safe" macro, checks for illegal sequences and for string boundaries.
	270	*
	271	* The offset may point to the lead byte of a multi-byte sequence,
	272	* in which case the macro will read the whole sequence.
	273	* If the offset points to a trail byte or an illegal UTF-8 sequence, then
	274	* c is set to a negative value.
	275	*
	276	* @param s const uint8_t * string
	277	* @param i string offset, must be i<length
	278	* @param length string length
	279	* @param c output UChar32 variable, set to <0 in case of an error
	280	* @see U8_NEXT_UNSAFE
	281	* @stable ICU 2.4
	282	*/
	283	#define U8_NEXT(s, i, length, c) { \
	284	(c)=(uint8_t)(s)[(i)++]; \
	285	if((c)>=0x80) { \
	286	uint8_t __t1, __t2; \
	287	if( /* handle U+1000..U+CFFF inline */ \
	288	(0xe0<(c) && (c)<=0xec) && \
	289	(((i)+1)<(length)) && \
	290	(__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
	291	(__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
	292	) { \
	293	/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
	294	(c)=(UChar)(((c)<<12)\|(__t1<<6)\|__t2); \
	295	(i)+=2; \
	296	} else if( /* handle U+0080..U+07FF inline */ \
	297	((c)<0xe0 && (c)>=0xc2) && \
	298	((i)<(length)) && \
	299	(__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
	300	) { \
	301	(c)=(UChar)((((c)&0x1f)<<6)\|__t1); \
	302	++(i); \
	303	} else if(U8_IS_LEAD(c)) { \
	304	/* function call for "complicated" and error cases */ \
	305	(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
	306	} else { \
	307	(c)=U_SENTINEL; \
	308	} \
	309	} \
	310	}
	311
	312	/**
	313	* Append a code point to a string, overwriting 1 to 4 bytes.
	314	* The offset points to the current end of the string contents
	315	* and is advanced (post-increment).
	316	* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
	317	* Otherwise, the result is undefined.
	318	*
	319	* @param s const uint8_t * string buffer
	320	* @param i string offset
	321	* @param c code point to append
	322	* @see U8_APPEND
	323	* @stable ICU 2.4
	324	*/
	325	#define U8_APPEND_UNSAFE(s, i, c) { \
	326	if((uint32_t)(c)<=0x7f) { \
	327	(s)[(i)++]=(uint8_t)(c); \
	328	} else { \
	329	if((uint32_t)(c)<=0x7ff) { \
	330	(s)[(i)++]=(uint8_t)(((c)>>6)\|0xc0); \
	331	} else { \
	332	if((uint32_t)(c)<=0xffff) { \
	333	(s)[(i)++]=(uint8_t)(((c)>>12)\|0xe0); \
	334	} else { \
	335	(s)[(i)++]=(uint8_t)(((c)>>18)\|0xf0); \
	336	(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)\|0x80); \
	337	} \
	338	(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)\|0x80); \
	339	} \
	340	(s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \
	341	} \
	342	}
	343
	344	/**
	345	* Append a code point to a string, overwriting 1 to 4 bytes.
	346	* The offset points to the current end of the string contents
	347	* and is advanced (post-increment).
	348	* "Safe" macro, checks for a valid code point.
	349	* If a non-ASCII code point is written, checks for sufficient space in the string.
	350	* If the code point is not valid or trail bytes do not fit,
	351	* then isError is set to TRUE.
	352	*
	353	* @param s const uint8_t * string buffer
	354	* @param i string offset, must be i<capacity
	355	* @param capacity size of the string buffer
	356	* @param c code point to append
	357	* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
	358	* @see U8_APPEND_UNSAFE
	359	* @stable ICU 2.4
	360	*/
	361	#define U8_APPEND(s, i, capacity, c, isError) { \
	362	if((uint32_t)(c)<=0x7f) { \
	363	(s)[(i)++]=(uint8_t)(c); \
	364	} else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
	365	(s)[(i)++]=(uint8_t)(((c)>>6)\|0xc0); \
	366	(s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \
	367	} else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
	368	(s)[(i)++]=(uint8_t)(((c)>>12)\|0xe0); \
	369	(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)\|0x80); \
	370	(s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \
	371	} else { \
	372	(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(capacity), c, &(isError)); \
	373	} \
	374	}
	375
	376	/**
	377	* Advance the string offset from one code point boundary to the next.
	378	* (Post-incrementing iteration.)
	379	* "Unsafe" macro, assumes well-formed UTF-8.
	380	*
	381	* @param s const uint8_t * string
	382	* @param i string offset
	383	* @see U8_FWD_1
	384	* @stable ICU 2.4
	385	*/
	386	#define U8_FWD_1_UNSAFE(s, i) { \
	387	(i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \
	388	}
	389
	390	/**
	391	* Advance the string offset from one code point boundary to the next.
	392	* (Post-incrementing iteration.)
	393	* "Safe" macro, checks for illegal sequences and for string boundaries.
	394	*
	395	* @param s const uint8_t * string
	396	* @param i string offset, must be i<length
	397	* @param length string length
	398	* @see U8_FWD_1_UNSAFE
	399	* @stable ICU 2.4
	400	*/
	401	#define U8_FWD_1(s, i, length) { \
	402	uint8_t __b=(uint8_t)(s)[(i)++]; \
	403	if(U8_IS_LEAD(__b)) { \
	404	uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
	405	if((i)+__count>(length)) { \
	406	__count=(uint8_t)((length)-(i)); \
	407	} \
	408	while(__count>0 && U8_IS_TRAIL((s)[i])) { \
	409	++(i); \
	410	--__count; \
	411	} \
	412	} \
	413	}
	414
	415	/**
	416	* Advance the string offset from one code point boundary to the n-th next one,
	417	* i.e., move forward by n code points.
	418	* (Post-incrementing iteration.)
	419	* "Unsafe" macro, assumes well-formed UTF-8.
	420	*
	421	* @param s const uint8_t * string
	422	* @param i string offset
	423	* @param n number of code points to skip
	424	* @see U8_FWD_N
	425	* @stable ICU 2.4
	426	*/
	427	#define U8_FWD_N_UNSAFE(s, i, n) { \
	428	int32_t __N=(n); \
	429	while(__N>0) { \
	430	U8_FWD_1_UNSAFE(s, i); \
	431	--__N; \
	432	} \
	433	}
	434
	435	/**
	436	* Advance the string offset from one code point boundary to the n-th next one,
	437	* i.e., move forward by n code points.
	438	* (Post-incrementing iteration.)
	439	* "Safe" macro, checks for illegal sequences and for string boundaries.
	440	*
	441	* @param s const uint8_t * string
	442	* @param i string offset, must be i<length
	443	* @param length string length
	444	* @param n number of code points to skip
	445	* @see U8_FWD_N_UNSAFE
	446	* @stable ICU 2.4
	447	*/
	448	#define U8_FWD_N(s, i, length, n) { \
	449	int32_t __N=(n); \
	450	while(__N>0 && (i)<(length)) { \
	451	U8_FWD_1(s, i, length); \
	452	--__N; \
	453	} \
	454	}
	455
	456	/**
	457	* Adjust a random-access offset to a code point boundary
	458	* at the start of a code point.
	459	* If the offset points to a UTF-8 trail byte,
	460	* then the offset is moved backward to the corresponding lead byte.
	461	* Otherwise, it is not modified.
	462	* "Unsafe" macro, assumes well-formed UTF-8.
	463	*
	464	* @param s const uint8_t * string
	465	* @param i string offset
	466	* @see U8_SET_CP_START
	467	* @stable ICU 2.4
	468	*/
	469	#define U8_SET_CP_START_UNSAFE(s, i) { \
	470	while(U8_IS_TRAIL((s)[i])) { --(i); } \
	471	}
	472
	473	/**
	474	* Adjust a random-access offset to a code point boundary
	475	* at the start of a code point.
	476	* If the offset points to a UTF-8 trail byte,
	477	* then the offset is moved backward to the corresponding lead byte.
	478	* Otherwise, it is not modified.
	479	* "Safe" macro, checks for illegal sequences and for string boundaries.
	480	*
	481	* @param s const uint8_t * string
	482	* @param start starting string offset (usually 0)
	483	* @param i string offset, must be start<=i
	484	* @see U8_SET_CP_START_UNSAFE
	485	* @stable ICU 2.4
	486	*/
	487	#define U8_SET_CP_START(s, start, i) { \
	488	if(U8_IS_TRAIL((s)[(i)])) { \
	489	(i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
	490	} \
	491	}
	492
	493	/* definitions with backward iteration -------------------------------------- */
	494
	495	/**
	496	* Move the string offset from one code point boundary to the previous one
	497	* and get the code point between them.
	498	* (Pre-decrementing backward iteration.)
	499	* "Unsafe" macro, assumes well-formed UTF-8.
	500	*
	501	* The input offset may be the same as the string length.
	502	* If the offset is behind a multi-byte sequence, then the macro will read
	503	* the whole sequence.
	504	* If the offset is behind a lead byte, then that itself
	505	* will be returned as the code point.
	506	* The result is undefined if the offset is behind an illegal UTF-8 sequence.
	507	*
	508	* @param s const uint8_t * string
	509	* @param i string offset
	510	* @param c output UChar32 variable
	511	* @see U8_PREV
	512	* @stable ICU 2.4
	513	*/
	514	#define U8_PREV_UNSAFE(s, i, c) { \
	515	(c)=(uint8_t)(s)[--(i)]; \
	516	if(U8_IS_TRAIL(c)) { \
	517	uint8_t __b, __count=1, __shift=6; \
	518	\
	519	/* c is a trail byte */ \
	520	(c)&=0x3f; \
	521	for(;;) { \
	522	__b=(uint8_t)(s)[--(i)]; \
	523	if(__b>=0xc0) { \
	524	U8_MASK_LEAD_BYTE(__b, __count); \
	525	(c)\|=(UChar32)__b<<__shift; \
	526	break; \
	527	} else { \
	528	(c)\|=(UChar32)(__b&0x3f)<<__shift; \
	529	++__count; \
	530	__shift+=6; \
	531	} \
	532	} \
	533	} \
	534	}
	535
	536	/**
	537	* Move the string offset from one code point boundary to the previous one
	538	* and get the code point between them.
	539	* (Pre-decrementing backward iteration.)
	540	* "Safe" macro, checks for illegal sequences and for string boundaries.
	541	*
	542	* The input offset may be the same as the string length.
	543	* If the offset is behind a multi-byte sequence, then the macro will read
	544	* the whole sequence.
	545	* If the offset is behind a lead byte, then that itself
	546	* will be returned as the code point.
	547	* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
	548	*
	549	* @param s const uint8_t * string
	550	* @param start starting string offset (usually 0)
	551	* @param i string offset, must be start<i
	552	* @param c output UChar32 variable, set to <0 in case of an error
	553	* @see U8_PREV_UNSAFE
	554	* @stable ICU 2.4
	555	*/
	556	#define U8_PREV(s, start, i, c) { \
	557	(c)=(uint8_t)(s)[--(i)]; \
	558	if((c)>=0x80) { \
	559	if((c)<=0xbf) { \
	560	(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
	561	} else { \
	562	(c)=U_SENTINEL; \
	563	} \
	564	} \
	565	}
	566
	567	/**
	568	* Move the string offset from one code point boundary to the previous one.
	569	* (Pre-decrementing backward iteration.)
	570	* The input offset may be the same as the string length.
	571	* "Unsafe" macro, assumes well-formed UTF-8.
	572	*
	573	* @param s const uint8_t * string
	574	* @param i string offset
	575	* @see U8_BACK_1
	576	* @stable ICU 2.4
	577	*/
	578	#define U8_BACK_1_UNSAFE(s, i) { \
	579	while(U8_IS_TRAIL((s)[--(i)])) {} \
	580	}
	581
	582	/**
	583	* Move the string offset from one code point boundary to the previous one.
	584	* (Pre-decrementing backward iteration.)
	585	* The input offset may be the same as the string length.
	586	* "Safe" macro, checks for illegal sequences and for string boundaries.
	587	*
	588	* @param s const uint8_t * string
	589	* @param start starting string offset (usually 0)
	590	* @param i string offset, must be start<i
	591	* @see U8_BACK_1_UNSAFE
	592	* @stable ICU 2.4
	593	*/
	594	#define U8_BACK_1(s, start, i) { \
	595	if(U8_IS_TRAIL((s)[--(i)])) { \
	596	(i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
	597	} \
	598	}
	599
	600	/**
	601	* Move the string offset from one code point boundary to the n-th one before it,
	602	* i.e., move backward by n code points.
	603	* (Pre-decrementing backward iteration.)
	604	* The input offset may be the same as the string length.
	605	* "Unsafe" macro, assumes well-formed UTF-8.
	606	*
	607	* @param s const uint8_t * string
	608	* @param i string offset
	609	* @param n number of code points to skip
	610	* @see U8_BACK_N
	611	* @stable ICU 2.4
	612	*/
	613	#define U8_BACK_N_UNSAFE(s, i, n) { \
	614	int32_t __N=(n); \
	615	while(__N>0) { \
	616	U8_BACK_1_UNSAFE(s, i); \
	617	--__N; \
	618	} \
	619	}
	620
	621	/**
	622	* Move the string offset from one code point boundary to the n-th one before it,
	623	* i.e., move backward by n code points.
	624	* (Pre-decrementing backward iteration.)
	625	* The input offset may be the same as the string length.
	626	* "Safe" macro, checks for illegal sequences and for string boundaries.
	627	*
	628	* @param s const uint8_t * string
	629	* @param start index of the start of the string
	630	* @param i string offset, must be start<i
	631	* @param n number of code points to skip
	632	* @see U8_BACK_N_UNSAFE
	633	* @stable ICU 2.4
	634	*/
	635	#define U8_BACK_N(s, start, i, n) { \
	636	int32_t __N=(n); \
	637	while(__N>0 && (i)>(start)) { \
	638	U8_BACK_1(s, start, i); \
	639	--__N; \
	640	} \
	641	}
	642
	643	/**
	644	* Adjust a random-access offset to a code point boundary after a code point.
	645	* If the offset is behind a partial multi-byte sequence,
	646	* then the offset is incremented to behind the whole sequence.
	647	* Otherwise, it is not modified.
	648	* The input offset may be the same as the string length.
	649	* "Unsafe" macro, assumes well-formed UTF-8.
	650	*
	651	* @param s const uint8_t * string
	652	* @param i string offset
	653	* @see U8_SET_CP_LIMIT
	654	* @stable ICU 2.4
	655	*/
	656	#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
	657	U8_BACK_1_UNSAFE(s, i); \
	658	U8_FWD_1_UNSAFE(s, i); \
	659	}
	660
	661	/**
	662	* Adjust a random-access offset to a code point boundary after a code point.
	663	* If the offset is behind a partial multi-byte sequence,
	664	* then the offset is incremented to behind the whole sequence.
	665	* Otherwise, it is not modified.
	666	* The input offset may be the same as the string length.
	667	* "Safe" macro, checks for illegal sequences and for string boundaries.
	668	*
	669	* @param s const uint8_t * string
	670	* @param start starting string offset (usually 0)
	671	* @param i string offset, must be start<=i<=length
	672	* @param length string length
	673	* @see U8_SET_CP_LIMIT_UNSAFE
	674	* @stable ICU 2.4
	675	*/
	676	#define U8_SET_CP_LIMIT(s, start, i, length) { \
	677	if((start)<(i) && (i)<(length)) { \
	678	U8_BACK_1(s, start, i); \
	679	U8_FWD_1(s, i, length); \
	680	} \
	681	}
	682
	683	#endif