git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/unicode/utf8.h

... / ...

Commit	Line	Data
	1	/*
	2	*******************************************************************************
	3	*
	4	* Copyright (C) 1999-2007, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	* file name: utf8.h
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 1999sep13
	14	* created by: Markus W. Scherer
	15	*/
	16
	17	/**
	18	* \file
	19	* \brief C API: 8-bit Unicode handling macros
	20	*
	21	* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
	22	* utf8.h is included by utf.h after unicode/umachine.h
	23	* and some common definitions.
	24	*
	25	* For more information see utf.h and the ICU User Guide Strings chapter
	26	* (http://icu-project.org/userguide/strings.html).
	27	*
	28	* <em>Usage:</em>
	29	* ICU coding guidelines for if() statements should be followed when using these macros.
	30	* Compound statements (curly braces {}) must be used for if-else-while...
	31	* bodies and all macro statements should be terminated with semicolon.
	32	*/
	33
	34	#ifndef __UTF8_H__
	35	#define __UTF8_H__
	36
	37	/* utf.h must be included first. */
	38	#ifndef __UTF_H__
	39	# include "unicode/utf.h"
	40	#endif
	41
	42	/* internal definitions ----------------------------------------------------- */
	43
	44	/**
	45	* \var utf8_countTrailBytes
	46	* Internal array with numbers of trail bytes for any given byte used in
	47	* lead byte position.
	48	* @internal
	49	*/
	50	#ifdef U_UTF8_IMPL
	51	U_EXPORT const uint8_t
	52	#elif defined(U_STATIC_IMPLEMENTATION) \|\| defined(U_COMMON_IMPLEMENTATION)
	53	U_CFUNC const uint8_t
	54	#else
	55	U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? / /U_IMPORT*/
	56	#endif
	57	utf8_countTrailBytes[256];
	58
	59	/**
	60	* Count the trail bytes for a UTF-8 lead byte.
	61	* @internal
	62	*/
	63	#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
	64
	65	/**
	66	* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
	67	* @internal
	68	*/
	69	#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
	70
	71	/**
	72	* Function for handling "next code point" with error-checking.
	73	* @internal
	74	*/
	75	U_INTERNAL UChar32 U_EXPORT2
	76	utf8_nextCharSafeBody(const uint8_t s, int32_t pi, int32_t length, UChar32 c, UBool strict);
	77
	78	/**
	79	* Function for handling "append code point" with error-checking.
	80	* @internal
	81	*/
	82	U_INTERNAL int32_t U_EXPORT2
	83	utf8_appendCharSafeBody(uint8_t s, int32_t i, int32_t length, UChar32 c, UBool pIsError);
	84
	85	/**
	86	* Function for handling "previous code point" with error-checking.
	87	* @internal
	88	*/
	89	U_INTERNAL UChar32 U_EXPORT2
	90	utf8_prevCharSafeBody(const uint8_t s, int32_t start, int32_t pi, UChar32 c, UBool strict);
	91
	92	/**
	93	* Function for handling "skip backward one code point" with error-checking.
	94	* @internal
	95	*/
	96	U_INTERNAL int32_t U_EXPORT2
	97	utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
	98
	99	/* single-code point definitions -------------------------------------------- */
	100
	101	/**
	102	* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
	103	* @param c 8-bit code unit (byte)
	104	* @return TRUE or FALSE
	105	* @stable ICU 2.4
	106	*/
	107	#define U8_IS_SINGLE(c) (((c)&0x80)==0)
	108
	109	/**
	110	* Is this code unit (byte) a UTF-8 lead byte?
	111	* @param c 8-bit code unit (byte)
	112	* @return TRUE or FALSE
	113	* @stable ICU 2.4
	114	*/
	115	#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
	116
	117	/**
	118	* Is this code unit (byte) a UTF-8 trail byte?
	119	* @param c 8-bit code unit (byte)
	120	* @return TRUE or FALSE
	121	* @stable ICU 2.4
	122	*/
	123	#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
	124
	125	/**
	126	* How many code units (bytes) are used for the UTF-8 encoding
	127	* of this Unicode code point?
	128	* @param c 32-bit code point
	129	* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
	130	* @stable ICU 2.4
	131	*/
	132	#define U8_LENGTH(c) \
	133	((uint32_t)(c)<=0x7f ? 1 : \
	134	((uint32_t)(c)<=0x7ff ? 2 : \
	135	((uint32_t)(c)<=0xd7ff ? 3 : \
	136	((uint32_t)(c)<=0xdfff \|\| (uint32_t)(c)>0x10ffff ? 0 : \
	137	((uint32_t)(c)<=0xffff ? 3 : 4)\
	138	) \
	139	) \
	140	) \
	141	)
	142
	143	/**
	144	* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
	145	* @return 4
	146	* @stable ICU 2.4
	147	*/
	148	#define U8_MAX_LENGTH 4
	149
	150	/**
	151	* Get a code point from a string at a random-access offset,
	152	* without changing the offset.
	153	* The offset may point to either the lead byte or one of the trail bytes
	154	* for a code point, in which case the macro will read all of the bytes
	155	* for the code point.
	156	* The result is undefined if the offset points to an illegal UTF-8
	157	* byte sequence.
	158	* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
	159	*
	160	* @param s const uint8_t * string
	161	* @param i string offset
	162	* @param c output UChar32 variable
	163	* @see U8_GET
	164	* @stable ICU 2.4
	165	*/
	166	#define U8_GET_UNSAFE(s, i, c) { \
	167	int32_t _u8_get_unsafe_index=(int32_t)(i); \
	168	U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
	169	U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
	170	}
	171
	172	/**
	173	* Get a code point from a string at a random-access offset,
	174	* without changing the offset.
	175	* The offset may point to either the lead byte or one of the trail bytes
	176	* for a code point, in which case the macro will read all of the bytes
	177	* for the code point.
	178	* If the offset points to an illegal UTF-8 byte sequence, then
	179	* c is set to a negative value.
	180	* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
	181	*
	182	* @param s const uint8_t * string
	183	* @param start starting string offset
	184	* @param i string offset, must be start<=i<length
	185	* @param length string length
	186	* @param c output UChar32 variable, set to <0 in case of an error
	187	* @see U8_GET_UNSAFE
	188	* @stable ICU 2.4
	189	*/
	190	#define U8_GET(s, start, i, length, c) { \
	191	int32_t _u8_get_index=(int32_t)(i); \
	192	U8_SET_CP_START(s, start, _u8_get_index); \
	193	U8_NEXT(s, _u8_get_index, length, c); \
	194	}
	195
	196	/* definitions with forward iteration --------------------------------------- */
	197
	198	/**
	199	* Get a code point from a string at a code point boundary offset,
	200	* and advance the offset to the next code point boundary.
	201	* (Post-incrementing forward iteration.)
	202	* "Unsafe" macro, assumes well-formed UTF-8.
	203	*
	204	* The offset may point to the lead byte of a multi-byte sequence,
	205	* in which case the macro will read the whole sequence.
	206	* The result is undefined if the offset points to a trail byte
	207	* or an illegal UTF-8 sequence.
	208	*
	209	* @param s const uint8_t * string
	210	* @param i string offset
	211	* @param c output UChar32 variable
	212	* @see U8_NEXT
	213	* @stable ICU 2.4
	214	*/
	215	#define U8_NEXT_UNSAFE(s, i, c) { \
	216	(c)=(uint8_t)(s)[(i)++]; \
	217	if((uint8_t)((c)-0xc0)<0x35) { \
	218	uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \
	219	U8_MASK_LEAD_BYTE(c, __count); \
	220	switch(__count) { \
	221	/* each following branch falls through to the next one */ \
	222	case 3: \
	223	(c)=((c)<<6)\|((s)[(i)++]&0x3f); \
	224	case 2: \
	225	(c)=((c)<<6)\|((s)[(i)++]&0x3f); \
	226	case 1: \
	227	(c)=((c)<<6)\|((s)[(i)++]&0x3f); \
	228	/* no other branches to optimize switch() */ \
	229	break; \
	230	} \
	231	} \
	232	}
	233
	234	/**
	235	* Get a code point from a string at a code point boundary offset,
	236	* and advance the offset to the next code point boundary.
	237	* (Post-incrementing forward iteration.)
	238	* "Safe" macro, checks for illegal sequences and for string boundaries.
	239	*
	240	* The offset may point to the lead byte of a multi-byte sequence,
	241	* in which case the macro will read the whole sequence.
	242	* If the offset points to a trail byte or an illegal UTF-8 sequence, then
	243	* c is set to a negative value.
	244	*
	245	* @param s const uint8_t * string
	246	* @param i string offset, must be i<length
	247	* @param length string length
	248	* @param c output UChar32 variable, set to <0 in case of an error
	249	* @see U8_NEXT_UNSAFE
	250	* @stable ICU 2.4
	251	*/
	252	#define U8_NEXT(s, i, length, c) { \
	253	(c)=(uint8_t)(s)[(i)++]; \
	254	if((c)>=0x80) { \
	255	uint8_t __t1, __t2; \
	256	if( /* handle U+1000..U+CFFF inline */ \
	257	(0xe0<(c) && (c)<=0xec) && \
	258	(((i)+1)<(length)) && \
	259	(__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
	260	(__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
	261	) { \
	262	/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
	263	(c)=(UChar)(((c)<<12)\|(__t1<<6)\|__t2); \
	264	(i)+=2; \
	265	} else if( /* handle U+0080..U+07FF inline */ \
	266	((c)<0xe0 && (c)>=0xc2) && \
	267	((i)<(length)) && \
	268	(__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
	269	) { \
	270	(c)=(UChar)((((c)&0x1f)<<6)\|__t1); \
	271	++(i); \
	272	} else if(U8_IS_LEAD(c)) { \
	273	/* function call for "complicated" and error cases */ \
	274	(c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
	275	} else { \
	276	(c)=U_SENTINEL; \
	277	} \
	278	} \
	279	}
	280
	281	/**
	282	* Append a code point to a string, overwriting 1 to 4 bytes.
	283	* The offset points to the current end of the string contents
	284	* and is advanced (post-increment).
	285	* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
	286	* Otherwise, the result is undefined.
	287	*
	288	* @param s const uint8_t * string buffer
	289	* @param i string offset
	290	* @param c code point to append
	291	* @see U8_APPEND
	292	* @stable ICU 2.4
	293	*/
	294	#define U8_APPEND_UNSAFE(s, i, c) { \
	295	if((uint32_t)(c)<=0x7f) { \
	296	(s)[(i)++]=(uint8_t)(c); \
	297	} else { \
	298	if((uint32_t)(c)<=0x7ff) { \
	299	(s)[(i)++]=(uint8_t)(((c)>>6)\|0xc0); \
	300	} else { \
	301	if((uint32_t)(c)<=0xffff) { \
	302	(s)[(i)++]=(uint8_t)(((c)>>12)\|0xe0); \
	303	} else { \
	304	(s)[(i)++]=(uint8_t)(((c)>>18)\|0xf0); \
	305	(s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)\|0x80); \
	306	} \
	307	(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)\|0x80); \
	308	} \
	309	(s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \
	310	} \
	311	}
	312
	313	/**
	314	* Append a code point to a string, overwriting 1 to 4 bytes.
	315	* The offset points to the current end of the string contents
	316	* and is advanced (post-increment).
	317	* "Safe" macro, checks for a valid code point.
	318	* If a non-ASCII code point is written, checks for sufficient space in the string.
	319	* If the code point is not valid or trail bytes do not fit,
	320	* then isError is set to TRUE.
	321	*
	322	* @param s const uint8_t * string buffer
	323	* @param i string offset, must be i<capacity
	324	* @param capacity size of the string buffer
	325	* @param c code point to append
	326	* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
	327	* @see U8_APPEND_UNSAFE
	328	* @stable ICU 2.4
	329	*/
	330	#define U8_APPEND(s, i, capacity, c, isError) { \
	331	if((uint32_t)(c)<=0x7f) { \
	332	(s)[(i)++]=(uint8_t)(c); \
	333	} else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
	334	(s)[(i)++]=(uint8_t)(((c)>>6)\|0xc0); \
	335	(s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \
	336	} else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
	337	(s)[(i)++]=(uint8_t)(((c)>>12)\|0xe0); \
	338	(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)\|0x80); \
	339	(s)[(i)++]=(uint8_t)(((c)&0x3f)\|0x80); \
	340	} else { \
	341	(i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(capacity), c, &(isError)); \
	342	} \
	343	}
	344
	345	/**
	346	* Advance the string offset from one code point boundary to the next.
	347	* (Post-incrementing iteration.)
	348	* "Unsafe" macro, assumes well-formed UTF-8.
	349	*
	350	* @param s const uint8_t * string
	351	* @param i string offset
	352	* @see U8_FWD_1
	353	* @stable ICU 2.4
	354	*/
	355	#define U8_FWD_1_UNSAFE(s, i) { \
	356	(i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \
	357	}
	358
	359	/**
	360	* Advance the string offset from one code point boundary to the next.
	361	* (Post-incrementing iteration.)
	362	* "Safe" macro, checks for illegal sequences and for string boundaries.
	363	*
	364	* @param s const uint8_t * string
	365	* @param i string offset, must be i<length
	366	* @param length string length
	367	* @see U8_FWD_1_UNSAFE
	368	* @stable ICU 2.4
	369	*/
	370	#define U8_FWD_1(s, i, length) { \
	371	uint8_t __b=(uint8_t)(s)[(i)++]; \
	372	if(U8_IS_LEAD(__b)) { \
	373	uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
	374	if((i)+__count>(length)) { \
	375	__count=(uint8_t)((length)-(i)); \
	376	} \
	377	while(__count>0 && U8_IS_TRAIL((s)[i])) { \
	378	++(i); \
	379	--__count; \
	380	} \
	381	} \
	382	}
	383
	384	/**
	385	* Advance the string offset from one code point boundary to the n-th next one,
	386	* i.e., move forward by n code points.
	387	* (Post-incrementing iteration.)
	388	* "Unsafe" macro, assumes well-formed UTF-8.
	389	*
	390	* @param s const uint8_t * string
	391	* @param i string offset
	392	* @param n number of code points to skip
	393	* @see U8_FWD_N
	394	* @stable ICU 2.4
	395	*/
	396	#define U8_FWD_N_UNSAFE(s, i, n) { \
	397	int32_t __N=(n); \
	398	while(__N>0) { \
	399	U8_FWD_1_UNSAFE(s, i); \
	400	--__N; \
	401	} \
	402	}
	403
	404	/**
	405	* Advance the string offset from one code point boundary to the n-th next one,
	406	* i.e., move forward by n code points.
	407	* (Post-incrementing iteration.)
	408	* "Safe" macro, checks for illegal sequences and for string boundaries.
	409	*
	410	* @param s const uint8_t * string
	411	* @param i string offset, must be i<length
	412	* @param length string length
	413	* @param n number of code points to skip
	414	* @see U8_FWD_N_UNSAFE
	415	* @stable ICU 2.4
	416	*/
	417	#define U8_FWD_N(s, i, length, n) { \
	418	int32_t __N=(n); \
	419	while(__N>0 && (i)<(length)) { \
	420	U8_FWD_1(s, i, length); \
	421	--__N; \
	422	} \
	423	}
	424
	425	/**
	426	* Adjust a random-access offset to a code point boundary
	427	* at the start of a code point.
	428	* If the offset points to a UTF-8 trail byte,
	429	* then the offset is moved backward to the corresponding lead byte.
	430	* Otherwise, it is not modified.
	431	* "Unsafe" macro, assumes well-formed UTF-8.
	432	*
	433	* @param s const uint8_t * string
	434	* @param i string offset
	435	* @see U8_SET_CP_START
	436	* @stable ICU 2.4
	437	*/
	438	#define U8_SET_CP_START_UNSAFE(s, i) { \
	439	while(U8_IS_TRAIL((s)[i])) { --(i); } \
	440	}
	441
	442	/**
	443	* Adjust a random-access offset to a code point boundary
	444	* at the start of a code point.
	445	* If the offset points to a UTF-8 trail byte,
	446	* then the offset is moved backward to the corresponding lead byte.
	447	* Otherwise, it is not modified.
	448	* "Safe" macro, checks for illegal sequences and for string boundaries.
	449	*
	450	* @param s const uint8_t * string
	451	* @param start starting string offset (usually 0)
	452	* @param i string offset, must be start<=i
	453	* @see U8_SET_CP_START_UNSAFE
	454	* @stable ICU 2.4
	455	*/
	456	#define U8_SET_CP_START(s, start, i) { \
	457	if(U8_IS_TRAIL((s)[(i)])) { \
	458	(i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
	459	} \
	460	}
	461
	462	/* definitions with backward iteration -------------------------------------- */
	463
	464	/**
	465	* Move the string offset from one code point boundary to the previous one
	466	* and get the code point between them.
	467	* (Pre-decrementing backward iteration.)
	468	* "Unsafe" macro, assumes well-formed UTF-8.
	469	*
	470	* The input offset may be the same as the string length.
	471	* If the offset is behind a multi-byte sequence, then the macro will read
	472	* the whole sequence.
	473	* If the offset is behind a lead byte, then that itself
	474	* will be returned as the code point.
	475	* The result is undefined if the offset is behind an illegal UTF-8 sequence.
	476	*
	477	* @param s const uint8_t * string
	478	* @param i string offset
	479	* @param c output UChar32 variable
	480	* @see U8_PREV
	481	* @stable ICU 2.4
	482	*/
	483	#define U8_PREV_UNSAFE(s, i, c) { \
	484	(c)=(uint8_t)(s)[--(i)]; \
	485	if(U8_IS_TRAIL(c)) { \
	486	uint8_t __b, __count=1, __shift=6; \
	487	\
	488	/* c is a trail byte */ \
	489	(c)&=0x3f; \
	490	for(;;) { \
	491	__b=(uint8_t)(s)[--(i)]; \
	492	if(__b>=0xc0) { \
	493	U8_MASK_LEAD_BYTE(__b, __count); \
	494	(c)\|=(UChar32)__b<<__shift; \
	495	break; \
	496	} else { \
	497	(c)\|=(UChar32)(__b&0x3f)<<__shift; \
	498	++__count; \
	499	__shift+=6; \
	500	} \
	501	} \
	502	} \
	503	}
	504
	505	/**
	506	* Move the string offset from one code point boundary to the previous one
	507	* and get the code point between them.
	508	* (Pre-decrementing backward iteration.)
	509	* "Safe" macro, checks for illegal sequences and for string boundaries.
	510	*
	511	* The input offset may be the same as the string length.
	512	* If the offset is behind a multi-byte sequence, then the macro will read
	513	* the whole sequence.
	514	* If the offset is behind a lead byte, then that itself
	515	* will be returned as the code point.
	516	* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
	517	*
	518	* @param s const uint8_t * string
	519	* @param start starting string offset (usually 0)
	520	* @param i string offset, must be start<i
	521	* @param c output UChar32 variable, set to <0 in case of an error
	522	* @see U8_PREV_UNSAFE
	523	* @stable ICU 2.4
	524	*/
	525	#define U8_PREV(s, start, i, c) { \
	526	(c)=(uint8_t)(s)[--(i)]; \
	527	if((c)>=0x80) { \
	528	if((c)<=0xbf) { \
	529	(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
	530	} else { \
	531	(c)=U_SENTINEL; \
	532	} \
	533	} \
	534	}
	535
	536	/**
	537	* Move the string offset from one code point boundary to the previous one.
	538	* (Pre-decrementing backward iteration.)
	539	* The input offset may be the same as the string length.
	540	* "Unsafe" macro, assumes well-formed UTF-8.
	541	*
	542	* @param s const uint8_t * string
	543	* @param i string offset
	544	* @see U8_BACK_1
	545	* @stable ICU 2.4
	546	*/
	547	#define U8_BACK_1_UNSAFE(s, i) { \
	548	while(U8_IS_TRAIL((s)[--(i)])) {} \
	549	}
	550
	551	/**
	552	* Move the string offset from one code point boundary to the previous one.
	553	* (Pre-decrementing backward iteration.)
	554	* The input offset may be the same as the string length.
	555	* "Safe" macro, checks for illegal sequences and for string boundaries.
	556	*
	557	* @param s const uint8_t * string
	558	* @param start starting string offset (usually 0)
	559	* @param i string offset, must be start<i
	560	* @see U8_BACK_1_UNSAFE
	561	* @stable ICU 2.4
	562	*/
	563	#define U8_BACK_1(s, start, i) { \
	564	if(U8_IS_TRAIL((s)[--(i)])) { \
	565	(i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
	566	} \
	567	}
	568
	569	/**
	570	* Move the string offset from one code point boundary to the n-th one before it,
	571	* i.e., move backward by n code points.
	572	* (Pre-decrementing backward iteration.)
	573	* The input offset may be the same as the string length.
	574	* "Unsafe" macro, assumes well-formed UTF-8.
	575	*
	576	* @param s const uint8_t * string
	577	* @param i string offset
	578	* @param n number of code points to skip
	579	* @see U8_BACK_N
	580	* @stable ICU 2.4
	581	*/
	582	#define U8_BACK_N_UNSAFE(s, i, n) { \
	583	int32_t __N=(n); \
	584	while(__N>0) { \
	585	U8_BACK_1_UNSAFE(s, i); \
	586	--__N; \
	587	} \
	588	}
	589
	590	/**
	591	* Move the string offset from one code point boundary to the n-th one before it,
	592	* i.e., move backward by n code points.
	593	* (Pre-decrementing backward iteration.)
	594	* The input offset may be the same as the string length.
	595	* "Safe" macro, checks for illegal sequences and for string boundaries.
	596	*
	597	* @param s const uint8_t * string
	598	* @param start index of the start of the string
	599	* @param i string offset, must be start<i
	600	* @param n number of code points to skip
	601	* @see U8_BACK_N_UNSAFE
	602	* @stable ICU 2.4
	603	*/
	604	#define U8_BACK_N(s, start, i, n) { \
	605	int32_t __N=(n); \
	606	while(__N>0 && (i)>(start)) { \
	607	U8_BACK_1(s, start, i); \
	608	--__N; \
	609	} \
	610	}
	611
	612	/**
	613	* Adjust a random-access offset to a code point boundary after a code point.
	614	* If the offset is behind a partial multi-byte sequence,
	615	* then the offset is incremented to behind the whole sequence.
	616	* Otherwise, it is not modified.
	617	* The input offset may be the same as the string length.
	618	* "Unsafe" macro, assumes well-formed UTF-8.
	619	*
	620	* @param s const uint8_t * string
	621	* @param i string offset
	622	* @see U8_SET_CP_LIMIT
	623	* @stable ICU 2.4
	624	*/
	625	#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \
	626	U8_BACK_1_UNSAFE(s, i); \
	627	U8_FWD_1_UNSAFE(s, i); \
	628	}
	629
	630	/**
	631	* Adjust a random-access offset to a code point boundary after a code point.
	632	* If the offset is behind a partial multi-byte sequence,
	633	* then the offset is incremented to behind the whole sequence.
	634	* Otherwise, it is not modified.
	635	* The input offset may be the same as the string length.
	636	* "Safe" macro, checks for illegal sequences and for string boundaries.
	637	*
	638	* @param s const uint8_t * string
	639	* @param start starting string offset (usually 0)
	640	* @param i string offset, must be start<=i<=length
	641	* @param length string length
	642	* @see U8_SET_CP_LIMIT_UNSAFE
	643	* @stable ICU 2.4
	644	*/
	645	#define U8_SET_CP_LIMIT(s, start, i, length) { \
	646	if((start)<(i) && (i)<(length)) { \
	647	U8_BACK_1(s, start, i); \
	648	U8_FWD_1(s, i, length); \
	649	} \
	650	}
	651
	652	#endif