X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/f3c0d7a59d99c2a94c6b8822291f0e42be3773c9..340931cb2e044a2141d11567dd0f782524e32994:/icuSources/common/unicode/utf16.h diff --git a/icuSources/common/unicode/utf16.h b/icuSources/common/unicode/utf16.h index 9cc73f7b..f3bdb823 100644 --- a/icuSources/common/unicode/utf16.h +++ b/icuSources/common/unicode/utf16.h @@ -163,7 +163,7 @@ * @see U16_GET * @stable ICU 2.4 */ -#define U16_GET_UNSAFE(s, i, c) { \ +#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[i]; \ if(U16_IS_SURROGATE(c)) { \ if(U16_IS_SURROGATE_LEAD(c)) { \ @@ -172,7 +172,7 @@ (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a random-access offset, @@ -185,8 +185,8 @@ * * The length can be negative for a NUL-terminated string. * - * If the offset points to a single, unpaired surrogate, then that itself - * will be returned as the code point. + * If the offset points to a single, unpaired surrogate, then + * c is set to that unpaired surrogate. * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. * * @param s const UChar * string @@ -197,21 +197,64 @@ * @see U16_GET_UNSAFE * @stable ICU 2.4 */ -#define U16_GET(s, start, i, length, c) { \ +#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2 = 0; \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } else { \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then + * c is set to U+FFFD. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ } \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /* definitions with forward iteration --------------------------------------- */ @@ -234,12 +277,12 @@ * @see U16_NEXT * @stable ICU 2.4 */ -#define U16_NEXT_UNSAFE(s, i, c) { \ +#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[(i)++]; \ if(U16_IS_LEAD(c)) { \ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Get a code point from a string at a code point boundary offset, @@ -253,8 +296,7 @@ * for a supplementary code point, in which case the macro will read * the following trail surrogate as well. * If the offset points to a trail surrogate or - * to a single, unpaired lead surrogate, then that itself - * will be returned as the code point. + * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. * * @param s const UChar * string * @param i string offset, must be i>10)+0xd7c0); \ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Append a code point to a string, overwriting 1 or 2 code units. @@ -313,7 +389,7 @@ * @see U16_APPEND_UNSAFE * @stable ICU 2.4 */ -#define U16_APPEND(s, i, capacity, c, isError) { \ +#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ if((uint32_t)(c)<=0xffff) { \ (s)[(i)++]=(uint16_t)(c); \ } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ @@ -322,7 +398,7 @@ } else /* c>0x10ffff or not enough space */ { \ (isError)=TRUE; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the next. @@ -334,11 +410,11 @@ * @see U16_FWD_1 * @stable ICU 2.4 */ -#define U16_FWD_1_UNSAFE(s, i) { \ +#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)++])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the next. @@ -353,11 +429,11 @@ * @see U16_FWD_1_UNSAFE * @stable ICU 2.4 */ -#define U16_FWD_1(s, i, length) { \ +#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the n-th next one, @@ -371,13 +447,13 @@ * @see U16_FWD_N * @stable ICU 2.4 */ -#define U16_FWD_N_UNSAFE(s, i, n) { \ +#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ U16_FWD_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Advance the string offset from one code point boundary to the n-th next one, @@ -394,13 +470,13 @@ * @see U16_FWD_N_UNSAFE * @stable ICU 2.4 */ -#define U16_FWD_N(s, i, length, n) { \ +#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ U16_FWD_1(s, i, length); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary @@ -415,11 +491,11 @@ * @see U16_SET_CP_START * @stable ICU 2.4 */ -#define U16_SET_CP_START_UNSAFE(s, i) { \ +#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[i])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary @@ -435,11 +511,11 @@ * @see U16_SET_CP_START_UNSAFE * @stable ICU 2.4 */ -#define U16_SET_CP_START(s, start, i) { \ +#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /* definitions with backward iteration -------------------------------------- */ @@ -463,12 +539,12 @@ * @see U16_PREV * @stable ICU 2.4 */ -#define U16_PREV_UNSAFE(s, i, c) { \ +#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(U16_IS_TRAIL(c)) { \ (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one @@ -481,8 +557,7 @@ * for a supplementary code point, then the macro will read * the preceding lead surrogate as well. * If the offset is behind a lead surrogate or behind a single, unpaired - * trail surrogate, then that itself - * will be returned as the code point. + * trail surrogate, then c is set to that unpaired surrogate. * * @param s const UChar * string * @param start starting string offset (usually 0) @@ -491,16 +566,49 @@ * @see U16_PREV_UNSAFE * @stable ICU 2.4 */ -#define U16_PREV(s, start, i, c) { \ +#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ (c)=(s)[--(i)]; \ if(U16_IS_TRAIL(c)) { \ - uint16_t __c2; \ + uint16_t __c2 = 0; \ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ --(i); \ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then c is set to U+FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one. @@ -513,11 +621,11 @@ * @see U16_BACK_1 * @stable ICU 2.4 */ -#define U16_BACK_1_UNSAFE(s, i) { \ +#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[--(i)])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the previous one. @@ -531,11 +639,11 @@ * @see U16_BACK_1_UNSAFE * @stable ICU 2.4 */ -#define U16_BACK_1(s, start, i) { \ +#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ --(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the n-th one before it, @@ -550,13 +658,13 @@ * @see U16_BACK_N * @stable ICU 2.4 */ -#define U16_BACK_N_UNSAFE(s, i, n) { \ +#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0) { \ U16_BACK_1_UNSAFE(s, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Move the string offset from one code point boundary to the n-th one before it, @@ -572,13 +680,13 @@ * @see U16_BACK_N_UNSAFE * @stable ICU 2.4 */ -#define U16_BACK_N(s, start, i, n) { \ +#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ int32_t __N=(n); \ while(__N>0 && (i)>(start)) { \ U16_BACK_1(s, start, i); \ --__N; \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary after a code point. @@ -593,11 +701,11 @@ * @see U16_SET_CP_LIMIT * @stable ICU 2.4 */ -#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ +#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ if(U16_IS_LEAD((s)[(i)-1])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END /** * Adjust a random-access offset to a code point boundary after a code point. @@ -616,10 +724,10 @@ * @see U16_SET_CP_LIMIT_UNSAFE * @stable ICU 2.4 */ -#define U16_SET_CP_LIMIT(s, start, i, length) { \ +#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ ++(i); \ } \ -} +} UPRV_BLOCK_MACRO_END #endif