X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/4388f060552cc537e71e957d32f35e9d75a61233..HEAD:/icuSources/common/utrie2.h diff --git a/icuSources/common/utrie2.h b/icuSources/common/utrie2.h index 1bac06ac..671f44e1 100644 --- a/icuSources/common/utrie2.h +++ b/icuSources/common/utrie2.h @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * -* Copyright (C) 2001-2011, International Business Machines +* Copyright (C) 2001-2014, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: utrie2.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -18,8 +20,8 @@ #define __UTRIE2_H__ #include "unicode/utypes.h" +#include "unicode/utf8.h" #include "putilimp.h" -#include "udataswp.h" U_CDECL_BEGIN @@ -52,6 +54,8 @@ typedef struct UTrie UTrie; * is truncated, omitting both the BMP portion and the high range. * - There is a special small index for 2-byte UTF-8, and the initial data * entries are designed for fast 1/2-byte UTF-8 lookup. + * Starting with ICU 60, C0 and C1 are not recognized as UTF-8 lead bytes any more at all, + * and the associated 2-byte indexes are unused. */ /** @@ -319,46 +323,12 @@ utrie2_isFrozen(const UTrie2 *trie); * @see utrie2_openFromSerialized() */ U_CAPI int32_t U_EXPORT2 -utrie2_serialize(UTrie2 *trie, +utrie2_serialize(const UTrie2 *trie, void *data, int32_t capacity, UErrorCode *pErrorCode); /* Public UTrie2 API: miscellaneous functions ------------------------------- */ -/** - * Get the UTrie version from 32-bit-aligned memory containing the serialized form - * of either a UTrie (version 1) or a UTrie2 (version 2). - * - * @param data a pointer to 32-bit-aligned memory containing the serialized form - * of a UTrie, version 1 or 2 - * @param length the number of bytes available at data; - * can be more than necessary (see return value) - * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. - * If TRUE, opposite-endian serialized forms are recognized as well. - * @return the UTrie version of the serialized form, or 0 if it is not - * recognized as a serialized UTrie - */ -U_CAPI int32_t U_EXPORT2 -utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk); - -/** - * Swap a serialized UTrie2. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Swap a serialized UTrie or UTrie2. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swapAnyVersion(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - /** * Build a UTrie2 (version 2) from a UTrie (version 1). * Enumerates all values in the UTrie and builds a UTrie2 with the same values. @@ -659,19 +629,6 @@ public: const UChar *limit; }; -class UTrie2Singleton { -public: - UTrie2Singleton(SimpleSingleton &s) : singleton(s) {} - void deleteInstance() { - utrie2_close((UTrie2 *)singleton.fInstance); - singleton.reset(); - } - UTrie2 *getInstance(InstantiatorFn *instantiator, const void *context, - UErrorCode &errorCode); -private: - SimpleSingleton &singleton; -}; - U_NAMESPACE_END #endif @@ -717,6 +674,10 @@ struct UTrie2 { UBool padding1; int16_t padding2; UNewTrie2 *newTrie; /* builder object; NULL when frozen */ + +#ifdef UTRIE2_DEBUG + const char *name; +#endif }; /** @@ -910,7 +871,7 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)] /** Internal next-post-increment: get the next code point (c) and its data. */ -#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) { \ +#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \ { \ uint16_t __c2; \ (c)=*(src)++; \ @@ -924,10 +885,10 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** Internal pre-decrement-previous: get the previous code point (c) and its data */ -#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) { \ +#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \ { \ uint16_t __c2; \ (c)=*--(src); \ @@ -939,34 +900,34 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** Internal UTF-8 next-post-increment: get the next code point's data. */ -#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) { \ +#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \ uint8_t __lead=(uint8_t)*(src)++; \ - if(__lead<0xc0) { \ + if(U8_IS_SINGLE(__lead)) { \ (result)=(trie)->ascii[__lead]; \ } else { \ uint8_t __t1, __t2; \ - if( /* handle U+0000..U+07FF inline */ \ - __lead<0xe0 && (src)<(limit) && \ - (__t1=(uint8_t)(*(src)-0x80))<=0x3f \ - ) { \ - ++(src); \ - (result)=(trie)->data[ \ - (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \ - __t1]; \ - } else if( /* handle U+0000..U+CFFF inline */ \ - __lead<0xed && ((src)+1)<(limit) && \ - (__t1=(uint8_t)(*(src)-0x80))<=0x3f && (__lead>0xe0 || __t1>=0x20) && \ + if( /* handle U+0800..U+FFFF inline */ \ + 0xe0<=__lead && __lead<0xf0 && ((src)+1)<(limit) && \ + U8_IS_VALID_LEAD3_AND_T1(__lead, __t1=(uint8_t)*(src)) && \ (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \ ) { \ (src)+=2; \ (result)=(trie)->data[ \ ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \ - (__t1<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \ + ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \ <=0xc2 && (src)<(limit) && \ + (__t1=(uint8_t)(*(src)-0x80))<=0x3f \ + ) { \ + ++(src); \ + (result)=(trie)->data[ \ + (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \ + __t1]; \ } else { \ int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \ (const uint8_t *)(limit)); \ @@ -974,12 +935,12 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (result)=(trie)->data[__index>>3]; \ } \ } \ -} +} UPRV_BLOCK_MACRO_END /** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */ -#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) { \ +#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \ uint8_t __b=(uint8_t)*--(src); \ - if(__b<0x80) { \ + if(U8_IS_SINGLE(__b)) { \ (result)=(trie)->ascii[__b]; \ } else { \ int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \ @@ -987,15 +948,8 @@ utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, (src)-=__index&7; \ (result)=(trie)->data[__index>>3]; \ } \ -} +} UPRV_BLOCK_MACRO_END U_CDECL_END -/** - * Work around MSVC 2003 optimization bugs. - */ -#if defined (U_HAVE_MSVC_2003_OR_EARLIER) -#pragma optimize("", off) -#endif - #endif