+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
-* Copyright (C) 2001-2011, International Business Machines
+* Copyright (C) 2001-2014, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: utrie2.h
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#define __UTRIE2_H__
#include "unicode/utypes.h"
+#include "unicode/utf8.h"
#include "putilimp.h"
-#include "udataswp.h"
U_CDECL_BEGIN
* is truncated, omitting both the BMP portion and the high range.
* - There is a special small index for 2-byte UTF-8, and the initial data
* entries are designed for fast 1/2-byte UTF-8 lookup.
+ * Starting with ICU 60, C0 and C1 are not recognized as UTF-8 lead bytes any more at all,
+ * and the associated 2-byte indexes are unused.
*/
/**
* @see utrie2_openFromSerialized()
*/
U_CAPI int32_t U_EXPORT2
-utrie2_serialize(UTrie2 *trie,
+utrie2_serialize(const UTrie2 *trie,
void *data, int32_t capacity,
UErrorCode *pErrorCode);
/* Public UTrie2 API: miscellaneous functions ------------------------------- */
-/**
- * Get the UTrie version from 32-bit-aligned memory containing the serialized form
- * of either a UTrie (version 1) or a UTrie2 (version 2).
- *
- * @param data a pointer to 32-bit-aligned memory containing the serialized form
- * of a UTrie, version 1 or 2
- * @param length the number of bytes available at data;
- * can be more than necessary (see return value)
- * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized.
- * If TRUE, opposite-endian serialized forms are recognized as well.
- * @return the UTrie version of the serialized form, or 0 if it is not
- * recognized as a serialized UTrie
- */
-U_CAPI int32_t U_EXPORT2
-utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk);
-
-/**
- * Swap a serialized UTrie2.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-utrie2_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
-/**
- * Swap a serialized UTrie or UTrie2.
- * @internal
- */
-U_CAPI int32_t U_EXPORT2
-utrie2_swapAnyVersion(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode);
-
/**
* Build a UTrie2 (version 2) from a UTrie (version 1).
* Enumerates all values in the UTrie and builds a UTrie2 with the same values.
const UChar *limit;
};
-class UTrie2Singleton {
-public:
- UTrie2Singleton(SimpleSingleton &s) : singleton(s) {}
- void deleteInstance() {
- utrie2_close((UTrie2 *)singleton.fInstance);
- singleton.reset();
- }
- UTrie2 *getInstance(InstantiatorFn *instantiator, const void *context,
- UErrorCode &errorCode);
-private:
- SimpleSingleton &singleton;
-};
-
U_NAMESPACE_END
#endif
UBool padding1;
int16_t padding2;
UNewTrie2 *newTrie; /* builder object; NULL when frozen */
+
+#ifdef UTRIE2_DEBUG
+ const char *name;
+#endif
};
/**
(trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)]
/** Internal next-post-increment: get the next code point (c) and its data. */
-#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) { \
+#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
{ \
uint16_t __c2; \
(c)=*(src)++; \
(result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
} \
} \
-}
+} UPRV_BLOCK_MACRO_END
/** Internal pre-decrement-previous: get the previous code point (c) and its data */
-#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) { \
+#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
{ \
uint16_t __c2; \
(c)=*--(src); \
(result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \
} \
} \
-}
+} UPRV_BLOCK_MACRO_END
/** Internal UTF-8 next-post-increment: get the next code point's data. */
-#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) { \
+#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
uint8_t __lead=(uint8_t)*(src)++; \
- if(__lead<0xc0) { \
+ if(U8_IS_SINGLE(__lead)) { \
(result)=(trie)->ascii[__lead]; \
} else { \
uint8_t __t1, __t2; \
- if( /* handle U+0000..U+07FF inline */ \
- __lead<0xe0 && (src)<(limit) && \
- (__t1=(uint8_t)(*(src)-0x80))<=0x3f \
- ) { \
- ++(src); \
- (result)=(trie)->data[ \
- (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \
- __t1]; \
- } else if( /* handle U+0000..U+CFFF inline */ \
- __lead<0xed && ((src)+1)<(limit) && \
- (__t1=(uint8_t)(*(src)-0x80))<=0x3f && (__lead>0xe0 || __t1>=0x20) && \
+ if( /* handle U+0800..U+FFFF inline */ \
+ 0xe0<=__lead && __lead<0xf0 && ((src)+1)<(limit) && \
+ U8_IS_VALID_LEAD3_AND_T1(__lead, __t1=(uint8_t)*(src)) && \
(__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \
) { \
(src)+=2; \
(result)=(trie)->data[ \
((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \
- (__t1<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \
+ ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \
<<UTRIE2_INDEX_SHIFT)+ \
(__t2&UTRIE2_DATA_MASK)]; \
+ } else if( /* handle U+0080..U+07FF inline */ \
+ __lead<0xe0 && __lead>=0xc2 && (src)<(limit) && \
+ (__t1=(uint8_t)(*(src)-0x80))<=0x3f \
+ ) { \
+ ++(src); \
+ (result)=(trie)->data[ \
+ (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \
+ __t1]; \
} else { \
int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \
(const uint8_t *)(limit)); \
(result)=(trie)->data[__index>>3]; \
} \
} \
-}
+} UPRV_BLOCK_MACRO_END
/** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */
-#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) { \
+#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
uint8_t __b=(uint8_t)*--(src); \
- if(__b<0x80) { \
+ if(U8_IS_SINGLE(__b)) { \
(result)=(trie)->ascii[__b]; \
} else { \
int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \
(src)-=__index&7; \
(result)=(trie)->data[__index>>3]; \
} \
-}
+} UPRV_BLOCK_MACRO_END
U_CDECL_END
-/**
- * Work around MSVC 2003 optimization bugs.
- */
-#if defined (U_HAVE_MSVC_2003_OR_EARLIER)
-#pragma optimize("", off)
-#endif
-
#endif