X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/fd0068a84e9996f225edba706498f6ed413d0673..46f4442e9a5a4f3b98b7c1083586332f6a8a99a4:/icuSources/test/perf/unisetperf/draft/trieset.cpp diff --git a/icuSources/test/perf/unisetperf/draft/trieset.cpp b/icuSources/test/perf/unisetperf/draft/trieset.cpp new file mode 100644 index 00000000..6c472088 --- /dev/null +++ b/icuSources/test/perf/unisetperf/draft/trieset.cpp @@ -0,0 +1,111 @@ +/* +********************************************************************** +* Copyright (C) 2007, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: trieset.cpp +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2007jan15 +* created by: Markus Scherer +* +* Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet +* using a UTrie with 8-bit (byte) results per code point. +* Modifies the trie index to make the BMP linear, and uses the original set +* for supplementary code points. +*/ + +#include "unicode/utypes.h" +#include "unicont.h" + +#define UTRIE_GET8_LATIN1(trie) ((const uint8_t *)(trie)->data32+UTRIE_DATA_BLOCK_LENGTH) + +#define UTRIE_GET8_FROM_LEAD(trie, c16) \ + ((const uint8_t *)(trie)->data32)[ \ + ((int32_t)((trie)->index[(c16)>>UTRIE_SHIFT])<0xffff) { + break; + } + if(end>0xffff) { + end=0xffff; + } + if(!utrie_setRange32(newTrie, start, end+1, TRUE, TRUE)) { + errorCode=U_INTERNAL_PROGRAM_ERROR; + return; + } + } + + // Preflight the trie length. + int32_t length=utrie_serialize(newTrie, NULL, 0, NULL, 8, &errorCode); + if(errorCode!=U_BUFFER_OVERFLOW_ERROR) { + return; + } + + trieData=(uint32_t *)uprv_malloc(length); + if(trieData==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + errorCode=U_ZERO_ERROR; + utrie_serialize(newTrie, trieData, length, NULL, 8, &errorCode); + utrie_unserialize(&trie, trieData, length, &errorCode); // TODO: Implement for 8-bit UTrie! + + if(U_SUCCESS(errorCode)) { + // Copy the indexes for surrogate code points into the BMP range + // for simple access across the entire BMP. + uprv_memcpy((uint16_t *)trie.index+(0xd800>>UTRIE_SHIFT), + trie.index+UTRIE_BMP_INDEX_LENGTH, + (0x800>>UTRIE_SHIFT)*2); + latin1=UTRIE_GET8_LATIN1(&trie); + } + + restSet.remove(0, 0xffff); + } + + ~TrieSet() { + uprv_free(trieData); + delete restSet; + } + + UBool contains(UChar32 c) const { + if((uint32_t)c<=0xff) { + return (UBool)latin1[c]; + } else if((uint32_t)c<0xffff) { + return (UBool)UTRIE_GET8_FROM_LEAD(&trie, c); + } else { + return restSet->contains(c); + } + } + +private: + uint32_t *trieData; + const uint8_t *latin1; + UTrie trie; + UnicodeSet *restSet; +};