]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/perf/unisetperf/draft/trieset.cpp
ICU-400.37.tar.gz
[apple/icu.git] / icuSources / test / perf / unisetperf / draft / trieset.cpp
CommitLineData
46f4442e
A
1/*
2**********************************************************************
3* Copyright (C) 2007, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* file name: trieset.cpp
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* created on: 2007jan15
12* created by: Markus Scherer
13*
14* Idea for a "compiled", fast, read-only (immutable) version of a UnicodeSet
15* using a UTrie with 8-bit (byte) results per code point.
16* Modifies the trie index to make the BMP linear, and uses the original set
17* for supplementary code points.
18*/
19
20#include "unicode/utypes.h"
21#include "unicont.h"
22
23#define UTRIE_GET8_LATIN1(trie) ((const uint8_t *)(trie)->data32+UTRIE_DATA_BLOCK_LENGTH)
24
25#define UTRIE_GET8_FROM_LEAD(trie, c16) \
26 ((const uint8_t *)(trie)->data32)[ \
27 ((int32_t)((trie)->index[(c16)>>UTRIE_SHIFT])<<UTRIE_INDEX_SHIFT)+ \
28 ((c16)&UTRIE_MASK) \
29 ]
30
31class TrieSet : public UObject, public UnicodeContainable {
32public:
33 TrieSet(const UnicodeSet &set, UErrorCode &errorCode)
34 : trieData(NULL), latin1(NULL), restSet(set.clone()) {
35 if(U_FAILURE(errorCode)) {
36 return;
37 }
38 if(restSet==NULL) {
39 errorCode=U_MEMORY_ALLOCATION_ERROR;
40 return;
41 }
42
43 UNewTrie *newTrie=utrie_open(NULL, NULL, 0x11000, 0, 0, TRUE);
44 UChar32 start, end;
45
46 UnicodeSetIterator iter(set);
47
48 while(iter.nextRange() && !iter.isString()) {
49 start=iter.getCodepoint();
50 end=iter.getCodepointEnd();
51 if(start>0xffff) {
52 break;
53 }
54 if(end>0xffff) {
55 end=0xffff;
56 }
57 if(!utrie_setRange32(newTrie, start, end+1, TRUE, TRUE)) {
58 errorCode=U_INTERNAL_PROGRAM_ERROR;
59 return;
60 }
61 }
62
63 // Preflight the trie length.
64 int32_t length=utrie_serialize(newTrie, NULL, 0, NULL, 8, &errorCode);
65 if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
66 return;
67 }
68
69 trieData=(uint32_t *)uprv_malloc(length);
70 if(trieData==NULL) {
71 errorCode=U_MEMORY_ALLOCATION_ERROR;
72 return;
73 }
74
75 errorCode=U_ZERO_ERROR;
76 utrie_serialize(newTrie, trieData, length, NULL, 8, &errorCode);
77 utrie_unserialize(&trie, trieData, length, &errorCode); // TODO: Implement for 8-bit UTrie!
78
79 if(U_SUCCESS(errorCode)) {
80 // Copy the indexes for surrogate code points into the BMP range
81 // for simple access across the entire BMP.
82 uprv_memcpy((uint16_t *)trie.index+(0xd800>>UTRIE_SHIFT),
83 trie.index+UTRIE_BMP_INDEX_LENGTH,
84 (0x800>>UTRIE_SHIFT)*2);
85 latin1=UTRIE_GET8_LATIN1(&trie);
86 }
87
88 restSet.remove(0, 0xffff);
89 }
90
91 ~TrieSet() {
92 uprv_free(trieData);
93 delete restSet;
94 }
95
96 UBool contains(UChar32 c) const {
97 if((uint32_t)c<=0xff) {
98 return (UBool)latin1[c];
99 } else if((uint32_t)c<0xffff) {
100 return (UBool)UTRIE_GET8_FROM_LEAD(&trie, c);
101 } else {
102 return restSet->contains(c);
103 }
104 }
105
106private:
107 uint32_t *trieData;
108 const uint8_t *latin1;
109 UTrie trie;
110 UnicodeSet *restSet;
111};