]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
b331163b | 4 | * Copyright (C) 1999-2014 International Business Machines |
374ca955 A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: rbbidata.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * RBBI data formats Includes | |
14 | * | |
15 | * Structs that describes the format of the Binary RBBI data, | |
16 | * as it is stored in ICU's data file. | |
17 | * | |
18 | * RBBIDataWrapper - Instances of this class sit between the | |
19 | * raw data structs and the RulesBasedBreakIterator objects | |
20 | * that are created by applications. The wrapper class | |
21 | * provides reference counting for the underlying data, | |
22 | * and direct pointers to data that would not otherwise | |
23 | * be accessible without ugly pointer arithmetic. The | |
24 | * wrapper does not attempt to provide any higher level | |
25 | * abstractions for the data itself. | |
26 | * | |
27 | * There will be only one instance of RBBIDataWrapper for any | |
28 | * set of RBBI run time data being shared by instances | |
29 | * (clones) of RulesBasedBreakIterator. | |
30 | */ | |
b75a7d8f A |
31 | |
32 | #ifndef __RBBIDATA_H__ | |
33 | #define __RBBIDATA_H__ | |
34 | ||
35 | #include "unicode/utypes.h" | |
374ca955 A |
36 | #include "unicode/udata.h" |
37 | #include "udataswp.h" | |
38 | ||
39 | /** | |
40 | * Swap RBBI data. See udataswp.h. | |
41 | * @internal | |
42 | */ | |
43 | U_CAPI int32_t U_EXPORT2 | |
44 | ubrk_swap(const UDataSwapper *ds, | |
45 | const void *inData, int32_t length, void *outData, | |
46 | UErrorCode *pErrorCode); | |
47 | ||
4388f060 | 48 | #ifdef __cplusplus |
374ca955 | 49 | |
b75a7d8f A |
50 | #include "unicode/uobject.h" |
51 | #include "unicode/unistr.h" | |
57a6839d | 52 | #include "umutex.h" |
b75a7d8f A |
53 | #include "utrie.h" |
54 | ||
b75a7d8f A |
55 | U_NAMESPACE_BEGIN |
56 | ||
374ca955 A |
57 | /* |
58 | * The following structs map exactly onto the raw data from ICU common data file. | |
59 | */ | |
b75a7d8f | 60 | struct RBBIDataHeader { |
73c04bcf A |
61 | uint32_t fMagic; /* == 0xbla0 */ |
62 | uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ | |
63 | /* if there is one associated with this data. */ | |
64 | /* (version originates in rbbi, is copied to UDataInfo) */ | |
65 | /* For ICU 3.2 and earlier, this field was */ | |
66 | /* uint32_t fVersion */ | |
67 | /* with a value of 1. */ | |
68 | uint32_t fLength; /* Total length in bytes of this RBBI Data, */ | |
69 | /* including all sections, not just the header. */ | |
70 | uint32_t fCatCount; /* Number of character categories. */ | |
71 | ||
72 | /* */ | |
73 | /* Offsets and sizes of each of the subsections within the RBBI data. */ | |
74 | /* All offsets are bytes from the start of the RBBIDataHeader. */ | |
75 | /* All sizes are in bytes. */ | |
76 | /* */ | |
374ca955 | 77 | uint32_t fFTable; /* forward state transition table. */ |
b75a7d8f | 78 | uint32_t fFTableLen; |
374ca955 | 79 | uint32_t fRTable; /* Offset to the reverse state transition table. */ |
b75a7d8f | 80 | uint32_t fRTableLen; |
374ca955 A |
81 | uint32_t fSFTable; /* safe point forward transition table */ |
82 | uint32_t fSFTableLen; | |
83 | uint32_t fSRTable; /* safe point reverse transition table */ | |
84 | uint32_t fSRTableLen; | |
85 | uint32_t fTrie; /* Offset to Trie data for character categories */ | |
b75a7d8f | 86 | uint32_t fTrieLen; |
374ca955 A |
87 | uint32_t fRuleSource; /* Offset to the source for for the break */ |
88 | uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ | |
89 | uint32_t fStatusTable; /* Offset to the table of rule status values */ | |
90 | uint32_t fStatusTableLen; | |
b75a7d8f | 91 | |
374ca955 | 92 | uint32_t fReserved[6]; /* Reserved for expansion */ |
b75a7d8f A |
93 | |
94 | }; | |
95 | ||
96 | ||
97 | ||
98 | struct RBBIStateTableRow { | |
374ca955 A |
99 | int16_t fAccepting; /* Non-zero if this row is for an accepting state. */ |
100 | /* Value 0: not an accepting state. */ | |
101 | /* -1: Unconditional Accepting state. */ | |
102 | /* positive: Look-ahead match has completed. */ | |
103 | /* Actual boundary position happened earlier */ | |
104 | /* Value here == fLookAhead in earlier */ | |
105 | /* state, at actual boundary pos. */ | |
106 | int16_t fLookAhead; /* Non-zero if this row is for a state that */ | |
107 | /* corresponds to a '/' in the rule source. */ | |
108 | /* Value is the same as the fAccepting */ | |
109 | /* value for the rule (which will appear */ | |
110 | /* in a different state. */ | |
111 | int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */ | |
112 | /* from a rule. Value is the index in the */ | |
113 | /* StatusTable of the set of matching */ | |
114 | /* tags (rule status values) */ | |
b75a7d8f | 115 | int16_t fReserved; |
374ca955 | 116 | uint16_t fNextState[2]; /* Next State, indexed by char category. */ |
4388f060 A |
117 | /* This array does not have two elements */ |
118 | /* Array Size is actually fData->fHeader->fCatCount */ | |
374ca955 A |
119 | /* CAUTION: see RBBITableBuilder::getTableSize() */ |
120 | /* before changing anything here. */ | |
b75a7d8f A |
121 | }; |
122 | ||
123 | ||
124 | struct RBBIStateTable { | |
374ca955 A |
125 | uint32_t fNumStates; /* Number of states. */ |
126 | uint32_t fRowLen; /* Length of a state table row, in bytes. */ | |
127 | uint32_t fFlags; /* Option Flags for this state table */ | |
128 | uint32_t fReserved; /* reserved */ | |
129 | char fTableData[4]; /* First RBBIStateTableRow begins here. */ | |
130 | /* (making it char[] simplifies ugly address */ | |
131 | /* arithmetic for indexing variable length rows.) */ | |
b75a7d8f A |
132 | }; |
133 | ||
374ca955 | 134 | typedef enum { |
73c04bcf A |
135 | RBBI_LOOKAHEAD_HARD_BREAK = 1, |
136 | RBBI_BOF_REQUIRED = 2 | |
374ca955 A |
137 | } RBBIStateTableFlags; |
138 | ||
b75a7d8f | 139 | |
374ca955 A |
140 | /* */ |
141 | /* The reference counting wrapper class */ | |
142 | /* */ | |
b75a7d8f A |
143 | class RBBIDataWrapper : public UMemory { |
144 | public: | |
46f4442e A |
145 | enum EDontAdopt { |
146 | kDontAdopt | |
147 | }; | |
b75a7d8f | 148 | RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); |
46f4442e | 149 | RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status); |
b75a7d8f A |
150 | RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); |
151 | ~RBBIDataWrapper(); | |
152 | ||
b331163b | 153 | void init0(); |
b75a7d8f A |
154 | void init(const RBBIDataHeader *data, UErrorCode &status); |
155 | RBBIDataWrapper *addReference(); | |
156 | void removeReference(); | |
157 | UBool operator ==(const RBBIDataWrapper &other) const; | |
158 | int32_t hashCode(); | |
374ca955 A |
159 | const UnicodeString &getRuleSourceString() const; |
160 | #ifdef RBBI_DEBUG | |
b75a7d8f | 161 | void printData(); |
374ca955 A |
162 | void printTable(const char *heading, const RBBIStateTable *table); |
163 | #else | |
164 | #define printData() | |
165 | #define printTable(heading, table) | |
166 | #endif | |
b75a7d8f | 167 | |
374ca955 A |
168 | /* */ |
169 | /* Pointers to items within the data */ | |
170 | /* */ | |
b75a7d8f A |
171 | const RBBIDataHeader *fHeader; |
172 | const RBBIStateTable *fForwardTable; | |
173 | const RBBIStateTable *fReverseTable; | |
374ca955 A |
174 | const RBBIStateTable *fSafeFwdTable; |
175 | const RBBIStateTable *fSafeRevTable; | |
b75a7d8f | 176 | const UChar *fRuleSource; |
374ca955 A |
177 | const int32_t *fRuleStatusTable; |
178 | ||
179 | /* number of int32_t values in the rule status table. Used to sanity check indexing */ | |
180 | int32_t fStatusMaxIdx; | |
b75a7d8f A |
181 | |
182 | UTrie fTrie; | |
183 | ||
184 | private: | |
57a6839d | 185 | u_atomic_int32_t fRefCount; |
b331163b | 186 | UDataMemory *fUDataMem; |
b75a7d8f | 187 | UnicodeString fRuleString; |
46f4442e | 188 | UBool fDontFreeData; |
b75a7d8f | 189 | |
374ca955 A |
190 | RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */ |
191 | RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */ | |
b75a7d8f A |
192 | }; |
193 | ||
374ca955 A |
194 | |
195 | ||
b75a7d8f A |
196 | U_NAMESPACE_END |
197 | ||
374ca955 | 198 | #endif /* C++ */ |
b75a7d8f | 199 | |
374ca955 | 200 | #endif |