]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbidata.h
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / common / rbbidata.h
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2005,2008 International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: rbbidata.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * RBBI data formats Includes
14 *
15 * Structs that describes the format of the Binary RBBI data,
16 * as it is stored in ICU's data file.
17 *
18 * RBBIDataWrapper - Instances of this class sit between the
19 * raw data structs and the RulesBasedBreakIterator objects
20 * that are created by applications. The wrapper class
21 * provides reference counting for the underlying data,
22 * and direct pointers to data that would not otherwise
23 * be accessible without ugly pointer arithmetic. The
24 * wrapper does not attempt to provide any higher level
25 * abstractions for the data itself.
26 *
27 * There will be only one instance of RBBIDataWrapper for any
28 * set of RBBI run time data being shared by instances
29 * (clones) of RulesBasedBreakIterator.
30 */
31
32 #ifndef __RBBIDATA_H__
33 #define __RBBIDATA_H__
34
35 #include "unicode/utypes.h"
36 #include "unicode/udata.h"
37 #include "udataswp.h"
38
39 /**
40 * Swap RBBI data. See udataswp.h.
41 * @internal
42 */
43 U_CAPI int32_t U_EXPORT2
44 ubrk_swap(const UDataSwapper *ds,
45 const void *inData, int32_t length, void *outData,
46 UErrorCode *pErrorCode);
47
48 #ifdef XP_CPLUSPLUS
49
50 #include "unicode/uobject.h"
51 #include "unicode/unistr.h"
52 #include "utrie.h"
53
54 U_NAMESPACE_BEGIN
55
56 /*
57 * The following structs map exactly onto the raw data from ICU common data file.
58 */
59 struct RBBIDataHeader {
60 uint32_t fMagic; /* == 0xbla0 */
61 uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
62 /* if there is one associated with this data. */
63 /* (version originates in rbbi, is copied to UDataInfo) */
64 /* For ICU 3.2 and earlier, this field was */
65 /* uint32_t fVersion */
66 /* with a value of 1. */
67 uint32_t fLength; /* Total length in bytes of this RBBI Data, */
68 /* including all sections, not just the header. */
69 uint32_t fCatCount; /* Number of character categories. */
70
71 /* */
72 /* Offsets and sizes of each of the subsections within the RBBI data. */
73 /* All offsets are bytes from the start of the RBBIDataHeader. */
74 /* All sizes are in bytes. */
75 /* */
76 uint32_t fFTable; /* forward state transition table. */
77 uint32_t fFTableLen;
78 uint32_t fRTable; /* Offset to the reverse state transition table. */
79 uint32_t fRTableLen;
80 uint32_t fSFTable; /* safe point forward transition table */
81 uint32_t fSFTableLen;
82 uint32_t fSRTable; /* safe point reverse transition table */
83 uint32_t fSRTableLen;
84 uint32_t fTrie; /* Offset to Trie data for character categories */
85 uint32_t fTrieLen;
86 uint32_t fRuleSource; /* Offset to the source for for the break */
87 uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
88 uint32_t fStatusTable; /* Offset to the table of rule status values */
89 uint32_t fStatusTableLen;
90
91 uint32_t fReserved[6]; /* Reserved for expansion */
92
93 };
94
95
96
97 struct RBBIStateTableRow {
98 int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
99 /* Value 0: not an accepting state. */
100 /* -1: Unconditional Accepting state. */
101 /* positive: Look-ahead match has completed. */
102 /* Actual boundary position happened earlier */
103 /* Value here == fLookAhead in earlier */
104 /* state, at actual boundary pos. */
105 int16_t fLookAhead; /* Non-zero if this row is for a state that */
106 /* corresponds to a '/' in the rule source. */
107 /* Value is the same as the fAccepting */
108 /* value for the rule (which will appear */
109 /* in a different state. */
110 int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
111 /* from a rule. Value is the index in the */
112 /* StatusTable of the set of matching */
113 /* tags (rule status values) */
114 int16_t fReserved;
115 uint16_t fNextState[2]; /* Next State, indexed by char category. */
116 /* Array Size is fNumCols from the */
117 /* state table header. */
118 /* CAUTION: see RBBITableBuilder::getTableSize() */
119 /* before changing anything here. */
120 };
121
122
123 struct RBBIStateTable {
124 uint32_t fNumStates; /* Number of states. */
125 uint32_t fRowLen; /* Length of a state table row, in bytes. */
126 uint32_t fFlags; /* Option Flags for this state table */
127 uint32_t fReserved; /* reserved */
128 char fTableData[4]; /* First RBBIStateTableRow begins here. */
129 /* (making it char[] simplifies ugly address */
130 /* arithmetic for indexing variable length rows.) */
131 };
132
133 typedef enum {
134 RBBI_LOOKAHEAD_HARD_BREAK = 1,
135 RBBI_BOF_REQUIRED = 2
136 } RBBIStateTableFlags;
137
138
139 /* */
140 /* The reference counting wrapper class */
141 /* */
142 class RBBIDataWrapper : public UMemory {
143 public:
144 enum EDontAdopt {
145 kDontAdopt
146 };
147 RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
148 RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
149 RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
150 ~RBBIDataWrapper();
151
152 void init(const RBBIDataHeader *data, UErrorCode &status);
153 RBBIDataWrapper *addReference();
154 void removeReference();
155 UBool operator ==(const RBBIDataWrapper &other) const;
156 int32_t hashCode();
157 const UnicodeString &getRuleSourceString() const;
158 #ifdef RBBI_DEBUG
159 void printData();
160 void printTable(const char *heading, const RBBIStateTable *table);
161 #else
162 #define printData()
163 #define printTable(heading, table)
164 #endif
165
166 /* */
167 /* Pointers to items within the data */
168 /* */
169 const RBBIDataHeader *fHeader;
170 const RBBIStateTable *fForwardTable;
171 const RBBIStateTable *fReverseTable;
172 const RBBIStateTable *fSafeFwdTable;
173 const RBBIStateTable *fSafeRevTable;
174 const UChar *fRuleSource;
175 const int32_t *fRuleStatusTable;
176
177 /* number of int32_t values in the rule status table. Used to sanity check indexing */
178 int32_t fStatusMaxIdx;
179
180 UTrie fTrie;
181
182 private:
183 int32_t fRefCount;
184 UDataMemory *fUDataMem;
185 UnicodeString fRuleString;
186 UBool fDontFreeData;
187
188 RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
189 RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
190 };
191
192
193
194 U_NAMESPACE_END
195
196 #endif /* C++ */
197
198 #endif