]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbidata.h
ICU-59152.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbidata.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1999-2014 International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: rbbidata.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * RBBI data formats Includes
16 *
17 * Structs that describes the format of the Binary RBBI data,
18 * as it is stored in ICU's data file.
19 *
20 * RBBIDataWrapper - Instances of this class sit between the
21 * raw data structs and the RulesBasedBreakIterator objects
22 * that are created by applications. The wrapper class
23 * provides reference counting for the underlying data,
24 * and direct pointers to data that would not otherwise
25 * be accessible without ugly pointer arithmetic. The
26 * wrapper does not attempt to provide any higher level
27 * abstractions for the data itself.
28 *
29 * There will be only one instance of RBBIDataWrapper for any
30 * set of RBBI run time data being shared by instances
31 * (clones) of RulesBasedBreakIterator.
32 */
33
34 #ifndef __RBBIDATA_H__
35 #define __RBBIDATA_H__
36
37 #include "unicode/utypes.h"
38 #include "unicode/udata.h"
39 #include "udataswp.h"
40
41 /**
42 * Swap RBBI data. See udataswp.h.
43 * @internal
44 */
45 U_CAPI int32_t U_EXPORT2
46 ubrk_swap(const UDataSwapper *ds,
47 const void *inData, int32_t length, void *outData,
48 UErrorCode *pErrorCode);
49
50 #ifdef __cplusplus
51
52 #include "unicode/uobject.h"
53 #include "unicode/unistr.h"
54 #include "umutex.h"
55 #include "utrie.h"
56
57 U_NAMESPACE_BEGIN
58
59 /*
60 * The following structs map exactly onto the raw data from ICU common data file.
61 */
62 struct RBBIDataHeader {
63 uint32_t fMagic; /* == 0xbla0 */
64 uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
65 /* if there is one associated with this data. */
66 /* (version originates in rbbi, is copied to UDataInfo) */
67 /* For ICU 3.2 and earlier, this field was */
68 /* uint32_t fVersion */
69 /* with a value of 1. */
70 uint32_t fLength; /* Total length in bytes of this RBBI Data, */
71 /* including all sections, not just the header. */
72 uint32_t fCatCount; /* Number of character categories. */
73
74 /* */
75 /* Offsets and sizes of each of the subsections within the RBBI data. */
76 /* All offsets are bytes from the start of the RBBIDataHeader. */
77 /* All sizes are in bytes. */
78 /* */
79 uint32_t fFTable; /* forward state transition table. */
80 uint32_t fFTableLen;
81 uint32_t fRTable; /* Offset to the reverse state transition table. */
82 uint32_t fRTableLen;
83 uint32_t fSFTable; /* safe point forward transition table */
84 uint32_t fSFTableLen;
85 uint32_t fSRTable; /* safe point reverse transition table */
86 uint32_t fSRTableLen;
87 uint32_t fTrie; /* Offset to Trie data for character categories */
88 uint32_t fTrieLen;
89 uint32_t fRuleSource; /* Offset to the source for for the break */
90 uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
91 uint32_t fStatusTable; /* Offset to the table of rule status values */
92 uint32_t fStatusTableLen;
93
94 uint32_t fReserved[6]; /* Reserved for expansion */
95
96 };
97
98
99
100 struct RBBIStateTableRow {
101 int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
102 /* Value 0: not an accepting state. */
103 /* -1: Unconditional Accepting state. */
104 /* positive: Look-ahead match has completed. */
105 /* Actual boundary position happened earlier */
106 /* Value here == fLookAhead in earlier */
107 /* state, at actual boundary pos. */
108 int16_t fLookAhead; /* Non-zero if this row is for a state that */
109 /* corresponds to a '/' in the rule source. */
110 /* Value is the same as the fAccepting */
111 /* value for the rule (which will appear */
112 /* in a different state. */
113 int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
114 /* from a rule. Value is the index in the */
115 /* StatusTable of the set of matching */
116 /* tags (rule status values) */
117 int16_t fReserved;
118 uint16_t fNextState[2]; /* Next State, indexed by char category. */
119 /* This array does not have two elements */
120 /* Array Size is actually fData->fHeader->fCatCount */
121 /* CAUTION: see RBBITableBuilder::getTableSize() */
122 /* before changing anything here. */
123 };
124
125
126 struct RBBIStateTable {
127 uint32_t fNumStates; /* Number of states. */
128 uint32_t fRowLen; /* Length of a state table row, in bytes. */
129 uint32_t fFlags; /* Option Flags for this state table */
130 uint32_t fReserved; /* reserved */
131 char fTableData[4]; /* First RBBIStateTableRow begins here. */
132 /* (making it char[] simplifies ugly address */
133 /* arithmetic for indexing variable length rows.) */
134 };
135
136 typedef enum {
137 RBBI_LOOKAHEAD_HARD_BREAK = 1,
138 RBBI_BOF_REQUIRED = 2
139 } RBBIStateTableFlags;
140
141
142 /* */
143 /* The reference counting wrapper class */
144 /* */
145 class RBBIDataWrapper : public UMemory {
146 public:
147 enum EDontAdopt {
148 kDontAdopt
149 };
150 RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
151 RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
152 RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
153 ~RBBIDataWrapper();
154
155 void init0();
156 void init(const RBBIDataHeader *data, UErrorCode &status);
157 RBBIDataWrapper *addReference();
158 void removeReference();
159 UBool operator ==(const RBBIDataWrapper &other) const;
160 int32_t hashCode();
161 const UnicodeString &getRuleSourceString() const;
162 #ifdef RBBI_DEBUG
163 void printData();
164 void printTable(const char *heading, const RBBIStateTable *table);
165 #else
166 #define printData()
167 #define printTable(heading, table)
168 #endif
169
170 /* */
171 /* Pointers to items within the data */
172 /* */
173 const RBBIDataHeader *fHeader;
174 const RBBIStateTable *fForwardTable;
175 const RBBIStateTable *fReverseTable;
176 const RBBIStateTable *fSafeFwdTable;
177 const RBBIStateTable *fSafeRevTable;
178 const UChar *fRuleSource;
179 const int32_t *fRuleStatusTable;
180
181 /* number of int32_t values in the rule status table. Used to sanity check indexing */
182 int32_t fStatusMaxIdx;
183
184 UTrie fTrie;
185
186 private:
187 u_atomic_int32_t fRefCount;
188 UDataMemory *fUDataMem;
189 UnicodeString fRuleString;
190 UBool fDontFreeData;
191
192 RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
193 RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
194 };
195
196
197
198 U_NAMESPACE_END
199
200 #endif /* C++ */
201
202 #endif