]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/rbbidata.h
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbidata.h
CommitLineData
374ca955
A
1/*
2*******************************************************************************
3*
b331163b 4* Copyright (C) 1999-2014 International Business Machines
374ca955
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: rbbidata.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* RBBI data formats Includes
14*
15* Structs that describes the format of the Binary RBBI data,
16* as it is stored in ICU's data file.
17*
18* RBBIDataWrapper - Instances of this class sit between the
19* raw data structs and the RulesBasedBreakIterator objects
20* that are created by applications. The wrapper class
21* provides reference counting for the underlying data,
22* and direct pointers to data that would not otherwise
23* be accessible without ugly pointer arithmetic. The
24* wrapper does not attempt to provide any higher level
25* abstractions for the data itself.
26*
27* There will be only one instance of RBBIDataWrapper for any
28* set of RBBI run time data being shared by instances
29* (clones) of RulesBasedBreakIterator.
30*/
b75a7d8f
A
31
32#ifndef __RBBIDATA_H__
33#define __RBBIDATA_H__
34
35#include "unicode/utypes.h"
374ca955
A
36#include "unicode/udata.h"
37#include "udataswp.h"
38
39/**
40 * Swap RBBI data. See udataswp.h.
41 * @internal
42 */
43U_CAPI int32_t U_EXPORT2
44ubrk_swap(const UDataSwapper *ds,
45 const void *inData, int32_t length, void *outData,
46 UErrorCode *pErrorCode);
47
4388f060 48#ifdef __cplusplus
374ca955 49
b75a7d8f
A
50#include "unicode/uobject.h"
51#include "unicode/unistr.h"
57a6839d 52#include "umutex.h"
b75a7d8f
A
53#include "utrie.h"
54
b75a7d8f
A
55U_NAMESPACE_BEGIN
56
374ca955
A
57/*
58 * The following structs map exactly onto the raw data from ICU common data file.
59 */
b75a7d8f 60struct RBBIDataHeader {
73c04bcf
A
61 uint32_t fMagic; /* == 0xbla0 */
62 uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */
63 /* if there is one associated with this data. */
64 /* (version originates in rbbi, is copied to UDataInfo) */
65 /* For ICU 3.2 and earlier, this field was */
66 /* uint32_t fVersion */
67 /* with a value of 1. */
68 uint32_t fLength; /* Total length in bytes of this RBBI Data, */
69 /* including all sections, not just the header. */
70 uint32_t fCatCount; /* Number of character categories. */
71
72 /* */
73 /* Offsets and sizes of each of the subsections within the RBBI data. */
74 /* All offsets are bytes from the start of the RBBIDataHeader. */
75 /* All sizes are in bytes. */
76 /* */
374ca955 77 uint32_t fFTable; /* forward state transition table. */
b75a7d8f 78 uint32_t fFTableLen;
374ca955 79 uint32_t fRTable; /* Offset to the reverse state transition table. */
b75a7d8f 80 uint32_t fRTableLen;
374ca955
A
81 uint32_t fSFTable; /* safe point forward transition table */
82 uint32_t fSFTableLen;
83 uint32_t fSRTable; /* safe point reverse transition table */
84 uint32_t fSRTableLen;
85 uint32_t fTrie; /* Offset to Trie data for character categories */
b75a7d8f 86 uint32_t fTrieLen;
374ca955
A
87 uint32_t fRuleSource; /* Offset to the source for for the break */
88 uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
89 uint32_t fStatusTable; /* Offset to the table of rule status values */
90 uint32_t fStatusTableLen;
b75a7d8f 91
374ca955 92 uint32_t fReserved[6]; /* Reserved for expansion */
b75a7d8f
A
93
94};
95
96
97
98struct RBBIStateTableRow {
374ca955
A
99 int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
100 /* Value 0: not an accepting state. */
101 /* -1: Unconditional Accepting state. */
102 /* positive: Look-ahead match has completed. */
103 /* Actual boundary position happened earlier */
104 /* Value here == fLookAhead in earlier */
105 /* state, at actual boundary pos. */
106 int16_t fLookAhead; /* Non-zero if this row is for a state that */
107 /* corresponds to a '/' in the rule source. */
108 /* Value is the same as the fAccepting */
109 /* value for the rule (which will appear */
110 /* in a different state. */
111 int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
112 /* from a rule. Value is the index in the */
113 /* StatusTable of the set of matching */
114 /* tags (rule status values) */
b75a7d8f 115 int16_t fReserved;
374ca955 116 uint16_t fNextState[2]; /* Next State, indexed by char category. */
4388f060
A
117 /* This array does not have two elements */
118 /* Array Size is actually fData->fHeader->fCatCount */
374ca955
A
119 /* CAUTION: see RBBITableBuilder::getTableSize() */
120 /* before changing anything here. */
b75a7d8f
A
121};
122
123
124struct RBBIStateTable {
374ca955
A
125 uint32_t fNumStates; /* Number of states. */
126 uint32_t fRowLen; /* Length of a state table row, in bytes. */
127 uint32_t fFlags; /* Option Flags for this state table */
128 uint32_t fReserved; /* reserved */
129 char fTableData[4]; /* First RBBIStateTableRow begins here. */
130 /* (making it char[] simplifies ugly address */
131 /* arithmetic for indexing variable length rows.) */
b75a7d8f
A
132};
133
374ca955 134typedef enum {
73c04bcf
A
135 RBBI_LOOKAHEAD_HARD_BREAK = 1,
136 RBBI_BOF_REQUIRED = 2
374ca955
A
137} RBBIStateTableFlags;
138
b75a7d8f 139
374ca955
A
140/* */
141/* The reference counting wrapper class */
142/* */
b75a7d8f
A
143class RBBIDataWrapper : public UMemory {
144public:
46f4442e
A
145 enum EDontAdopt {
146 kDontAdopt
147 };
b75a7d8f 148 RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
46f4442e 149 RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
b75a7d8f
A
150 RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
151 ~RBBIDataWrapper();
152
b331163b 153 void init0();
b75a7d8f
A
154 void init(const RBBIDataHeader *data, UErrorCode &status);
155 RBBIDataWrapper *addReference();
156 void removeReference();
157 UBool operator ==(const RBBIDataWrapper &other) const;
158 int32_t hashCode();
374ca955
A
159 const UnicodeString &getRuleSourceString() const;
160#ifdef RBBI_DEBUG
b75a7d8f 161 void printData();
374ca955
A
162 void printTable(const char *heading, const RBBIStateTable *table);
163#else
164 #define printData()
165 #define printTable(heading, table)
166#endif
b75a7d8f 167
374ca955
A
168 /* */
169 /* Pointers to items within the data */
170 /* */
b75a7d8f
A
171 const RBBIDataHeader *fHeader;
172 const RBBIStateTable *fForwardTable;
173 const RBBIStateTable *fReverseTable;
374ca955
A
174 const RBBIStateTable *fSafeFwdTable;
175 const RBBIStateTable *fSafeRevTable;
b75a7d8f 176 const UChar *fRuleSource;
374ca955
A
177 const int32_t *fRuleStatusTable;
178
179 /* number of int32_t values in the rule status table. Used to sanity check indexing */
180 int32_t fStatusMaxIdx;
b75a7d8f
A
181
182 UTrie fTrie;
183
184private:
57a6839d 185 u_atomic_int32_t fRefCount;
b331163b 186 UDataMemory *fUDataMem;
b75a7d8f 187 UnicodeString fRuleString;
46f4442e 188 UBool fDontFreeData;
b75a7d8f 189
374ca955
A
190 RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
191 RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
b75a7d8f
A
192};
193
374ca955
A
194
195
b75a7d8f
A
196U_NAMESPACE_END
197
374ca955 198#endif /* C++ */
b75a7d8f 199
374ca955 200#endif