]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/rbbidata.h
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbidata.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
374ca955
A
3/*
4*******************************************************************************
5*
b331163b 6* Copyright (C) 1999-2014 International Business Machines
374ca955
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: rbbidata.h
f3c0d7a5 11* encoding: UTF-8
374ca955
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* RBBI data formats Includes
16*
17* Structs that describes the format of the Binary RBBI data,
18* as it is stored in ICU's data file.
19*
20* RBBIDataWrapper - Instances of this class sit between the
21* raw data structs and the RulesBasedBreakIterator objects
22* that are created by applications. The wrapper class
23* provides reference counting for the underlying data,
24* and direct pointers to data that would not otherwise
25* be accessible without ugly pointer arithmetic. The
26* wrapper does not attempt to provide any higher level
27* abstractions for the data itself.
28*
29* There will be only one instance of RBBIDataWrapper for any
30* set of RBBI run time data being shared by instances
31* (clones) of RulesBasedBreakIterator.
32*/
b75a7d8f
A
33
34#ifndef __RBBIDATA_H__
35#define __RBBIDATA_H__
36
37#include "unicode/utypes.h"
374ca955
A
38#include "unicode/udata.h"
39#include "udataswp.h"
40
41/**
42 * Swap RBBI data. See udataswp.h.
43 * @internal
44 */
45U_CAPI int32_t U_EXPORT2
46ubrk_swap(const UDataSwapper *ds,
47 const void *inData, int32_t length, void *outData,
48 UErrorCode *pErrorCode);
49
4388f060 50#ifdef __cplusplus
374ca955 51
b75a7d8f
A
52#include "unicode/uobject.h"
53#include "unicode/unistr.h"
0f5d89e8 54#include "unicode/uversion.h"
57a6839d 55#include "umutex.h"
0f5d89e8 56#include "utrie2.h"
b75a7d8f 57
b75a7d8f
A
58U_NAMESPACE_BEGIN
59
0f5d89e8
A
60// The current RBBI data format version.
61static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {5, 0, 0, 0};
62
374ca955
A
63/*
64 * The following structs map exactly onto the raw data from ICU common data file.
65 */
b75a7d8f 66struct RBBIDataHeader {
73c04bcf 67 uint32_t fMagic; /* == 0xbla0 */
0f5d89e8 68 UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */
73c04bcf
A
69 /* if there is one associated with this data. */
70 /* (version originates in rbbi, is copied to UDataInfo) */
73c04bcf
A
71 uint32_t fLength; /* Total length in bytes of this RBBI Data, */
72 /* including all sections, not just the header. */
73 uint32_t fCatCount; /* Number of character categories. */
74
75 /* */
76 /* Offsets and sizes of each of the subsections within the RBBI data. */
77 /* All offsets are bytes from the start of the RBBIDataHeader. */
78 /* All sizes are in bytes. */
79 /* */
374ca955 80 uint32_t fFTable; /* forward state transition table. */
b75a7d8f 81 uint32_t fFTableLen;
374ca955 82 uint32_t fRTable; /* Offset to the reverse state transition table. */
b75a7d8f 83 uint32_t fRTableLen;
374ca955 84 uint32_t fTrie; /* Offset to Trie data for character categories */
b75a7d8f 85 uint32_t fTrieLen;
374ca955
A
86 uint32_t fRuleSource; /* Offset to the source for for the break */
87 uint32_t fRuleSourceLen; /* rules. Stored UChar *. */
88 uint32_t fStatusTable; /* Offset to the table of rule status values */
89 uint32_t fStatusTableLen;
b75a7d8f 90
374ca955 91 uint32_t fReserved[6]; /* Reserved for expansion */
b75a7d8f
A
92
93};
94
95
96
97struct RBBIStateTableRow {
374ca955
A
98 int16_t fAccepting; /* Non-zero if this row is for an accepting state. */
99 /* Value 0: not an accepting state. */
100 /* -1: Unconditional Accepting state. */
101 /* positive: Look-ahead match has completed. */
102 /* Actual boundary position happened earlier */
103 /* Value here == fLookAhead in earlier */
104 /* state, at actual boundary pos. */
105 int16_t fLookAhead; /* Non-zero if this row is for a state that */
106 /* corresponds to a '/' in the rule source. */
107 /* Value is the same as the fAccepting */
108 /* value for the rule (which will appear */
109 /* in a different state. */
110 int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */
111 /* from a rule. Value is the index in the */
112 /* StatusTable of the set of matching */
113 /* tags (rule status values) */
b75a7d8f 114 int16_t fReserved;
0f5d89e8
A
115 uint16_t fNextState[1]; /* Next State, indexed by char category. */
116 /* Variable-length array declared with length 1 */
117 /* to disable bounds checkers. */
118 /* Array Size is actually fData->fHeader->fCatCount*/
374ca955
A
119 /* CAUTION: see RBBITableBuilder::getTableSize() */
120 /* before changing anything here. */
b75a7d8f
A
121};
122
123
124struct RBBIStateTable {
374ca955
A
125 uint32_t fNumStates; /* Number of states. */
126 uint32_t fRowLen; /* Length of a state table row, in bytes. */
127 uint32_t fFlags; /* Option Flags for this state table */
128 uint32_t fReserved; /* reserved */
0f5d89e8
A
129 char fTableData[1]; /* First RBBIStateTableRow begins here. */
130 /* Variable-length array declared with length 1 */
131 /* to disable bounds checkers. */
374ca955
A
132 /* (making it char[] simplifies ugly address */
133 /* arithmetic for indexing variable length rows.) */
b75a7d8f
A
134};
135
374ca955 136typedef enum {
73c04bcf
A
137 RBBI_LOOKAHEAD_HARD_BREAK = 1,
138 RBBI_BOF_REQUIRED = 2
374ca955
A
139} RBBIStateTableFlags;
140
b75a7d8f 141
374ca955
A
142/* */
143/* The reference counting wrapper class */
144/* */
b75a7d8f
A
145class RBBIDataWrapper : public UMemory {
146public:
46f4442e
A
147 enum EDontAdopt {
148 kDontAdopt
149 };
b75a7d8f 150 RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
46f4442e 151 RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
b75a7d8f
A
152 RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
153 ~RBBIDataWrapper();
154
0f5d89e8
A
155 static UBool isDataVersionAcceptable(const UVersionInfo version);
156
b331163b 157 void init0();
b75a7d8f
A
158 void init(const RBBIDataHeader *data, UErrorCode &status);
159 RBBIDataWrapper *addReference();
160 void removeReference();
161 UBool operator ==(const RBBIDataWrapper &other) const;
162 int32_t hashCode();
374ca955 163 const UnicodeString &getRuleSourceString() const;
b75a7d8f 164 void printData();
374ca955 165 void printTable(const char *heading, const RBBIStateTable *table);
b75a7d8f 166
374ca955
A
167 /* */
168 /* Pointers to items within the data */
169 /* */
b75a7d8f
A
170 const RBBIDataHeader *fHeader;
171 const RBBIStateTable *fForwardTable;
172 const RBBIStateTable *fReverseTable;
173 const UChar *fRuleSource;
374ca955
A
174 const int32_t *fRuleStatusTable;
175
176 /* number of int32_t values in the rule status table. Used to sanity check indexing */
177 int32_t fStatusMaxIdx;
b75a7d8f 178
0f5d89e8 179 UTrie2 *fTrie;
b75a7d8f
A
180
181private:
57a6839d 182 u_atomic_int32_t fRefCount;
0f5d89e8 183 UDataMemory *fUDataMem;
b75a7d8f 184 UnicodeString fRuleString;
46f4442e 185 UBool fDontFreeData;
b75a7d8f 186
374ca955
A
187 RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */
188 RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */
b75a7d8f
A
189};
190
374ca955
A
191
192
b75a7d8f
A
193U_NAMESPACE_END
194
374ca955 195#endif /* C++ */
b75a7d8f 196
374ca955 197#endif