]>
Commit | Line | Data |
---|---|---|
0f5d89e8 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * | |
6 | * Copyright (C) 1999-2014 International Business Machines | |
7 | * Corporation and others. All Rights Reserved. | |
8 | * | |
9 | ******************************************************************************* | |
10 | * | |
11 | ********************************************************************** | |
12 | * Legacy version of RBBIDataHeader and RBBIDataWrapper from ICU 57, | |
13 | * only for use by Apple RuleBasedTokenizer | |
14 | ********************************************************************** | |
15 | * | |
16 | * RBBI data formats Includes | |
17 | * | |
18 | * Structs that describes the format of the Binary RBBI data, | |
19 | * as it is stored in ICU's data file. | |
20 | * | |
21 | * RBBIDataWrapper - Instances of this class sit between the | |
22 | * raw data structs and the RulesBasedBreakIterator objects | |
23 | * that are created by applications. The wrapper class | |
24 | * provides reference counting for the underlying data, | |
25 | * and direct pointers to data that would not otherwise | |
26 | * be accessible without ugly pointer arithmetic. The | |
27 | * wrapper does not attempt to provide any higher level | |
28 | * abstractions for the data itself. | |
29 | * | |
30 | * There will be only one instance of RBBIDataWrapper for any | |
31 | * set of RBBI run time data being shared by instances | |
32 | * (clones) of RulesBasedBreakIterator. | |
33 | */ | |
34 | ||
35 | #ifndef __RBBIDATA57_H__ | |
36 | #define __RBBIDATA57_H__ | |
37 | ||
38 | #include "unicode/utypes.h" | |
39 | #include "unicode/udata.h" | |
40 | #include "udataswp.h" | |
41 | #include "rbbidata.h" | |
42 | ||
43 | #ifdef __cplusplus | |
44 | ||
45 | #include "unicode/uobject.h" | |
46 | #include "unicode/unistr.h" | |
47 | #include "umutex.h" | |
48 | #include "utrie.h" | |
49 | ||
50 | U_NAMESPACE_BEGIN | |
51 | ||
52 | /* | |
53 | * The following structs map exactly onto the raw data from ICU common data file. | |
54 | */ | |
55 | struct RBBIDataHeader57 { | |
56 | uint32_t fMagic; /* == 0xbla0 */ | |
57 | uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ | |
58 | /* if there is one associated with this data. */ | |
59 | /* (version originates in rbbi, is copied to UDataInfo) */ | |
60 | /* For ICU 3.2 and earlier, this field was */ | |
61 | /* uint32_t fVersion */ | |
62 | /* with a value of 1. */ | |
63 | uint32_t fLength; /* Total length in bytes of this RBBI Data, */ | |
64 | /* including all sections, not just the header. */ | |
65 | uint32_t fCatCount; /* Number of character categories. */ | |
66 | ||
67 | /* */ | |
68 | /* Offsets and sizes of each of the subsections within the RBBI data. */ | |
69 | /* All offsets are bytes from the start of the RBBIDataHeader57. */ | |
70 | /* All sizes are in bytes. */ | |
71 | /* */ | |
72 | uint32_t fFTable; /* forward state transition table. */ | |
73 | uint32_t fFTableLen; | |
74 | uint32_t fRTable; /* Offset to the reverse state transition table. */ | |
75 | uint32_t fRTableLen; | |
76 | uint32_t fSFTable; /* safe point forward transition table */ | |
77 | uint32_t fSFTableLen; | |
78 | uint32_t fSRTable; /* safe point reverse transition table */ | |
79 | uint32_t fSRTableLen; | |
80 | uint32_t fTrie; /* Offset to Trie data for character categories */ | |
81 | uint32_t fTrieLen; | |
82 | uint32_t fRuleSource; /* Offset to the source for for the break */ | |
83 | uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ | |
84 | uint32_t fStatusTable; /* Offset to the table of rule status values */ | |
85 | uint32_t fStatusTableLen; | |
86 | ||
87 | uint32_t fReserved[6]; /* Reserved for expansion */ | |
88 | ||
89 | }; | |
90 | ||
91 | ||
92 | ||
93 | // struct RBBIStateTableRow: standard one from rbbidata.h | |
94 | ||
95 | ||
96 | // struct RBBIStateTable: standard one from rbbidata.h | |
97 | ||
98 | /* */ | |
99 | /* The reference counting wrapper class */ | |
100 | /* */ | |
101 | class RBBIDataWrapper57 : public UMemory { | |
102 | public: | |
103 | enum EDontAdopt { | |
104 | kDontAdopt | |
105 | }; | |
106 | RBBIDataWrapper57(const RBBIDataHeader57 *data, UErrorCode &status); | |
107 | RBBIDataWrapper57(const RBBIDataHeader57 *data, enum EDontAdopt dontAdopt, UErrorCode &status); | |
108 | RBBIDataWrapper57(UDataMemory* udm, UErrorCode &status); | |
109 | ~RBBIDataWrapper57(); | |
110 | ||
111 | void init0(); | |
112 | void init(const RBBIDataHeader57 *data, UErrorCode &status); | |
113 | RBBIDataWrapper57 *addReference(); | |
114 | void removeReference(); | |
115 | UBool operator ==(const RBBIDataWrapper57 &other) const; | |
116 | int32_t hashCode(); | |
117 | const UnicodeString &getRuleSourceString() const; | |
118 | #ifdef RBBI_DEBUG | |
119 | void printData(); | |
120 | void printTable(const char *heading, const RBBIStateTable *table); | |
121 | #else | |
122 | #define printData() | |
123 | #define printTable(heading, table) | |
124 | #endif | |
125 | ||
126 | /* */ | |
127 | /* Pointers to items within the data */ | |
128 | /* */ | |
129 | const RBBIDataHeader57 *fHeader; | |
130 | const RBBIStateTable *fForwardTable; | |
131 | const RBBIStateTable *fReverseTable; | |
132 | const RBBIStateTable *fSafeFwdTable; | |
133 | const RBBIStateTable *fSafeRevTable; | |
134 | const UChar *fRuleSource; | |
135 | const int32_t *fRuleStatusTable; | |
136 | ||
137 | /* number of int32_t values in the rule status table. Used to sanity check indexing */ | |
138 | int32_t fStatusMaxIdx; | |
139 | ||
140 | UTrie fTrie; | |
141 | ||
142 | private: | |
143 | u_atomic_int32_t fRefCount; | |
144 | UDataMemory *fUDataMem; | |
145 | UnicodeString fRuleString; | |
146 | UBool fDontFreeData; | |
147 | ||
148 | RBBIDataWrapper57(const RBBIDataWrapper57 &other); /* forbid copying of this class */ | |
149 | RBBIDataWrapper57 &operator=(const RBBIDataWrapper57 &other); /* forbid copying of this class */ | |
150 | }; | |
151 | ||
152 | ||
153 | ||
154 | U_NAMESPACE_END | |
155 | ||
156 | #endif /* C++ */ | |
157 | ||
158 | #endif |