2 *******************************************************************************
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: rbbidata.h
10 * tab size: 8 (not used)
13 * RBBI data formats Includes
15 * Structs that describes the format of the Binary RBBI data,
16 * as it is stored in ICU's data file.
18 * RBBIDataWrapper - Instances of this class sit between the
19 * raw data structs and the RulesBasedBreakIterator objects
20 * that are created by applications. The wrapper class
21 * provides reference counting for the underlying data,
22 * and direct pointers to data that would not otherwise
23 * be accessible without ugly pointer arithmetic. The
24 * wrapper does not attempt to provide any higher level
25 * abstractions for the data itself.
27 * There will be only one instance of RBBIDataWrapper for any
28 * set of RBBI run time data being shared by instances
29 * (clones) of RulesBasedBreakIterator.
32 #ifndef __RBBIDATA_H__
33 #define __RBBIDATA_H__
35 #include "unicode/utypes.h"
36 #include "unicode/udata.h"
40 * Swap RBBI data. See udataswp.h.
43 U_CAPI
int32_t U_EXPORT2
44 ubrk_swap(const UDataSwapper
*ds
,
45 const void *inData
, int32_t length
, void *outData
,
46 UErrorCode
*pErrorCode
);
50 #include "unicode/uobject.h"
51 #include "unicode/unistr.h"
57 * The following structs map exactly onto the raw data from ICU common data file.
59 struct RBBIDataHeader
{
60 uint32_t fMagic
; /* == 0xbla0 */
61 uint32_t fVersion
; /* == 1 */
62 uint32_t fLength
; /* Total length in bytes of this RBBI Data, */
63 /* including all sections, not just the header. */
64 uint32_t fCatCount
; /* Number of character categories. */
67 /* Offsets and sizes of each of the subsections within the RBBI data. */
68 /* All offsets are bytes from the start of the RBBIDataHeader. */
69 /* All sizes are in bytes. */
71 uint32_t fFTable
; /* forward state transition table. */
73 uint32_t fRTable
; /* Offset to the reverse state transition table. */
75 uint32_t fSFTable
; /* safe point forward transition table */
77 uint32_t fSRTable
; /* safe point reverse transition table */
79 uint32_t fTrie
; /* Offset to Trie data for character categories */
81 uint32_t fRuleSource
; /* Offset to the source for for the break */
82 uint32_t fRuleSourceLen
; /* rules. Stored UChar *. */
83 uint32_t fStatusTable
; /* Offset to the table of rule status values */
84 uint32_t fStatusTableLen
;
86 uint32_t fReserved
[6]; /* Reserved for expansion */
92 struct RBBIStateTableRow
{
93 int16_t fAccepting
; /* Non-zero if this row is for an accepting state. */
94 /* Value 0: not an accepting state. */
95 /* -1: Unconditional Accepting state. */
96 /* positive: Look-ahead match has completed. */
97 /* Actual boundary position happened earlier */
98 /* Value here == fLookAhead in earlier */
99 /* state, at actual boundary pos. */
100 int16_t fLookAhead
; /* Non-zero if this row is for a state that */
101 /* corresponds to a '/' in the rule source. */
102 /* Value is the same as the fAccepting */
103 /* value for the rule (which will appear */
104 /* in a different state. */
105 int16_t fTagIdx
; /* Non-zero if this row covers a {tagged} position */
106 /* from a rule. Value is the index in the */
107 /* StatusTable of the set of matching */
108 /* tags (rule status values) */
110 uint16_t fNextState
[2]; /* Next State, indexed by char category. */
111 /* Array Size is fNumCols from the */
112 /* state table header. */
113 /* CAUTION: see RBBITableBuilder::getTableSize() */
114 /* before changing anything here. */
118 struct RBBIStateTable
{
119 uint32_t fNumStates
; /* Number of states. */
120 uint32_t fRowLen
; /* Length of a state table row, in bytes. */
121 uint32_t fFlags
; /* Option Flags for this state table */
122 uint32_t fReserved
; /* reserved */
123 char fTableData
[4]; /* First RBBIStateTableRow begins here. */
124 /* (making it char[] simplifies ugly address */
125 /* arithmetic for indexing variable length rows.) */
129 RBBI_LOOKAHEAD_HARD_BREAK
= 1
130 } RBBIStateTableFlags
;
134 /* The reference counting wrapper class */
136 class RBBIDataWrapper
: public UMemory
{
138 RBBIDataWrapper(const RBBIDataHeader
*data
, UErrorCode
&status
);
139 RBBIDataWrapper(UDataMemory
* udm
, UErrorCode
&status
);
142 void init(const RBBIDataHeader
*data
, UErrorCode
&status
);
143 RBBIDataWrapper
*addReference();
144 void removeReference();
145 UBool
operator ==(const RBBIDataWrapper
&other
) const;
147 const UnicodeString
&getRuleSourceString() const;
150 void printTable(const char *heading
, const RBBIStateTable
*table
);
153 #define printTable(heading, table)
157 /* Pointers to items within the data */
159 const RBBIDataHeader
*fHeader
;
160 const RBBIStateTable
*fForwardTable
;
161 const RBBIStateTable
*fReverseTable
;
162 const RBBIStateTable
*fSafeFwdTable
;
163 const RBBIStateTable
*fSafeRevTable
;
164 const UChar
*fRuleSource
;
165 const int32_t *fRuleStatusTable
;
167 /* number of int32_t values in the rule status table. Used to sanity check indexing */
168 int32_t fStatusMaxIdx
;
174 UDataMemory
*fUDataMem
;
175 UnicodeString fRuleString
;
177 RBBIDataWrapper(const RBBIDataWrapper
&other
); /* forbid copying of this class */
178 RBBIDataWrapper
&operator=(const RBBIDataWrapper
&other
); /* forbid copying of this class */