1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1999-2014 International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
11 **********************************************************************
12 * Legacy version of RBBIDataHeader and RBBIDataWrapper from ICU 57,
13 * only for use by Apple RuleBasedTokenizer
14 **********************************************************************
16 * RBBI data formats Includes
18 * Structs that describes the format of the Binary RBBI data,
19 * as it is stored in ICU's data file.
21 * RBBIDataWrapper - Instances of this class sit between the
22 * raw data structs and the RulesBasedBreakIterator objects
23 * that are created by applications. The wrapper class
24 * provides reference counting for the underlying data,
25 * and direct pointers to data that would not otherwise
26 * be accessible without ugly pointer arithmetic. The
27 * wrapper does not attempt to provide any higher level
28 * abstractions for the data itself.
30 * There will be only one instance of RBBIDataWrapper for any
31 * set of RBBI run time data being shared by instances
32 * (clones) of RulesBasedBreakIterator.
35 #ifndef __RBBIDATA57_H__
36 #define __RBBIDATA57_H__
38 #include "unicode/utypes.h"
39 #include "unicode/udata.h"
45 #include "unicode/uobject.h"
46 #include "unicode/unistr.h"
53 * The following structs map exactly onto the raw data from ICU common data file.
55 struct RBBIDataHeader57
{
56 uint32_t fMagic
; /* == 0xbla0 */
57 uint8_t fFormatVersion
[4]; /* Data Format. Same as the value in struct UDataInfo */
58 /* if there is one associated with this data. */
59 /* (version originates in rbbi, is copied to UDataInfo) */
60 /* For ICU 3.2 and earlier, this field was */
61 /* uint32_t fVersion */
62 /* with a value of 1. */
63 uint32_t fLength
; /* Total length in bytes of this RBBI Data, */
64 /* including all sections, not just the header. */
65 uint32_t fCatCount
; /* Number of character categories. */
68 /* Offsets and sizes of each of the subsections within the RBBI data. */
69 /* All offsets are bytes from the start of the RBBIDataHeader57. */
70 /* All sizes are in bytes. */
72 uint32_t fFTable
; /* forward state transition table. */
74 uint32_t fRTable
; /* Offset to the reverse state transition table. */
76 uint32_t fSFTable
; /* safe point forward transition table */
78 uint32_t fSRTable
; /* safe point reverse transition table */
80 uint32_t fTrie
; /* Offset to Trie data for character categories */
82 uint32_t fRuleSource
; /* Offset to the source for for the break */
83 uint32_t fRuleSourceLen
; /* rules. Stored UChar *. */
84 uint32_t fStatusTable
; /* Offset to the table of rule status values */
85 uint32_t fStatusTableLen
;
87 uint32_t fReserved
[6]; /* Reserved for expansion */
93 // struct RBBIStateTableRow: standard one from rbbidata.h
96 // struct RBBIStateTable: standard one from rbbidata.h
99 /* The reference counting wrapper class */
101 class RBBIDataWrapper57
: public UMemory
{
106 RBBIDataWrapper57(const RBBIDataHeader57
*data
, UErrorCode
&status
);
107 RBBIDataWrapper57(const RBBIDataHeader57
*data
, enum EDontAdopt dontAdopt
, UErrorCode
&status
);
108 RBBIDataWrapper57(UDataMemory
* udm
, UErrorCode
&status
);
109 ~RBBIDataWrapper57();
112 void init(const RBBIDataHeader57
*data
, UErrorCode
&status
);
113 RBBIDataWrapper57
*addReference();
114 void removeReference();
115 UBool
operator ==(const RBBIDataWrapper57
&other
) const;
117 const UnicodeString
&getRuleSourceString() const;
120 void printTable(const char *heading
, const RBBIStateTable
*table
);
123 #define printTable(heading, table)
127 /* Pointers to items within the data */
129 const RBBIDataHeader57
*fHeader
;
130 const RBBIStateTable
*fForwardTable
;
131 const RBBIStateTable
*fReverseTable
;
132 const RBBIStateTable
*fSafeFwdTable
;
133 const RBBIStateTable
*fSafeRevTable
;
134 const UChar
*fRuleSource
;
135 const int32_t *fRuleStatusTable
;
137 /* number of int32_t values in the rule status table. Used to sanity check indexing */
138 int32_t fStatusMaxIdx
;
143 u_atomic_int32_t fRefCount
;
144 UDataMemory
*fUDataMem
;
145 UnicodeString fRuleString
;
148 RBBIDataWrapper57(const RBBIDataWrapper57
&other
); /* forbid copying of this class */
149 RBBIDataWrapper57
&operator=(const RBBIDataWrapper57
&other
); /* forbid copying of this class */