X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..57a6839dcb3bba09e8228b822b290604668416fe:/icuSources/common/rbbidata.h diff --git a/icuSources/common/rbbidata.h b/icuSources/common/rbbidata.h index c7ee0096..78a4ac76 100644 --- a/icuSources/common/rbbidata.h +++ b/icuSources/common/rbbidata.h @@ -1,105 +1,152 @@ -// file: rbbidata.h -// -//********************************************************************** -// Copyright (C) 1999 IBM Corp. All rights reserved. -//********************************************************************** -// -// RBBI data formats Includes -// -// Structs that describes the format of the Binary RBBI data, -// as it is stored in ICU's data file. -// -// RBBIDataWrapper - Instances of this class sit between the -// raw data structs and the RulesBasedBreakIterator objects -// that are created by applications. The wrapper class -// provides reference counting for the underlying data, -// and direct pointers to data that would not otherwise -// be accessible without ugly pointer arithmetic. The -// wrapper does not attempt to provide any higher level -// abstractions for the data itself. -// -// There will be only one instance of RBBIDataWrapper for any -// set of RBBI run time data being shared by instances -// (clones) of RulesBasedBreakIterator. -// +/* +******************************************************************************* +* +* Copyright (C) 1999-2013 International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: rbbidata.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* RBBI data formats Includes +* +* Structs that describes the format of the Binary RBBI data, +* as it is stored in ICU's data file. +* +* RBBIDataWrapper - Instances of this class sit between the +* raw data structs and the RulesBasedBreakIterator objects +* that are created by applications. The wrapper class +* provides reference counting for the underlying data, +* and direct pointers to data that would not otherwise +* be accessible without ugly pointer arithmetic. The +* wrapper does not attempt to provide any higher level +* abstractions for the data itself. +* +* There will be only one instance of RBBIDataWrapper for any +* set of RBBI run time data being shared by instances +* (clones) of RulesBasedBreakIterator. +*/ #ifndef __RBBIDATA_H__ #define __RBBIDATA_H__ #include "unicode/utypes.h" +#include "unicode/udata.h" +#include "udataswp.h" + +/** + * Swap RBBI data. See udataswp.h. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ubrk_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +#ifdef __cplusplus + #include "unicode/uobject.h" #include "unicode/unistr.h" -#include "unicode/udata.h" +#include "umutex.h" #include "utrie.h" - U_NAMESPACE_BEGIN -// -// The following structs map exactly onto the raw data from ICU common data file. -// +/* + * The following structs map exactly onto the raw data from ICU common data file. + */ struct RBBIDataHeader { - uint32_t fMagic; // == 0xbla0 - uint32_t fVersion; // == 1 - uint32_t fLength; // Total length in bytes of this RBBI Data, - // including all sections, not just the header. - uint32_t fCatCount; // Number of character categories. - - // - // Offsets and sizes of each of the subsections within the RBBI data. - // All offsets are bytes from the start of the RBBIDataHeader. - // All sizes are in bytes. - // - uint32_t fFTable; // forward state transition table. + uint32_t fMagic; /* == 0xbla0 */ + uint8_t fFormatVersion[4]; /* Data Format. Same as the value in struct UDataInfo */ + /* if there is one associated with this data. */ + /* (version originates in rbbi, is copied to UDataInfo) */ + /* For ICU 3.2 and earlier, this field was */ + /* uint32_t fVersion */ + /* with a value of 1. */ + uint32_t fLength; /* Total length in bytes of this RBBI Data, */ + /* including all sections, not just the header. */ + uint32_t fCatCount; /* Number of character categories. */ + + /* */ + /* Offsets and sizes of each of the subsections within the RBBI data. */ + /* All offsets are bytes from the start of the RBBIDataHeader. */ + /* All sizes are in bytes. */ + /* */ + uint32_t fFTable; /* forward state transition table. */ uint32_t fFTableLen; - uint32_t fRTable; // Offset to the reverse state transition table. + uint32_t fRTable; /* Offset to the reverse state transition table. */ uint32_t fRTableLen; - uint32_t fTrie; // Offset to Trie data for character categories + uint32_t fSFTable; /* safe point forward transition table */ + uint32_t fSFTableLen; + uint32_t fSRTable; /* safe point reverse transition table */ + uint32_t fSRTableLen; + uint32_t fTrie; /* Offset to Trie data for character categories */ uint32_t fTrieLen; - uint32_t fRuleSource; // Offset to the source for for the break - uint32_t fRuleSourceLen; // rules. Stored UChar *. + uint32_t fRuleSource; /* Offset to the source for for the break */ + uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ + uint32_t fStatusTable; /* Offset to the table of rule status values */ + uint32_t fStatusTableLen; - uint32_t fReserved[8]; // Reserved for expansion + uint32_t fReserved[6]; /* Reserved for expansion */ }; struct RBBIStateTableRow { - int16_t fAccepting; // Non-zero if this row is for an accepting state. - // Value is the {nnn} value to return to calling - // application. - int16_t fLookAhead; // Non-zero if this row is for a state that - // corresponds to a '/' in the rule source. - // Value is the same as the fAccepting - // value for the rule (which will appear - // in a different state. - int16_t fTag; // Non-zero if this row covers a {tagged} position - // from a rule. value is the tag number. + int16_t fAccepting; /* Non-zero if this row is for an accepting state. */ + /* Value 0: not an accepting state. */ + /* -1: Unconditional Accepting state. */ + /* positive: Look-ahead match has completed. */ + /* Actual boundary position happened earlier */ + /* Value here == fLookAhead in earlier */ + /* state, at actual boundary pos. */ + int16_t fLookAhead; /* Non-zero if this row is for a state that */ + /* corresponds to a '/' in the rule source. */ + /* Value is the same as the fAccepting */ + /* value for the rule (which will appear */ + /* in a different state. */ + int16_t fTagIdx; /* Non-zero if this row covers a {tagged} position */ + /* from a rule. Value is the index in the */ + /* StatusTable of the set of matching */ + /* tags (rule status values) */ int16_t fReserved; - uint16_t fNextState[2]; // Next State, indexed by char category. - // Array Size is fNumCols from the - // state table header. - // CAUTION: see RBBITableBuilder::getTableSize() - // before changing anything here. + uint16_t fNextState[2]; /* Next State, indexed by char category. */ + /* This array does not have two elements */ + /* Array Size is actually fData->fHeader->fCatCount */ + /* CAUTION: see RBBITableBuilder::getTableSize() */ + /* before changing anything here. */ }; struct RBBIStateTable { - uint32_t fNumStates; // Number of states. - uint32_t fRowLen; // Length of a state table row, in bytes. - char fTableData[4]; // First RBBIStateTableRow begins here. - // (making it char[] simplifies ugly address - // arithmetic for indexing variable length rows.) + uint32_t fNumStates; /* Number of states. */ + uint32_t fRowLen; /* Length of a state table row, in bytes. */ + uint32_t fFlags; /* Option Flags for this state table */ + uint32_t fReserved; /* reserved */ + char fTableData[4]; /* First RBBIStateTableRow begins here. */ + /* (making it char[] simplifies ugly address */ + /* arithmetic for indexing variable length rows.) */ }; +typedef enum { + RBBI_LOOKAHEAD_HARD_BREAK = 1, + RBBI_BOF_REQUIRED = 2 +} RBBIStateTableFlags; + -// -// The reference counting wrapper class -// +/* */ +/* The reference counting wrapper class */ +/* */ class RBBIDataWrapper : public UMemory { public: + enum EDontAdopt { + kDontAdopt + }; RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); + RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status); RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); ~RBBIDataWrapper(); @@ -108,29 +155,45 @@ public: void removeReference(); UBool operator ==(const RBBIDataWrapper &other) const; int32_t hashCode(); - const UnicodeString &getRuleSourceString(); + const UnicodeString &getRuleSourceString() const; +#ifdef RBBI_DEBUG void printData(); + void printTable(const char *heading, const RBBIStateTable *table); +#else + #define printData() + #define printTable(heading, table) +#endif - // - // Pointers to items within the data - // + /* */ + /* Pointers to items within the data */ + /* */ const RBBIDataHeader *fHeader; const RBBIStateTable *fForwardTable; const RBBIStateTable *fReverseTable; + const RBBIStateTable *fSafeFwdTable; + const RBBIStateTable *fSafeRevTable; const UChar *fRuleSource; + const int32_t *fRuleStatusTable; + + /* number of int32_t values in the rule status table. Used to sanity check indexing */ + int32_t fStatusMaxIdx; UTrie fTrie; private: - int32_t fRefCount; + u_atomic_int32_t fRefCount; UDataMemory *fUDataMem; UnicodeString fRuleString; + UBool fDontFreeData; - RBBIDataWrapper(const RBBIDataWrapper &other); // forbid copying of this class - RBBIDataWrapper &operator=(const RBBIDataWrapper &other); // forbid copying of this class + RBBIDataWrapper(const RBBIDataWrapper &other); /* forbid copying of this class */ + RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /* forbid copying of this class */ }; + + U_NAMESPACE_END -#endif +#endif /* C++ */ +#endif