]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/rbbidata.h
ICU-531.30.tar.gz
[apple/icu.git] / icuSources / common / rbbidata.h
index c7ee0096d1d2a2cb4a5b8ee08677aff0357ded99..78a4ac762c88fd13dddf496d449de8de9cb718c2 100644 (file)
-//  file:  rbbidata.h
-//
-//**********************************************************************
-//   Copyright (C) 1999 IBM Corp. All rights reserved.
-//**********************************************************************
-//
-//   RBBI data formats  Includes
-//
-//                          Structs that describes the format of the Binary RBBI data,
-//                          as it is stored in ICU's data file.
-//
-//      RBBIDataWrapper  -  Instances of this class sit between the
-//                          raw data structs and the RulesBasedBreakIterator objects
-//                          that are created by applications.  The wrapper class
-//                          provides reference counting for the underlying data,
-//                          and direct pointers to data that would not otherwise
-//                          be accessible without ugly pointer arithmetic.  The
-//                          wrapper does not attempt to provide any higher level
-//                          abstractions for the data itself.
-//
-//                          There will be only one instance of RBBIDataWrapper for any
-//                          set of RBBI run time data being shared by instances
-//                          (clones) of RulesBasedBreakIterator.
-//
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2013 International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  rbbidata.h
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   RBBI data formats  Includes
+*
+*                          Structs that describes the format of the Binary RBBI data,
+*                          as it is stored in ICU's data file.
+*
+*      RBBIDataWrapper  -  Instances of this class sit between the
+*                          raw data structs and the RulesBasedBreakIterator objects
+*                          that are created by applications.  The wrapper class
+*                          provides reference counting for the underlying data,
+*                          and direct pointers to data that would not otherwise
+*                          be accessible without ugly pointer arithmetic.  The
+*                          wrapper does not attempt to provide any higher level
+*                          abstractions for the data itself.
+*
+*                          There will be only one instance of RBBIDataWrapper for any
+*                          set of RBBI run time data being shared by instances
+*                          (clones) of RulesBasedBreakIterator.
+*/
 
 #ifndef __RBBIDATA_H__
 #define __RBBIDATA_H__
 
 #include "unicode/utypes.h"
+#include "unicode/udata.h"
+#include "udataswp.h"
+
+/**
+ * Swap RBBI data. See udataswp.h.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_swap(const UDataSwapper *ds,
+          const void *inData, int32_t length, void *outData,
+          UErrorCode *pErrorCode);
+
+#ifdef __cplusplus
+
 #include "unicode/uobject.h"
 #include "unicode/unistr.h"
-#include "unicode/udata.h"
+#include "umutex.h"
 #include "utrie.h"
 
-
 U_NAMESPACE_BEGIN
 
-//
-//  The following structs map exactly onto the raw data from ICU common data file.
-//
+/*  
+ *   The following structs map exactly onto the raw data from ICU common data file. 
+ */
 struct RBBIDataHeader {
-    uint32_t         fMagic;       // == 0xbla0
-    uint32_t         fVersion;     // == 1
-    uint32_t         fLength;      // Total length in bytes of this RBBI Data,
-                                   //     including all sections, not just the header.
-    uint32_t         fCatCount;    // Number of character categories.
-
-    //
-    // Offsets and sizes of each of the subsections within the RBBI data.
-    // All offsets are bytes from the start of the RBBIDataHeader.
-    // All sizes are in bytes.
-    //
-    uint32_t         fFTable;      // forward state transition table.
+    uint32_t         fMagic;           /*  == 0xbla0                                               */
+    uint8_t          fFormatVersion[4]; /* Data Format.  Same as the value in struct UDataInfo      */
+                                       /*   if there is one associated with this data.             */
+                                       /*     (version originates in rbbi, is copied to UDataInfo) */
+                                       /*   For ICU 3.2 and earlier, this field was                */
+                                       /*       uint32_t  fVersion                                 */
+                                       /*   with a value of 1.                                     */
+    uint32_t         fLength;          /*  Total length in bytes of this RBBI Data,                */
+                                       /*      including all sections, not just the header.        */
+    uint32_t         fCatCount;        /*  Number of character categories.                         */
+
+    /*                                                                        */
+    /*  Offsets and sizes of each of the subsections within the RBBI data.    */
+    /*  All offsets are bytes from the start of the RBBIDataHeader.           */
+    /*  All sizes are in bytes.                                               */
+    /*                                                                        */
+    uint32_t         fFTable;         /*  forward state transition table. */
     uint32_t         fFTableLen;
-    uint32_t         fRTable;      // Offset to the reverse state transition table.
+    uint32_t         fRTable;         /*  Offset to the reverse state transition table. */
     uint32_t         fRTableLen;
-    uint32_t         fTrie;        // Offset to Trie data for character categories
+    uint32_t         fSFTable;        /*  safe point forward transition table */
+    uint32_t         fSFTableLen;
+    uint32_t         fSRTable;        /*  safe point reverse transition table */
+    uint32_t         fSRTableLen;
+    uint32_t         fTrie;           /*  Offset to Trie data for character categories */
     uint32_t         fTrieLen;
-    uint32_t         fRuleSource;  // Offset to the source for for the break
-    uint32_t         fRuleSourceLen;  //   rules.  Stored UChar *.
+    uint32_t         fRuleSource;     /*  Offset to the source for for the break */
+    uint32_t         fRuleSourceLen;  /*    rules.  Stored UChar *. */
+    uint32_t         fStatusTable;    /* Offset to the table of rule status values */
+    uint32_t         fStatusTableLen;
 
-    uint32_t         fReserved[8]; // Reserved for expansion
+    uint32_t         fReserved[6];    /*  Reserved for expansion */
 
 };
 
 
 
 struct  RBBIStateTableRow {
-    int16_t          fAccepting;    // Non-zero if this row is for an accepting state.
-                                    // Value is the {nnn} value to return to calling
-                                    //    application.
-    int16_t          fLookAhead;    // Non-zero if this row is for a state that
-                                    //   corresponds to a '/' in the rule source.
-                                    //   Value is the same as the fAccepting
-                                    //     value for the rule (which will appear
-                                    //     in a different state.
-    int16_t          fTag;          // Non-zero if this row covers a {tagged} position
-                                    //    from a rule.  value is the tag number.
+    int16_t          fAccepting;    /*  Non-zero if this row is for an accepting state.   */
+                                    /*  Value 0: not an accepting state.                  */
+                                    /*       -1: Unconditional Accepting state.           */
+                                    /*    positive:  Look-ahead match has completed.      */
+                                    /*           Actual boundary position happened earlier */
+                                    /*           Value here == fLookAhead in earlier      */
+                                    /*              state, at actual boundary pos.        */
+    int16_t          fLookAhead;    /*  Non-zero if this row is for a state that          */
+                                    /*    corresponds to a '/' in the rule source.        */
+                                    /*    Value is the same as the fAccepting             */
+                                    /*      value for the rule (which will appear         */
+                                    /*      in a different state.                         */
+    int16_t          fTagIdx;       /*  Non-zero if this row covers a {tagged} position   */
+                                    /*     from a rule.  Value is the index in the        */
+                                    /*     StatusTable of the set of matching             */
+                                    /*     tags (rule status values)                      */
     int16_t          fReserved;
-    uint16_t         fNextState[2]; // Next State, indexed by char category.
-                                    //   Array Size is fNumCols from the
-                                    //   state table header.
-                                    //   CAUTION:  see RBBITableBuilder::getTableSize()
-                                    //             before changing anything here.
+    uint16_t         fNextState[2]; /*  Next State, indexed by char category.             */
+                                    /*  This array does not have two elements             */
+                                    /*    Array Size is actually fData->fHeader->fCatCount         */
+                                    /*    CAUTION:  see RBBITableBuilder::getTableSize()  */
+                                    /*              before changing anything here.        */
 };
 
 
 struct RBBIStateTable {
-    uint32_t         fNumStates;    // Number of states.
-    uint32_t         fRowLen;       // Length of a state table row, in bytes.
-    char             fTableData[4]; // First RBBIStateTableRow begins here.
-                                    //   (making it char[] simplifies ugly address
-                                    //    arithmetic for indexing variable length rows.)
+    uint32_t         fNumStates;    /*  Number of states.                                 */
+    uint32_t         fRowLen;       /*  Length of a state table row, in bytes.            */
+    uint32_t         fFlags;        /*  Option Flags for this state table                 */
+    uint32_t         fReserved;     /*  reserved                                          */
+    char             fTableData[4]; /*  First RBBIStateTableRow begins here.              */
+                                    /*    (making it char[] simplifies ugly address       */
+                                    /*     arithmetic for indexing variable length rows.) */
 };
 
+typedef enum {
+    RBBI_LOOKAHEAD_HARD_BREAK = 1,
+    RBBI_BOF_REQUIRED = 2
+} RBBIStateTableFlags;
+
 
-//
-//  The reference counting wrapper class
-//
+/*                                        */
+/*   The reference counting wrapper class */
+/*                                        */
 class RBBIDataWrapper : public UMemory {
 public:
+    enum EDontAdopt {
+        kDontAdopt
+    };
     RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status);
+    RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status);
     RBBIDataWrapper(UDataMemory* udm, UErrorCode &status);
     ~RBBIDataWrapper();
 
@@ -108,29 +155,45 @@ public:
     void                  removeReference();
     UBool                 operator ==(const RBBIDataWrapper &other) const;
     int32_t               hashCode();
-    const UnicodeString  &getRuleSourceString();
+    const UnicodeString  &getRuleSourceString() const;
+#ifdef RBBI_DEBUG
     void                  printData();
+    void                  printTable(const char *heading, const RBBIStateTable *table);
+#else
+    #define printData()
+    #define printTable(heading, table)
+#endif
 
-    //
-    //  Pointers to items within the data
-    //
+    /*                                     */
+    /*   Pointers to items within the data */
+    /*                                     */
     const RBBIDataHeader     *fHeader;
     const RBBIStateTable     *fForwardTable;
     const RBBIStateTable     *fReverseTable;
+    const RBBIStateTable     *fSafeFwdTable;
+    const RBBIStateTable     *fSafeRevTable;
     const UChar              *fRuleSource;
+    const int32_t            *fRuleStatusTable; 
+
+    /* number of int32_t values in the rule status table.   Used to sanity check indexing */
+    int32_t             fStatusMaxIdx;
 
     UTrie               fTrie;
 
 private:
-    int32_t             fRefCount;
+    u_atomic_int32_t    fRefCount;
     UDataMemory        *fUDataMem;
     UnicodeString       fRuleString;
+    UBool               fDontFreeData;
 
-    RBBIDataWrapper(const RBBIDataWrapper &other); // forbid copying of this class
-    RBBIDataWrapper &operator=(const RBBIDataWrapper &other); // forbid copying of this class
+    RBBIDataWrapper(const RBBIDataWrapper &other); /*  forbid copying of this class */
+    RBBIDataWrapper &operator=(const RBBIDataWrapper &other); /*  forbid copying of this class */
 };
 
+
+
 U_NAMESPACE_END
 
-#endif
+#endif /* C++ */
 
+#endif