ICU-59180.0.1.tar.gz

[apple/icu.git] / icuSources / i18n / rbt_pars.h
diff --git a/icuSources/i18n/rbt_pars.h b/icuSources/i18n/rbt_pars.h

index d025d00bf4b533c8543e7942d1b5aa515914db74..61ce9727e05f9d2cad4faaa9515734830eca0dcb 100644 (file)
--- a/icuSources/i18n/rbt_pars.h
+++ b/icuSources/i18n/rbt_pars.h
@@ -1,5 +1,9 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
-* Copyright (C) {1999-2003}, International Business Machines Corporation and others. All Rights Reserved.
+**********************************************************************
+* Copyright (C) 1999-2011, International Business Machines Corporation
+* and others. All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
  *   11/17/99    aliu        Creation.
@@ -11,11 +15,14 @@
  #include "unicode/utypes.h"
  
  #if !UCONFIG_NO_TRANSLITERATION
+#ifdef __cplusplus
  
  #include "unicode/uobject.h"
  #include "unicode/parseerr.h"
  #include "unicode/unorm.h"
  #include "rbt.h"
+#include "hash.h"
+#include "uvector.h"
  
  U_NAMESPACE_BEGIN
  
@@ -24,7 +31,6 @@ class UnicodeFunctor;
  class ParseData;
  class RuleHalf;
  class ParsePosition;
-class UVector;
  class StringMatcher;
  
  class TransliteratorParser : public UMemory {
@@ -32,27 +38,16 @@ class TransliteratorParser : public UMemory {
   public:
  
      /**
-     * PUBLIC data member containing the parsed data object, or null if
-     * there were no rules.
-     */
-    TransliterationRuleData* data;
-
-    /**
-     * PUBLIC data member.
-     * The block of ::IDs, both at the top and at the bottom.
-     * Inserted into these may be additional rules at the
-     * idSplitPoint.
+     * A Vector of TransliterationRuleData objects, one for each discrete group
+     * of rules in the rule set
       */
-    UnicodeString idBlock;
+    UVector dataVector;
  
      /**
       * PUBLIC data member.
-     * In a compound RBT, the index at which the RBT rules are
-     * inserted into the ID block.  Index 0 means before any IDs
-     * in the block.  Index idBlock.length() means after all IDs
-     * in the block.  Index is a string index.
+     * A Vector of UnicodeStrings containing all of the ID blocks in the rule set
       */
-    int32_t idSplitPoint;
+    UVector idBlockVector;
  
      /**
       * PUBLIC data member containing the parsed compound filter, if any.
@@ -61,18 +56,12 @@ class TransliteratorParser : public UMemory {
  
   private:
  
-    // The number of rules parsed.  This tells us if there were
-    // any actual transliterator rules, or if there were just ::ID
-    // block IDs.
-    int32_t ruleCount;
-
-    UTransDirection direction;
-
      /**
-     * We use a single error code during parsing.  Rather than pass it
-     * through each API, we keep it here.
+     * The current data object for which we are parsing rules
       */
-    UErrorCode status;
+    TransliterationRuleData* curData;
+
+    UTransDirection direction;
  
      /**
       * Parse error information.
@@ -89,8 +78,14 @@ class TransliteratorParser : public UMemory {
       * is copied into the array data.variables.  As with data.variables,
       * element 0 corresponds to character data.variablesBase.
       */
-    UVector* variablesVector;
+    UVector variablesVector;
  
+    /**
+     * Temporary table of variable names.  When parsing is complete, this is
+     * copied into data.variableNames.
+     */
+    Hashtable variableNames;    
+    
      /**
       * String of standins for segments.  Used during the parsing of a single
       * rule.  segmentStandins.charAt(0) is the standin for "$1" and corresponds
@@ -104,7 +99,7 @@ class TransliteratorParser : public UMemory {
       * segmentStandins.charAt(0) is the standin for "$1" and corresponds
       * to StringMatcher object segmentObjects.elementAt(0), etc.
       */
-    UVector* segmentObjects;
+    UVector segmentObjects;
  
      /**
       * The next available stand-in for variables.  This starts at some point in
@@ -142,7 +137,7 @@ public:
      /**
       * Constructor.
       */
-    TransliteratorParser();
+    TransliteratorParser(UErrorCode &statusReturn);
  
      /**
       * Destructor.
@@ -176,12 +171,6 @@ public:
       */ 
      UnicodeSet* orphanCompoundFilter();
  
-    /**
-     * Return the data object parsed by parse().  Caller owns result.
-     * @return the data object parsed by parse().
-     */
-    TransliterationRuleData* orphanData();
-
  private:
  
      /**
@@ -190,7 +179,8 @@ private:
       * @param direction  either FORWARD or REVERSE.
       */
      void parseRules(const UnicodeString& rules,
-                    UTransDirection direction);
+                    UTransDirection direction,
+                    UErrorCode& status);
  
      /**
       * MAIN PARSER.  Parse the next rule in the given rule string, starting
@@ -209,14 +199,14 @@ private:
       * @param limit      pointer past the last character of the rule.
       * @return           the index after the last character parsed.
       */
-    int32_t parseRule(const UnicodeString& rule, int32_t pos, int32_t limit);
+    int32_t parseRule(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status);
  
      /**
       * Set the variable range to [start, end] (inclusive).
       * @param start    the start value of the range.
       * @param end      the end value of the range.
       */
-    void setVariableRange(int32_t start, int32_t end);
+    void setVariableRange(int32_t start, int32_t end, UErrorCode& status);
  
      /**
       * Assert that the given character is NOT within the variable range.
@@ -259,7 +249,7 @@ private:
       * @return the position index after the final ';' of the pragma,
       * or -1 on failure.
       */
-    int32_t parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit);
+    int32_t parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status);
  
      /**
       * Called by main parser upon syntax error.  Search the rule string
@@ -271,7 +261,8 @@ private:
       * @param start position of first character of current rule.
       * @return start position of first character of current rule.
       */
-    int32_t syntaxError(UErrorCode parseErrorCode, const UnicodeString&, int32_t start);
+    int32_t syntaxError(UErrorCode parseErrorCode, const UnicodeString&, int32_t start,
+                        UErrorCode& status);
  
      /**
       * Parse a UnicodeSet out, store it, and return the stand-in character
@@ -282,7 +273,8 @@ private:
       * @return        the stand-in character used to represent it.
       */
      UChar parseSet(const UnicodeString& rule,
-                   ParsePosition& pos);
+                   ParsePosition& pos,
+                   UErrorCode& status);
  
      /**
       * Generate and return a stand-in for a new UnicodeFunctor.  Store
@@ -290,28 +282,28 @@ private:
       * @param adopted the UnicodeFunctor to be adopted.
       * @return        a stand-in for a new UnicodeFunctor.
       */
-    UChar generateStandInFor(UnicodeFunctor* adopted);
+    UChar generateStandInFor(UnicodeFunctor* adopted, UErrorCode& status);
  
      /**
       * Return the standin for segment seg (1-based).
       * @param seg    the given segment.
       * @return       the standIn character for the given segment.
       */
-    UChar getSegmentStandin(int32_t seg);
+    UChar getSegmentStandin(int32_t seg, UErrorCode& status);
  
      /**
       * Set the object for segment seg (1-based).
       * @param seg      the given segment.
       * @param adopted  the StringMatcher to be adopted.
       */
-    void setSegmentObject(int32_t seg, StringMatcher* adopted);
+    void setSegmentObject(int32_t seg, StringMatcher* adopted, UErrorCode& status);
  
      /**
       * Return the stand-in for the dot set.  It is allocated the first
       * time and reused thereafter.
       * @return    the stand-in for the dot set.
       */
-    UChar getDotStandIn();
+    UChar getDotStandIn(UErrorCode& status);
  
      /**
       * Append the value of the given variable name to the given
@@ -320,13 +312,14 @@ private:
       * @param buf     the given UnicodeString to append to.
       */
      void appendVariableDef(const UnicodeString& name,
-                           UnicodeString& buf);
+                           UnicodeString& buf,
+                           UErrorCode& status);
  
      /**
       * Glue method to get around access restrictions in C++.
       */
-    static Transliterator* createBasicInstance(const UnicodeString& id,
-                                               const UnicodeString* canonID);
+    /*static Transliterator* createBasicInstance(const UnicodeString& id,
+                                               const UnicodeString* canonID);*/
  
      friend class RuleHalf;
  
@@ -344,6 +337,21 @@ private:
  
  U_NAMESPACE_END
  
+#endif /* #ifdef __cplusplus */
+
+/**
+ * Strip/convert the following from the transliterator rules:
+ * comments
+ * newlines
+ * white space at the beginning and end of a line
+ * unescape \u notation
+ *
+ * The target must be equal in size as the source.
+ * @internal
+ */
+U_CAPI int32_t
+utrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorCode *status);
+
  #endif /* #if !UCONFIG_NO_TRANSLITERATION */
  
  #endif