ICU-64260.0.1.tar.gz

[apple/icu.git] / icuSources / i18n / regeximp.h
diff --git a/icuSources/i18n/regeximp.h b/icuSources/i18n/regeximp.h

index 3ee9102df1a5f945e76560f7f6cd52b4faeda19c..51db88216789b919b1bd7728d550676e2c46c760 100644 (file)
--- a/icuSources/i18n/regeximp.h
+++ b/icuSources/i18n/regeximp.h
@@ -1,5 +1,7 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  //
-//   Copyright (C) 2002-2010 International Business Machines Corporation
+//   Copyright (C) 2002-2015 International Business Machines Corporation
  //   and others. All rights reserved.
  //
  //   file:  regeximp.h
@@ -12,11 +14,21 @@
  #ifndef _REGEXIMP_H
  #define _REGEXIMP_H
  
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/uniset.h"
+#include "unicode/utext.h"
+
  #include "cmemory.h"
+#include "ucase.h"
  
  U_NAMESPACE_BEGIN
  
-#ifdef REGEX_DEBUG   /* For debugging, define REGEX_DEBUG in regex.h, not here in this file. */
+// For debugging, define REGEX_DEBUG
+// To define with configure,
+//   CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release Linux 
+
+#ifdef REGEX_DEBUG
  //
  //  debugging options.  Enable one or more of the three #defines immediately following
  //
@@ -36,19 +48,6 @@ U_NAMESPACE_BEGIN
  #define REGEX_SCAN_DEBUG_PRINTF(a)
  #endif
  
-#ifdef REGEX_DUMP_DEBUG
-#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
-#else
-#define REGEX_DUMP_DEBUG_PRINTF(a)
-#endif
-
-#ifdef REGEX_RUN_DEBUG
-#define REGEX_RUN_DEBUG_PRINTF(a) printf a
-#define REGEX_DUMP_DEBUG_PRINTF(a) printf a
-#else
-#define REGEX_RUN_DEBUG_PRINTF(a)
-#endif
-
  
  //
  //  Opcode types     In the compiled form of the regexp, these are the type, or opcodes,
@@ -176,7 +175,10 @@ enum {
       URX_BACKSLASH_BU  = 53,   // \b or \B in UREGEX_UWORD mode, using Unicode style
                                 //   word boundaries.
       URX_DOLLAR_D      = 54,   // $ end of input test, in UNIX_LINES mode.
-     URX_DOLLAR_MD     = 55    // $ end of input test, in MULTI_LINE and UNIX_LINES mode.
+     URX_DOLLAR_MD     = 55,   // $ end of input test, in MULTI_LINE and UNIX_LINES mode.
+     URX_BACKSLASH_H   = 56,   // Value field:  0:  \h    1:  \H
+     URX_BACKSLASH_R   = 57,   // Any line break sequence.
+     URX_BACKSLASH_V   = 58    // Value field:  0:  \v    1:  \V
  
  };
  
@@ -238,13 +240,15 @@ enum {
          "LOOP_DOT_I",          \
          "BACKSLASH_BU",        \
          "DOLLAR_D",            \
-        "DOLLAR_MD"
+        "DOLLAR_MD",           \
+        "URX_BACKSLASH_H",     \
+        "URX_BACKSLASH_R",     \
+        "URX_BACKSLASH_V" 
  
  
  //
  //  Convenience macros for assembling and disassembling a compiled operation.
  //
-#define URX_BUILD(type, val) (int32_t)((type << 24) | (val))
  #define URX_TYPE(x)          ((uint32_t)(x) >> 24)
  #define URX_VAL(x)           ((x) & 0xffffff)
  
@@ -352,6 +356,59 @@ inline void Regex8BitSet::operator = (const Regex8BitSet &s) {
     uprv_memcpy(d, s.d, sizeof(d));
  }
  
+
+//  Case folded UText Iterator helper class.
+//  Wraps a UText, provides a case-folded enumeration over its contents.
+//  Used in implementing case insensitive matching constructs.
+//  Implementation in rematch.cpp
+
+class CaseFoldingUTextIterator: public UMemory {
+      public:
+        CaseFoldingUTextIterator(UText &text);
+        ~CaseFoldingUTextIterator();
+
+        UChar32 next();           // Next case folded character
+
+        UBool   inExpansion();    // True if last char returned from next() and the
+                                  //  next to be returned both originated from a string
+                                  //  folding of the same code point from the orignal UText.
+      private:
+        UText             &fUText;
+        const  UChar      *fFoldChars;
+        int32_t            fFoldLength;
+        int32_t            fFoldIndex;
+
+};
+
+
+// Case folded UChar * string iterator.
+//  Wraps a UChar  *, provides a case-folded enumeration over its contents.
+//  Used in implementing case insensitive matching constructs.
+//  Implementation in rematch.cpp
+
+class CaseFoldingUCharIterator: public UMemory {
+      public:
+        CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit);
+        ~CaseFoldingUCharIterator();
+
+        UChar32 next();           // Next case folded character
+
+        UBool   inExpansion();    // True if last char returned from next() and the
+                                  //  next to be returned both originated from a string
+                                  //  folding of the same code point from the orignal UText.
+
+        int64_t  getIndex();      // Return the current input buffer index.
+
+      private:
+        const  UChar      *fChars;
+        int64_t            fIndex;
+        int64_t            fLimit;
+        const  UChar      *fFoldChars;
+        int32_t            fFoldLength;
+        int32_t            fFoldIndex;
+
+};
+
  U_NAMESPACE_END
  #endif