X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..b801cf366c7671a99bdcef84d1e9c0ec64b36723:/icuSources/i18n/regeximp.h diff --git a/icuSources/i18n/regeximp.h b/icuSources/i18n/regeximp.h index 3ee9102d..52ea6626 100644 --- a/icuSources/i18n/regeximp.h +++ b/icuSources/i18n/regeximp.h @@ -1,5 +1,5 @@ // -// Copyright (C) 2002-2010 International Business Machines Corporation +// Copyright (C) 2002-2015 International Business Machines Corporation // and others. All rights reserved. // // file: regeximp.h @@ -12,11 +12,21 @@ #ifndef _REGEXIMP_H #define _REGEXIMP_H +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/uniset.h" +#include "unicode/utext.h" + #include "cmemory.h" +#include "ucase.h" U_NAMESPACE_BEGIN -#ifdef REGEX_DEBUG /* For debugging, define REGEX_DEBUG in regex.h, not here in this file. */ +// For debugging, define REGEX_DEBUG +// To define with configure, +// CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release Linux + +#ifdef REGEX_DEBUG // // debugging options. Enable one or more of the three #defines immediately following // @@ -36,19 +46,6 @@ U_NAMESPACE_BEGIN #define REGEX_SCAN_DEBUG_PRINTF(a) #endif -#ifdef REGEX_DUMP_DEBUG -#define REGEX_DUMP_DEBUG_PRINTF(a) printf a -#else -#define REGEX_DUMP_DEBUG_PRINTF(a) -#endif - -#ifdef REGEX_RUN_DEBUG -#define REGEX_RUN_DEBUG_PRINTF(a) printf a -#define REGEX_DUMP_DEBUG_PRINTF(a) printf a -#else -#define REGEX_RUN_DEBUG_PRINTF(a) -#endif - // // Opcode types In the compiled form of the regexp, these are the type, or opcodes, @@ -176,7 +173,10 @@ enum { URX_BACKSLASH_BU = 53, // \b or \B in UREGEX_UWORD mode, using Unicode style // word boundaries. URX_DOLLAR_D = 54, // $ end of input test, in UNIX_LINES mode. - URX_DOLLAR_MD = 55 // $ end of input test, in MULTI_LINE and UNIX_LINES mode. + URX_DOLLAR_MD = 55, // $ end of input test, in MULTI_LINE and UNIX_LINES mode. + URX_BACKSLASH_H = 56, // Value field: 0: \h 1: \H + URX_BACKSLASH_R = 57, // Any line break sequence. + URX_BACKSLASH_V = 58 // Value field: 0: \v 1: \V }; @@ -238,13 +238,15 @@ enum { "LOOP_DOT_I", \ "BACKSLASH_BU", \ "DOLLAR_D", \ - "DOLLAR_MD" + "DOLLAR_MD", \ + "URX_BACKSLASH_H", \ + "URX_BACKSLASH_R", \ + "URX_BACKSLASH_V" // // Convenience macros for assembling and disassembling a compiled operation. // -#define URX_BUILD(type, val) (int32_t)((type << 24) | (val)) #define URX_TYPE(x) ((uint32_t)(x) >> 24) #define URX_VAL(x) ((x) & 0xffffff) @@ -352,6 +354,61 @@ inline void Regex8BitSet::operator = (const Regex8BitSet &s) { uprv_memcpy(d, s.d, sizeof(d)); } + +// Case folded UText Iterator helper class. +// Wraps a UText, provides a case-folded enumeration over its contents. +// Used in implementing case insensitive matching constructs. +// Implementation in rematch.cpp + +class CaseFoldingUTextIterator: public UMemory { + public: + CaseFoldingUTextIterator(UText &text); + ~CaseFoldingUTextIterator(); + + UChar32 next(); // Next case folded character + + UBool inExpansion(); // True if last char returned from next() and the + // next to be returned both originated from a string + // folding of the same code point from the orignal UText. + private: + UText &fUText; + const UCaseProps *fcsp; + const UChar *fFoldChars; + int32_t fFoldLength; + int32_t fFoldIndex; + +}; + + +// Case folded UChar * string iterator. +// Wraps a UChar *, provides a case-folded enumeration over its contents. +// Used in implementing case insensitive matching constructs. +// Implementation in rematch.cpp + +class CaseFoldingUCharIterator: public UMemory { + public: + CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limit); + ~CaseFoldingUCharIterator(); + + UChar32 next(); // Next case folded character + + UBool inExpansion(); // True if last char returned from next() and the + // next to be returned both originated from a string + // folding of the same code point from the orignal UText. + + int64_t getIndex(); // Return the current input buffer index. + + private: + const UChar *fChars; + int64_t fIndex; + int64_t fLimit; + const UCaseProps *fcsp; + const UChar *fFoldChars; + int32_t fFoldLength; + int32_t fFoldIndex; + +}; + U_NAMESPACE_END #endif