X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/73c04bcfe1096173b00431f0cdc742894b15eef0..ef6cf650f4a75c3f97de06b51fa104f2069b9ea2:/icuSources/common/util.cpp?ds=sidebyside diff --git a/icuSources/common/util.cpp b/icuSources/common/util.cpp index 7a588b5a..acb15854 100644 --- a/icuSources/common/util.cpp +++ b/icuSources/common/util.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (c) 2001-2006, International Business Machines +* Copyright (c) 2001-2011, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -8,8 +8,10 @@ ********************************************************************** */ -#include "util.h" #include "unicode/unimatch.h" +#include "unicode/utf16.h" +#include "patternprops.h" +#include "util.h" // Define UChar constants using hex for EBCDIC compatibility @@ -61,9 +63,6 @@ UnicodeString& ICU_Utility::appendNumber(UnicodeString& result, int32_t n, return result; } -static const UChar HEX[16] = {48,49,50,51,52,53,54,55, // 0-7 - 56,57,65,66,67,68,69,70}; // 8-9 A-F - /** * Return true if the character is NOT printable ASCII. */ @@ -82,17 +81,17 @@ UBool ICU_Utility::escapeUnprintable(UnicodeString& result, UChar32 c) { result.append(BACKSLASH); if (c & ~0xFFFF) { result.append(UPPER_U); - result.append(HEX[0xF&(c>>28)]); - result.append(HEX[0xF&(c>>24)]); - result.append(HEX[0xF&(c>>20)]); - result.append(HEX[0xF&(c>>16)]); + result.append(DIGITS[0xF&(c>>28)]); + result.append(DIGITS[0xF&(c>>24)]); + result.append(DIGITS[0xF&(c>>20)]); + result.append(DIGITS[0xF&(c>>16)]); } else { result.append(LOWER_U); } - result.append(HEX[0xF&(c>>12)]); - result.append(HEX[0xF&(c>>8)]); - result.append(HEX[0xF&(c>>4)]); - result.append(HEX[0xF&c]); + result.append(DIGITS[0xF&(c>>12)]); + result.append(DIGITS[0xF&(c>>8)]); + result.append(DIGITS[0xF&(c>>4)]); + result.append(DIGITS[0xF&c]); return TRUE; } return FALSE; @@ -134,13 +133,8 @@ int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text, int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos, UBool advance) { int32_t p = pos; - while (p < str.length()) { - UChar32 c = str.char32At(p); - if (!uprv_isRuleWhiteSpace(c)) { - break; - } - p += UTF_CHAR_LENGTH(c); - } + const UChar* s = str.getBuffer(); + p = (int32_t)(PatternProps::skipWhiteSpace(s + p, str.length() - p) - s); if (advance) { pos = p; } @@ -148,8 +142,8 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos, } /** - * Skip over whitespace in a Replaceable. Whitespace is defined by - * uprv_isRuleWhiteSpace(). Skipping may be done in the forward or + * Skip over Pattern_White_Space in a Replaceable. + * Skipping may be done in the forward or * reverse direction. In either case, the leftmost index will be * inclusive, and the rightmost index will be exclusive. That is, * given a range defined as [start, limit), the call @@ -175,11 +169,11 @@ int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos, //? } //? //? while (pos != stop && -//? uprv_isRuleWhiteSpace(c = text.char32At(pos))) { +//? PatternProps::isWhiteSpace(c = text.char32At(pos))) { //? if (isForward) { -//? pos += UTF_CHAR_LENGTH(c); +//? pos += U16_LENGTH(c); //? } else { -//? pos -= UTF_CHAR_LENGTH(c); +//? pos -= U16_LENGTH(c); //? } //? } //? @@ -219,7 +213,7 @@ UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, UChar ch) { * pattern. Characters are matched literally and case-sensitively * except for the following special characters: * - * ~ zero or more uprv_isRuleWhiteSpace chars + * ~ zero or more Pattern_White_Space chars * * If end of pattern is reached with all matches along the way, * pos is advanced to the first unparsed index and returned. @@ -248,8 +242,8 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat, // parse \s* if (cpat == 126 /*~*/) { - if (uprv_isRuleWhiteSpace(c)) { - index += UTF_CHAR_LENGTH(c); + if (PatternProps::isWhiteSpace(c)) { + index += U16_LENGTH(c); continue; } else { if (++ipat == pat.length()) { @@ -261,8 +255,8 @@ int32_t ICU_Utility::parsePattern(const UnicodeString& pat, // parse literal else if (c == cpat) { - index += UTF_CHAR_LENGTH(c); - ipat += UTF_CHAR_LENGTH(cpat); + index += U16_LENGTH(c); + ipat += U16_LENGTH(cpat); if (ipat == pat.length()) { return index; // success; c parsed } @@ -373,7 +367,7 @@ void ICU_Utility::appendToRule(UnicodeString& rule, !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) || - uprv_isRuleWhiteSpace(c)) { + PatternProps::isWhiteSpace(c)) { quoteBuf.append(c); // Double ' within a quote if (c == APOSTROPHE) { @@ -413,18 +407,3 @@ void ICU_Utility::appendToRule(UnicodeString& rule, } U_NAMESPACE_END - -U_CAPI UBool U_EXPORT2 -uprv_isRuleWhiteSpace(UChar32 c) { - /* "white space" in the sense of ICU rule parsers - This is a FIXED LIST that is NOT DEPENDENT ON UNICODE PROPERTIES. - See UAX #31 Identifier and Pattern Syntax: http://www.unicode.org/reports/tr31/ - U+0009..U+000D, U+0020, U+0085, U+200E..U+200F, and U+2028..U+2029 - Equivalent to test for Pattern_White_Space Unicode property. - */ - return (c >= 0x0009 && c <= 0x2029 && - (c <= 0x000D || c == 0x0020 || c == 0x0085 || - c == 0x200E || c == 0x200F || c >= 0x2028)); -} - -//eof