icuSources/i18n/numparse_stringsegment.cpp

   1 // © 2018 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3
   4 #include "unicode/utypes.h"
   5
   6 #if !UCONFIG_NO_FORMATTING
   7
   8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
   9 // Helpful in toString methods and elsewhere.
  10 #define UNISTR_FROM_STRING_EXPLICIT
  11
  12 #include "numparse_types.h"
  13 #include "numparse_stringsegment.h"
  14 #include "putilimp.h"
  15 #include "unicode/utf16.h"
  16 #include "unicode/uniset.h"
  17
  18 using namespace icu;
  19 using namespace icu::numparse;
  20 using namespace icu::numparse::impl;
  21
  22
  23 StringSegment::StringSegment(const UnicodeString& str, bool ignoreCase)
  24         : fStr(str), fStart(0), fEnd(str.length()),
  25           fFoldCase(ignoreCase) {}
  26
  27 int32_t StringSegment::getOffset() const {
  28     return fStart;
  29 }
  30
  31 void StringSegment::setOffset(int32_t start) {
  32     fStart = start;
  33 }
  34
  35 void StringSegment::adjustOffset(int32_t delta) {
  36     fStart += delta;
  37 }
  38
  39 void StringSegment::adjustOffsetByCodePoint() {
  40     fStart += U16_LENGTH(getCodePoint());
  41 }
  42
  43 void StringSegment::setLength(int32_t length) {
  44     fEnd = fStart + length;
  45 }
  46
  47 void StringSegment::resetLength() {
  48     fEnd = fStr.length();
  49 }
  50
  51 int32_t StringSegment::length() const {
  52     return fEnd - fStart;
  53 }
  54
  55 char16_t StringSegment::charAt(int32_t index) const {
  56     return fStr.charAt(index + fStart);
  57 }
  58
  59 UChar32 StringSegment::codePointAt(int32_t index) const {
  60     return fStr.char32At(index + fStart);
  61 }
  62
  63 UnicodeString StringSegment::toUnicodeString() const {
  64     return UnicodeString(fStr.getBuffer() + fStart, fEnd - fStart);
  65 }
  66
  67 const UnicodeString StringSegment::toTempUnicodeString() const {
  68     // Use the readonly-aliasing constructor for efficiency.
  69     return UnicodeString(FALSE, fStr.getBuffer() + fStart, fEnd - fStart);
  70 }
  71
  72 UChar32 StringSegment::getCodePoint() const {
  73     char16_t lead = fStr.charAt(fStart);
  74     if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) {
  75         return fStr.char32At(fStart);
  76     } else if (U16_IS_SURROGATE(lead)) {
  77         return -1;
  78     } else {
  79         return lead;
  80     }
  81 }
  82
  83 bool StringSegment::startsWith(UChar32 otherCp) const {
  84     return codePointsEqual(getCodePoint(), otherCp, fFoldCase);
  85 }
  86
  87 bool StringSegment::startsWith(const UnicodeSet& uniset) const {
  88     // TODO: Move UnicodeSet case-folding logic here.
  89     // TODO: Handle string matches here instead of separately.
  90     UChar32 cp = getCodePoint();
  91     if (cp == -1) {
  92         return false;
  93     }
  94     return uniset.contains(cp);
  95 }
  96
  97 bool StringSegment::startsWith(const UnicodeString& other) const {
  98     if (other.isBogus() || other.length() == 0 || length() == 0) {
  99         return false;
 100     }
 101     int cp1 = getCodePoint();
 102     int cp2 = other.char32At(0);
 103     return codePointsEqual(cp1, cp2, fFoldCase);
 104 }
 105
 106 int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) {
 107     return getPrefixLengthInternal(other, fFoldCase);
 108 }
 109
 110 int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other) {
 111     return getPrefixLengthInternal(other, false);
 112 }
 113
 114 int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
 115     U_ASSERT(other.length() > 0);
 116     int32_t offset = 0;
 117     for (; offset < uprv_min(length(), other.length());) {
 118         // TODO: case-fold code points, not chars
 119         char16_t c1 = charAt(offset);
 120         char16_t c2 = other.charAt(offset);
 121         if (!codePointsEqual(c1, c2, foldCase)) {
 122             break;
 123         }
 124         offset++;
 125     }
 126     return offset;
 127 }
 128
 129 bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) {
 130     if (cp1 == cp2) {
 131         return true;
 132     }
 133     if (!foldCase) {
 134         return false;
 135     }
 136     cp1 = u_foldCase(cp1, TRUE);
 137     cp2 = u_foldCase(cp2, TRUE);
 138     return cp1 == cp2;
 139 }
 140
 141 bool StringSegment::operator==(const UnicodeString& other) const {
 142     return toTempUnicodeString() == other;
 143 }
 144
 145
 146 #endif /* #if !UCONFIG_NO_FORMATTING */