]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/numparse_stringsegment.cpp
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
12 #include "numparse_types.h"
13 #include "numparse_stringsegment.h"
15 #include "unicode/utf16.h"
16 #include "unicode/uniset.h"
19 using namespace icu::numparse
;
20 using namespace icu::numparse::impl
;
23 StringSegment::StringSegment(const UnicodeString
& str
, bool ignoreCase
)
24 : fStr(str
), fStart(0), fEnd(str
.length()),
25 fFoldCase(ignoreCase
) {}
27 int32_t StringSegment::getOffset() const {
31 void StringSegment::setOffset(int32_t start
) {
35 void StringSegment::adjustOffset(int32_t delta
) {
39 void StringSegment::adjustOffsetByCodePoint() {
40 fStart
+= U16_LENGTH(getCodePoint());
43 void StringSegment::setLength(int32_t length
) {
44 fEnd
= fStart
+ length
;
47 void StringSegment::resetLength() {
51 int32_t StringSegment::length() const {
55 char16_t StringSegment::charAt(int32_t index
) const {
56 return fStr
.charAt(index
+ fStart
);
59 UChar32
StringSegment::codePointAt(int32_t index
) const {
60 return fStr
.char32At(index
+ fStart
);
63 UnicodeString
StringSegment::toUnicodeString() const {
64 return UnicodeString(fStr
.getBuffer() + fStart
, fEnd
- fStart
);
67 const UnicodeString
StringSegment::toTempUnicodeString() const {
68 // Use the readonly-aliasing constructor for efficiency.
69 return UnicodeString(FALSE
, fStr
.getBuffer() + fStart
, fEnd
- fStart
);
72 UChar32
StringSegment::getCodePoint() const {
73 char16_t lead
= fStr
.charAt(fStart
);
74 if (U16_IS_LEAD(lead
) && fStart
+ 1 < fEnd
) {
75 return fStr
.char32At(fStart
);
76 } else if (U16_IS_SURROGATE(lead
)) {
83 bool StringSegment::startsWith(UChar32 otherCp
) const {
84 return codePointsEqual(getCodePoint(), otherCp
, fFoldCase
);
87 bool StringSegment::startsWith(const UnicodeSet
& uniset
) const {
88 // TODO: Move UnicodeSet case-folding logic here.
89 // TODO: Handle string matches here instead of separately.
90 UChar32 cp
= getCodePoint();
94 return uniset
.contains(cp
);
97 bool StringSegment::startsWith(const UnicodeString
& other
) const {
98 if (other
.isBogus() || other
.length() == 0 || length() == 0) {
101 int cp1
= getCodePoint();
102 int cp2
= other
.char32At(0);
103 return codePointsEqual(cp1
, cp2
, fFoldCase
);
106 int32_t StringSegment::getCommonPrefixLength(const UnicodeString
& other
) {
107 return getPrefixLengthInternal(other
, fFoldCase
);
110 int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString
& other
) {
111 return getPrefixLengthInternal(other
, false);
114 int32_t StringSegment::getPrefixLengthInternal(const UnicodeString
& other
, bool foldCase
) {
115 U_ASSERT(other
.length() > 0);
117 for (; offset
< uprv_min(length(), other
.length());) {
118 // TODO: case-fold code points, not chars
119 char16_t c1
= charAt(offset
);
120 char16_t c2
= other
.charAt(offset
);
121 if (!codePointsEqual(c1
, c2
, foldCase
)) {
129 bool StringSegment::codePointsEqual(UChar32 cp1
, UChar32 cp2
, bool foldCase
) {
136 cp1
= u_foldCase(cp1
, TRUE
);
137 cp2
= u_foldCase(cp2
, TRUE
);
141 bool StringSegment::operator==(const UnicodeString
& other
) const {
142 return toTempUnicodeString() == other
;
146 #endif /* #if !UCONFIG_NO_FORMATTING */