]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/string_segment.cpp
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 #include "unicode/utypes.h"
6 #if !UCONFIG_NO_FORMATTING
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
12 #include "numparse_types.h"
13 #include "string_segment.h"
15 #include "unicode/utf16.h"
16 #include "unicode/uniset.h"
21 StringSegment::StringSegment(const UnicodeString
& str
, bool ignoreCase
)
22 : fStr(str
), fStart(0), fEnd(str
.length()),
23 fFoldCase(ignoreCase
) {}
25 int32_t StringSegment::getOffset() const {
29 void StringSegment::setOffset(int32_t start
) {
33 void StringSegment::adjustOffset(int32_t delta
) {
37 void StringSegment::adjustOffsetByCodePoint() {
38 fStart
+= U16_LENGTH(getCodePoint());
41 void StringSegment::setLength(int32_t length
) {
42 fEnd
= fStart
+ length
;
45 void StringSegment::resetLength() {
49 int32_t StringSegment::length() const {
53 char16_t StringSegment::charAt(int32_t index
) const {
54 return fStr
.charAt(index
+ fStart
);
57 UChar32
StringSegment::codePointAt(int32_t index
) const {
58 return fStr
.char32At(index
+ fStart
);
61 UnicodeString
StringSegment::toUnicodeString() const {
62 return UnicodeString(fStr
.getBuffer() + fStart
, fEnd
- fStart
);
65 const UnicodeString
StringSegment::toTempUnicodeString() const {
66 // Use the readonly-aliasing constructor for efficiency.
67 return UnicodeString(FALSE
, fStr
.getBuffer() + fStart
, fEnd
- fStart
);
70 UChar32
StringSegment::getCodePoint() const {
71 char16_t lead
= fStr
.charAt(fStart
);
72 if (U16_IS_LEAD(lead
) && fStart
+ 1 < fEnd
) {
73 return fStr
.char32At(fStart
);
74 } else if (U16_IS_SURROGATE(lead
)) {
81 bool StringSegment::startsWith(UChar32 otherCp
) const {
82 return codePointsEqual(getCodePoint(), otherCp
, fFoldCase
);
85 bool StringSegment::startsWith(const UnicodeSet
& uniset
) const {
86 // TODO: Move UnicodeSet case-folding logic here.
87 // TODO: Handle string matches here instead of separately.
88 UChar32 cp
= getCodePoint();
92 return uniset
.contains(cp
);
95 bool StringSegment::startsWith(const UnicodeString
& other
) const {
96 if (other
.isBogus() || other
.length() == 0 || length() == 0) {
99 int cp1
= getCodePoint();
100 int cp2
= other
.char32At(0);
101 return codePointsEqual(cp1
, cp2
, fFoldCase
);
104 int32_t StringSegment::getCommonPrefixLength(const UnicodeString
& other
) {
105 return getPrefixLengthInternal(other
, fFoldCase
);
108 int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString
& other
) {
109 return getPrefixLengthInternal(other
, false);
112 int32_t StringSegment::getPrefixLengthInternal(const UnicodeString
& other
, bool foldCase
) {
113 U_ASSERT(other
.length() > 0);
115 for (; offset
< uprv_min(length(), other
.length());) {
116 // TODO: case-fold code points, not chars
117 char16_t c1
= charAt(offset
);
118 char16_t c2
= other
.charAt(offset
);
119 if (!codePointsEqual(c1
, c2
, foldCase
)) {
127 bool StringSegment::codePointsEqual(UChar32 cp1
, UChar32 cp2
, bool foldCase
) {
134 cp1
= u_foldCase(cp1
, TRUE
);
135 cp2
= u_foldCase(cp2
, TRUE
);
139 bool StringSegment::operator==(const UnicodeString
& other
) const {
140 return toTempUnicodeString() == other
;
145 #endif /* #if !UCONFIG_NO_FORMATTING */