]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/bytesinkutil.cpp
ICU-62108.0.1.tar.gz
[apple/icu.git] / icuSources / common / bytesinkutil.cpp
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // bytesinkutil.cpp
5 // created: 2017sep14 Markus W. Scherer
6
7 #include "unicode/utypes.h"
8 #include "unicode/bytestream.h"
9 #include "unicode/edits.h"
10 #include "unicode/stringoptions.h"
11 #include "unicode/utf8.h"
12 #include "unicode/utf16.h"
13 #include "bytesinkutil.h"
14 #include "cmemory.h"
15 #include "uassert.h"
16
17 U_NAMESPACE_BEGIN
18
19 UBool
20 ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
21 ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
22 if (U_FAILURE(errorCode)) { return FALSE; }
23 char scratch[200];
24 int32_t s8Length = 0;
25 for (int32_t i = 0; i < s16Length;) {
26 int32_t capacity;
27 int32_t desiredCapacity = s16Length - i;
28 if (desiredCapacity < (INT32_MAX / 3)) {
29 desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit
30 } else if (desiredCapacity < (INT32_MAX / 2)) {
31 desiredCapacity *= 2;
32 } else {
33 desiredCapacity = INT32_MAX;
34 }
35 char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
36 scratch, UPRV_LENGTHOF(scratch), &capacity);
37 capacity -= U8_MAX_LENGTH - 1;
38 int32_t j = 0;
39 for (; i < s16Length && j < capacity;) {
40 UChar32 c;
41 U16_NEXT_UNSAFE(s16, i, c);
42 U8_APPEND_UNSAFE(buffer, j, c);
43 }
44 if (j > (INT32_MAX - s8Length)) {
45 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
46 return FALSE;
47 }
48 sink.Append(buffer, j);
49 s8Length += j;
50 }
51 if (edits != nullptr) {
52 edits->addReplace(length, s8Length);
53 }
54 return TRUE;
55 }
56
57 UBool
58 ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
59 const char16_t *s16, int32_t s16Length,
60 ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
61 if (U_FAILURE(errorCode)) { return FALSE; }
62 if ((limit - s) > INT32_MAX) {
63 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
64 return FALSE;
65 }
66 return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
67 }
68
69 void
70 ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
71 char s8[U8_MAX_LENGTH];
72 int32_t s8Length = 0;
73 U8_APPEND_UNSAFE(s8, s8Length, c);
74 if (edits != nullptr) {
75 edits->addReplace(length, s8Length);
76 }
77 sink.Append(s8, s8Length);
78 }
79
80 namespace {
81
82 // See unicode/utf8.h U8_APPEND_UNSAFE().
83 inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
84 inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
85
86 } // namespace
87
88 void
89 ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
90 U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
91 char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
92 sink.Append(s8, 2);
93 }
94
95 void
96 ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
97 ByteSink &sink, uint32_t options, Edits *edits) {
98 U_ASSERT(length > 0);
99 if (edits != nullptr) {
100 edits->addUnchanged(length);
101 }
102 if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
103 sink.Append(reinterpret_cast<const char *>(s), length);
104 }
105 }
106
107 UBool
108 ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
109 ByteSink &sink, uint32_t options, Edits *edits,
110 UErrorCode &errorCode) {
111 if (U_FAILURE(errorCode)) { return FALSE; }
112 if ((limit - s) > INT32_MAX) {
113 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
114 return FALSE;
115 }
116 int32_t length = (int32_t)(limit - s);
117 if (length > 0) {
118 appendNonEmptyUnchanged(s, length, sink, options, edits);
119 }
120 return TRUE;
121 }
122
123 U_NAMESPACE_END