]>
git.saurik.com Git - wxWidgets.git/blob - src/stc/scintilla/src/UniConversion.cxx
863eb82cdc97586d480b0bdb3d77ac17c50ac478
1 // Scintilla source code edit control
2 /** @file UniConversion.cxx
3 ** Functions to handle UFT-8 and UCS-2 strings.
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
10 #include "UniConversion.h"
12 enum { SURROGATE_LEAD_FIRST
= 0xD800 };
13 enum { SURROGATE_TRAIL_FIRST
= 0xDC00 };
14 enum { SURROGATE_TRAIL_LAST
= 0xDFFF };
16 unsigned int UTF8Length(const wchar_t *uptr
, unsigned int tlen
) {
18 for (unsigned int i
= 0; i
< tlen
&& uptr
[i
];) {
19 unsigned int uch
= uptr
[i
];
22 } else if (uch
< 0x800) {
24 } else if ((uch
>= SURROGATE_LEAD_FIRST
) &&
25 (uch
<= SURROGATE_TRAIL_LAST
)) {
36 void UTF8FromUTF16(const wchar_t *uptr
, unsigned int tlen
, char *putf
, unsigned int len
) {
38 for (unsigned int i
= 0; i
< tlen
&& uptr
[i
];) {
39 unsigned int uch
= uptr
[i
];
41 putf
[k
++] = static_cast<char>(uch
);
42 } else if (uch
< 0x800) {
43 putf
[k
++] = static_cast<char>(0xC0 | (uch
>> 6));
44 putf
[k
++] = static_cast<char>(0x80 | (uch
& 0x3f));
45 } else if ((uch
>= SURROGATE_LEAD_FIRST
) &&
46 (uch
<= SURROGATE_TRAIL_LAST
)) {
47 // Half a surrogate pair
49 unsigned int xch
= 0x10000 + ((uch
& 0x3ff) << 10) + (uptr
[i
] & 0x3ff);
50 putf
[k
++] = static_cast<char>(0xF0 | (xch
>> 18));
51 putf
[k
++] = static_cast<char>(0x80 | (xch
>> 12) & 0x3f);
52 putf
[k
++] = static_cast<char>(0x80 | ((xch
>> 6) & 0x3f));
53 putf
[k
++] = static_cast<char>(0x80 | (xch
& 0x3f));
55 putf
[k
++] = static_cast<char>(0xE0 | (uch
>> 12));
56 putf
[k
++] = static_cast<char>(0x80 | ((uch
>> 6) & 0x3f));
57 putf
[k
++] = static_cast<char>(0x80 | (uch
& 0x3f));
64 unsigned int UTF16Length(const char *s
, unsigned int len
) {
65 unsigned int ulen
= 0;
67 for (unsigned int i
=0;i
<len
;) {
68 unsigned char ch
= static_cast<unsigned char>(s
[i
]);
71 } else if (ch
< 0x80 + 0x40 + 0x20) {
73 } else if (ch
< 0x80 + 0x40 + 0x20 + 0x10) {
85 unsigned int UTF16FromUTF8(const char *s
, unsigned int len
, wchar_t *tbuf
, unsigned int tlen
) {
87 const unsigned char *us
= reinterpret_cast<const unsigned char *>(s
);
89 while ((i
<len
) && (ui
<tlen
)) {
90 unsigned char ch
= us
[i
++];
93 } else if (ch
< 0x80 + 0x40 + 0x20) {
94 tbuf
[ui
] = static_cast<wchar_t>((ch
& 0x1F) << 6);
96 tbuf
[ui
] = static_cast<wchar_t>(tbuf
[ui
] + (ch
& 0x7F));
97 } else if (ch
< 0x80 + 0x40 + 0x20 + 0x10) {
98 tbuf
[ui
] = static_cast<wchar_t>((ch
& 0xF) << 12);
100 tbuf
[ui
] = static_cast<wchar_t>(tbuf
[ui
] + ((ch
& 0x7F) << 6));
102 tbuf
[ui
] = static_cast<wchar_t>(tbuf
[ui
] + (ch
& 0x7F));
104 // Outside the BMP so need two surrogates
105 int val
= (ch
& 0x7) << 18;
107 val
+= (ch
& 0x3F) << 12;
109 val
+= (ch
& 0x3F) << 6;
112 tbuf
[ui
] = static_cast<wchar_t>(((val
- 0x10000) >> 10) + SURROGATE_LEAD_FIRST
);
114 tbuf
[ui
] = static_cast<wchar_t>((val
& 0x3ff) + SURROGATE_TRAIL_FIRST
);