]>
Commit | Line | Data |
---|---|---|
65ec6247 RD |
1 | // Scintilla source code edit control |
2 | /** @file UniConversion.cxx | |
3 | ** Functions to handle UFT-8 and UCS-2 strings. | |
4 | **/ | |
5 | // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org> | |
f6bcfd97 BP |
6 | // The License.txt file describes the conditions under which this software may be distributed. |
7 | ||
8 | #include <stdlib.h> | |
9 | ||
10 | #include "UniConversion.h" | |
11 | ||
12 | unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) { | |
13 | unsigned int len = 0; | |
14 | for (unsigned int i = 0; i < tlen && uptr[i]; i++) { | |
15 | unsigned int uch = uptr[i]; | |
16 | if (uch < 0x80) | |
17 | len++; | |
18 | else if (uch < 0x800) | |
19 | len+=2; | |
20 | else | |
21 | len +=3; | |
22 | } | |
23 | return len; | |
24 | } | |
25 | ||
26 | void UTF8FromUCS2(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) { | |
27 | int k = 0; | |
28 | for (unsigned int i = 0; i < tlen && uptr[i]; i++) { | |
29 | unsigned int uch = uptr[i]; | |
30 | if (uch < 0x80) { | |
31 | putf[k++] = static_cast<char>(uch); | |
32 | } else if (uch < 0x800) { | |
33 | putf[k++] = static_cast<char>(0xC0 | (uch >> 6)); | |
34 | putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); | |
35 | } else { | |
36 | putf[k++] = static_cast<char>(0xE0 | (uch >> 12)); | |
37 | putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f)); | |
38 | putf[k++] = static_cast<char>(0x80 | (uch & 0x3f)); | |
39 | } | |
40 | } | |
41 | putf[len] = '\0'; | |
42 | } | |
43 | ||
44 | unsigned int UCS2Length(const char *s, unsigned int len) { | |
45 | unsigned int ulen = 0; | |
46 | for (unsigned int i=0;i<len;i++) { | |
47 | unsigned char ch = static_cast<unsigned char>(s[i]); | |
48 | if ((ch < 0x80) || (ch > (0x80 + 0x40))) | |
49 | ulen++; | |
50 | } | |
51 | return ulen; | |
52 | } | |
53 | ||
54 | unsigned int UCS2FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) { | |
f6bcfd97 BP |
55 | unsigned int ui=0; |
56 | const unsigned char *us = reinterpret_cast<const unsigned char *>(s); | |
57 | unsigned int i=0; | |
58 | while ((i<len) && (ui<tlen)) { | |
59 | unsigned char ch = us[i++]; | |
60 | if (ch < 0x80) { | |
61 | tbuf[ui] = ch; | |
62 | } else if (ch < 0x80 + 0x40 + 0x20) { | |
63 | tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6); | |
64 | ch = us[i++]; | |
65 | tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); | |
66 | } else { | |
67 | tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12); | |
68 | ch = us[i++]; | |
69 | tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6)); | |
70 | ch = us[i++]; | |
71 | tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F)); | |
72 | } | |
73 | ui++; | |
74 | } | |
75 | return ui; | |
f6bcfd97 | 76 | } |