]>
Commit | Line | Data |
---|---|---|
3d9156a7 A |
1 | --- utf2.c.orig Fri Feb 18 15:49:55 2005 |
2 | +++ utf2.c Fri Feb 18 15:52:07 2005 | |
3 | @@ -25,8 +25,11 @@ | |
4 | */ | |
5 | ||
6 | #include <sys/param.h> | |
7 | +/* dumb down UTF-8 to do UTF2 */ | |
8 | __FBSDID("$FreeBSD: src/lib/libc/locale/utf8.c,v 1.11 2004/07/27 06:29:48 tjr Exp $"); | |
9 | ||
10 | +#include "xlocale_private.h" | |
11 | + | |
12 | #include <errno.h> | |
13 | #include <limits.h> | |
14 | #include <runetype.h> | |
15 | @@ -35,54 +38,55 @@ | |
16 | #include <wchar.h> | |
17 | #include "mblocal.h" | |
18 | ||
19 | -size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, | |
20 | - mbstate_t * __restrict); | |
21 | -int _UTF8_mbsinit(const mbstate_t *); | |
22 | -size_t _UTF8_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict, | |
23 | - size_t, size_t, mbstate_t * __restrict); | |
24 | -size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); | |
25 | -size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, | |
26 | - size_t, size_t, mbstate_t * __restrict); | |
27 | +#define UTF2_MB_CUR_MAX 3 | |
28 | + | |
29 | +static size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, | |
30 | + mbstate_t * __restrict, locale_t); | |
31 | +static int _UTF2_mbsinit(const mbstate_t *, locale_t); | |
32 | +static size_t _UTF2_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict, | |
33 | + size_t, size_t, mbstate_t * __restrict, locale_t); | |
34 | +static size_t _UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict, locale_t); | |
35 | +static size_t _UTF2_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, | |
36 | + size_t, size_t, mbstate_t * __restrict, locale_t); | |
37 | ||
38 | typedef struct { | |
39 | wchar_t ch; | |
40 | int want; | |
41 | wchar_t lbound; | |
42 | -} _UTF8State; | |
43 | +} _UTF2State; | |
44 | ||
45 | -int | |
46 | -_UTF8_init(_RuneLocale *rl) | |
47 | +__private_extern__ int | |
48 | +_UTF2_init(struct __xlocale_st_runelocale *xrl) | |
49 | { | |
50 | ||
51 | - __mbrtowc = _UTF8_mbrtowc; | |
52 | - __wcrtomb = _UTF8_wcrtomb; | |
53 | - __mbsinit = _UTF8_mbsinit; | |
54 | - __mbsnrtowcs = _UTF8_mbsnrtowcs; | |
55 | - __wcsnrtombs = _UTF8_wcsnrtombs; | |
56 | - _CurrentRuneLocale = rl; | |
57 | - __mb_cur_max = 6; | |
58 | + xrl->__mbrtowc = _UTF2_mbrtowc; | |
59 | + xrl->__wcrtomb = _UTF2_wcrtomb; | |
60 | + xrl->__mbsinit = _UTF2_mbsinit; | |
61 | + xrl->__mbsnrtowcs = _UTF2_mbsnrtowcs; | |
62 | + xrl->__wcsnrtombs = _UTF2_wcsnrtombs; | |
63 | + xrl->__mb_cur_max = UTF2_MB_CUR_MAX; | |
64 | ||
65 | return (0); | |
66 | } | |
67 | ||
68 | -int | |
69 | -_UTF8_mbsinit(const mbstate_t *ps) | |
70 | +static int | |
71 | +_UTF2_mbsinit(const mbstate_t *ps, locale_t loc) | |
72 | { | |
73 | ||
74 | - return (ps == NULL || ((const _UTF8State *)ps)->want == 0); | |
75 | + return (ps == NULL || ((const _UTF2State *)ps)->want == 0); | |
76 | } | |
77 | ||
78 | -size_t | |
79 | -_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, | |
80 | - mbstate_t * __restrict ps) | |
81 | +static size_t | |
82 | +_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, | |
83 | + mbstate_t * __restrict ps, locale_t loc) | |
84 | { | |
85 | - _UTF8State *us; | |
86 | + _UTF2State *us; | |
87 | int ch, i, mask, want; | |
88 | wchar_t lbound, wch; | |
89 | ||
90 | - us = (_UTF8State *)ps; | |
91 | + us = (_UTF2State *)ps; | |
92 | ||
93 | - if (us->want < 0 || us->want > 6) { | |
94 | + if (us->want < 0 || us->want > 3) { | |
95 | errno = EINVAL; | |
96 | return ((size_t)-1); | |
97 | } | |
98 | @@ -130,21 +134,9 @@ | |
99 | mask = 0x0f; | |
100 | want = 3; | |
101 | lbound = 0x800; | |
102 | - } else if ((ch & 0xf8) == 0xf0) { | |
103 | - mask = 0x07; | |
104 | - want = 4; | |
105 | - lbound = 0x10000; | |
106 | - } else if ((ch & 0xfc) == 0xf8) { | |
107 | - mask = 0x03; | |
108 | - want = 5; | |
109 | - lbound = 0x200000; | |
110 | - } else if ((ch & 0xfc) == 0xfc) { | |
111 | - mask = 0x01; | |
112 | - want = 6; | |
113 | - lbound = 0x4000000; | |
114 | } else { | |
115 | /* | |
116 | - * Malformed input; input is not UTF-8. | |
117 | + * Malformed input; input is not UTF2. | |
118 | */ | |
119 | errno = EILSEQ; | |
120 | return ((size_t)-1); | |
121 | @@ -194,17 +186,17 @@ | |
122 | return (wch == L'\0' ? 0 : want); | |
123 | } | |
124 | ||
125 | -size_t | |
126 | -_UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, | |
127 | - size_t nms, size_t len, mbstate_t * __restrict ps) | |
128 | +static size_t | |
129 | +_UTF2_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, | |
130 | + size_t nms, size_t len, mbstate_t * __restrict ps, locale_t loc) | |
131 | { | |
132 | - _UTF8State *us; | |
133 | + _UTF2State *us; | |
134 | const char *s; | |
135 | size_t nchr; | |
136 | wchar_t wc; | |
137 | size_t nb; | |
138 | ||
139 | - us = (_UTF8State *)ps; | |
140 | + us = (_UTF2State *)ps; | |
141 | ||
142 | s = *src; | |
143 | nchr = 0; | |
144 | @@ -226,7 +218,7 @@ | |
145 | * excluding NUL. | |
146 | */ | |
147 | nb = 1; | |
148 | - else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) == | |
149 | + else if ((nb = _UTF2_mbrtowc(&wc, s, nms, ps, loc)) == | |
150 | (size_t)-1) | |
151 | /* Invalid sequence - mbrtowc() sets errno. */ | |
152 | return ((size_t)-1); | |
153 | @@ -256,7 +248,7 @@ | |
154 | */ | |
155 | *dst = (wchar_t)*s; | |
156 | nb = 1; | |
157 | - } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) == | |
158 | + } else if ((nb = _UTF2_mbrtowc(dst, s, nms, ps, loc)) == | |
159 | (size_t)-1) { | |
160 | *src = s; | |
161 | return ((size_t)-1); | |
162 | @@ -276,14 +268,14 @@ | |
163 | return (nchr); | |
164 | } | |
165 | ||
166 | -size_t | |
167 | -_UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) | |
168 | +static size_t | |
169 | +_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps, locale_t loc) | |
170 | { | |
171 | - _UTF8State *us; | |
172 | + _UTF2State *us; | |
173 | unsigned char lead; | |
174 | int i, len; | |
175 | ||
176 | - us = (_UTF8State *)ps; | |
177 | + us = (_UTF2State *)ps; | |
178 | ||
179 | if (us->want != 0) { | |
180 | errno = EINVAL; | |
181 | @@ -315,15 +307,6 @@ | |
182 | } else if ((wc & ~0xffff) == 0) { | |
183 | lead = 0xe0; | |
184 | len = 3; | |
185 | - } else if ((wc & ~0x1fffff) == 0) { | |
186 | - lead = 0xf0; | |
187 | - len = 4; | |
188 | - } else if ((wc & ~0x3ffffff) == 0) { | |
189 | - lead = 0xf8; | |
190 | - len = 5; | |
191 | - } else if ((wc & ~0x7fffffff) == 0) { | |
192 | - lead = 0xfc; | |
193 | - len = 6; | |
194 | } else { | |
195 | errno = EILSEQ; | |
196 | return ((size_t)-1); | |
197 | @@ -344,17 +327,17 @@ | |
198 | return (len); | |
199 | } | |
200 | ||
201 | -size_t | |
202 | -_UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, | |
203 | - size_t nwc, size_t len, mbstate_t * __restrict ps) | |
204 | +static size_t | |
205 | +_UTF2_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, | |
206 | + size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t loc) | |
207 | { | |
208 | - _UTF8State *us; | |
209 | + _UTF2State *us; | |
210 | char buf[MB_LEN_MAX]; | |
211 | const wchar_t *s; | |
212 | size_t nbytes; | |
213 | size_t nb; | |
214 | ||
215 | - us = (_UTF8State *)ps; | |
216 | + us = (_UTF2State *)ps; | |
217 | ||
218 | if (us->want != 0) { | |
219 | errno = EINVAL; | |
220 | @@ -369,7 +352,7 @@ | |
221 | if (0 <= *s && *s < 0x80) | |
222 | /* Fast path for plain ASCII characters. */ | |
223 | nb = 1; | |
224 | - else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == | |
225 | + else if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) == | |
226 | (size_t)-1) | |
227 | /* Invalid character - wcrtomb() sets errno. */ | |
228 | return ((size_t)-1); | |
229 | @@ -386,9 +369,9 @@ | |
230 | /* Fast path for plain ASCII characters. */ | |
231 | nb = 1; | |
232 | *dst = *s; | |
233 | - } else if (len > (size_t)MB_CUR_MAX) { | |
234 | + } else if (len > (size_t)UTF2_MB_CUR_MAX) { | |
235 | /* Enough space to translate in-place. */ | |
236 | - if ((nb = (int)_UTF8_wcrtomb(dst, *s, ps)) < 0) { | |
237 | + if ((nb = (int)_UTF2_wcrtomb(dst, *s, ps, loc)) < 0) { | |
238 | *src = s; | |
239 | return ((size_t)-1); | |
240 | } | |
241 | @@ -396,7 +379,7 @@ | |
242 | /* | |
243 | * May not be enough space; use temp. buffer. | |
244 | */ | |
245 | - if ((nb = (int)_UTF8_wcrtomb(buf, *s, ps)) < 0) { | |
246 | + if ((nb = (int)_UTF2_wcrtomb(buf, *s, ps, loc)) < 0) { | |
247 | *src = s; | |
248 | return ((size_t)-1); | |
249 | } |