1 --- utf2.c.bsdnew 2009-11-09 17:38:09.000000000 -0800
2 +++ utf2.c 2009-11-09 17:41:17.000000000 -0800
5 __FBSDID("$FreeBSD: src/lib/libc/locale/utf8.c,v 1.16 2007/10/15 09:51:30 ache Exp $");
7 +#include "xlocale_private.h"
12 @@ -35,62 +37,61 @@ __FBSDID("$FreeBSD: src/lib/libc/locale/
16 -extern int __mb_sb_limit;
17 +#define UTF2_MB_CUR_MAX 3
19 -static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict,
20 - size_t, mbstate_t * __restrict);
21 -static int _UTF8_mbsinit(const mbstate_t *);
22 -static size_t _UTF8_mbsnrtowcs(wchar_t * __restrict,
23 +static size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict,
24 + size_t, mbstate_t * __restrict, locale_t);
25 +static int _UTF2_mbsinit(const mbstate_t *, locale_t);
26 +static size_t _UTF2_mbsnrtowcs(wchar_t * __restrict,
27 const char ** __restrict, size_t, size_t,
28 - mbstate_t * __restrict);
29 -static size_t _UTF8_wcrtomb(char * __restrict, wchar_t,
30 - mbstate_t * __restrict);
31 -static size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
32 - size_t, size_t, mbstate_t * __restrict);
33 + mbstate_t * __restrict, locale_t);
34 +static size_t _UTF2_wcrtomb(char * __restrict, wchar_t,
35 + mbstate_t * __restrict, locale_t);
36 +static size_t _UTF2_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
37 + size_t, size_t, mbstate_t * __restrict, locale_t);
47 -_UTF8_init(_RuneLocale *rl)
48 +__private_extern__ int
49 +_UTF2_init(struct __xlocale_st_runelocale *xrl)
52 - __mbrtowc = _UTF8_mbrtowc;
53 - __wcrtomb = _UTF8_wcrtomb;
54 - __mbsinit = _UTF8_mbsinit;
55 - __mbsnrtowcs = _UTF8_mbsnrtowcs;
56 - __wcsnrtombs = _UTF8_wcsnrtombs;
57 - _CurrentRuneLocale = rl;
59 + xrl->__mbrtowc = _UTF2_mbrtowc;
60 + xrl->__wcrtomb = _UTF2_wcrtomb;
61 + xrl->__mbsinit = _UTF2_mbsinit;
62 + xrl->__mbsnrtowcs = _UTF2_mbsnrtowcs;
63 + xrl->__wcsnrtombs = _UTF2_wcsnrtombs;
64 + xrl->__mb_cur_max = UTF2_MB_CUR_MAX;
66 * UCS-4 encoding used as the internal representation, so
67 * slots 0x0080-0x00FF are occuped and must be excluded
68 * from the single byte ctype by setting the limit.
70 - __mb_sb_limit = 128;
71 + xrl->__mb_sb_limit = 128;
77 -_UTF8_mbsinit(const mbstate_t *ps)
78 +_UTF2_mbsinit(const mbstate_t *ps, locale_t loc)
81 - return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
82 + return (ps == NULL || ((const _UTF2State *)ps)->want == 0);
86 -_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
87 - mbstate_t * __restrict ps)
88 +_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
89 + mbstate_t * __restrict ps, locale_t loc)
93 int ch, i, mask, want;
96 - us = (_UTF8State *)ps;
97 + us = (_UTF2State *)ps;
99 if (us->want < 0 || us->want > 6) {
101 @@ -140,21 +141,9 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc,
105 - } else if ((ch & 0xf8) == 0xf0) {
109 - } else if ((ch & 0xfc) == 0xf8) {
113 - } else if ((ch & 0xfe) == 0xfc) {
116 - lbound = 0x4000000;
119 - * Malformed input; input is not UTF-8.
120 + * Malformed input; input is not UTF2.
124 @@ -205,16 +194,16 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc,
128 -_UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
129 - size_t nms, size_t len, mbstate_t * __restrict ps)
130 +_UTF2_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
131 + size_t nms, size_t len, mbstate_t * __restrict ps, locale_t loc)
140 - us = (_UTF8State *)ps;
141 + us = (_UTF2State *)ps;
145 @@ -236,7 +225,7 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
149 - else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
150 + else if ((nb = _UTF2_mbrtowc(&wc, s, nms, ps, loc)) ==
152 /* Invalid sequence - mbrtowc() sets errno. */
154 @@ -266,7 +255,7 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
158 - } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
159 + } else if ((nb = _UTF2_mbrtowc(dst, s, nms, ps, loc)) ==
163 @@ -287,13 +276,13 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
167 -_UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
168 +_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps, locale_t loc)
175 - us = (_UTF8State *)ps;
176 + us = (_UTF2State *)ps;
180 @@ -325,15 +314,6 @@ _UTF8_wcrtomb(char * __restrict s, wchar
181 } else if ((wc & ~0xffff) == 0) {
184 - } else if ((wc & ~0x1fffff) == 0) {
187 - } else if ((wc & ~0x3ffffff) == 0) {
190 - } else if ((wc & ~0x7fffffff) == 0) {
196 @@ -355,16 +335,16 @@ _UTF8_wcrtomb(char * __restrict s, wchar
200 -_UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
201 - size_t nwc, size_t len, mbstate_t * __restrict ps)
202 +_UTF2_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
203 + size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t loc)
207 char buf[MB_LEN_MAX];
212 - us = (_UTF8State *)ps;
213 + us = (_UTF2State *)ps;
217 @@ -379,7 +359,7 @@ _UTF8_wcsnrtombs(char * __restrict dst,
218 if (0 <= *s && *s < 0x80)
219 /* Fast path for plain ASCII characters. */
221 - else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
222 + else if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) ==
224 /* Invalid character - wcrtomb() sets errno. */
226 @@ -396,9 +376,9 @@ _UTF8_wcsnrtombs(char * __restrict dst,
227 /* Fast path for plain ASCII characters. */
230 - } else if (len > (size_t)MB_CUR_MAX) {
231 + } else if (len > (size_t)UTF2_MB_CUR_MAX) {
232 /* Enough space to translate in-place. */
233 - if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
234 + if ((nb = _UTF2_wcrtomb(dst, *s, ps, loc)) == (size_t)-1) {
238 @@ -406,7 +386,7 @@ _UTF8_wcsnrtombs(char * __restrict dst,
240 * May not be enough space; use temp. buffer.
242 - if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
243 + if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) == (size_t)-1) {