]>
Commit | Line | Data |
---|---|---|
ad3c9f2a A |
1 | /* |
2 | tre_regcomp.c - TRE POSIX compatible regex compilation functions. | |
3 | ||
4 | This software is released under a BSD-style license. | |
5 | See the file LICENSE for details and copyright. | |
6 | ||
7 | */ | |
8 | ||
9 | #ifdef HAVE_CONFIG_H | |
10 | #include <config.h> | |
11 | #endif /* HAVE_CONFIG_H */ | |
12 | ||
13 | #include <string.h> | |
14 | #include <errno.h> | |
15 | #include <stdlib.h> | |
16 | ||
17 | #include "tre.h" | |
18 | #include "tre-internal.h" | |
19 | #include "xmalloc.h" | |
20 | ||
21 | #ifndef BUILDING_VARIANT | |
22 | int | |
23 | tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags, locale_t loc) | |
24 | { | |
25 | int ret; | |
26 | #if TRE_WCHAR | |
27 | tre_char_t *wregex; | |
28 | size_t wlen; | |
29 | ||
30 | wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); | |
31 | if (wregex == NULL) | |
32 | return REG_ESPACE; | |
33 | #ifdef __LIBC__ | |
34 | NORMALIZE_LOCALE(loc); | |
35 | #endif /* __LIBC__ */ | |
36 | ||
37 | /* If the current locale uses the standard single byte encoding of | |
38 | characters, we don't do a multibyte string conversion. If we did, | |
39 | many applications which use the default locale would break since | |
40 | the default "C" locale uses the 7-bit ASCII character set, and | |
41 | all characters with the eighth bit set would be considered invalid. */ | |
42 | #if TRE_MULTIBYTE | |
43 | if (TRE_MB_CUR_MAX_L(loc) == 1) | |
44 | #endif /* TRE_MULTIBYTE */ | |
45 | { | |
46 | unsigned int i; | |
47 | const unsigned char *str = (const unsigned char *)regex; | |
48 | tre_char_t *wstr = wregex; | |
49 | ||
50 | for (i = 0; i < n; i++) | |
51 | *(wstr++) = *(str++); | |
52 | wlen = n; | |
53 | } | |
54 | #if TRE_MULTIBYTE | |
55 | else | |
56 | { | |
57 | size_t consumed; | |
58 | tre_char_t *wcptr = wregex; | |
59 | #ifdef HAVE_MBSTATE_T | |
60 | mbstate_t state; | |
61 | memset(&state, '\0', sizeof(state)); | |
62 | #endif /* HAVE_MBSTATE_T */ | |
63 | while (n > 0) | |
64 | { | |
65 | consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc); | |
66 | ||
67 | switch (consumed) | |
68 | { | |
69 | case 0: | |
70 | if (*regex == '\0') | |
71 | consumed = 1; | |
72 | else | |
73 | { | |
74 | xfree(wregex); | |
75 | return REG_BADPAT; | |
76 | } | |
77 | break; | |
78 | case (size_t)-1: | |
79 | case (size_t)-2: | |
80 | DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); | |
81 | xfree(wregex); | |
82 | return REG_ILLSEQ; | |
83 | } | |
84 | regex += consumed; | |
85 | n -= consumed; | |
86 | wcptr++; | |
87 | } | |
88 | wlen = wcptr - wregex; | |
89 | } | |
90 | #endif /* TRE_MULTIBYTE */ | |
91 | ||
92 | wregex[wlen] = L'\0'; | |
93 | ret = tre_compile(preg, wregex, wlen, cflags, loc); | |
94 | xfree(wregex); | |
95 | #else /* !TRE_WCHAR */ | |
96 | #ifdef __LIBC__ | |
97 | NORMALIZE_LOCALE(loc); | |
98 | #endif /* __LIBC__ */ | |
99 | ||
100 | ret = tre_compile(preg, (const tre_char_t *)regex, n, cflags, loc); | |
101 | #endif /* !TRE_WCHAR */ | |
102 | ||
103 | return ret; | |
104 | } | |
105 | ||
106 | int | |
107 | tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags) | |
108 | { | |
109 | locale_t loc; | |
110 | ||
111 | #ifdef __LIBC__ | |
112 | loc = __current_locale(); | |
113 | #else /* !__LIBC__ */ | |
114 | loc = duplocale(NULL); | |
115 | if (!loc) return REG_ESPACE; | |
116 | #endif /* !__LIBC__ */ | |
117 | ||
118 | return tre_regncomp_l(preg, regex, n, cflags, loc); | |
119 | } | |
120 | ||
121 | int | |
122 | tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc) | |
123 | { | |
124 | size_t len; | |
125 | ||
126 | if (cflags & REG_PEND) | |
127 | { | |
128 | if ((const char *)(preg->re_endp) < regex) | |
129 | return REG_INVARG; | |
130 | len = (const char *)(preg->re_endp) - regex; | |
131 | } | |
132 | else | |
133 | len = strlen(regex); | |
134 | return tre_regncomp_l(preg, regex, len, cflags, loc); | |
135 | } | |
136 | #endif /* !BUILDING_VARIANT */ | |
137 | ||
138 | int | |
139 | tre_regcomp(regex_t *preg, const char *regex, int cflags) | |
140 | { | |
141 | locale_t loc; | |
142 | ||
143 | #ifdef __LIBC__ | |
144 | loc = __current_locale(); | |
145 | #else /* !__LIBC__ */ | |
146 | loc = duplocale(NULL); | |
147 | if (!loc) return REG_ESPACE; | |
148 | #endif /* !__LIBC__ */ | |
149 | ||
150 | return tre_regcomp_l(preg, regex, cflags, loc); | |
151 | } | |
152 | ||
153 | ||
154 | #ifndef BUILDING_VARIANT | |
155 | #ifdef TRE_WCHAR | |
156 | int | |
157 | tre_regwncomp_l(regex_t *preg, const wchar_t *regex, size_t n, int cflags, locale_t loc) | |
158 | { | |
159 | #ifdef __LIBC__ | |
160 | NORMALIZE_LOCALE(loc); | |
161 | #endif /* __LIBC__ */ | |
162 | return tre_compile(preg, regex, n, cflags, loc); | |
163 | } | |
164 | ||
165 | int | |
166 | tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags) | |
167 | { | |
168 | locale_t loc; | |
169 | ||
170 | #ifdef __LIBC__ | |
171 | loc = __current_locale(); | |
172 | #else /* !__LIBC__ */ | |
173 | loc = duplocale(NULL); | |
174 | if (!loc) return REG_ESPACE; | |
175 | #endif /* !__LIBC__ */ | |
176 | ||
177 | return tre_compile(preg, regex, n, cflags, loc); | |
178 | } | |
179 | ||
180 | int | |
181 | tre_regwcomp_l(regex_t *preg, const wchar_t *regex, int cflags, locale_t loc) | |
182 | { | |
183 | #ifdef __LIBC__ | |
184 | NORMALIZE_LOCALE(loc); | |
185 | #endif /* __LIBC__ */ | |
186 | return tre_compile(preg, regex, wcslen(regex), cflags, loc); | |
187 | } | |
188 | ||
189 | int | |
190 | tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags) | |
191 | { | |
192 | return tre_regwncomp(preg, regex, wcslen(regex), cflags); | |
193 | } | |
194 | #endif /* TRE_WCHAR */ | |
195 | ||
196 | void | |
197 | tre_regfree(regex_t *preg) | |
198 | { | |
199 | tre_free(preg); | |
200 | } | |
201 | #endif /* !BUILDING_VARIANT */ | |
202 | ||
203 | /* EOF */ |