]>
Commit | Line | Data |
---|---|---|
ff4a34be | 1 | /* quotearg.c - quote arguments for output |
b0ce6046 | 2 | Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. |
ff4a34be AD |
3 | |
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2, or (at your option) | |
7 | any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software Foundation, | |
16 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
17 | ||
18 | /* Written by Paul Eggert <eggert@twinsun.com> */ | |
19 | ||
20 | #if HAVE_CONFIG_H | |
21 | # include <config.h> | |
22 | #endif | |
23 | ||
b0ce6046 AD |
24 | #if HAVE_STDDEF_H |
25 | # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */ | |
26 | #endif | |
ff4a34be AD |
27 | #include <sys/types.h> |
28 | #include <quotearg.h> | |
29 | #include <xalloc.h> | |
30 | ||
31 | #include <ctype.h> | |
32 | ||
33 | #if ENABLE_NLS | |
34 | # include <libintl.h> | |
35 | # define _(text) gettext (text) | |
36 | #else | |
37 | # define _(text) text | |
38 | #endif | |
39 | #define N_(text) text | |
40 | ||
41 | #if HAVE_LIMITS_H | |
42 | # include <limits.h> | |
43 | #endif | |
44 | #ifndef CHAR_BIT | |
45 | # define CHAR_BIT 8 | |
46 | #endif | |
47 | #ifndef UCHAR_MAX | |
48 | # define UCHAR_MAX ((unsigned char) -1) | |
49 | #endif | |
50 | ||
51 | #if HAVE_C_BACKSLASH_A | |
52 | # define ALERT_CHAR '\a' | |
53 | #else | |
54 | # define ALERT_CHAR '\7' | |
55 | #endif | |
56 | ||
57 | #if HAVE_STDLIB_H | |
58 | # include <stdlib.h> | |
59 | #endif | |
60 | ||
61 | #if HAVE_STRING_H | |
62 | # include <string.h> | |
63 | #endif | |
64 | ||
65 | #if HAVE_WCHAR_H | |
66 | # include <wchar.h> | |
67 | #endif | |
68 | ||
b0ce6046 | 69 | #if !HAVE_MBRTOWC |
ff4a34be AD |
70 | /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the |
71 | other macros are defined only for documentation and to satisfy C | |
72 | syntax. */ | |
73 | # undef MB_CUR_MAX | |
74 | # define MB_CUR_MAX 1 | |
75 | # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) | |
76 | # define mbsinit(ps) 1 | |
77 | # define iswprint(wc) ISPRINT ((unsigned char) (wc)) | |
78 | #endif | |
79 | ||
80 | #ifndef iswprint | |
81 | # if HAVE_WCTYPE_H | |
82 | # include <wctype.h> | |
83 | # endif | |
84 | # if !defined iswprint && !HAVE_ISWPRINT | |
85 | # define iswprint(wc) 1 | |
86 | # endif | |
87 | #endif | |
88 | ||
89 | #define INT_BITS (sizeof (int) * CHAR_BIT) | |
90 | ||
91 | #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) | |
92 | # define IN_CTYPE_DOMAIN(c) 1 | |
93 | #else | |
94 | # define IN_CTYPE_DOMAIN(c) isascii(c) | |
95 | #endif | |
96 | ||
97 | /* Undefine to protect against the definition in wctype.h of solaris2.6. */ | |
98 | #undef ISPRINT | |
99 | #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) | |
100 | ||
101 | struct quoting_options | |
102 | { | |
103 | /* Basic quoting style. */ | |
104 | enum quoting_style style; | |
105 | ||
106 | /* Quote the characters indicated by this bit vector even if the | |
107 | quoting style would not normally require them to be quoted. */ | |
108 | int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; | |
109 | }; | |
110 | ||
111 | /* Names of quoting styles. */ | |
112 | char const *const quoting_style_args[] = | |
113 | { | |
114 | "literal", | |
115 | "shell", | |
116 | "shell-always", | |
117 | "c", | |
118 | "escape", | |
119 | "locale", | |
120 | "clocale", | |
121 | 0 | |
122 | }; | |
123 | ||
124 | /* Correspondences to quoting style names. */ | |
125 | enum quoting_style const quoting_style_vals[] = | |
126 | { | |
127 | literal_quoting_style, | |
128 | shell_quoting_style, | |
129 | shell_always_quoting_style, | |
130 | c_quoting_style, | |
131 | escape_quoting_style, | |
132 | locale_quoting_style, | |
133 | clocale_quoting_style | |
134 | }; | |
135 | ||
136 | /* The default quoting options. */ | |
137 | static struct quoting_options default_quoting_options; | |
138 | ||
139 | /* Allocate a new set of quoting options, with contents initially identical | |
140 | to O if O is not null, or to the default if O is null. | |
141 | It is the caller's responsibility to free the result. */ | |
142 | struct quoting_options * | |
143 | clone_quoting_options (struct quoting_options *o) | |
144 | { | |
145 | struct quoting_options *p | |
146 | = (struct quoting_options *) xmalloc (sizeof (struct quoting_options)); | |
147 | *p = *(o ? o : &default_quoting_options); | |
148 | return p; | |
149 | } | |
150 | ||
151 | /* Get the value of O's quoting style. If O is null, use the default. */ | |
152 | enum quoting_style | |
153 | get_quoting_style (struct quoting_options *o) | |
154 | { | |
155 | return (o ? o : &default_quoting_options)->style; | |
156 | } | |
157 | ||
158 | /* In O (or in the default if O is null), | |
159 | set the value of the quoting style to S. */ | |
160 | void | |
161 | set_quoting_style (struct quoting_options *o, enum quoting_style s) | |
162 | { | |
163 | (o ? o : &default_quoting_options)->style = s; | |
164 | } | |
165 | ||
166 | /* In O (or in the default if O is null), | |
167 | set the value of the quoting options for character C to I. | |
168 | Return the old value. Currently, the only values defined for I are | |
169 | 0 (the default) and 1 (which means to quote the character even if | |
170 | it would not otherwise be quoted). */ | |
171 | int | |
172 | set_char_quoting (struct quoting_options *o, char c, int i) | |
173 | { | |
174 | unsigned char uc = c; | |
175 | int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; | |
176 | int shift = uc % INT_BITS; | |
177 | int r = (*p >> shift) & 1; | |
178 | *p ^= ((i & 1) ^ r) << shift; | |
179 | return r; | |
180 | } | |
181 | ||
182 | /* MSGID approximates a quotation mark. Return its translation if it | |
183 | has one; otherwise, return either it or "\"", depending on S. */ | |
184 | static char const * | |
185 | gettext_quote (char const *msgid, enum quoting_style s) | |
186 | { | |
187 | char const *translation = _(msgid); | |
188 | if (translation == msgid && s == clocale_quoting_style) | |
189 | translation = "\""; | |
190 | return translation; | |
191 | } | |
192 | ||
193 | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of | |
194 | argument ARG (of size ARGSIZE), using QUOTING_STYLE and the | |
195 | non-quoting-style part of O to control quoting. | |
196 | Terminate the output with a null character, and return the written | |
197 | size of the output, not counting the terminating null. | |
198 | If BUFFERSIZE is too small to store the output string, return the | |
199 | value that would have been returned had BUFFERSIZE been large enough. | |
200 | If ARGSIZE is -1, use the string length of the argument for ARGSIZE. | |
201 | ||
202 | This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, | |
203 | ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting | |
204 | style specified by O, and O may not be null. */ | |
205 | ||
206 | static size_t | |
207 | quotearg_buffer_restyled (char *buffer, size_t buffersize, | |
208 | char const *arg, size_t argsize, | |
209 | enum quoting_style quoting_style, | |
210 | struct quoting_options const *o) | |
211 | { | |
212 | size_t i; | |
213 | size_t len = 0; | |
214 | char const *quote_string = 0; | |
215 | size_t quote_string_len = 0; | |
216 | int backslash_escapes = 0; | |
217 | int unibyte_locale = MB_CUR_MAX == 1; | |
218 | ||
219 | #define STORE(c) \ | |
220 | do \ | |
221 | { \ | |
222 | if (len < buffersize) \ | |
223 | buffer[len] = (c); \ | |
224 | len++; \ | |
225 | } \ | |
226 | while (0) | |
227 | ||
228 | switch (quoting_style) | |
229 | { | |
230 | case c_quoting_style: | |
231 | STORE ('"'); | |
232 | backslash_escapes = 1; | |
233 | quote_string = "\""; | |
234 | quote_string_len = 1; | |
235 | break; | |
236 | ||
237 | case escape_quoting_style: | |
238 | backslash_escapes = 1; | |
239 | break; | |
240 | ||
241 | case locale_quoting_style: | |
242 | case clocale_quoting_style: | |
243 | { | |
244 | /* Get translations for open and closing quotation marks. | |
245 | ||
246 | The message catalog should translate "`" to a left | |
247 | quotation mark suitable for the locale, and similarly for | |
248 | "'". If the catalog has no translation, | |
249 | locale_quoting_style quotes `like this', and | |
250 | clocale_quoting_style quotes "like this". | |
251 | ||
252 | For example, an American English Unicode locale should | |
253 | translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and | |
254 | should translate "'" to U+201D (RIGHT DOUBLE QUOTATION | |
255 | MARK). A British English Unicode locale should instead | |
256 | translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and | |
257 | U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */ | |
258 | ||
259 | char const *left = gettext_quote (N_("`"), quoting_style); | |
260 | char const *right = gettext_quote (N_("'"), quoting_style); | |
261 | for (quote_string = left; *quote_string; quote_string++) | |
262 | STORE (*quote_string); | |
263 | backslash_escapes = 1; | |
264 | quote_string = right; | |
265 | quote_string_len = strlen (quote_string); | |
266 | } | |
267 | break; | |
268 | ||
269 | case shell_always_quoting_style: | |
270 | STORE ('\''); | |
271 | quote_string = "'"; | |
272 | quote_string_len = 1; | |
273 | break; | |
274 | ||
275 | default: | |
276 | break; | |
277 | } | |
278 | ||
279 | for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++) | |
280 | { | |
281 | unsigned char c; | |
282 | unsigned char esc; | |
283 | ||
284 | if (backslash_escapes | |
285 | && quote_string_len | |
286 | && i + quote_string_len <= argsize | |
287 | && memcmp (arg + i, quote_string, quote_string_len) == 0) | |
288 | STORE ('\\'); | |
289 | ||
290 | c = arg[i]; | |
291 | switch (c) | |
292 | { | |
293 | case '?': | |
294 | switch (quoting_style) | |
295 | { | |
296 | case shell_quoting_style: | |
297 | goto use_shell_always_quoting_style; | |
298 | ||
299 | case c_quoting_style: | |
300 | if (i + 2 < argsize && arg[i + 1] == '?') | |
301 | switch (arg[i + 2]) | |
302 | { | |
303 | case '!': case '\'': | |
304 | case '(': case ')': case '-': case '/': | |
305 | case '<': case '=': case '>': | |
306 | /* Escape the second '?' in what would otherwise be | |
307 | a trigraph. */ | |
308 | i += 2; | |
309 | c = arg[i + 2]; | |
310 | STORE ('?'); | |
311 | STORE ('\\'); | |
312 | STORE ('?'); | |
313 | break; | |
314 | } | |
315 | break; | |
316 | ||
317 | default: | |
318 | break; | |
319 | } | |
320 | break; | |
321 | ||
322 | case ALERT_CHAR: esc = 'a'; goto c_escape; | |
323 | case '\b': esc = 'b'; goto c_escape; | |
324 | case '\f': esc = 'f'; goto c_escape; | |
325 | case '\n': esc = 'n'; goto c_and_shell_escape; | |
326 | case '\r': esc = 'r'; goto c_and_shell_escape; | |
327 | case '\t': esc = 't'; goto c_and_shell_escape; | |
328 | case '\v': esc = 'v'; goto c_escape; | |
329 | case '\\': esc = c; goto c_and_shell_escape; | |
330 | ||
331 | c_and_shell_escape: | |
332 | if (quoting_style == shell_quoting_style) | |
333 | goto use_shell_always_quoting_style; | |
334 | c_escape: | |
335 | if (backslash_escapes) | |
336 | { | |
337 | c = esc; | |
338 | goto store_escape; | |
339 | } | |
340 | break; | |
341 | ||
342 | case '#': case '~': | |
343 | if (i != 0) | |
344 | break; | |
345 | /* Fall through. */ | |
346 | case ' ': | |
347 | case '!': /* special in bash */ | |
348 | case '"': case '$': case '&': | |
349 | case '(': case ')': case '*': case ';': | |
350 | case '<': case '>': case '[': | |
351 | case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ | |
352 | case '`': case '|': | |
353 | /* A shell special character. In theory, '$' and '`' could | |
354 | be the first bytes of multibyte characters, which means | |
355 | we should check them with mbrtowc, but in practice this | |
356 | doesn't happen so it's not worth worrying about. */ | |
357 | if (quoting_style == shell_quoting_style) | |
358 | goto use_shell_always_quoting_style; | |
359 | break; | |
360 | ||
361 | case '\'': | |
362 | switch (quoting_style) | |
363 | { | |
364 | case shell_quoting_style: | |
365 | goto use_shell_always_quoting_style; | |
366 | ||
367 | case shell_always_quoting_style: | |
368 | STORE ('\''); | |
369 | STORE ('\\'); | |
370 | STORE ('\''); | |
371 | break; | |
372 | ||
373 | default: | |
374 | break; | |
375 | } | |
376 | break; | |
377 | ||
378 | case '%': case '+': case ',': case '-': case '.': case '/': | |
379 | case '0': case '1': case '2': case '3': case '4': case '5': | |
380 | case '6': case '7': case '8': case '9': case ':': case '=': | |
381 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
382 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': | |
383 | case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
384 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
385 | case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': | |
386 | case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': | |
387 | case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': | |
388 | case 'o': case 'p': case 'q': case 'r': case 's': case 't': | |
389 | case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': | |
390 | case '{': case '}': | |
391 | /* These characters don't cause problems, no matter what the | |
392 | quoting style is. They cannot start multibyte sequences. */ | |
393 | break; | |
394 | ||
395 | default: | |
396 | /* If we have a multibyte sequence, copy it until we reach | |
397 | its end, find an error, or come back to the initial shift | |
398 | state. For C-like styles, if the sequence has | |
399 | unprintable characters, escape the whole sequence, since | |
400 | we can't easily escape single characters within it. */ | |
401 | { | |
402 | /* Length of multibyte sequence found so far. */ | |
403 | size_t m; | |
404 | ||
405 | int printable; | |
406 | ||
407 | if (unibyte_locale) | |
408 | { | |
409 | m = 1; | |
410 | printable = ISPRINT (c); | |
411 | } | |
412 | else | |
413 | { | |
414 | mbstate_t mbstate; | |
415 | memset (&mbstate, 0, sizeof mbstate); | |
416 | ||
417 | m = 0; | |
418 | printable = 1; | |
419 | if (argsize == (size_t) -1) | |
420 | argsize = strlen (arg); | |
421 | ||
422 | do | |
423 | { | |
424 | wchar_t w; | |
425 | size_t bytes = mbrtowc (&w, &arg[i + m], | |
426 | argsize - (i + m), &mbstate); | |
427 | if (bytes == 0) | |
428 | break; | |
429 | else if (bytes == (size_t) -1) | |
430 | { | |
431 | printable = 0; | |
432 | break; | |
433 | } | |
434 | else if (bytes == (size_t) -2) | |
435 | { | |
436 | printable = 0; | |
437 | while (i + m < argsize && arg[i + m]) | |
438 | m++; | |
439 | break; | |
440 | } | |
441 | else | |
442 | { | |
443 | if (! iswprint (w)) | |
444 | printable = 0; | |
445 | m += bytes; | |
446 | } | |
447 | } | |
448 | while (! mbsinit (&mbstate)); | |
449 | } | |
450 | ||
451 | if (1 < m || (backslash_escapes && ! printable)) | |
452 | { | |
453 | /* Output a multibyte sequence, or an escaped | |
454 | unprintable unibyte character. */ | |
455 | size_t ilim = i + m; | |
456 | ||
457 | for (;;) | |
458 | { | |
459 | if (backslash_escapes && ! printable) | |
460 | { | |
461 | STORE ('\\'); | |
462 | STORE ('0' + (c >> 6)); | |
463 | STORE ('0' + ((c >> 3) & 7)); | |
464 | c = '0' + (c & 7); | |
465 | } | |
466 | if (ilim <= i + 1) | |
467 | break; | |
468 | STORE (c); | |
469 | c = arg[++i]; | |
470 | } | |
471 | ||
472 | goto store_c; | |
473 | } | |
474 | } | |
475 | } | |
476 | ||
477 | if (! (backslash_escapes | |
478 | && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) | |
479 | goto store_c; | |
480 | ||
481 | store_escape: | |
482 | STORE ('\\'); | |
483 | ||
484 | store_c: | |
485 | STORE (c); | |
486 | } | |
487 | ||
488 | if (quote_string) | |
489 | for (; *quote_string; quote_string++) | |
490 | STORE (*quote_string); | |
491 | ||
492 | if (len < buffersize) | |
493 | buffer[len] = '\0'; | |
494 | return len; | |
495 | ||
496 | use_shell_always_quoting_style: | |
497 | return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, | |
498 | shell_always_quoting_style, o); | |
499 | } | |
500 | ||
501 | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of | |
502 | argument ARG (of size ARGSIZE), using O to control quoting. | |
503 | If O is null, use the default. | |
504 | Terminate the output with a null character, and return the written | |
505 | size of the output, not counting the terminating null. | |
506 | If BUFFERSIZE is too small to store the output string, return the | |
507 | value that would have been returned had BUFFERSIZE been large enough. | |
508 | If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */ | |
509 | size_t | |
510 | quotearg_buffer (char *buffer, size_t buffersize, | |
511 | char const *arg, size_t argsize, | |
512 | struct quoting_options const *o) | |
513 | { | |
514 | struct quoting_options const *p = o ? o : &default_quoting_options; | |
515 | return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, | |
516 | p->style, p); | |
517 | } | |
518 | ||
519 | /* Use storage slot N to return a quoted version of the string ARG. | |
520 | OPTIONS specifies the quoting options. | |
521 | The returned value points to static storage that can be | |
522 | reused by the next call to this function with the same value of N. | |
523 | N must be nonnegative. N is deliberately declared with type "int" | |
524 | to allow for future extensions (using negative values). */ | |
525 | static char * | |
526 | quotearg_n_options (int n, char const *arg, | |
527 | struct quoting_options const *options) | |
528 | { | |
529 | /* Preallocate a slot 0 buffer, so that the caller can always quote | |
530 | one small component of a "memory exhausted" message in slot 0. */ | |
531 | static char slot0[256]; | |
532 | static unsigned int nslots = 1; | |
533 | struct slotvec | |
534 | { | |
535 | size_t size; | |
536 | char *val; | |
537 | }; | |
538 | static struct slotvec slotvec0 = {sizeof slot0, slot0}; | |
539 | static struct slotvec *slotvec = &slotvec0; | |
540 | ||
541 | if (nslots <= n) | |
542 | { | |
543 | int n1 = n + 1; | |
544 | size_t s = n1 * sizeof (struct slotvec); | |
545 | if (! (0 < n1 && n1 == s / sizeof (struct slotvec))) | |
546 | abort (); | |
547 | if (slotvec == &slotvec0) | |
548 | { | |
549 | slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec)); | |
550 | *slotvec = slotvec0; | |
551 | } | |
552 | slotvec = (struct slotvec *) xrealloc (slotvec, s); | |
553 | memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec)); | |
554 | nslots = n; | |
555 | } | |
556 | ||
557 | { | |
558 | size_t size = slotvec[n].size; | |
559 | char *val = slotvec[n].val; | |
560 | size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options); | |
561 | ||
562 | if (size <= qsize) | |
563 | { | |
564 | slotvec[n].size = size = qsize + 1; | |
565 | slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size); | |
566 | quotearg_buffer (val, size, arg, (size_t) -1, options); | |
567 | } | |
568 | ||
569 | return val; | |
570 | } | |
571 | } | |
572 | ||
573 | char * | |
574 | quotearg_n (unsigned int n, char const *arg) | |
575 | { | |
576 | return quotearg_n_options (n, arg, &default_quoting_options); | |
577 | } | |
578 | ||
579 | char * | |
580 | quotearg (char const *arg) | |
581 | { | |
582 | return quotearg_n (0, arg); | |
583 | } | |
584 | ||
585 | char * | |
586 | quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg) | |
587 | { | |
588 | struct quoting_options o; | |
589 | o.style = s; | |
590 | memset (o.quote_these_too, 0, sizeof o.quote_these_too); | |
591 | return quotearg_n_options (n, arg, &o); | |
592 | } | |
593 | ||
594 | char * | |
595 | quotearg_style (enum quoting_style s, char const *arg) | |
596 | { | |
597 | return quotearg_n_style (0, s, arg); | |
598 | } | |
599 | ||
600 | char * | |
601 | quotearg_char (char const *arg, char ch) | |
602 | { | |
603 | struct quoting_options options; | |
604 | options = default_quoting_options; | |
605 | set_char_quoting (&options, ch, 1); | |
606 | return quotearg_n_options (0, arg, &options); | |
607 | } | |
608 | ||
609 | char * | |
610 | quotearg_colon (char const *arg) | |
611 | { | |
612 | return quotearg_char (arg, ':'); | |
613 | } |