]>
Commit | Line | Data |
---|---|---|
1 | /* quotearg.c - quote arguments for output | |
2 | Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2, or (at your option) | |
7 | any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software Foundation, | |
16 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
17 | ||
18 | /* Written by Paul Eggert <eggert@twinsun.com> */ | |
19 | ||
20 | #if HAVE_CONFIG_H | |
21 | # include <config.h> | |
22 | #endif | |
23 | ||
24 | #if HAVE_STDDEF_H | |
25 | # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */ | |
26 | #endif | |
27 | #include <sys/types.h> | |
28 | #include <quotearg.h> | |
29 | #include <xalloc.h> | |
30 | ||
31 | #include <ctype.h> | |
32 | ||
33 | #if ENABLE_NLS | |
34 | # include <libintl.h> | |
35 | # define _(text) gettext (text) | |
36 | #else | |
37 | # define _(text) text | |
38 | #endif | |
39 | #define N_(text) text | |
40 | ||
41 | #if HAVE_LIMITS_H | |
42 | # include <limits.h> | |
43 | #endif | |
44 | #ifndef CHAR_BIT | |
45 | # define CHAR_BIT 8 | |
46 | #endif | |
47 | #ifndef UCHAR_MAX | |
48 | # define UCHAR_MAX ((unsigned char) -1) | |
49 | #endif | |
50 | ||
51 | #if HAVE_C_BACKSLASH_A | |
52 | # define ALERT_CHAR '\a' | |
53 | #else | |
54 | # define ALERT_CHAR '\7' | |
55 | #endif | |
56 | ||
57 | #if HAVE_STDLIB_H | |
58 | # include <stdlib.h> | |
59 | #endif | |
60 | ||
61 | #if HAVE_STRING_H | |
62 | # include <string.h> | |
63 | #endif | |
64 | ||
65 | #if HAVE_WCHAR_H | |
66 | ||
67 | /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */ | |
68 | # include <stdio.h> | |
69 | # include <time.h> | |
70 | ||
71 | # include <wchar.h> | |
72 | #endif | |
73 | ||
74 | #if !HAVE_MBRTOWC | |
75 | /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the | |
76 | other macros are defined only for documentation and to satisfy C | |
77 | syntax. */ | |
78 | # undef MB_CUR_MAX | |
79 | # define MB_CUR_MAX 1 | |
80 | # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) | |
81 | # define iswprint(wc) ISPRINT ((unsigned char) (wc)) | |
82 | # undef HAVE_MBSINIT | |
83 | #endif | |
84 | ||
85 | #if !defined mbsinit && !HAVE_MBSINIT | |
86 | # define mbsinit(ps) 1 | |
87 | #endif | |
88 | ||
89 | #ifndef iswprint | |
90 | # if HAVE_WCTYPE_H | |
91 | # include <wctype.h> | |
92 | # endif | |
93 | # if !defined iswprint && !HAVE_ISWPRINT | |
94 | # define iswprint(wc) 1 | |
95 | # endif | |
96 | #endif | |
97 | ||
98 | #define INT_BITS (sizeof (int) * CHAR_BIT) | |
99 | ||
100 | #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) | |
101 | # define IN_CTYPE_DOMAIN(c) 1 | |
102 | #else | |
103 | # define IN_CTYPE_DOMAIN(c) isascii(c) | |
104 | #endif | |
105 | ||
106 | /* Undefine to protect against the definition in wctype.h of solaris2.6. */ | |
107 | #undef ISPRINT | |
108 | #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) | |
109 | ||
110 | struct quoting_options | |
111 | { | |
112 | /* Basic quoting style. */ | |
113 | enum quoting_style style; | |
114 | ||
115 | /* Quote the characters indicated by this bit vector even if the | |
116 | quoting style would not normally require them to be quoted. */ | |
117 | int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; | |
118 | }; | |
119 | ||
120 | /* Names of quoting styles. */ | |
121 | char const *const quoting_style_args[] = | |
122 | { | |
123 | "literal", | |
124 | "shell", | |
125 | "shell-always", | |
126 | "c", | |
127 | "escape", | |
128 | "locale", | |
129 | "clocale", | |
130 | 0 | |
131 | }; | |
132 | ||
133 | /* Correspondences to quoting style names. */ | |
134 | enum quoting_style const quoting_style_vals[] = | |
135 | { | |
136 | literal_quoting_style, | |
137 | shell_quoting_style, | |
138 | shell_always_quoting_style, | |
139 | c_quoting_style, | |
140 | escape_quoting_style, | |
141 | locale_quoting_style, | |
142 | clocale_quoting_style | |
143 | }; | |
144 | ||
145 | /* The default quoting options. */ | |
146 | static struct quoting_options default_quoting_options; | |
147 | ||
148 | /* Allocate a new set of quoting options, with contents initially identical | |
149 | to O if O is not null, or to the default if O is null. | |
150 | It is the caller's responsibility to free the result. */ | |
151 | struct quoting_options * | |
152 | clone_quoting_options (struct quoting_options *o) | |
153 | { | |
154 | struct quoting_options *p | |
155 | = (struct quoting_options *) xmalloc (sizeof (struct quoting_options)); | |
156 | *p = *(o ? o : &default_quoting_options); | |
157 | return p; | |
158 | } | |
159 | ||
160 | /* Get the value of O's quoting style. If O is null, use the default. */ | |
161 | enum quoting_style | |
162 | get_quoting_style (struct quoting_options *o) | |
163 | { | |
164 | return (o ? o : &default_quoting_options)->style; | |
165 | } | |
166 | ||
167 | /* In O (or in the default if O is null), | |
168 | set the value of the quoting style to S. */ | |
169 | void | |
170 | set_quoting_style (struct quoting_options *o, enum quoting_style s) | |
171 | { | |
172 | (o ? o : &default_quoting_options)->style = s; | |
173 | } | |
174 | ||
175 | /* In O (or in the default if O is null), | |
176 | set the value of the quoting options for character C to I. | |
177 | Return the old value. Currently, the only values defined for I are | |
178 | 0 (the default) and 1 (which means to quote the character even if | |
179 | it would not otherwise be quoted). */ | |
180 | int | |
181 | set_char_quoting (struct quoting_options *o, char c, int i) | |
182 | { | |
183 | unsigned char uc = c; | |
184 | int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; | |
185 | int shift = uc % INT_BITS; | |
186 | int r = (*p >> shift) & 1; | |
187 | *p ^= ((i & 1) ^ r) << shift; | |
188 | return r; | |
189 | } | |
190 | ||
191 | /* MSGID approximates a quotation mark. Return its translation if it | |
192 | has one; otherwise, return either it or "\"", depending on S. */ | |
193 | static char const * | |
194 | gettext_quote (char const *msgid, enum quoting_style s) | |
195 | { | |
196 | char const *translation = _(msgid); | |
197 | if (translation == msgid && s == clocale_quoting_style) | |
198 | translation = "\""; | |
199 | return translation; | |
200 | } | |
201 | ||
202 | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of | |
203 | argument ARG (of size ARGSIZE), using QUOTING_STYLE and the | |
204 | non-quoting-style part of O to control quoting. | |
205 | Terminate the output with a null character, and return the written | |
206 | size of the output, not counting the terminating null. | |
207 | If BUFFERSIZE is too small to store the output string, return the | |
208 | value that would have been returned had BUFFERSIZE been large enough. | |
209 | If ARGSIZE is -1, use the string length of the argument for ARGSIZE. | |
210 | ||
211 | This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, | |
212 | ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting | |
213 | style specified by O, and O may not be null. */ | |
214 | ||
215 | static size_t | |
216 | quotearg_buffer_restyled (char *buffer, size_t buffersize, | |
217 | char const *arg, size_t argsize, | |
218 | enum quoting_style quoting_style, | |
219 | struct quoting_options const *o) | |
220 | { | |
221 | size_t i; | |
222 | size_t len = 0; | |
223 | char const *quote_string = 0; | |
224 | size_t quote_string_len = 0; | |
225 | int backslash_escapes = 0; | |
226 | int unibyte_locale = MB_CUR_MAX == 1; | |
227 | ||
228 | #define STORE(c) \ | |
229 | do \ | |
230 | { \ | |
231 | if (len < buffersize) \ | |
232 | buffer[len] = (c); \ | |
233 | len++; \ | |
234 | } \ | |
235 | while (0) | |
236 | ||
237 | switch (quoting_style) | |
238 | { | |
239 | case c_quoting_style: | |
240 | STORE ('"'); | |
241 | backslash_escapes = 1; | |
242 | quote_string = "\""; | |
243 | quote_string_len = 1; | |
244 | break; | |
245 | ||
246 | case escape_quoting_style: | |
247 | backslash_escapes = 1; | |
248 | break; | |
249 | ||
250 | case locale_quoting_style: | |
251 | case clocale_quoting_style: | |
252 | { | |
253 | /* Get translations for open and closing quotation marks. | |
254 | ||
255 | The message catalog should translate "`" to a left | |
256 | quotation mark suitable for the locale, and similarly for | |
257 | "'". If the catalog has no translation, | |
258 | locale_quoting_style quotes `like this', and | |
259 | clocale_quoting_style quotes "like this". | |
260 | ||
261 | For example, an American English Unicode locale should | |
262 | translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and | |
263 | should translate "'" to U+201D (RIGHT DOUBLE QUOTATION | |
264 | MARK). A British English Unicode locale should instead | |
265 | translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and | |
266 | U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */ | |
267 | ||
268 | char const *left = gettext_quote (N_("`"), quoting_style); | |
269 | char const *right = gettext_quote (N_("'"), quoting_style); | |
270 | for (quote_string = left; *quote_string; quote_string++) | |
271 | STORE (*quote_string); | |
272 | backslash_escapes = 1; | |
273 | quote_string = right; | |
274 | quote_string_len = strlen (quote_string); | |
275 | } | |
276 | break; | |
277 | ||
278 | case shell_always_quoting_style: | |
279 | STORE ('\''); | |
280 | quote_string = "'"; | |
281 | quote_string_len = 1; | |
282 | break; | |
283 | ||
284 | default: | |
285 | break; | |
286 | } | |
287 | ||
288 | for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++) | |
289 | { | |
290 | unsigned char c; | |
291 | unsigned char esc; | |
292 | ||
293 | if (backslash_escapes | |
294 | && quote_string_len | |
295 | && i + quote_string_len <= argsize | |
296 | && memcmp (arg + i, quote_string, quote_string_len) == 0) | |
297 | STORE ('\\'); | |
298 | ||
299 | c = arg[i]; | |
300 | switch (c) | |
301 | { | |
302 | case '?': | |
303 | switch (quoting_style) | |
304 | { | |
305 | case shell_quoting_style: | |
306 | goto use_shell_always_quoting_style; | |
307 | ||
308 | case c_quoting_style: | |
309 | if (i + 2 < argsize && arg[i + 1] == '?') | |
310 | switch (arg[i + 2]) | |
311 | { | |
312 | case '!': case '\'': | |
313 | case '(': case ')': case '-': case '/': | |
314 | case '<': case '=': case '>': | |
315 | /* Escape the second '?' in what would otherwise be | |
316 | a trigraph. */ | |
317 | i += 2; | |
318 | c = arg[i + 2]; | |
319 | STORE ('?'); | |
320 | STORE ('\\'); | |
321 | STORE ('?'); | |
322 | break; | |
323 | } | |
324 | break; | |
325 | ||
326 | default: | |
327 | break; | |
328 | } | |
329 | break; | |
330 | ||
331 | case ALERT_CHAR: esc = 'a'; goto c_escape; | |
332 | case '\b': esc = 'b'; goto c_escape; | |
333 | case '\f': esc = 'f'; goto c_escape; | |
334 | case '\n': esc = 'n'; goto c_and_shell_escape; | |
335 | case '\r': esc = 'r'; goto c_and_shell_escape; | |
336 | case '\t': esc = 't'; goto c_and_shell_escape; | |
337 | case '\v': esc = 'v'; goto c_escape; | |
338 | case '\\': esc = c; goto c_and_shell_escape; | |
339 | ||
340 | c_and_shell_escape: | |
341 | if (quoting_style == shell_quoting_style) | |
342 | goto use_shell_always_quoting_style; | |
343 | c_escape: | |
344 | if (backslash_escapes) | |
345 | { | |
346 | c = esc; | |
347 | goto store_escape; | |
348 | } | |
349 | break; | |
350 | ||
351 | case '#': case '~': | |
352 | if (i != 0) | |
353 | break; | |
354 | /* Fall through. */ | |
355 | case ' ': | |
356 | case '!': /* special in bash */ | |
357 | case '"': case '$': case '&': | |
358 | case '(': case ')': case '*': case ';': | |
359 | case '<': case '>': case '[': | |
360 | case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ | |
361 | case '`': case '|': | |
362 | /* A shell special character. In theory, '$' and '`' could | |
363 | be the first bytes of multibyte characters, which means | |
364 | we should check them with mbrtowc, but in practice this | |
365 | doesn't happen so it's not worth worrying about. */ | |
366 | if (quoting_style == shell_quoting_style) | |
367 | goto use_shell_always_quoting_style; | |
368 | break; | |
369 | ||
370 | case '\'': | |
371 | switch (quoting_style) | |
372 | { | |
373 | case shell_quoting_style: | |
374 | goto use_shell_always_quoting_style; | |
375 | ||
376 | case shell_always_quoting_style: | |
377 | STORE ('\''); | |
378 | STORE ('\\'); | |
379 | STORE ('\''); | |
380 | break; | |
381 | ||
382 | default: | |
383 | break; | |
384 | } | |
385 | break; | |
386 | ||
387 | case '%': case '+': case ',': case '-': case '.': case '/': | |
388 | case '0': case '1': case '2': case '3': case '4': case '5': | |
389 | case '6': case '7': case '8': case '9': case ':': case '=': | |
390 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
391 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': | |
392 | case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
393 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
394 | case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': | |
395 | case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': | |
396 | case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': | |
397 | case 'o': case 'p': case 'q': case 'r': case 's': case 't': | |
398 | case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': | |
399 | case '{': case '}': | |
400 | /* These characters don't cause problems, no matter what the | |
401 | quoting style is. They cannot start multibyte sequences. */ | |
402 | break; | |
403 | ||
404 | default: | |
405 | /* If we have a multibyte sequence, copy it until we reach | |
406 | its end, find an error, or come back to the initial shift | |
407 | state. For C-like styles, if the sequence has | |
408 | unprintable characters, escape the whole sequence, since | |
409 | we can't easily escape single characters within it. */ | |
410 | { | |
411 | /* Length of multibyte sequence found so far. */ | |
412 | size_t m; | |
413 | ||
414 | int printable; | |
415 | ||
416 | if (unibyte_locale) | |
417 | { | |
418 | m = 1; | |
419 | printable = ISPRINT (c); | |
420 | } | |
421 | else | |
422 | { | |
423 | mbstate_t mbstate; | |
424 | memset (&mbstate, 0, sizeof mbstate); | |
425 | ||
426 | m = 0; | |
427 | printable = 1; | |
428 | if (argsize == (size_t) -1) | |
429 | argsize = strlen (arg); | |
430 | ||
431 | do | |
432 | { | |
433 | wchar_t w; | |
434 | size_t bytes = mbrtowc (&w, &arg[i + m], | |
435 | argsize - (i + m), &mbstate); | |
436 | if (bytes == 0) | |
437 | break; | |
438 | else if (bytes == (size_t) -1) | |
439 | { | |
440 | printable = 0; | |
441 | break; | |
442 | } | |
443 | else if (bytes == (size_t) -2) | |
444 | { | |
445 | printable = 0; | |
446 | while (i + m < argsize && arg[i + m]) | |
447 | m++; | |
448 | break; | |
449 | } | |
450 | else | |
451 | { | |
452 | if (! iswprint (w)) | |
453 | printable = 0; | |
454 | m += bytes; | |
455 | } | |
456 | } | |
457 | while (! mbsinit (&mbstate)); | |
458 | } | |
459 | ||
460 | if (1 < m || (backslash_escapes && ! printable)) | |
461 | { | |
462 | /* Output a multibyte sequence, or an escaped | |
463 | unprintable unibyte character. */ | |
464 | size_t ilim = i + m; | |
465 | ||
466 | for (;;) | |
467 | { | |
468 | if (backslash_escapes && ! printable) | |
469 | { | |
470 | STORE ('\\'); | |
471 | STORE ('0' + (c >> 6)); | |
472 | STORE ('0' + ((c >> 3) & 7)); | |
473 | c = '0' + (c & 7); | |
474 | } | |
475 | if (ilim <= i + 1) | |
476 | break; | |
477 | STORE (c); | |
478 | c = arg[++i]; | |
479 | } | |
480 | ||
481 | goto store_c; | |
482 | } | |
483 | } | |
484 | } | |
485 | ||
486 | if (! (backslash_escapes | |
487 | && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) | |
488 | goto store_c; | |
489 | ||
490 | store_escape: | |
491 | STORE ('\\'); | |
492 | ||
493 | store_c: | |
494 | STORE (c); | |
495 | } | |
496 | ||
497 | if (quote_string) | |
498 | for (; *quote_string; quote_string++) | |
499 | STORE (*quote_string); | |
500 | ||
501 | if (len < buffersize) | |
502 | buffer[len] = '\0'; | |
503 | return len; | |
504 | ||
505 | use_shell_always_quoting_style: | |
506 | return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, | |
507 | shell_always_quoting_style, o); | |
508 | } | |
509 | ||
510 | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of | |
511 | argument ARG (of size ARGSIZE), using O to control quoting. | |
512 | If O is null, use the default. | |
513 | Terminate the output with a null character, and return the written | |
514 | size of the output, not counting the terminating null. | |
515 | If BUFFERSIZE is too small to store the output string, return the | |
516 | value that would have been returned had BUFFERSIZE been large enough. | |
517 | If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */ | |
518 | size_t | |
519 | quotearg_buffer (char *buffer, size_t buffersize, | |
520 | char const *arg, size_t argsize, | |
521 | struct quoting_options const *o) | |
522 | { | |
523 | struct quoting_options const *p = o ? o : &default_quoting_options; | |
524 | return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, | |
525 | p->style, p); | |
526 | } | |
527 | ||
528 | /* Use storage slot N to return a quoted version of the string ARG. | |
529 | OPTIONS specifies the quoting options. | |
530 | The returned value points to static storage that can be | |
531 | reused by the next call to this function with the same value of N. | |
532 | N must be nonnegative. N is deliberately declared with type "int" | |
533 | to allow for future extensions (using negative values). */ | |
534 | static char * | |
535 | quotearg_n_options (int n, char const *arg, | |
536 | struct quoting_options const *options) | |
537 | { | |
538 | /* Preallocate a slot 0 buffer, so that the caller can always quote | |
539 | one small component of a "memory exhausted" message in slot 0. */ | |
540 | static char slot0[256]; | |
541 | static unsigned int nslots = 1; | |
542 | struct slotvec | |
543 | { | |
544 | size_t size; | |
545 | char *val; | |
546 | }; | |
547 | static struct slotvec slotvec0 = {sizeof slot0, slot0}; | |
548 | static struct slotvec *slotvec = &slotvec0; | |
549 | ||
550 | if (nslots <= n) | |
551 | { | |
552 | int n1 = n + 1; | |
553 | size_t s = n1 * sizeof (struct slotvec); | |
554 | if (! (0 < n1 && n1 == s / sizeof (struct slotvec))) | |
555 | abort (); | |
556 | if (slotvec == &slotvec0) | |
557 | { | |
558 | slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec)); | |
559 | *slotvec = slotvec0; | |
560 | } | |
561 | slotvec = (struct slotvec *) xrealloc (slotvec, s); | |
562 | memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec)); | |
563 | nslots = n; | |
564 | } | |
565 | ||
566 | { | |
567 | size_t size = slotvec[n].size; | |
568 | char *val = slotvec[n].val; | |
569 | size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options); | |
570 | ||
571 | if (size <= qsize) | |
572 | { | |
573 | slotvec[n].size = size = qsize + 1; | |
574 | slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size); | |
575 | quotearg_buffer (val, size, arg, (size_t) -1, options); | |
576 | } | |
577 | ||
578 | return val; | |
579 | } | |
580 | } | |
581 | ||
582 | char * | |
583 | quotearg_n (unsigned int n, char const *arg) | |
584 | { | |
585 | return quotearg_n_options (n, arg, &default_quoting_options); | |
586 | } | |
587 | ||
588 | char * | |
589 | quotearg (char const *arg) | |
590 | { | |
591 | return quotearg_n (0, arg); | |
592 | } | |
593 | ||
594 | char * | |
595 | quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg) | |
596 | { | |
597 | struct quoting_options o; | |
598 | o.style = s; | |
599 | memset (o.quote_these_too, 0, sizeof o.quote_these_too); | |
600 | return quotearg_n_options (n, arg, &o); | |
601 | } | |
602 | ||
603 | char * | |
604 | quotearg_style (enum quoting_style s, char const *arg) | |
605 | { | |
606 | return quotearg_n_style (0, s, arg); | |
607 | } | |
608 | ||
609 | char * | |
610 | quotearg_char (char const *arg, char ch) | |
611 | { | |
612 | struct quoting_options options; | |
613 | options = default_quoting_options; | |
614 | set_char_quoting (&options, ch, 1); | |
615 | return quotearg_n_options (0, arg, &options); | |
616 | } | |
617 | ||
618 | char * | |
619 | quotearg_colon (char const *arg) | |
620 | { | |
621 | return quotearg_char (arg, ':'); | |
622 | } |