]>
Commit | Line | Data |
---|---|---|
ff4a34be AD |
1 | /* quotearg.c - quote arguments for output |
2 | Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2, or (at your option) | |
7 | any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software Foundation, | |
16 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
17 | ||
18 | /* Written by Paul Eggert <eggert@twinsun.com> */ | |
19 | ||
20 | #if HAVE_CONFIG_H | |
21 | # include <config.h> | |
22 | #endif | |
23 | ||
24 | #include <sys/types.h> | |
25 | #include <quotearg.h> | |
26 | #include <xalloc.h> | |
27 | ||
28 | #include <ctype.h> | |
29 | ||
30 | #if ENABLE_NLS | |
31 | # include <libintl.h> | |
32 | # define _(text) gettext (text) | |
33 | #else | |
34 | # define _(text) text | |
35 | #endif | |
36 | #define N_(text) text | |
37 | ||
38 | #if HAVE_LIMITS_H | |
39 | # include <limits.h> | |
40 | #endif | |
41 | #ifndef CHAR_BIT | |
42 | # define CHAR_BIT 8 | |
43 | #endif | |
44 | #ifndef UCHAR_MAX | |
45 | # define UCHAR_MAX ((unsigned char) -1) | |
46 | #endif | |
47 | ||
48 | #if HAVE_C_BACKSLASH_A | |
49 | # define ALERT_CHAR '\a' | |
50 | #else | |
51 | # define ALERT_CHAR '\7' | |
52 | #endif | |
53 | ||
54 | #if HAVE_STDLIB_H | |
55 | # include <stdlib.h> | |
56 | #endif | |
57 | ||
58 | #if HAVE_STRING_H | |
59 | # include <string.h> | |
60 | #endif | |
61 | ||
62 | #if HAVE_WCHAR_H | |
63 | # include <wchar.h> | |
64 | #endif | |
65 | ||
66 | #if HAVE_MBRTOWC | |
67 | size_t mbrtowc (); | |
68 | # ifdef mbstate_t | |
69 | # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0) | |
70 | # define mbsinit(ps) 1 | |
71 | # endif | |
72 | #else | |
73 | /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the | |
74 | other macros are defined only for documentation and to satisfy C | |
75 | syntax. */ | |
76 | # undef MB_CUR_MAX | |
77 | # define MB_CUR_MAX 1 | |
78 | # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) | |
79 | # define mbsinit(ps) 1 | |
80 | # define iswprint(wc) ISPRINT ((unsigned char) (wc)) | |
81 | #endif | |
82 | ||
83 | #ifndef iswprint | |
84 | # if HAVE_WCTYPE_H | |
85 | # include <wctype.h> | |
86 | # endif | |
87 | # if !defined iswprint && !HAVE_ISWPRINT | |
88 | # define iswprint(wc) 1 | |
89 | # endif | |
90 | #endif | |
91 | ||
92 | #define INT_BITS (sizeof (int) * CHAR_BIT) | |
93 | ||
94 | #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) | |
95 | # define IN_CTYPE_DOMAIN(c) 1 | |
96 | #else | |
97 | # define IN_CTYPE_DOMAIN(c) isascii(c) | |
98 | #endif | |
99 | ||
100 | /* Undefine to protect against the definition in wctype.h of solaris2.6. */ | |
101 | #undef ISPRINT | |
102 | #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) | |
103 | ||
104 | struct quoting_options | |
105 | { | |
106 | /* Basic quoting style. */ | |
107 | enum quoting_style style; | |
108 | ||
109 | /* Quote the characters indicated by this bit vector even if the | |
110 | quoting style would not normally require them to be quoted. */ | |
111 | int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; | |
112 | }; | |
113 | ||
114 | /* Names of quoting styles. */ | |
115 | char const *const quoting_style_args[] = | |
116 | { | |
117 | "literal", | |
118 | "shell", | |
119 | "shell-always", | |
120 | "c", | |
121 | "escape", | |
122 | "locale", | |
123 | "clocale", | |
124 | 0 | |
125 | }; | |
126 | ||
127 | /* Correspondences to quoting style names. */ | |
128 | enum quoting_style const quoting_style_vals[] = | |
129 | { | |
130 | literal_quoting_style, | |
131 | shell_quoting_style, | |
132 | shell_always_quoting_style, | |
133 | c_quoting_style, | |
134 | escape_quoting_style, | |
135 | locale_quoting_style, | |
136 | clocale_quoting_style | |
137 | }; | |
138 | ||
139 | /* The default quoting options. */ | |
140 | static struct quoting_options default_quoting_options; | |
141 | ||
142 | /* Allocate a new set of quoting options, with contents initially identical | |
143 | to O if O is not null, or to the default if O is null. | |
144 | It is the caller's responsibility to free the result. */ | |
145 | struct quoting_options * | |
146 | clone_quoting_options (struct quoting_options *o) | |
147 | { | |
148 | struct quoting_options *p | |
149 | = (struct quoting_options *) xmalloc (sizeof (struct quoting_options)); | |
150 | *p = *(o ? o : &default_quoting_options); | |
151 | return p; | |
152 | } | |
153 | ||
154 | /* Get the value of O's quoting style. If O is null, use the default. */ | |
155 | enum quoting_style | |
156 | get_quoting_style (struct quoting_options *o) | |
157 | { | |
158 | return (o ? o : &default_quoting_options)->style; | |
159 | } | |
160 | ||
161 | /* In O (or in the default if O is null), | |
162 | set the value of the quoting style to S. */ | |
163 | void | |
164 | set_quoting_style (struct quoting_options *o, enum quoting_style s) | |
165 | { | |
166 | (o ? o : &default_quoting_options)->style = s; | |
167 | } | |
168 | ||
169 | /* In O (or in the default if O is null), | |
170 | set the value of the quoting options for character C to I. | |
171 | Return the old value. Currently, the only values defined for I are | |
172 | 0 (the default) and 1 (which means to quote the character even if | |
173 | it would not otherwise be quoted). */ | |
174 | int | |
175 | set_char_quoting (struct quoting_options *o, char c, int i) | |
176 | { | |
177 | unsigned char uc = c; | |
178 | int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; | |
179 | int shift = uc % INT_BITS; | |
180 | int r = (*p >> shift) & 1; | |
181 | *p ^= ((i & 1) ^ r) << shift; | |
182 | return r; | |
183 | } | |
184 | ||
185 | /* MSGID approximates a quotation mark. Return its translation if it | |
186 | has one; otherwise, return either it or "\"", depending on S. */ | |
187 | static char const * | |
188 | gettext_quote (char const *msgid, enum quoting_style s) | |
189 | { | |
190 | char const *translation = _(msgid); | |
191 | if (translation == msgid && s == clocale_quoting_style) | |
192 | translation = "\""; | |
193 | return translation; | |
194 | } | |
195 | ||
196 | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of | |
197 | argument ARG (of size ARGSIZE), using QUOTING_STYLE and the | |
198 | non-quoting-style part of O to control quoting. | |
199 | Terminate the output with a null character, and return the written | |
200 | size of the output, not counting the terminating null. | |
201 | If BUFFERSIZE is too small to store the output string, return the | |
202 | value that would have been returned had BUFFERSIZE been large enough. | |
203 | If ARGSIZE is -1, use the string length of the argument for ARGSIZE. | |
204 | ||
205 | This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, | |
206 | ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting | |
207 | style specified by O, and O may not be null. */ | |
208 | ||
209 | static size_t | |
210 | quotearg_buffer_restyled (char *buffer, size_t buffersize, | |
211 | char const *arg, size_t argsize, | |
212 | enum quoting_style quoting_style, | |
213 | struct quoting_options const *o) | |
214 | { | |
215 | size_t i; | |
216 | size_t len = 0; | |
217 | char const *quote_string = 0; | |
218 | size_t quote_string_len = 0; | |
219 | int backslash_escapes = 0; | |
220 | int unibyte_locale = MB_CUR_MAX == 1; | |
221 | ||
222 | #define STORE(c) \ | |
223 | do \ | |
224 | { \ | |
225 | if (len < buffersize) \ | |
226 | buffer[len] = (c); \ | |
227 | len++; \ | |
228 | } \ | |
229 | while (0) | |
230 | ||
231 | switch (quoting_style) | |
232 | { | |
233 | case c_quoting_style: | |
234 | STORE ('"'); | |
235 | backslash_escapes = 1; | |
236 | quote_string = "\""; | |
237 | quote_string_len = 1; | |
238 | break; | |
239 | ||
240 | case escape_quoting_style: | |
241 | backslash_escapes = 1; | |
242 | break; | |
243 | ||
244 | case locale_quoting_style: | |
245 | case clocale_quoting_style: | |
246 | { | |
247 | /* Get translations for open and closing quotation marks. | |
248 | ||
249 | The message catalog should translate "`" to a left | |
250 | quotation mark suitable for the locale, and similarly for | |
251 | "'". If the catalog has no translation, | |
252 | locale_quoting_style quotes `like this', and | |
253 | clocale_quoting_style quotes "like this". | |
254 | ||
255 | For example, an American English Unicode locale should | |
256 | translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and | |
257 | should translate "'" to U+201D (RIGHT DOUBLE QUOTATION | |
258 | MARK). A British English Unicode locale should instead | |
259 | translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and | |
260 | U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */ | |
261 | ||
262 | char const *left = gettext_quote (N_("`"), quoting_style); | |
263 | char const *right = gettext_quote (N_("'"), quoting_style); | |
264 | for (quote_string = left; *quote_string; quote_string++) | |
265 | STORE (*quote_string); | |
266 | backslash_escapes = 1; | |
267 | quote_string = right; | |
268 | quote_string_len = strlen (quote_string); | |
269 | } | |
270 | break; | |
271 | ||
272 | case shell_always_quoting_style: | |
273 | STORE ('\''); | |
274 | quote_string = "'"; | |
275 | quote_string_len = 1; | |
276 | break; | |
277 | ||
278 | default: | |
279 | break; | |
280 | } | |
281 | ||
282 | for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++) | |
283 | { | |
284 | unsigned char c; | |
285 | unsigned char esc; | |
286 | ||
287 | if (backslash_escapes | |
288 | && quote_string_len | |
289 | && i + quote_string_len <= argsize | |
290 | && memcmp (arg + i, quote_string, quote_string_len) == 0) | |
291 | STORE ('\\'); | |
292 | ||
293 | c = arg[i]; | |
294 | switch (c) | |
295 | { | |
296 | case '?': | |
297 | switch (quoting_style) | |
298 | { | |
299 | case shell_quoting_style: | |
300 | goto use_shell_always_quoting_style; | |
301 | ||
302 | case c_quoting_style: | |
303 | if (i + 2 < argsize && arg[i + 1] == '?') | |
304 | switch (arg[i + 2]) | |
305 | { | |
306 | case '!': case '\'': | |
307 | case '(': case ')': case '-': case '/': | |
308 | case '<': case '=': case '>': | |
309 | /* Escape the second '?' in what would otherwise be | |
310 | a trigraph. */ | |
311 | i += 2; | |
312 | c = arg[i + 2]; | |
313 | STORE ('?'); | |
314 | STORE ('\\'); | |
315 | STORE ('?'); | |
316 | break; | |
317 | } | |
318 | break; | |
319 | ||
320 | default: | |
321 | break; | |
322 | } | |
323 | break; | |
324 | ||
325 | case ALERT_CHAR: esc = 'a'; goto c_escape; | |
326 | case '\b': esc = 'b'; goto c_escape; | |
327 | case '\f': esc = 'f'; goto c_escape; | |
328 | case '\n': esc = 'n'; goto c_and_shell_escape; | |
329 | case '\r': esc = 'r'; goto c_and_shell_escape; | |
330 | case '\t': esc = 't'; goto c_and_shell_escape; | |
331 | case '\v': esc = 'v'; goto c_escape; | |
332 | case '\\': esc = c; goto c_and_shell_escape; | |
333 | ||
334 | c_and_shell_escape: | |
335 | if (quoting_style == shell_quoting_style) | |
336 | goto use_shell_always_quoting_style; | |
337 | c_escape: | |
338 | if (backslash_escapes) | |
339 | { | |
340 | c = esc; | |
341 | goto store_escape; | |
342 | } | |
343 | break; | |
344 | ||
345 | case '#': case '~': | |
346 | if (i != 0) | |
347 | break; | |
348 | /* Fall through. */ | |
349 | case ' ': | |
350 | case '!': /* special in bash */ | |
351 | case '"': case '$': case '&': | |
352 | case '(': case ')': case '*': case ';': | |
353 | case '<': case '>': case '[': | |
354 | case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ | |
355 | case '`': case '|': | |
356 | /* A shell special character. In theory, '$' and '`' could | |
357 | be the first bytes of multibyte characters, which means | |
358 | we should check them with mbrtowc, but in practice this | |
359 | doesn't happen so it's not worth worrying about. */ | |
360 | if (quoting_style == shell_quoting_style) | |
361 | goto use_shell_always_quoting_style; | |
362 | break; | |
363 | ||
364 | case '\'': | |
365 | switch (quoting_style) | |
366 | { | |
367 | case shell_quoting_style: | |
368 | goto use_shell_always_quoting_style; | |
369 | ||
370 | case shell_always_quoting_style: | |
371 | STORE ('\''); | |
372 | STORE ('\\'); | |
373 | STORE ('\''); | |
374 | break; | |
375 | ||
376 | default: | |
377 | break; | |
378 | } | |
379 | break; | |
380 | ||
381 | case '%': case '+': case ',': case '-': case '.': case '/': | |
382 | case '0': case '1': case '2': case '3': case '4': case '5': | |
383 | case '6': case '7': case '8': case '9': case ':': case '=': | |
384 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
385 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': | |
386 | case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
387 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
388 | case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': | |
389 | case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': | |
390 | case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': | |
391 | case 'o': case 'p': case 'q': case 'r': case 's': case 't': | |
392 | case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': | |
393 | case '{': case '}': | |
394 | /* These characters don't cause problems, no matter what the | |
395 | quoting style is. They cannot start multibyte sequences. */ | |
396 | break; | |
397 | ||
398 | default: | |
399 | /* If we have a multibyte sequence, copy it until we reach | |
400 | its end, find an error, or come back to the initial shift | |
401 | state. For C-like styles, if the sequence has | |
402 | unprintable characters, escape the whole sequence, since | |
403 | we can't easily escape single characters within it. */ | |
404 | { | |
405 | /* Length of multibyte sequence found so far. */ | |
406 | size_t m; | |
407 | ||
408 | int printable; | |
409 | ||
410 | if (unibyte_locale) | |
411 | { | |
412 | m = 1; | |
413 | printable = ISPRINT (c); | |
414 | } | |
415 | else | |
416 | { | |
417 | mbstate_t mbstate; | |
418 | memset (&mbstate, 0, sizeof mbstate); | |
419 | ||
420 | m = 0; | |
421 | printable = 1; | |
422 | if (argsize == (size_t) -1) | |
423 | argsize = strlen (arg); | |
424 | ||
425 | do | |
426 | { | |
427 | wchar_t w; | |
428 | size_t bytes = mbrtowc (&w, &arg[i + m], | |
429 | argsize - (i + m), &mbstate); | |
430 | if (bytes == 0) | |
431 | break; | |
432 | else if (bytes == (size_t) -1) | |
433 | { | |
434 | printable = 0; | |
435 | break; | |
436 | } | |
437 | else if (bytes == (size_t) -2) | |
438 | { | |
439 | printable = 0; | |
440 | while (i + m < argsize && arg[i + m]) | |
441 | m++; | |
442 | break; | |
443 | } | |
444 | else | |
445 | { | |
446 | if (! iswprint (w)) | |
447 | printable = 0; | |
448 | m += bytes; | |
449 | } | |
450 | } | |
451 | while (! mbsinit (&mbstate)); | |
452 | } | |
453 | ||
454 | if (1 < m || (backslash_escapes && ! printable)) | |
455 | { | |
456 | /* Output a multibyte sequence, or an escaped | |
457 | unprintable unibyte character. */ | |
458 | size_t ilim = i + m; | |
459 | ||
460 | for (;;) | |
461 | { | |
462 | if (backslash_escapes && ! printable) | |
463 | { | |
464 | STORE ('\\'); | |
465 | STORE ('0' + (c >> 6)); | |
466 | STORE ('0' + ((c >> 3) & 7)); | |
467 | c = '0' + (c & 7); | |
468 | } | |
469 | if (ilim <= i + 1) | |
470 | break; | |
471 | STORE (c); | |
472 | c = arg[++i]; | |
473 | } | |
474 | ||
475 | goto store_c; | |
476 | } | |
477 | } | |
478 | } | |
479 | ||
480 | if (! (backslash_escapes | |
481 | && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) | |
482 | goto store_c; | |
483 | ||
484 | store_escape: | |
485 | STORE ('\\'); | |
486 | ||
487 | store_c: | |
488 | STORE (c); | |
489 | } | |
490 | ||
491 | if (quote_string) | |
492 | for (; *quote_string; quote_string++) | |
493 | STORE (*quote_string); | |
494 | ||
495 | if (len < buffersize) | |
496 | buffer[len] = '\0'; | |
497 | return len; | |
498 | ||
499 | use_shell_always_quoting_style: | |
500 | return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, | |
501 | shell_always_quoting_style, o); | |
502 | } | |
503 | ||
504 | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of | |
505 | argument ARG (of size ARGSIZE), using O to control quoting. | |
506 | If O is null, use the default. | |
507 | Terminate the output with a null character, and return the written | |
508 | size of the output, not counting the terminating null. | |
509 | If BUFFERSIZE is too small to store the output string, return the | |
510 | value that would have been returned had BUFFERSIZE been large enough. | |
511 | If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */ | |
512 | size_t | |
513 | quotearg_buffer (char *buffer, size_t buffersize, | |
514 | char const *arg, size_t argsize, | |
515 | struct quoting_options const *o) | |
516 | { | |
517 | struct quoting_options const *p = o ? o : &default_quoting_options; | |
518 | return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, | |
519 | p->style, p); | |
520 | } | |
521 | ||
522 | /* Use storage slot N to return a quoted version of the string ARG. | |
523 | OPTIONS specifies the quoting options. | |
524 | The returned value points to static storage that can be | |
525 | reused by the next call to this function with the same value of N. | |
526 | N must be nonnegative. N is deliberately declared with type "int" | |
527 | to allow for future extensions (using negative values). */ | |
528 | static char * | |
529 | quotearg_n_options (int n, char const *arg, | |
530 | struct quoting_options const *options) | |
531 | { | |
532 | /* Preallocate a slot 0 buffer, so that the caller can always quote | |
533 | one small component of a "memory exhausted" message in slot 0. */ | |
534 | static char slot0[256]; | |
535 | static unsigned int nslots = 1; | |
536 | struct slotvec | |
537 | { | |
538 | size_t size; | |
539 | char *val; | |
540 | }; | |
541 | static struct slotvec slotvec0 = {sizeof slot0, slot0}; | |
542 | static struct slotvec *slotvec = &slotvec0; | |
543 | ||
544 | if (nslots <= n) | |
545 | { | |
546 | int n1 = n + 1; | |
547 | size_t s = n1 * sizeof (struct slotvec); | |
548 | if (! (0 < n1 && n1 == s / sizeof (struct slotvec))) | |
549 | abort (); | |
550 | if (slotvec == &slotvec0) | |
551 | { | |
552 | slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec)); | |
553 | *slotvec = slotvec0; | |
554 | } | |
555 | slotvec = (struct slotvec *) xrealloc (slotvec, s); | |
556 | memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec)); | |
557 | nslots = n; | |
558 | } | |
559 | ||
560 | { | |
561 | size_t size = slotvec[n].size; | |
562 | char *val = slotvec[n].val; | |
563 | size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options); | |
564 | ||
565 | if (size <= qsize) | |
566 | { | |
567 | slotvec[n].size = size = qsize + 1; | |
568 | slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size); | |
569 | quotearg_buffer (val, size, arg, (size_t) -1, options); | |
570 | } | |
571 | ||
572 | return val; | |
573 | } | |
574 | } | |
575 | ||
576 | char * | |
577 | quotearg_n (unsigned int n, char const *arg) | |
578 | { | |
579 | return quotearg_n_options (n, arg, &default_quoting_options); | |
580 | } | |
581 | ||
582 | char * | |
583 | quotearg (char const *arg) | |
584 | { | |
585 | return quotearg_n (0, arg); | |
586 | } | |
587 | ||
588 | char * | |
589 | quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg) | |
590 | { | |
591 | struct quoting_options o; | |
592 | o.style = s; | |
593 | memset (o.quote_these_too, 0, sizeof o.quote_these_too); | |
594 | return quotearg_n_options (n, arg, &o); | |
595 | } | |
596 | ||
597 | char * | |
598 | quotearg_style (enum quoting_style s, char const *arg) | |
599 | { | |
600 | return quotearg_n_style (0, s, arg); | |
601 | } | |
602 | ||
603 | char * | |
604 | quotearg_char (char const *arg, char ch) | |
605 | { | |
606 | struct quoting_options options; | |
607 | options = default_quoting_options; | |
608 | set_char_quoting (&options, ch, 1); | |
609 | return quotearg_n_options (0, arg, &options); | |
610 | } | |
611 | ||
612 | char * | |
613 | quotearg_colon (char const *arg) | |
614 | { | |
615 | return quotearg_char (arg, ':'); | |
616 | } |