]>
Commit | Line | Data |
---|---|---|
1 | /* quotearg.c - quote arguments for output | |
2 | Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2, or (at your option) | |
7 | any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software Foundation, | |
16 | Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
17 | ||
18 | /* Written by Paul Eggert <eggert@twinsun.com> */ | |
19 | ||
20 | #if HAVE_CONFIG_H | |
21 | # include <config.h> | |
22 | #endif | |
23 | ||
24 | #include "quotearg.h" | |
25 | ||
26 | #include "xalloc.h" | |
27 | ||
28 | #include <ctype.h> | |
29 | #include <errno.h> | |
30 | #include <limits.h> | |
31 | #include <stdlib.h> | |
32 | #include <string.h> | |
33 | ||
34 | #include "gettext.h" | |
35 | #define _(msgid) gettext (msgid) | |
36 | #define N_(msgid) msgid | |
37 | ||
38 | #if HAVE_WCHAR_H | |
39 | ||
40 | /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */ | |
41 | # include <stdio.h> | |
42 | # include <time.h> | |
43 | ||
44 | # include <wchar.h> | |
45 | #endif | |
46 | ||
47 | #if !HAVE_MBRTOWC | |
48 | /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the | |
49 | other macros are defined only for documentation and to satisfy C | |
50 | syntax. */ | |
51 | # undef MB_CUR_MAX | |
52 | # define MB_CUR_MAX 1 | |
53 | # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) | |
54 | # define iswprint(wc) isprint ((unsigned char) (wc)) | |
55 | # undef HAVE_MBSINIT | |
56 | #endif | |
57 | ||
58 | #if !defined mbsinit && !HAVE_MBSINIT | |
59 | # define mbsinit(ps) 1 | |
60 | #endif | |
61 | ||
62 | #ifndef iswprint | |
63 | # if HAVE_WCTYPE_H | |
64 | # include <wctype.h> | |
65 | # endif | |
66 | # if !defined iswprint && !HAVE_ISWPRINT | |
67 | # define iswprint(wc) 1 | |
68 | # endif | |
69 | #endif | |
70 | ||
71 | #ifndef SIZE_MAX | |
72 | # define SIZE_MAX ((size_t) -1) | |
73 | #endif | |
74 | ||
75 | #define INT_BITS (sizeof (int) * CHAR_BIT) | |
76 | ||
77 | struct quoting_options | |
78 | { | |
79 | /* Basic quoting style. */ | |
80 | enum quoting_style style; | |
81 | ||
82 | /* Quote the characters indicated by this bit vector even if the | |
83 | quoting style would not normally require them to be quoted. */ | |
84 | int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; | |
85 | }; | |
86 | ||
87 | /* Names of quoting styles. */ | |
88 | char const *const quoting_style_args[] = | |
89 | { | |
90 | "literal", | |
91 | "shell", | |
92 | "shell-always", | |
93 | "c", | |
94 | "escape", | |
95 | "locale", | |
96 | "clocale", | |
97 | 0 | |
98 | }; | |
99 | ||
100 | /* Correspondences to quoting style names. */ | |
101 | enum quoting_style const quoting_style_vals[] = | |
102 | { | |
103 | literal_quoting_style, | |
104 | shell_quoting_style, | |
105 | shell_always_quoting_style, | |
106 | c_quoting_style, | |
107 | escape_quoting_style, | |
108 | locale_quoting_style, | |
109 | clocale_quoting_style | |
110 | }; | |
111 | ||
112 | /* The default quoting options. */ | |
113 | static struct quoting_options default_quoting_options; | |
114 | ||
115 | /* Allocate a new set of quoting options, with contents initially identical | |
116 | to O if O is not null, or to the default if O is null. | |
117 | It is the caller's responsibility to free the result. */ | |
118 | struct quoting_options * | |
119 | clone_quoting_options (struct quoting_options *o) | |
120 | { | |
121 | int e = errno; | |
122 | struct quoting_options *p = xmalloc (sizeof *p); | |
123 | *p = *(o ? o : &default_quoting_options); | |
124 | errno = e; | |
125 | return p; | |
126 | } | |
127 | ||
128 | /* Get the value of O's quoting style. If O is null, use the default. */ | |
129 | enum quoting_style | |
130 | get_quoting_style (struct quoting_options *o) | |
131 | { | |
132 | return (o ? o : &default_quoting_options)->style; | |
133 | } | |
134 | ||
135 | /* In O (or in the default if O is null), | |
136 | set the value of the quoting style to S. */ | |
137 | void | |
138 | set_quoting_style (struct quoting_options *o, enum quoting_style s) | |
139 | { | |
140 | (o ? o : &default_quoting_options)->style = s; | |
141 | } | |
142 | ||
143 | /* In O (or in the default if O is null), | |
144 | set the value of the quoting options for character C to I. | |
145 | Return the old value. Currently, the only values defined for I are | |
146 | 0 (the default) and 1 (which means to quote the character even if | |
147 | it would not otherwise be quoted). */ | |
148 | int | |
149 | set_char_quoting (struct quoting_options *o, char c, int i) | |
150 | { | |
151 | unsigned char uc = c; | |
152 | int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; | |
153 | int shift = uc % INT_BITS; | |
154 | int r = (*p >> shift) & 1; | |
155 | *p ^= ((i & 1) ^ r) << shift; | |
156 | return r; | |
157 | } | |
158 | ||
159 | /* MSGID approximates a quotation mark. Return its translation if it | |
160 | has one; otherwise, return either it or "\"", depending on S. */ | |
161 | static char const * | |
162 | gettext_quote (char const *msgid, enum quoting_style s) | |
163 | { | |
164 | char const *translation = _(msgid); | |
165 | if (translation == msgid && s == clocale_quoting_style) | |
166 | translation = "\""; | |
167 | return translation; | |
168 | } | |
169 | ||
170 | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of | |
171 | argument ARG (of size ARGSIZE), using QUOTING_STYLE and the | |
172 | non-quoting-style part of O to control quoting. | |
173 | Terminate the output with a null character, and return the written | |
174 | size of the output, not counting the terminating null. | |
175 | If BUFFERSIZE is too small to store the output string, return the | |
176 | value that would have been returned had BUFFERSIZE been large enough. | |
177 | If ARGSIZE is -1, use the string length of the argument for ARGSIZE. | |
178 | ||
179 | This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, | |
180 | ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting | |
181 | style specified by O, and O may not be null. */ | |
182 | ||
183 | static size_t | |
184 | quotearg_buffer_restyled (char *buffer, size_t buffersize, | |
185 | char const *arg, size_t argsize, | |
186 | enum quoting_style quoting_style, | |
187 | struct quoting_options const *o) | |
188 | { | |
189 | size_t i; | |
190 | size_t len = 0; | |
191 | char const *quote_string = 0; | |
192 | size_t quote_string_len = 0; | |
193 | int backslash_escapes = 0; | |
194 | int unibyte_locale = MB_CUR_MAX == 1; | |
195 | ||
196 | #define STORE(c) \ | |
197 | do \ | |
198 | { \ | |
199 | if (len < buffersize) \ | |
200 | buffer[len] = (c); \ | |
201 | len++; \ | |
202 | } \ | |
203 | while (0) | |
204 | ||
205 | switch (quoting_style) | |
206 | { | |
207 | case c_quoting_style: | |
208 | STORE ('"'); | |
209 | backslash_escapes = 1; | |
210 | quote_string = "\""; | |
211 | quote_string_len = 1; | |
212 | break; | |
213 | ||
214 | case escape_quoting_style: | |
215 | backslash_escapes = 1; | |
216 | break; | |
217 | ||
218 | case locale_quoting_style: | |
219 | case clocale_quoting_style: | |
220 | { | |
221 | /* Get translations for open and closing quotation marks. | |
222 | ||
223 | The message catalog should translate "`" to a left | |
224 | quotation mark suitable for the locale, and similarly for | |
225 | "'". If the catalog has no translation, | |
226 | locale_quoting_style quotes `like this', and | |
227 | clocale_quoting_style quotes "like this". | |
228 | ||
229 | For example, an American English Unicode locale should | |
230 | translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and | |
231 | should translate "'" to U+201D (RIGHT DOUBLE QUOTATION | |
232 | MARK). A British English Unicode locale should instead | |
233 | translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and | |
234 | U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */ | |
235 | ||
236 | char const *left = gettext_quote (N_("`"), quoting_style); | |
237 | char const *right = gettext_quote (N_("'"), quoting_style); | |
238 | for (quote_string = left; *quote_string; quote_string++) | |
239 | STORE (*quote_string); | |
240 | backslash_escapes = 1; | |
241 | quote_string = right; | |
242 | quote_string_len = strlen (quote_string); | |
243 | } | |
244 | break; | |
245 | ||
246 | case shell_always_quoting_style: | |
247 | STORE ('\''); | |
248 | quote_string = "'"; | |
249 | quote_string_len = 1; | |
250 | break; | |
251 | ||
252 | default: | |
253 | break; | |
254 | } | |
255 | ||
256 | for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) | |
257 | { | |
258 | unsigned char c; | |
259 | unsigned char esc; | |
260 | ||
261 | if (backslash_escapes | |
262 | && quote_string_len | |
263 | && i + quote_string_len <= argsize | |
264 | && memcmp (arg + i, quote_string, quote_string_len) == 0) | |
265 | STORE ('\\'); | |
266 | ||
267 | c = arg[i]; | |
268 | switch (c) | |
269 | { | |
270 | case '\0': | |
271 | if (backslash_escapes) | |
272 | { | |
273 | STORE ('\\'); | |
274 | STORE ('0'); | |
275 | STORE ('0'); | |
276 | c = '0'; | |
277 | } | |
278 | break; | |
279 | ||
280 | case '?': | |
281 | switch (quoting_style) | |
282 | { | |
283 | case shell_quoting_style: | |
284 | goto use_shell_always_quoting_style; | |
285 | ||
286 | case c_quoting_style: | |
287 | if (i + 2 < argsize && arg[i + 1] == '?') | |
288 | switch (arg[i + 2]) | |
289 | { | |
290 | case '!': case '\'': | |
291 | case '(': case ')': case '-': case '/': | |
292 | case '<': case '=': case '>': | |
293 | /* Escape the second '?' in what would otherwise be | |
294 | a trigraph. */ | |
295 | c = arg[i + 2]; | |
296 | i += 2; | |
297 | STORE ('?'); | |
298 | STORE ('\\'); | |
299 | STORE ('?'); | |
300 | break; | |
301 | } | |
302 | break; | |
303 | ||
304 | default: | |
305 | break; | |
306 | } | |
307 | break; | |
308 | ||
309 | case '\a': esc = 'a'; goto c_escape; | |
310 | case '\b': esc = 'b'; goto c_escape; | |
311 | case '\f': esc = 'f'; goto c_escape; | |
312 | case '\n': esc = 'n'; goto c_and_shell_escape; | |
313 | case '\r': esc = 'r'; goto c_and_shell_escape; | |
314 | case '\t': esc = 't'; goto c_and_shell_escape; | |
315 | case '\v': esc = 'v'; goto c_escape; | |
316 | case '\\': esc = c; goto c_and_shell_escape; | |
317 | ||
318 | c_and_shell_escape: | |
319 | if (quoting_style == shell_quoting_style) | |
320 | goto use_shell_always_quoting_style; | |
321 | c_escape: | |
322 | if (backslash_escapes) | |
323 | { | |
324 | c = esc; | |
325 | goto store_escape; | |
326 | } | |
327 | break; | |
328 | ||
329 | case '#': case '~': | |
330 | if (i != 0) | |
331 | break; | |
332 | /* Fall through. */ | |
333 | case ' ': | |
334 | case '!': /* special in bash */ | |
335 | case '"': case '$': case '&': | |
336 | case '(': case ')': case '*': case ';': | |
337 | case '<': case '>': case '[': | |
338 | case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ | |
339 | case '`': case '|': | |
340 | /* A shell special character. In theory, '$' and '`' could | |
341 | be the first bytes of multibyte characters, which means | |
342 | we should check them with mbrtowc, but in practice this | |
343 | doesn't happen so it's not worth worrying about. */ | |
344 | if (quoting_style == shell_quoting_style) | |
345 | goto use_shell_always_quoting_style; | |
346 | break; | |
347 | ||
348 | case '\'': | |
349 | switch (quoting_style) | |
350 | { | |
351 | case shell_quoting_style: | |
352 | goto use_shell_always_quoting_style; | |
353 | ||
354 | case shell_always_quoting_style: | |
355 | STORE ('\''); | |
356 | STORE ('\\'); | |
357 | STORE ('\''); | |
358 | break; | |
359 | ||
360 | default: | |
361 | break; | |
362 | } | |
363 | break; | |
364 | ||
365 | case '%': case '+': case ',': case '-': case '.': case '/': | |
366 | case '0': case '1': case '2': case '3': case '4': case '5': | |
367 | case '6': case '7': case '8': case '9': case ':': case '=': | |
368 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
369 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': | |
370 | case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': | |
371 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
372 | case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': | |
373 | case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': | |
374 | case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': | |
375 | case 'o': case 'p': case 'q': case 'r': case 's': case 't': | |
376 | case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': | |
377 | case '{': case '}': | |
378 | /* These characters don't cause problems, no matter what the | |
379 | quoting style is. They cannot start multibyte sequences. */ | |
380 | break; | |
381 | ||
382 | default: | |
383 | /* If we have a multibyte sequence, copy it until we reach | |
384 | its end, find an error, or come back to the initial shift | |
385 | state. For C-like styles, if the sequence has | |
386 | unprintable characters, escape the whole sequence, since | |
387 | we can't easily escape single characters within it. */ | |
388 | { | |
389 | /* Length of multibyte sequence found so far. */ | |
390 | size_t m; | |
391 | ||
392 | int printable; | |
393 | ||
394 | if (unibyte_locale) | |
395 | { | |
396 | m = 1; | |
397 | printable = isprint (c); | |
398 | } | |
399 | else | |
400 | { | |
401 | mbstate_t mbstate; | |
402 | memset (&mbstate, 0, sizeof mbstate); | |
403 | ||
404 | m = 0; | |
405 | printable = 1; | |
406 | if (argsize == SIZE_MAX) | |
407 | argsize = strlen (arg); | |
408 | ||
409 | do | |
410 | { | |
411 | wchar_t w; | |
412 | size_t bytes = mbrtowc (&w, &arg[i + m], | |
413 | argsize - (i + m), &mbstate); | |
414 | if (bytes == 0) | |
415 | break; | |
416 | else if (bytes == (size_t) -1) | |
417 | { | |
418 | printable = 0; | |
419 | break; | |
420 | } | |
421 | else if (bytes == (size_t) -2) | |
422 | { | |
423 | printable = 0; | |
424 | while (i + m < argsize && arg[i + m]) | |
425 | m++; | |
426 | break; | |
427 | } | |
428 | else | |
429 | { | |
430 | if (! iswprint (w)) | |
431 | printable = 0; | |
432 | m += bytes; | |
433 | } | |
434 | } | |
435 | while (! mbsinit (&mbstate)); | |
436 | } | |
437 | ||
438 | if (1 < m || (backslash_escapes && ! printable)) | |
439 | { | |
440 | /* Output a multibyte sequence, or an escaped | |
441 | unprintable unibyte character. */ | |
442 | size_t ilim = i + m; | |
443 | ||
444 | for (;;) | |
445 | { | |
446 | if (backslash_escapes && ! printable) | |
447 | { | |
448 | STORE ('\\'); | |
449 | STORE ('0' + (c >> 6)); | |
450 | STORE ('0' + ((c >> 3) & 7)); | |
451 | c = '0' + (c & 7); | |
452 | } | |
453 | if (ilim <= i + 1) | |
454 | break; | |
455 | STORE (c); | |
456 | c = arg[++i]; | |
457 | } | |
458 | ||
459 | goto store_c; | |
460 | } | |
461 | } | |
462 | } | |
463 | ||
464 | if (! (backslash_escapes | |
465 | && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) | |
466 | goto store_c; | |
467 | ||
468 | store_escape: | |
469 | STORE ('\\'); | |
470 | ||
471 | store_c: | |
472 | STORE (c); | |
473 | } | |
474 | ||
475 | if (quote_string) | |
476 | for (; *quote_string; quote_string++) | |
477 | STORE (*quote_string); | |
478 | ||
479 | if (len < buffersize) | |
480 | buffer[len] = '\0'; | |
481 | return len; | |
482 | ||
483 | use_shell_always_quoting_style: | |
484 | return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, | |
485 | shell_always_quoting_style, o); | |
486 | } | |
487 | ||
488 | /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of | |
489 | argument ARG (of size ARGSIZE), using O to control quoting. | |
490 | If O is null, use the default. | |
491 | Terminate the output with a null character, and return the written | |
492 | size of the output, not counting the terminating null. | |
493 | If BUFFERSIZE is too small to store the output string, return the | |
494 | value that would have been returned had BUFFERSIZE been large enough. | |
495 | If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */ | |
496 | size_t | |
497 | quotearg_buffer (char *buffer, size_t buffersize, | |
498 | char const *arg, size_t argsize, | |
499 | struct quoting_options const *o) | |
500 | { | |
501 | struct quoting_options const *p = o ? o : &default_quoting_options; | |
502 | int e = errno; | |
503 | size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, | |
504 | p->style, p); | |
505 | errno = e; | |
506 | return r; | |
507 | } | |
508 | ||
509 | /* Use storage slot N to return a quoted version of argument ARG. | |
510 | ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string. | |
511 | OPTIONS specifies the quoting options. | |
512 | The returned value points to static storage that can be | |
513 | reused by the next call to this function with the same value of N. | |
514 | N must be nonnegative. N is deliberately declared with type "int" | |
515 | to allow for future extensions (using negative values). */ | |
516 | static char * | |
517 | quotearg_n_options (int n, char const *arg, size_t argsize, | |
518 | struct quoting_options const *options) | |
519 | { | |
520 | int e = errno; | |
521 | ||
522 | /* Preallocate a slot 0 buffer, so that the caller can always quote | |
523 | one small component of a "memory exhausted" message in slot 0. */ | |
524 | static char slot0[256]; | |
525 | static unsigned int nslots = 1; | |
526 | unsigned int n0 = n; | |
527 | struct slotvec | |
528 | { | |
529 | size_t size; | |
530 | char *val; | |
531 | }; | |
532 | static struct slotvec slotvec0 = {sizeof slot0, slot0}; | |
533 | static struct slotvec *slotvec = &slotvec0; | |
534 | ||
535 | if (n < 0) | |
536 | abort (); | |
537 | ||
538 | if (nslots <= n0) | |
539 | { | |
540 | unsigned int n1 = n0 + 1; | |
541 | size_t s = n1 * sizeof *slotvec; | |
542 | ||
543 | if (SIZE_MAX / UINT_MAX <= sizeof *slotvec | |
544 | && n1 != s / sizeof *slotvec) | |
545 | xalloc_die (); | |
546 | ||
547 | if (slotvec == &slotvec0) | |
548 | { | |
549 | slotvec = xmalloc (sizeof *slotvec); | |
550 | *slotvec = slotvec0; | |
551 | } | |
552 | slotvec = xrealloc (slotvec, s); | |
553 | memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec); | |
554 | nslots = n1; | |
555 | } | |
556 | ||
557 | { | |
558 | size_t size = slotvec[n].size; | |
559 | char *val = slotvec[n].val; | |
560 | size_t qsize = quotearg_buffer (val, size, arg, argsize, options); | |
561 | ||
562 | if (size <= qsize) | |
563 | { | |
564 | slotvec[n].size = size = qsize + 1; | |
565 | slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size); | |
566 | quotearg_buffer (val, size, arg, argsize, options); | |
567 | } | |
568 | ||
569 | errno = e; | |
570 | return val; | |
571 | } | |
572 | } | |
573 | ||
574 | char * | |
575 | quotearg_n (int n, char const *arg) | |
576 | { | |
577 | return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); | |
578 | } | |
579 | ||
580 | char * | |
581 | quotearg (char const *arg) | |
582 | { | |
583 | return quotearg_n (0, arg); | |
584 | } | |
585 | ||
586 | /* Return quoting options for STYLE, with no extra quoting. */ | |
587 | static struct quoting_options | |
588 | quoting_options_from_style (enum quoting_style style) | |
589 | { | |
590 | struct quoting_options o; | |
591 | o.style = style; | |
592 | memset (o.quote_these_too, 0, sizeof o.quote_these_too); | |
593 | return o; | |
594 | } | |
595 | ||
596 | char * | |
597 | quotearg_n_style (int n, enum quoting_style s, char const *arg) | |
598 | { | |
599 | struct quoting_options const o = quoting_options_from_style (s); | |
600 | return quotearg_n_options (n, arg, SIZE_MAX, &o); | |
601 | } | |
602 | ||
603 | char * | |
604 | quotearg_n_style_mem (int n, enum quoting_style s, | |
605 | char const *arg, size_t argsize) | |
606 | { | |
607 | struct quoting_options const o = quoting_options_from_style (s); | |
608 | return quotearg_n_options (n, arg, argsize, &o); | |
609 | } | |
610 | ||
611 | char * | |
612 | quotearg_style (enum quoting_style s, char const *arg) | |
613 | { | |
614 | return quotearg_n_style (0, s, arg); | |
615 | } | |
616 | ||
617 | char * | |
618 | quotearg_char (char const *arg, char ch) | |
619 | { | |
620 | struct quoting_options options; | |
621 | options = default_quoting_options; | |
622 | set_char_quoting (&options, ch, 1); | |
623 | return quotearg_n_options (0, arg, SIZE_MAX, &options); | |
624 | } | |
625 | ||
626 | char * | |
627 | quotearg_colon (char const *arg) | |
628 | { | |
629 | return quotearg_char (arg, ':'); | |
630 | } |