]> git.saurik.com Git - bison.git/blob - lib/quotearg.c
Update copyright date.
[bison.git] / lib / quotearg.c
1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
17
18 /* Written by Paul Eggert <eggert@twinsun.com> */
19
20 #if HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include "quotearg.h"
25
26 #include "xalloc.h"
27
28 #include <ctype.h>
29 #include <errno.h>
30 #include <limits.h>
31 #include <stdlib.h>
32 #include <string.h>
33
34 #include "gettext.h"
35 #define _(msgid) gettext (msgid)
36 #define N_(msgid) msgid
37
38 #if HAVE_WCHAR_H
39
40 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
41 # include <stdio.h>
42 # include <time.h>
43
44 # include <wchar.h>
45 #endif
46
47 #if !HAVE_MBRTOWC
48 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
49 other macros are defined only for documentation and to satisfy C
50 syntax. */
51 # undef MB_CUR_MAX
52 # define MB_CUR_MAX 1
53 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
54 # define iswprint(wc) isprint ((unsigned char) (wc))
55 # undef HAVE_MBSINIT
56 #endif
57
58 #if !defined mbsinit && !HAVE_MBSINIT
59 # define mbsinit(ps) 1
60 #endif
61
62 #ifndef iswprint
63 # if HAVE_WCTYPE_H
64 # include <wctype.h>
65 # endif
66 # if !defined iswprint && !HAVE_ISWPRINT
67 # define iswprint(wc) 1
68 # endif
69 #endif
70
71 #ifndef SIZE_MAX
72 # define SIZE_MAX ((size_t) -1)
73 #endif
74
75 #define INT_BITS (sizeof (int) * CHAR_BIT)
76
77 struct quoting_options
78 {
79 /* Basic quoting style. */
80 enum quoting_style style;
81
82 /* Quote the characters indicated by this bit vector even if the
83 quoting style would not normally require them to be quoted. */
84 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
85 };
86
87 /* Names of quoting styles. */
88 char const *const quoting_style_args[] =
89 {
90 "literal",
91 "shell",
92 "shell-always",
93 "c",
94 "escape",
95 "locale",
96 "clocale",
97 0
98 };
99
100 /* Correspondences to quoting style names. */
101 enum quoting_style const quoting_style_vals[] =
102 {
103 literal_quoting_style,
104 shell_quoting_style,
105 shell_always_quoting_style,
106 c_quoting_style,
107 escape_quoting_style,
108 locale_quoting_style,
109 clocale_quoting_style
110 };
111
112 /* The default quoting options. */
113 static struct quoting_options default_quoting_options;
114
115 /* Allocate a new set of quoting options, with contents initially identical
116 to O if O is not null, or to the default if O is null.
117 It is the caller's responsibility to free the result. */
118 struct quoting_options *
119 clone_quoting_options (struct quoting_options *o)
120 {
121 int e = errno;
122 struct quoting_options *p = xmalloc (sizeof *p);
123 *p = *(o ? o : &default_quoting_options);
124 errno = e;
125 return p;
126 }
127
128 /* Get the value of O's quoting style. If O is null, use the default. */
129 enum quoting_style
130 get_quoting_style (struct quoting_options *o)
131 {
132 return (o ? o : &default_quoting_options)->style;
133 }
134
135 /* In O (or in the default if O is null),
136 set the value of the quoting style to S. */
137 void
138 set_quoting_style (struct quoting_options *o, enum quoting_style s)
139 {
140 (o ? o : &default_quoting_options)->style = s;
141 }
142
143 /* In O (or in the default if O is null),
144 set the value of the quoting options for character C to I.
145 Return the old value. Currently, the only values defined for I are
146 0 (the default) and 1 (which means to quote the character even if
147 it would not otherwise be quoted). */
148 int
149 set_char_quoting (struct quoting_options *o, char c, int i)
150 {
151 unsigned char uc = c;
152 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
153 int shift = uc % INT_BITS;
154 int r = (*p >> shift) & 1;
155 *p ^= ((i & 1) ^ r) << shift;
156 return r;
157 }
158
159 /* MSGID approximates a quotation mark. Return its translation if it
160 has one; otherwise, return either it or "\"", depending on S. */
161 static char const *
162 gettext_quote (char const *msgid, enum quoting_style s)
163 {
164 char const *translation = _(msgid);
165 if (translation == msgid && s == clocale_quoting_style)
166 translation = "\"";
167 return translation;
168 }
169
170 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
171 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
172 non-quoting-style part of O to control quoting.
173 Terminate the output with a null character, and return the written
174 size of the output, not counting the terminating null.
175 If BUFFERSIZE is too small to store the output string, return the
176 value that would have been returned had BUFFERSIZE been large enough.
177 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
178
179 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
180 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
181 style specified by O, and O may not be null. */
182
183 static size_t
184 quotearg_buffer_restyled (char *buffer, size_t buffersize,
185 char const *arg, size_t argsize,
186 enum quoting_style quoting_style,
187 struct quoting_options const *o)
188 {
189 size_t i;
190 size_t len = 0;
191 char const *quote_string = 0;
192 size_t quote_string_len = 0;
193 int backslash_escapes = 0;
194 int unibyte_locale = MB_CUR_MAX == 1;
195
196 #define STORE(c) \
197 do \
198 { \
199 if (len < buffersize) \
200 buffer[len] = (c); \
201 len++; \
202 } \
203 while (0)
204
205 switch (quoting_style)
206 {
207 case c_quoting_style:
208 STORE ('"');
209 backslash_escapes = 1;
210 quote_string = "\"";
211 quote_string_len = 1;
212 break;
213
214 case escape_quoting_style:
215 backslash_escapes = 1;
216 break;
217
218 case locale_quoting_style:
219 case clocale_quoting_style:
220 {
221 /* Get translations for open and closing quotation marks.
222
223 The message catalog should translate "`" to a left
224 quotation mark suitable for the locale, and similarly for
225 "'". If the catalog has no translation,
226 locale_quoting_style quotes `like this', and
227 clocale_quoting_style quotes "like this".
228
229 For example, an American English Unicode locale should
230 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
231 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
232 MARK). A British English Unicode locale should instead
233 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
234 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
235
236 char const *left = gettext_quote (N_("`"), quoting_style);
237 char const *right = gettext_quote (N_("'"), quoting_style);
238 for (quote_string = left; *quote_string; quote_string++)
239 STORE (*quote_string);
240 backslash_escapes = 1;
241 quote_string = right;
242 quote_string_len = strlen (quote_string);
243 }
244 break;
245
246 case shell_always_quoting_style:
247 STORE ('\'');
248 quote_string = "'";
249 quote_string_len = 1;
250 break;
251
252 default:
253 break;
254 }
255
256 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
257 {
258 unsigned char c;
259 unsigned char esc;
260
261 if (backslash_escapes
262 && quote_string_len
263 && i + quote_string_len <= argsize
264 && memcmp (arg + i, quote_string, quote_string_len) == 0)
265 STORE ('\\');
266
267 c = arg[i];
268 switch (c)
269 {
270 case '\0':
271 if (backslash_escapes)
272 {
273 STORE ('\\');
274 STORE ('0');
275 STORE ('0');
276 c = '0';
277 }
278 break;
279
280 case '?':
281 switch (quoting_style)
282 {
283 case shell_quoting_style:
284 goto use_shell_always_quoting_style;
285
286 case c_quoting_style:
287 if (i + 2 < argsize && arg[i + 1] == '?')
288 switch (arg[i + 2])
289 {
290 case '!': case '\'':
291 case '(': case ')': case '-': case '/':
292 case '<': case '=': case '>':
293 /* Escape the second '?' in what would otherwise be
294 a trigraph. */
295 c = arg[i + 2];
296 i += 2;
297 STORE ('?');
298 STORE ('\\');
299 STORE ('?');
300 break;
301 }
302 break;
303
304 default:
305 break;
306 }
307 break;
308
309 case '\a': esc = 'a'; goto c_escape;
310 case '\b': esc = 'b'; goto c_escape;
311 case '\f': esc = 'f'; goto c_escape;
312 case '\n': esc = 'n'; goto c_and_shell_escape;
313 case '\r': esc = 'r'; goto c_and_shell_escape;
314 case '\t': esc = 't'; goto c_and_shell_escape;
315 case '\v': esc = 'v'; goto c_escape;
316 case '\\': esc = c; goto c_and_shell_escape;
317
318 c_and_shell_escape:
319 if (quoting_style == shell_quoting_style)
320 goto use_shell_always_quoting_style;
321 c_escape:
322 if (backslash_escapes)
323 {
324 c = esc;
325 goto store_escape;
326 }
327 break;
328
329 case '#': case '~':
330 if (i != 0)
331 break;
332 /* Fall through. */
333 case ' ':
334 case '!': /* special in bash */
335 case '"': case '$': case '&':
336 case '(': case ')': case '*': case ';':
337 case '<': case '>': case '[':
338 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
339 case '`': case '|':
340 /* A shell special character. In theory, '$' and '`' could
341 be the first bytes of multibyte characters, which means
342 we should check them with mbrtowc, but in practice this
343 doesn't happen so it's not worth worrying about. */
344 if (quoting_style == shell_quoting_style)
345 goto use_shell_always_quoting_style;
346 break;
347
348 case '\'':
349 switch (quoting_style)
350 {
351 case shell_quoting_style:
352 goto use_shell_always_quoting_style;
353
354 case shell_always_quoting_style:
355 STORE ('\'');
356 STORE ('\\');
357 STORE ('\'');
358 break;
359
360 default:
361 break;
362 }
363 break;
364
365 case '%': case '+': case ',': case '-': case '.': case '/':
366 case '0': case '1': case '2': case '3': case '4': case '5':
367 case '6': case '7': case '8': case '9': case ':': case '=':
368 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
369 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
370 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
371 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
372 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
373 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
374 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
375 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
376 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
377 case '{': case '}':
378 /* These characters don't cause problems, no matter what the
379 quoting style is. They cannot start multibyte sequences. */
380 break;
381
382 default:
383 /* If we have a multibyte sequence, copy it until we reach
384 its end, find an error, or come back to the initial shift
385 state. For C-like styles, if the sequence has
386 unprintable characters, escape the whole sequence, since
387 we can't easily escape single characters within it. */
388 {
389 /* Length of multibyte sequence found so far. */
390 size_t m;
391
392 int printable;
393
394 if (unibyte_locale)
395 {
396 m = 1;
397 printable = isprint (c);
398 }
399 else
400 {
401 mbstate_t mbstate;
402 memset (&mbstate, 0, sizeof mbstate);
403
404 m = 0;
405 printable = 1;
406 if (argsize == SIZE_MAX)
407 argsize = strlen (arg);
408
409 do
410 {
411 wchar_t w;
412 size_t bytes = mbrtowc (&w, &arg[i + m],
413 argsize - (i + m), &mbstate);
414 if (bytes == 0)
415 break;
416 else if (bytes == (size_t) -1)
417 {
418 printable = 0;
419 break;
420 }
421 else if (bytes == (size_t) -2)
422 {
423 printable = 0;
424 while (i + m < argsize && arg[i + m])
425 m++;
426 break;
427 }
428 else
429 {
430 if (! iswprint (w))
431 printable = 0;
432 m += bytes;
433 }
434 }
435 while (! mbsinit (&mbstate));
436 }
437
438 if (1 < m || (backslash_escapes && ! printable))
439 {
440 /* Output a multibyte sequence, or an escaped
441 unprintable unibyte character. */
442 size_t ilim = i + m;
443
444 for (;;)
445 {
446 if (backslash_escapes && ! printable)
447 {
448 STORE ('\\');
449 STORE ('0' + (c >> 6));
450 STORE ('0' + ((c >> 3) & 7));
451 c = '0' + (c & 7);
452 }
453 if (ilim <= i + 1)
454 break;
455 STORE (c);
456 c = arg[++i];
457 }
458
459 goto store_c;
460 }
461 }
462 }
463
464 if (! (backslash_escapes
465 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
466 goto store_c;
467
468 store_escape:
469 STORE ('\\');
470
471 store_c:
472 STORE (c);
473 }
474
475 if (quote_string)
476 for (; *quote_string; quote_string++)
477 STORE (*quote_string);
478
479 if (len < buffersize)
480 buffer[len] = '\0';
481 return len;
482
483 use_shell_always_quoting_style:
484 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
485 shell_always_quoting_style, o);
486 }
487
488 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
489 argument ARG (of size ARGSIZE), using O to control quoting.
490 If O is null, use the default.
491 Terminate the output with a null character, and return the written
492 size of the output, not counting the terminating null.
493 If BUFFERSIZE is too small to store the output string, return the
494 value that would have been returned had BUFFERSIZE been large enough.
495 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
496 size_t
497 quotearg_buffer (char *buffer, size_t buffersize,
498 char const *arg, size_t argsize,
499 struct quoting_options const *o)
500 {
501 struct quoting_options const *p = o ? o : &default_quoting_options;
502 int e = errno;
503 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
504 p->style, p);
505 errno = e;
506 return r;
507 }
508
509 /* Use storage slot N to return a quoted version of argument ARG.
510 ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
511 OPTIONS specifies the quoting options.
512 The returned value points to static storage that can be
513 reused by the next call to this function with the same value of N.
514 N must be nonnegative. N is deliberately declared with type "int"
515 to allow for future extensions (using negative values). */
516 static char *
517 quotearg_n_options (int n, char const *arg, size_t argsize,
518 struct quoting_options const *options)
519 {
520 int e = errno;
521
522 /* Preallocate a slot 0 buffer, so that the caller can always quote
523 one small component of a "memory exhausted" message in slot 0. */
524 static char slot0[256];
525 static unsigned int nslots = 1;
526 unsigned int n0 = n;
527 struct slotvec
528 {
529 size_t size;
530 char *val;
531 };
532 static struct slotvec slotvec0 = {sizeof slot0, slot0};
533 static struct slotvec *slotvec = &slotvec0;
534
535 if (n < 0)
536 abort ();
537
538 if (nslots <= n0)
539 {
540 unsigned int n1 = n0 + 1;
541 size_t s = n1 * sizeof *slotvec;
542
543 if (SIZE_MAX / UINT_MAX <= sizeof *slotvec
544 && n1 != s / sizeof *slotvec)
545 xalloc_die ();
546
547 if (slotvec == &slotvec0)
548 {
549 slotvec = xmalloc (sizeof *slotvec);
550 *slotvec = slotvec0;
551 }
552 slotvec = xrealloc (slotvec, s);
553 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
554 nslots = n1;
555 }
556
557 {
558 size_t size = slotvec[n].size;
559 char *val = slotvec[n].val;
560 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
561
562 if (size <= qsize)
563 {
564 slotvec[n].size = size = qsize + 1;
565 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
566 quotearg_buffer (val, size, arg, argsize, options);
567 }
568
569 errno = e;
570 return val;
571 }
572 }
573
574 char *
575 quotearg_n (int n, char const *arg)
576 {
577 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
578 }
579
580 char *
581 quotearg (char const *arg)
582 {
583 return quotearg_n (0, arg);
584 }
585
586 /* Return quoting options for STYLE, with no extra quoting. */
587 static struct quoting_options
588 quoting_options_from_style (enum quoting_style style)
589 {
590 struct quoting_options o;
591 o.style = style;
592 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
593 return o;
594 }
595
596 char *
597 quotearg_n_style (int n, enum quoting_style s, char const *arg)
598 {
599 struct quoting_options const o = quoting_options_from_style (s);
600 return quotearg_n_options (n, arg, SIZE_MAX, &o);
601 }
602
603 char *
604 quotearg_n_style_mem (int n, enum quoting_style s,
605 char const *arg, size_t argsize)
606 {
607 struct quoting_options const o = quoting_options_from_style (s);
608 return quotearg_n_options (n, arg, argsize, &o);
609 }
610
611 char *
612 quotearg_style (enum quoting_style s, char const *arg)
613 {
614 return quotearg_n_style (0, s, arg);
615 }
616
617 char *
618 quotearg_char (char const *arg, char ch)
619 {
620 struct quoting_options options;
621 options = default_quoting_options;
622 set_char_quoting (&options, ch, 1);
623 return quotearg_n_options (0, arg, SIZE_MAX, &options);
624 }
625
626 char *
627 quotearg_colon (char const *arg)
628 {
629 return quotearg_char (arg, ':');
630 }