]> git.saurik.com Git - bison.git/blob - lib/quotearg.c
Version 1.30.
[bison.git] / lib / quotearg.c
1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
17
18 /* Written by Paul Eggert <eggert@twinsun.com> */
19
20 #if HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #if HAVE_STDDEF_H
25 # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
26 #endif
27 #include <sys/types.h>
28 #include <quotearg.h>
29 #include <xalloc.h>
30
31 #include <ctype.h>
32
33 #if ENABLE_NLS
34 # include <libintl.h>
35 # define _(text) gettext (text)
36 #else
37 # define _(text) text
38 #endif
39 #define N_(text) text
40
41 #if HAVE_LIMITS_H
42 # include <limits.h>
43 #endif
44 #ifndef CHAR_BIT
45 # define CHAR_BIT 8
46 #endif
47 #ifndef UCHAR_MAX
48 # define UCHAR_MAX ((unsigned char) -1)
49 #endif
50
51 #if HAVE_C_BACKSLASH_A
52 # define ALERT_CHAR '\a'
53 #else
54 # define ALERT_CHAR '\7'
55 #endif
56
57 #if HAVE_STDLIB_H
58 # include <stdlib.h>
59 #endif
60
61 #if HAVE_STRING_H
62 # include <string.h>
63 #endif
64
65 #if HAVE_WCHAR_H
66
67 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
68 # include <stdio.h>
69 # include <time.h>
70
71 # include <wchar.h>
72 #endif
73
74 #if !HAVE_MBRTOWC
75 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
76 other macros are defined only for documentation and to satisfy C
77 syntax. */
78 # undef MB_CUR_MAX
79 # define MB_CUR_MAX 1
80 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
81 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
82 # undef HAVE_MBSINIT
83 #endif
84
85 #if !defined mbsinit && !HAVE_MBSINIT
86 # define mbsinit(ps) 1
87 #endif
88
89 #ifndef iswprint
90 # if HAVE_WCTYPE_H
91 # include <wctype.h>
92 # endif
93 # if !defined iswprint && !HAVE_ISWPRINT
94 # define iswprint(wc) 1
95 # endif
96 #endif
97
98 #define INT_BITS (sizeof (int) * CHAR_BIT)
99
100 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
101 # define IN_CTYPE_DOMAIN(c) 1
102 #else
103 # define IN_CTYPE_DOMAIN(c) isascii(c)
104 #endif
105
106 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
107 #undef ISPRINT
108 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
109
110 struct quoting_options
111 {
112 /* Basic quoting style. */
113 enum quoting_style style;
114
115 /* Quote the characters indicated by this bit vector even if the
116 quoting style would not normally require them to be quoted. */
117 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
118 };
119
120 /* Names of quoting styles. */
121 char const *const quoting_style_args[] =
122 {
123 "literal",
124 "shell",
125 "shell-always",
126 "c",
127 "escape",
128 "locale",
129 "clocale",
130 0
131 };
132
133 /* Correspondences to quoting style names. */
134 enum quoting_style const quoting_style_vals[] =
135 {
136 literal_quoting_style,
137 shell_quoting_style,
138 shell_always_quoting_style,
139 c_quoting_style,
140 escape_quoting_style,
141 locale_quoting_style,
142 clocale_quoting_style
143 };
144
145 /* The default quoting options. */
146 static struct quoting_options default_quoting_options;
147
148 /* Allocate a new set of quoting options, with contents initially identical
149 to O if O is not null, or to the default if O is null.
150 It is the caller's responsibility to free the result. */
151 struct quoting_options *
152 clone_quoting_options (struct quoting_options *o)
153 {
154 struct quoting_options *p
155 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
156 *p = *(o ? o : &default_quoting_options);
157 return p;
158 }
159
160 /* Get the value of O's quoting style. If O is null, use the default. */
161 enum quoting_style
162 get_quoting_style (struct quoting_options *o)
163 {
164 return (o ? o : &default_quoting_options)->style;
165 }
166
167 /* In O (or in the default if O is null),
168 set the value of the quoting style to S. */
169 void
170 set_quoting_style (struct quoting_options *o, enum quoting_style s)
171 {
172 (o ? o : &default_quoting_options)->style = s;
173 }
174
175 /* In O (or in the default if O is null),
176 set the value of the quoting options for character C to I.
177 Return the old value. Currently, the only values defined for I are
178 0 (the default) and 1 (which means to quote the character even if
179 it would not otherwise be quoted). */
180 int
181 set_char_quoting (struct quoting_options *o, char c, int i)
182 {
183 unsigned char uc = c;
184 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
185 int shift = uc % INT_BITS;
186 int r = (*p >> shift) & 1;
187 *p ^= ((i & 1) ^ r) << shift;
188 return r;
189 }
190
191 /* MSGID approximates a quotation mark. Return its translation if it
192 has one; otherwise, return either it or "\"", depending on S. */
193 static char const *
194 gettext_quote (char const *msgid, enum quoting_style s)
195 {
196 char const *translation = _(msgid);
197 if (translation == msgid && s == clocale_quoting_style)
198 translation = "\"";
199 return translation;
200 }
201
202 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
203 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
204 non-quoting-style part of O to control quoting.
205 Terminate the output with a null character, and return the written
206 size of the output, not counting the terminating null.
207 If BUFFERSIZE is too small to store the output string, return the
208 value that would have been returned had BUFFERSIZE been large enough.
209 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
210
211 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
212 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
213 style specified by O, and O may not be null. */
214
215 static size_t
216 quotearg_buffer_restyled (char *buffer, size_t buffersize,
217 char const *arg, size_t argsize,
218 enum quoting_style quoting_style,
219 struct quoting_options const *o)
220 {
221 size_t i;
222 size_t len = 0;
223 char const *quote_string = 0;
224 size_t quote_string_len = 0;
225 int backslash_escapes = 0;
226 int unibyte_locale = MB_CUR_MAX == 1;
227
228 #define STORE(c) \
229 do \
230 { \
231 if (len < buffersize) \
232 buffer[len] = (c); \
233 len++; \
234 } \
235 while (0)
236
237 switch (quoting_style)
238 {
239 case c_quoting_style:
240 STORE ('"');
241 backslash_escapes = 1;
242 quote_string = "\"";
243 quote_string_len = 1;
244 break;
245
246 case escape_quoting_style:
247 backslash_escapes = 1;
248 break;
249
250 case locale_quoting_style:
251 case clocale_quoting_style:
252 {
253 /* Get translations for open and closing quotation marks.
254
255 The message catalog should translate "`" to a left
256 quotation mark suitable for the locale, and similarly for
257 "'". If the catalog has no translation,
258 locale_quoting_style quotes `like this', and
259 clocale_quoting_style quotes "like this".
260
261 For example, an American English Unicode locale should
262 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
263 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
264 MARK). A British English Unicode locale should instead
265 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
266 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
267
268 char const *left = gettext_quote (N_("`"), quoting_style);
269 char const *right = gettext_quote (N_("'"), quoting_style);
270 for (quote_string = left; *quote_string; quote_string++)
271 STORE (*quote_string);
272 backslash_escapes = 1;
273 quote_string = right;
274 quote_string_len = strlen (quote_string);
275 }
276 break;
277
278 case shell_always_quoting_style:
279 STORE ('\'');
280 quote_string = "'";
281 quote_string_len = 1;
282 break;
283
284 default:
285 break;
286 }
287
288 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
289 {
290 unsigned char c;
291 unsigned char esc;
292
293 if (backslash_escapes
294 && quote_string_len
295 && i + quote_string_len <= argsize
296 && memcmp (arg + i, quote_string, quote_string_len) == 0)
297 STORE ('\\');
298
299 c = arg[i];
300 switch (c)
301 {
302 case '?':
303 switch (quoting_style)
304 {
305 case shell_quoting_style:
306 goto use_shell_always_quoting_style;
307
308 case c_quoting_style:
309 if (i + 2 < argsize && arg[i + 1] == '?')
310 switch (arg[i + 2])
311 {
312 case '!': case '\'':
313 case '(': case ')': case '-': case '/':
314 case '<': case '=': case '>':
315 /* Escape the second '?' in what would otherwise be
316 a trigraph. */
317 i += 2;
318 c = arg[i + 2];
319 STORE ('?');
320 STORE ('\\');
321 STORE ('?');
322 break;
323 }
324 break;
325
326 default:
327 break;
328 }
329 break;
330
331 case ALERT_CHAR: esc = 'a'; goto c_escape;
332 case '\b': esc = 'b'; goto c_escape;
333 case '\f': esc = 'f'; goto c_escape;
334 case '\n': esc = 'n'; goto c_and_shell_escape;
335 case '\r': esc = 'r'; goto c_and_shell_escape;
336 case '\t': esc = 't'; goto c_and_shell_escape;
337 case '\v': esc = 'v'; goto c_escape;
338 case '\\': esc = c; goto c_and_shell_escape;
339
340 c_and_shell_escape:
341 if (quoting_style == shell_quoting_style)
342 goto use_shell_always_quoting_style;
343 c_escape:
344 if (backslash_escapes)
345 {
346 c = esc;
347 goto store_escape;
348 }
349 break;
350
351 case '#': case '~':
352 if (i != 0)
353 break;
354 /* Fall through. */
355 case ' ':
356 case '!': /* special in bash */
357 case '"': case '$': case '&':
358 case '(': case ')': case '*': case ';':
359 case '<': case '>': case '[':
360 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
361 case '`': case '|':
362 /* A shell special character. In theory, '$' and '`' could
363 be the first bytes of multibyte characters, which means
364 we should check them with mbrtowc, but in practice this
365 doesn't happen so it's not worth worrying about. */
366 if (quoting_style == shell_quoting_style)
367 goto use_shell_always_quoting_style;
368 break;
369
370 case '\'':
371 switch (quoting_style)
372 {
373 case shell_quoting_style:
374 goto use_shell_always_quoting_style;
375
376 case shell_always_quoting_style:
377 STORE ('\'');
378 STORE ('\\');
379 STORE ('\'');
380 break;
381
382 default:
383 break;
384 }
385 break;
386
387 case '%': case '+': case ',': case '-': case '.': case '/':
388 case '0': case '1': case '2': case '3': case '4': case '5':
389 case '6': case '7': case '8': case '9': case ':': case '=':
390 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
391 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
392 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
393 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
394 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
395 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
396 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
397 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
398 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
399 case '{': case '}':
400 /* These characters don't cause problems, no matter what the
401 quoting style is. They cannot start multibyte sequences. */
402 break;
403
404 default:
405 /* If we have a multibyte sequence, copy it until we reach
406 its end, find an error, or come back to the initial shift
407 state. For C-like styles, if the sequence has
408 unprintable characters, escape the whole sequence, since
409 we can't easily escape single characters within it. */
410 {
411 /* Length of multibyte sequence found so far. */
412 size_t m;
413
414 int printable;
415
416 if (unibyte_locale)
417 {
418 m = 1;
419 printable = ISPRINT (c);
420 }
421 else
422 {
423 mbstate_t mbstate;
424 memset (&mbstate, 0, sizeof mbstate);
425
426 m = 0;
427 printable = 1;
428 if (argsize == (size_t) -1)
429 argsize = strlen (arg);
430
431 do
432 {
433 wchar_t w;
434 size_t bytes = mbrtowc (&w, &arg[i + m],
435 argsize - (i + m), &mbstate);
436 if (bytes == 0)
437 break;
438 else if (bytes == (size_t) -1)
439 {
440 printable = 0;
441 break;
442 }
443 else if (bytes == (size_t) -2)
444 {
445 printable = 0;
446 while (i + m < argsize && arg[i + m])
447 m++;
448 break;
449 }
450 else
451 {
452 if (! iswprint (w))
453 printable = 0;
454 m += bytes;
455 }
456 }
457 while (! mbsinit (&mbstate));
458 }
459
460 if (1 < m || (backslash_escapes && ! printable))
461 {
462 /* Output a multibyte sequence, or an escaped
463 unprintable unibyte character. */
464 size_t ilim = i + m;
465
466 for (;;)
467 {
468 if (backslash_escapes && ! printable)
469 {
470 STORE ('\\');
471 STORE ('0' + (c >> 6));
472 STORE ('0' + ((c >> 3) & 7));
473 c = '0' + (c & 7);
474 }
475 if (ilim <= i + 1)
476 break;
477 STORE (c);
478 c = arg[++i];
479 }
480
481 goto store_c;
482 }
483 }
484 }
485
486 if (! (backslash_escapes
487 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
488 goto store_c;
489
490 store_escape:
491 STORE ('\\');
492
493 store_c:
494 STORE (c);
495 }
496
497 if (quote_string)
498 for (; *quote_string; quote_string++)
499 STORE (*quote_string);
500
501 if (len < buffersize)
502 buffer[len] = '\0';
503 return len;
504
505 use_shell_always_quoting_style:
506 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
507 shell_always_quoting_style, o);
508 }
509
510 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
511 argument ARG (of size ARGSIZE), using O to control quoting.
512 If O is null, use the default.
513 Terminate the output with a null character, and return the written
514 size of the output, not counting the terminating null.
515 If BUFFERSIZE is too small to store the output string, return the
516 value that would have been returned had BUFFERSIZE been large enough.
517 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
518 size_t
519 quotearg_buffer (char *buffer, size_t buffersize,
520 char const *arg, size_t argsize,
521 struct quoting_options const *o)
522 {
523 struct quoting_options const *p = o ? o : &default_quoting_options;
524 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
525 p->style, p);
526 }
527
528 /* Use storage slot N to return a quoted version of the string ARG.
529 OPTIONS specifies the quoting options.
530 The returned value points to static storage that can be
531 reused by the next call to this function with the same value of N.
532 N must be nonnegative. N is deliberately declared with type "int"
533 to allow for future extensions (using negative values). */
534 static char *
535 quotearg_n_options (int n, char const *arg,
536 struct quoting_options const *options)
537 {
538 /* Preallocate a slot 0 buffer, so that the caller can always quote
539 one small component of a "memory exhausted" message in slot 0. */
540 static char slot0[256];
541 static unsigned int nslots = 1;
542 struct slotvec
543 {
544 size_t size;
545 char *val;
546 };
547 static struct slotvec slotvec0 = {sizeof slot0, slot0};
548 static struct slotvec *slotvec = &slotvec0;
549
550 if (nslots <= n)
551 {
552 int n1 = n + 1;
553 size_t s = n1 * sizeof (struct slotvec);
554 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
555 abort ();
556 if (slotvec == &slotvec0)
557 {
558 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
559 *slotvec = slotvec0;
560 }
561 slotvec = (struct slotvec *) xrealloc (slotvec, s);
562 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
563 nslots = n;
564 }
565
566 {
567 size_t size = slotvec[n].size;
568 char *val = slotvec[n].val;
569 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
570
571 if (size <= qsize)
572 {
573 slotvec[n].size = size = qsize + 1;
574 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
575 quotearg_buffer (val, size, arg, (size_t) -1, options);
576 }
577
578 return val;
579 }
580 }
581
582 char *
583 quotearg_n (unsigned int n, char const *arg)
584 {
585 return quotearg_n_options (n, arg, &default_quoting_options);
586 }
587
588 char *
589 quotearg (char const *arg)
590 {
591 return quotearg_n (0, arg);
592 }
593
594 char *
595 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
596 {
597 struct quoting_options o;
598 o.style = s;
599 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
600 return quotearg_n_options (n, arg, &o);
601 }
602
603 char *
604 quotearg_style (enum quoting_style s, char const *arg)
605 {
606 return quotearg_n_style (0, s, arg);
607 }
608
609 char *
610 quotearg_char (char const *arg, char ch)
611 {
612 struct quoting_options options;
613 options = default_quoting_options;
614 set_char_quoting (&options, ch, 1);
615 return quotearg_n_options (0, arg, &options);
616 }
617
618 char *
619 quotearg_colon (char const *arg)
620 {
621 return quotearg_char (arg, ':');
622 }