]> git.saurik.com Git - bison.git/blob - lib/quotearg.c
2a7ba4c445ba42b8aaf913e810c41ce8e5be8c18
[bison.git] / lib / quotearg.c
1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
17
18 /* Written by Paul Eggert <eggert@twinsun.com> */
19
20 #if HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #if HAVE_STDDEF_H
25 # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
26 #endif
27 #include <sys/types.h>
28 #include <quotearg.h>
29 #include <xalloc.h>
30
31 #include <ctype.h>
32
33 #if ENABLE_NLS
34 # include <libintl.h>
35 # define _(text) gettext (text)
36 #else
37 # define _(text) text
38 #endif
39 #define N_(text) text
40
41 #if HAVE_LIMITS_H
42 # include <limits.h>
43 #endif
44 #ifndef CHAR_BIT
45 # define CHAR_BIT 8
46 #endif
47 #ifndef UCHAR_MAX
48 # define UCHAR_MAX ((unsigned char) -1)
49 #endif
50
51 #if HAVE_C_BACKSLASH_A
52 # define ALERT_CHAR '\a'
53 #else
54 # define ALERT_CHAR '\7'
55 #endif
56
57 #if HAVE_STDLIB_H
58 # include <stdlib.h>
59 #endif
60
61 #if HAVE_STRING_H
62 # include <string.h>
63 #endif
64
65 #if HAVE_WCHAR_H
66 # include <wchar.h>
67 #endif
68
69 #if !HAVE_MBRTOWC
70 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
71 other macros are defined only for documentation and to satisfy C
72 syntax. */
73 # undef MB_CUR_MAX
74 # define MB_CUR_MAX 1
75 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
76 # define mbsinit(ps) 1
77 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
78 #endif
79
80 #ifndef iswprint
81 # if HAVE_WCTYPE_H
82 # include <wctype.h>
83 # endif
84 # if !defined iswprint && !HAVE_ISWPRINT
85 # define iswprint(wc) 1
86 # endif
87 #endif
88
89 #define INT_BITS (sizeof (int) * CHAR_BIT)
90
91 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
92 # define IN_CTYPE_DOMAIN(c) 1
93 #else
94 # define IN_CTYPE_DOMAIN(c) isascii(c)
95 #endif
96
97 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
98 #undef ISPRINT
99 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
100
101 struct quoting_options
102 {
103 /* Basic quoting style. */
104 enum quoting_style style;
105
106 /* Quote the characters indicated by this bit vector even if the
107 quoting style would not normally require them to be quoted. */
108 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
109 };
110
111 /* Names of quoting styles. */
112 char const *const quoting_style_args[] =
113 {
114 "literal",
115 "shell",
116 "shell-always",
117 "c",
118 "escape",
119 "locale",
120 "clocale",
121 0
122 };
123
124 /* Correspondences to quoting style names. */
125 enum quoting_style const quoting_style_vals[] =
126 {
127 literal_quoting_style,
128 shell_quoting_style,
129 shell_always_quoting_style,
130 c_quoting_style,
131 escape_quoting_style,
132 locale_quoting_style,
133 clocale_quoting_style
134 };
135
136 /* The default quoting options. */
137 static struct quoting_options default_quoting_options;
138
139 /* Allocate a new set of quoting options, with contents initially identical
140 to O if O is not null, or to the default if O is null.
141 It is the caller's responsibility to free the result. */
142 struct quoting_options *
143 clone_quoting_options (struct quoting_options *o)
144 {
145 struct quoting_options *p
146 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
147 *p = *(o ? o : &default_quoting_options);
148 return p;
149 }
150
151 /* Get the value of O's quoting style. If O is null, use the default. */
152 enum quoting_style
153 get_quoting_style (struct quoting_options *o)
154 {
155 return (o ? o : &default_quoting_options)->style;
156 }
157
158 /* In O (or in the default if O is null),
159 set the value of the quoting style to S. */
160 void
161 set_quoting_style (struct quoting_options *o, enum quoting_style s)
162 {
163 (o ? o : &default_quoting_options)->style = s;
164 }
165
166 /* In O (or in the default if O is null),
167 set the value of the quoting options for character C to I.
168 Return the old value. Currently, the only values defined for I are
169 0 (the default) and 1 (which means to quote the character even if
170 it would not otherwise be quoted). */
171 int
172 set_char_quoting (struct quoting_options *o, char c, int i)
173 {
174 unsigned char uc = c;
175 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
176 int shift = uc % INT_BITS;
177 int r = (*p >> shift) & 1;
178 *p ^= ((i & 1) ^ r) << shift;
179 return r;
180 }
181
182 /* MSGID approximates a quotation mark. Return its translation if it
183 has one; otherwise, return either it or "\"", depending on S. */
184 static char const *
185 gettext_quote (char const *msgid, enum quoting_style s)
186 {
187 char const *translation = _(msgid);
188 if (translation == msgid && s == clocale_quoting_style)
189 translation = "\"";
190 return translation;
191 }
192
193 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
194 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
195 non-quoting-style part of O to control quoting.
196 Terminate the output with a null character, and return the written
197 size of the output, not counting the terminating null.
198 If BUFFERSIZE is too small to store the output string, return the
199 value that would have been returned had BUFFERSIZE been large enough.
200 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
201
202 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
203 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
204 style specified by O, and O may not be null. */
205
206 static size_t
207 quotearg_buffer_restyled (char *buffer, size_t buffersize,
208 char const *arg, size_t argsize,
209 enum quoting_style quoting_style,
210 struct quoting_options const *o)
211 {
212 size_t i;
213 size_t len = 0;
214 char const *quote_string = 0;
215 size_t quote_string_len = 0;
216 int backslash_escapes = 0;
217 int unibyte_locale = MB_CUR_MAX == 1;
218
219 #define STORE(c) \
220 do \
221 { \
222 if (len < buffersize) \
223 buffer[len] = (c); \
224 len++; \
225 } \
226 while (0)
227
228 switch (quoting_style)
229 {
230 case c_quoting_style:
231 STORE ('"');
232 backslash_escapes = 1;
233 quote_string = "\"";
234 quote_string_len = 1;
235 break;
236
237 case escape_quoting_style:
238 backslash_escapes = 1;
239 break;
240
241 case locale_quoting_style:
242 case clocale_quoting_style:
243 {
244 /* Get translations for open and closing quotation marks.
245
246 The message catalog should translate "`" to a left
247 quotation mark suitable for the locale, and similarly for
248 "'". If the catalog has no translation,
249 locale_quoting_style quotes `like this', and
250 clocale_quoting_style quotes "like this".
251
252 For example, an American English Unicode locale should
253 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
254 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
255 MARK). A British English Unicode locale should instead
256 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
257 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
258
259 char const *left = gettext_quote (N_("`"), quoting_style);
260 char const *right = gettext_quote (N_("'"), quoting_style);
261 for (quote_string = left; *quote_string; quote_string++)
262 STORE (*quote_string);
263 backslash_escapes = 1;
264 quote_string = right;
265 quote_string_len = strlen (quote_string);
266 }
267 break;
268
269 case shell_always_quoting_style:
270 STORE ('\'');
271 quote_string = "'";
272 quote_string_len = 1;
273 break;
274
275 default:
276 break;
277 }
278
279 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
280 {
281 unsigned char c;
282 unsigned char esc;
283
284 if (backslash_escapes
285 && quote_string_len
286 && i + quote_string_len <= argsize
287 && memcmp (arg + i, quote_string, quote_string_len) == 0)
288 STORE ('\\');
289
290 c = arg[i];
291 switch (c)
292 {
293 case '?':
294 switch (quoting_style)
295 {
296 case shell_quoting_style:
297 goto use_shell_always_quoting_style;
298
299 case c_quoting_style:
300 if (i + 2 < argsize && arg[i + 1] == '?')
301 switch (arg[i + 2])
302 {
303 case '!': case '\'':
304 case '(': case ')': case '-': case '/':
305 case '<': case '=': case '>':
306 /* Escape the second '?' in what would otherwise be
307 a trigraph. */
308 i += 2;
309 c = arg[i + 2];
310 STORE ('?');
311 STORE ('\\');
312 STORE ('?');
313 break;
314 }
315 break;
316
317 default:
318 break;
319 }
320 break;
321
322 case ALERT_CHAR: esc = 'a'; goto c_escape;
323 case '\b': esc = 'b'; goto c_escape;
324 case '\f': esc = 'f'; goto c_escape;
325 case '\n': esc = 'n'; goto c_and_shell_escape;
326 case '\r': esc = 'r'; goto c_and_shell_escape;
327 case '\t': esc = 't'; goto c_and_shell_escape;
328 case '\v': esc = 'v'; goto c_escape;
329 case '\\': esc = c; goto c_and_shell_escape;
330
331 c_and_shell_escape:
332 if (quoting_style == shell_quoting_style)
333 goto use_shell_always_quoting_style;
334 c_escape:
335 if (backslash_escapes)
336 {
337 c = esc;
338 goto store_escape;
339 }
340 break;
341
342 case '#': case '~':
343 if (i != 0)
344 break;
345 /* Fall through. */
346 case ' ':
347 case '!': /* special in bash */
348 case '"': case '$': case '&':
349 case '(': case ')': case '*': case ';':
350 case '<': case '>': case '[':
351 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
352 case '`': case '|':
353 /* A shell special character. In theory, '$' and '`' could
354 be the first bytes of multibyte characters, which means
355 we should check them with mbrtowc, but in practice this
356 doesn't happen so it's not worth worrying about. */
357 if (quoting_style == shell_quoting_style)
358 goto use_shell_always_quoting_style;
359 break;
360
361 case '\'':
362 switch (quoting_style)
363 {
364 case shell_quoting_style:
365 goto use_shell_always_quoting_style;
366
367 case shell_always_quoting_style:
368 STORE ('\'');
369 STORE ('\\');
370 STORE ('\'');
371 break;
372
373 default:
374 break;
375 }
376 break;
377
378 case '%': case '+': case ',': case '-': case '.': case '/':
379 case '0': case '1': case '2': case '3': case '4': case '5':
380 case '6': case '7': case '8': case '9': case ':': case '=':
381 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
382 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
383 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
384 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
385 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
386 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
387 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
388 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
389 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
390 case '{': case '}':
391 /* These characters don't cause problems, no matter what the
392 quoting style is. They cannot start multibyte sequences. */
393 break;
394
395 default:
396 /* If we have a multibyte sequence, copy it until we reach
397 its end, find an error, or come back to the initial shift
398 state. For C-like styles, if the sequence has
399 unprintable characters, escape the whole sequence, since
400 we can't easily escape single characters within it. */
401 {
402 /* Length of multibyte sequence found so far. */
403 size_t m;
404
405 int printable;
406
407 if (unibyte_locale)
408 {
409 m = 1;
410 printable = ISPRINT (c);
411 }
412 else
413 {
414 mbstate_t mbstate;
415 memset (&mbstate, 0, sizeof mbstate);
416
417 m = 0;
418 printable = 1;
419 if (argsize == (size_t) -1)
420 argsize = strlen (arg);
421
422 do
423 {
424 wchar_t w;
425 size_t bytes = mbrtowc (&w, &arg[i + m],
426 argsize - (i + m), &mbstate);
427 if (bytes == 0)
428 break;
429 else if (bytes == (size_t) -1)
430 {
431 printable = 0;
432 break;
433 }
434 else if (bytes == (size_t) -2)
435 {
436 printable = 0;
437 while (i + m < argsize && arg[i + m])
438 m++;
439 break;
440 }
441 else
442 {
443 if (! iswprint (w))
444 printable = 0;
445 m += bytes;
446 }
447 }
448 while (! mbsinit (&mbstate));
449 }
450
451 if (1 < m || (backslash_escapes && ! printable))
452 {
453 /* Output a multibyte sequence, or an escaped
454 unprintable unibyte character. */
455 size_t ilim = i + m;
456
457 for (;;)
458 {
459 if (backslash_escapes && ! printable)
460 {
461 STORE ('\\');
462 STORE ('0' + (c >> 6));
463 STORE ('0' + ((c >> 3) & 7));
464 c = '0' + (c & 7);
465 }
466 if (ilim <= i + 1)
467 break;
468 STORE (c);
469 c = arg[++i];
470 }
471
472 goto store_c;
473 }
474 }
475 }
476
477 if (! (backslash_escapes
478 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
479 goto store_c;
480
481 store_escape:
482 STORE ('\\');
483
484 store_c:
485 STORE (c);
486 }
487
488 if (quote_string)
489 for (; *quote_string; quote_string++)
490 STORE (*quote_string);
491
492 if (len < buffersize)
493 buffer[len] = '\0';
494 return len;
495
496 use_shell_always_quoting_style:
497 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
498 shell_always_quoting_style, o);
499 }
500
501 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
502 argument ARG (of size ARGSIZE), using O to control quoting.
503 If O is null, use the default.
504 Terminate the output with a null character, and return the written
505 size of the output, not counting the terminating null.
506 If BUFFERSIZE is too small to store the output string, return the
507 value that would have been returned had BUFFERSIZE been large enough.
508 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
509 size_t
510 quotearg_buffer (char *buffer, size_t buffersize,
511 char const *arg, size_t argsize,
512 struct quoting_options const *o)
513 {
514 struct quoting_options const *p = o ? o : &default_quoting_options;
515 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
516 p->style, p);
517 }
518
519 /* Use storage slot N to return a quoted version of the string ARG.
520 OPTIONS specifies the quoting options.
521 The returned value points to static storage that can be
522 reused by the next call to this function with the same value of N.
523 N must be nonnegative. N is deliberately declared with type "int"
524 to allow for future extensions (using negative values). */
525 static char *
526 quotearg_n_options (int n, char const *arg,
527 struct quoting_options const *options)
528 {
529 /* Preallocate a slot 0 buffer, so that the caller can always quote
530 one small component of a "memory exhausted" message in slot 0. */
531 static char slot0[256];
532 static unsigned int nslots = 1;
533 struct slotvec
534 {
535 size_t size;
536 char *val;
537 };
538 static struct slotvec slotvec0 = {sizeof slot0, slot0};
539 static struct slotvec *slotvec = &slotvec0;
540
541 if (nslots <= n)
542 {
543 int n1 = n + 1;
544 size_t s = n1 * sizeof (struct slotvec);
545 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
546 abort ();
547 if (slotvec == &slotvec0)
548 {
549 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
550 *slotvec = slotvec0;
551 }
552 slotvec = (struct slotvec *) xrealloc (slotvec, s);
553 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
554 nslots = n;
555 }
556
557 {
558 size_t size = slotvec[n].size;
559 char *val = slotvec[n].val;
560 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
561
562 if (size <= qsize)
563 {
564 slotvec[n].size = size = qsize + 1;
565 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
566 quotearg_buffer (val, size, arg, (size_t) -1, options);
567 }
568
569 return val;
570 }
571 }
572
573 char *
574 quotearg_n (unsigned int n, char const *arg)
575 {
576 return quotearg_n_options (n, arg, &default_quoting_options);
577 }
578
579 char *
580 quotearg (char const *arg)
581 {
582 return quotearg_n (0, arg);
583 }
584
585 char *
586 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
587 {
588 struct quoting_options o;
589 o.style = s;
590 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
591 return quotearg_n_options (n, arg, &o);
592 }
593
594 char *
595 quotearg_style (enum quoting_style s, char const *arg)
596 {
597 return quotearg_n_style (0, s, arg);
598 }
599
600 char *
601 quotearg_char (char const *arg, char ch)
602 {
603 struct quoting_options options;
604 options = default_quoting_options;
605 set_char_quoting (&options, ch, 1);
606 return quotearg_n_options (0, arg, &options);
607 }
608
609 char *
610 quotearg_colon (char const *arg)
611 {
612 return quotearg_char (arg, ':');
613 }