]> git.saurik.com Git - bison.git/blame - lib/quotearg.c
Initial revision.
[bison.git] / lib / quotearg.c
CommitLineData
ff4a34be 1/* quotearg.c - quote arguments for output
8434f222 2 Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
ff4a34be
AD
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
17
18/* Written by Paul Eggert <eggert@twinsun.com> */
19
20#if HAVE_CONFIG_H
21# include <config.h>
22#endif
23
fab5b110 24#include "quotearg.h"
f6cf0f6e 25
fab5b110 26#include "xalloc.h"
ff4a34be
AD
27
28#include <ctype.h>
f6cf0f6e
PE
29#include <errno.h>
30#include <limits.h>
31#include <stdlib.h>
32#include <string.h>
ff4a34be 33
f6cf0f6e
PE
34#include "gettext.h"
35#define _(msgid) gettext (msgid)
36#define N_(msgid) msgid
ff4a34be
AD
37
38#if HAVE_WCHAR_H
342b8b6e
AD
39
40/* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
41# include <stdio.h>
42# include <time.h>
43
ff4a34be
AD
44# include <wchar.h>
45#endif
46
b0ce6046 47#if !HAVE_MBRTOWC
ff4a34be
AD
48/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
49 other macros are defined only for documentation and to satisfy C
50 syntax. */
51# undef MB_CUR_MAX
52# define MB_CUR_MAX 1
53# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
f6cf0f6e 54# define iswprint(wc) isprint ((unsigned char) (wc))
342b8b6e
AD
55# undef HAVE_MBSINIT
56#endif
57
58#if !defined mbsinit && !HAVE_MBSINIT
59# define mbsinit(ps) 1
ff4a34be
AD
60#endif
61
62#ifndef iswprint
63# if HAVE_WCTYPE_H
64# include <wctype.h>
65# endif
66# if !defined iswprint && !HAVE_ISWPRINT
67# define iswprint(wc) 1
68# endif
69#endif
70
f6cf0f6e
PE
71#ifndef SIZE_MAX
72# define SIZE_MAX ((size_t) -1)
ff4a34be
AD
73#endif
74
f6cf0f6e 75#define INT_BITS (sizeof (int) * CHAR_BIT)
ff4a34be
AD
76
77struct quoting_options
78{
79 /* Basic quoting style. */
80 enum quoting_style style;
81
82 /* Quote the characters indicated by this bit vector even if the
83 quoting style would not normally require them to be quoted. */
84 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
85};
86
87/* Names of quoting styles. */
88char const *const quoting_style_args[] =
89{
90 "literal",
91 "shell",
92 "shell-always",
93 "c",
94 "escape",
95 "locale",
96 "clocale",
97 0
98};
99
100/* Correspondences to quoting style names. */
101enum quoting_style const quoting_style_vals[] =
102{
103 literal_quoting_style,
104 shell_quoting_style,
105 shell_always_quoting_style,
106 c_quoting_style,
107 escape_quoting_style,
108 locale_quoting_style,
109 clocale_quoting_style
110};
111
112/* The default quoting options. */
113static struct quoting_options default_quoting_options;
114
115/* Allocate a new set of quoting options, with contents initially identical
116 to O if O is not null, or to the default if O is null.
117 It is the caller's responsibility to free the result. */
118struct quoting_options *
119clone_quoting_options (struct quoting_options *o)
120{
f6cf0f6e
PE
121 int e = errno;
122 struct quoting_options *p = xmalloc (sizeof *p);
ff4a34be 123 *p = *(o ? o : &default_quoting_options);
f6cf0f6e 124 errno = e;
ff4a34be
AD
125 return p;
126}
127
128/* Get the value of O's quoting style. If O is null, use the default. */
129enum quoting_style
130get_quoting_style (struct quoting_options *o)
131{
132 return (o ? o : &default_quoting_options)->style;
133}
134
135/* In O (or in the default if O is null),
136 set the value of the quoting style to S. */
137void
138set_quoting_style (struct quoting_options *o, enum quoting_style s)
139{
140 (o ? o : &default_quoting_options)->style = s;
141}
142
143/* In O (or in the default if O is null),
144 set the value of the quoting options for character C to I.
145 Return the old value. Currently, the only values defined for I are
146 0 (the default) and 1 (which means to quote the character even if
147 it would not otherwise be quoted). */
148int
149set_char_quoting (struct quoting_options *o, char c, int i)
150{
151 unsigned char uc = c;
152 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
153 int shift = uc % INT_BITS;
154 int r = (*p >> shift) & 1;
155 *p ^= ((i & 1) ^ r) << shift;
156 return r;
157}
158
159/* MSGID approximates a quotation mark. Return its translation if it
160 has one; otherwise, return either it or "\"", depending on S. */
161static char const *
162gettext_quote (char const *msgid, enum quoting_style s)
163{
164 char const *translation = _(msgid);
165 if (translation == msgid && s == clocale_quoting_style)
166 translation = "\"";
167 return translation;
168}
169
170/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
171 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
172 non-quoting-style part of O to control quoting.
173 Terminate the output with a null character, and return the written
174 size of the output, not counting the terminating null.
175 If BUFFERSIZE is too small to store the output string, return the
176 value that would have been returned had BUFFERSIZE been large enough.
177 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
178
179 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
180 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
181 style specified by O, and O may not be null. */
182
183static size_t
184quotearg_buffer_restyled (char *buffer, size_t buffersize,
185 char const *arg, size_t argsize,
186 enum quoting_style quoting_style,
187 struct quoting_options const *o)
188{
189 size_t i;
190 size_t len = 0;
191 char const *quote_string = 0;
192 size_t quote_string_len = 0;
193 int backslash_escapes = 0;
194 int unibyte_locale = MB_CUR_MAX == 1;
195
196#define STORE(c) \
197 do \
198 { \
199 if (len < buffersize) \
200 buffer[len] = (c); \
201 len++; \
202 } \
203 while (0)
204
205 switch (quoting_style)
206 {
207 case c_quoting_style:
208 STORE ('"');
209 backslash_escapes = 1;
210 quote_string = "\"";
211 quote_string_len = 1;
212 break;
213
214 case escape_quoting_style:
215 backslash_escapes = 1;
216 break;
217
218 case locale_quoting_style:
219 case clocale_quoting_style:
220 {
221 /* Get translations for open and closing quotation marks.
222
223 The message catalog should translate "`" to a left
224 quotation mark suitable for the locale, and similarly for
225 "'". If the catalog has no translation,
226 locale_quoting_style quotes `like this', and
227 clocale_quoting_style quotes "like this".
228
229 For example, an American English Unicode locale should
230 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
231 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
232 MARK). A British English Unicode locale should instead
233 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
234 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
235
236 char const *left = gettext_quote (N_("`"), quoting_style);
237 char const *right = gettext_quote (N_("'"), quoting_style);
238 for (quote_string = left; *quote_string; quote_string++)
239 STORE (*quote_string);
240 backslash_escapes = 1;
241 quote_string = right;
242 quote_string_len = strlen (quote_string);
243 }
244 break;
245
246 case shell_always_quoting_style:
247 STORE ('\'');
248 quote_string = "'";
249 quote_string_len = 1;
250 break;
251
252 default:
253 break;
254 }
255
f6cf0f6e 256 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
ff4a34be
AD
257 {
258 unsigned char c;
259 unsigned char esc;
260
261 if (backslash_escapes
262 && quote_string_len
263 && i + quote_string_len <= argsize
264 && memcmp (arg + i, quote_string, quote_string_len) == 0)
265 STORE ('\\');
266
267 c = arg[i];
268 switch (c)
269 {
8434f222
PE
270 case '\0':
271 if (backslash_escapes)
272 {
273 STORE ('\\');
274 STORE ('0');
275 STORE ('0');
276 c = '0';
277 }
278 break;
279
ff4a34be
AD
280 case '?':
281 switch (quoting_style)
282 {
283 case shell_quoting_style:
284 goto use_shell_always_quoting_style;
285
286 case c_quoting_style:
287 if (i + 2 < argsize && arg[i + 1] == '?')
288 switch (arg[i + 2])
289 {
290 case '!': case '\'':
291 case '(': case ')': case '-': case '/':
292 case '<': case '=': case '>':
293 /* Escape the second '?' in what would otherwise be
294 a trigraph. */
ff4a34be 295 c = arg[i + 2];
9e4c5cb8 296 i += 2;
ff4a34be
AD
297 STORE ('?');
298 STORE ('\\');
299 STORE ('?');
300 break;
301 }
302 break;
303
304 default:
305 break;
306 }
307 break;
308
f6cf0f6e 309 case '\a': esc = 'a'; goto c_escape;
ff4a34be
AD
310 case '\b': esc = 'b'; goto c_escape;
311 case '\f': esc = 'f'; goto c_escape;
312 case '\n': esc = 'n'; goto c_and_shell_escape;
313 case '\r': esc = 'r'; goto c_and_shell_escape;
314 case '\t': esc = 't'; goto c_and_shell_escape;
315 case '\v': esc = 'v'; goto c_escape;
316 case '\\': esc = c; goto c_and_shell_escape;
317
318 c_and_shell_escape:
319 if (quoting_style == shell_quoting_style)
320 goto use_shell_always_quoting_style;
321 c_escape:
322 if (backslash_escapes)
323 {
324 c = esc;
325 goto store_escape;
326 }
327 break;
328
329 case '#': case '~':
330 if (i != 0)
331 break;
332 /* Fall through. */
333 case ' ':
334 case '!': /* special in bash */
335 case '"': case '$': case '&':
336 case '(': case ')': case '*': case ';':
337 case '<': case '>': case '[':
338 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
339 case '`': case '|':
340 /* A shell special character. In theory, '$' and '`' could
341 be the first bytes of multibyte characters, which means
342 we should check them with mbrtowc, but in practice this
343 doesn't happen so it's not worth worrying about. */
344 if (quoting_style == shell_quoting_style)
345 goto use_shell_always_quoting_style;
346 break;
347
348 case '\'':
349 switch (quoting_style)
350 {
351 case shell_quoting_style:
352 goto use_shell_always_quoting_style;
353
354 case shell_always_quoting_style:
355 STORE ('\'');
356 STORE ('\\');
357 STORE ('\'');
358 break;
359
360 default:
361 break;
362 }
363 break;
364
365 case '%': case '+': case ',': case '-': case '.': case '/':
366 case '0': case '1': case '2': case '3': case '4': case '5':
367 case '6': case '7': case '8': case '9': case ':': case '=':
368 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
369 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
370 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
371 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
372 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
373 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
374 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
375 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
376 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
377 case '{': case '}':
378 /* These characters don't cause problems, no matter what the
379 quoting style is. They cannot start multibyte sequences. */
380 break;
381
382 default:
383 /* If we have a multibyte sequence, copy it until we reach
384 its end, find an error, or come back to the initial shift
385 state. For C-like styles, if the sequence has
386 unprintable characters, escape the whole sequence, since
387 we can't easily escape single characters within it. */
388 {
389 /* Length of multibyte sequence found so far. */
390 size_t m;
391
392 int printable;
393
394 if (unibyte_locale)
395 {
396 m = 1;
f6cf0f6e 397 printable = isprint (c);
ff4a34be
AD
398 }
399 else
400 {
401 mbstate_t mbstate;
402 memset (&mbstate, 0, sizeof mbstate);
403
404 m = 0;
405 printable = 1;
f6cf0f6e 406 if (argsize == SIZE_MAX)
ff4a34be
AD
407 argsize = strlen (arg);
408
409 do
410 {
411 wchar_t w;
412 size_t bytes = mbrtowc (&w, &arg[i + m],
413 argsize - (i + m), &mbstate);
414 if (bytes == 0)
415 break;
416 else if (bytes == (size_t) -1)
417 {
418 printable = 0;
419 break;
420 }
421 else if (bytes == (size_t) -2)
422 {
423 printable = 0;
424 while (i + m < argsize && arg[i + m])
425 m++;
426 break;
427 }
428 else
429 {
430 if (! iswprint (w))
431 printable = 0;
432 m += bytes;
433 }
434 }
435 while (! mbsinit (&mbstate));
436 }
437
438 if (1 < m || (backslash_escapes && ! printable))
439 {
440 /* Output a multibyte sequence, or an escaped
441 unprintable unibyte character. */
442 size_t ilim = i + m;
443
444 for (;;)
445 {
446 if (backslash_escapes && ! printable)
447 {
448 STORE ('\\');
449 STORE ('0' + (c >> 6));
450 STORE ('0' + ((c >> 3) & 7));
451 c = '0' + (c & 7);
452 }
453 if (ilim <= i + 1)
454 break;
455 STORE (c);
456 c = arg[++i];
457 }
458
459 goto store_c;
460 }
461 }
462 }
463
464 if (! (backslash_escapes
465 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
466 goto store_c;
467
468 store_escape:
469 STORE ('\\');
470
471 store_c:
472 STORE (c);
473 }
474
475 if (quote_string)
476 for (; *quote_string; quote_string++)
477 STORE (*quote_string);
478
479 if (len < buffersize)
480 buffer[len] = '\0';
481 return len;
482
483 use_shell_always_quoting_style:
484 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
485 shell_always_quoting_style, o);
486}
487
488/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
489 argument ARG (of size ARGSIZE), using O to control quoting.
490 If O is null, use the default.
491 Terminate the output with a null character, and return the written
492 size of the output, not counting the terminating null.
493 If BUFFERSIZE is too small to store the output string, return the
494 value that would have been returned had BUFFERSIZE been large enough.
495 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
496size_t
497quotearg_buffer (char *buffer, size_t buffersize,
498 char const *arg, size_t argsize,
499 struct quoting_options const *o)
500{
501 struct quoting_options const *p = o ? o : &default_quoting_options;
f6cf0f6e
PE
502 int e = errno;
503 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
504 p->style, p);
505 errno = e;
506 return r;
ff4a34be
AD
507}
508
8434f222
PE
509/* Use storage slot N to return a quoted version of argument ARG.
510 ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
ff4a34be
AD
511 OPTIONS specifies the quoting options.
512 The returned value points to static storage that can be
513 reused by the next call to this function with the same value of N.
514 N must be nonnegative. N is deliberately declared with type "int"
515 to allow for future extensions (using negative values). */
516static char *
8434f222 517quotearg_n_options (int n, char const *arg, size_t argsize,
ff4a34be
AD
518 struct quoting_options const *options)
519{
f6cf0f6e
PE
520 int e = errno;
521
ff4a34be
AD
522 /* Preallocate a slot 0 buffer, so that the caller can always quote
523 one small component of a "memory exhausted" message in slot 0. */
524 static char slot0[256];
525 static unsigned int nslots = 1;
f4e421e6 526 unsigned int n0 = n;
ff4a34be
AD
527 struct slotvec
528 {
529 size_t size;
530 char *val;
531 };
532 static struct slotvec slotvec0 = {sizeof slot0, slot0};
533 static struct slotvec *slotvec = &slotvec0;
534
f4e421e6
AD
535 if (n < 0)
536 abort ();
537
538 if (nslots <= n0)
ff4a34be 539 {
f4e421e6
AD
540 unsigned int n1 = n0 + 1;
541 size_t s = n1 * sizeof *slotvec;
542
543 if (SIZE_MAX / UINT_MAX <= sizeof *slotvec
544 && n1 != s / sizeof *slotvec)
545 xalloc_die ();
546
ff4a34be
AD
547 if (slotvec == &slotvec0)
548 {
f6cf0f6e 549 slotvec = xmalloc (sizeof *slotvec);
ff4a34be
AD
550 *slotvec = slotvec0;
551 }
f6cf0f6e 552 slotvec = xrealloc (slotvec, s);
f4e421e6
AD
553 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
554 nslots = n1;
ff4a34be
AD
555 }
556
557 {
558 size_t size = slotvec[n].size;
559 char *val = slotvec[n].val;
8434f222 560 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
ff4a34be
AD
561
562 if (size <= qsize)
563 {
564 slotvec[n].size = size = qsize + 1;
565 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
8434f222 566 quotearg_buffer (val, size, arg, argsize, options);
ff4a34be
AD
567 }
568
f6cf0f6e 569 errno = e;
ff4a34be
AD
570 return val;
571 }
572}
573
574char *
f4e421e6 575quotearg_n (int n, char const *arg)
ff4a34be 576{
f6cf0f6e 577 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
ff4a34be
AD
578}
579
580char *
581quotearg (char const *arg)
582{
583 return quotearg_n (0, arg);
584}
585
8434f222
PE
586/* Return quoting options for STYLE, with no extra quoting. */
587static struct quoting_options
588quoting_options_from_style (enum quoting_style style)
ff4a34be
AD
589{
590 struct quoting_options o;
8434f222 591 o.style = style;
ff4a34be 592 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
8434f222
PE
593 return o;
594}
595
596char *
597quotearg_n_style (int n, enum quoting_style s, char const *arg)
598{
599 struct quoting_options const o = quoting_options_from_style (s);
f6cf0f6e 600 return quotearg_n_options (n, arg, SIZE_MAX, &o);
8434f222
PE
601}
602
603char *
604quotearg_n_style_mem (int n, enum quoting_style s,
605 char const *arg, size_t argsize)
606{
607 struct quoting_options const o = quoting_options_from_style (s);
608 return quotearg_n_options (n, arg, argsize, &o);
ff4a34be
AD
609}
610
611char *
612quotearg_style (enum quoting_style s, char const *arg)
613{
614 return quotearg_n_style (0, s, arg);
615}
616
617char *
618quotearg_char (char const *arg, char ch)
619{
620 struct quoting_options options;
621 options = default_quoting_options;
622 set_char_quoting (&options, ch, 1);
f6cf0f6e 623 return quotearg_n_options (0, arg, SIZE_MAX, &options);
ff4a34be
AD
624}
625
626char *
627quotearg_colon (char const *arg)
628{
629 return quotearg_char (arg, ':');
630}