1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
25 # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
27 #include <sys/types.h>
35 # define _(text) gettext (text)
48 # define UCHAR_MAX ((unsigned char) -1)
51 #if HAVE_C_BACKSLASH_A
52 # define ALERT_CHAR '\a'
54 # define ALERT_CHAR '\7'
67 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
75 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
76 other macros are defined only for documentation and to satisfy C
80 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
81 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
85 #if !defined mbsinit && !HAVE_MBSINIT
86 # define mbsinit(ps) 1
93 # if !defined iswprint && !HAVE_ISWPRINT
94 # define iswprint(wc) 1
98 #define INT_BITS (sizeof (int) * CHAR_BIT)
100 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
101 # define IN_CTYPE_DOMAIN(c) 1
103 # define IN_CTYPE_DOMAIN(c) isascii(c)
106 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
108 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
110 struct quoting_options
112 /* Basic quoting style. */
113 enum quoting_style style
;
115 /* Quote the characters indicated by this bit vector even if the
116 quoting style would not normally require them to be quoted. */
117 int quote_these_too
[(UCHAR_MAX
/ INT_BITS
) + 1];
120 /* Names of quoting styles. */
121 char const *const quoting_style_args
[] =
133 /* Correspondences to quoting style names. */
134 enum quoting_style
const quoting_style_vals
[] =
136 literal_quoting_style
,
138 shell_always_quoting_style
,
140 escape_quoting_style
,
141 locale_quoting_style
,
142 clocale_quoting_style
145 /* The default quoting options. */
146 static struct quoting_options default_quoting_options
;
148 /* Allocate a new set of quoting options, with contents initially identical
149 to O if O is not null, or to the default if O is null.
150 It is the caller's responsibility to free the result. */
151 struct quoting_options
*
152 clone_quoting_options (struct quoting_options
*o
)
154 struct quoting_options
*p
155 = (struct quoting_options
*) xmalloc (sizeof (struct quoting_options
));
156 *p
= *(o
? o
: &default_quoting_options
);
160 /* Get the value of O's quoting style. If O is null, use the default. */
162 get_quoting_style (struct quoting_options
*o
)
164 return (o
? o
: &default_quoting_options
)->style
;
167 /* In O (or in the default if O is null),
168 set the value of the quoting style to S. */
170 set_quoting_style (struct quoting_options
*o
, enum quoting_style s
)
172 (o
? o
: &default_quoting_options
)->style
= s
;
175 /* In O (or in the default if O is null),
176 set the value of the quoting options for character C to I.
177 Return the old value. Currently, the only values defined for I are
178 0 (the default) and 1 (which means to quote the character even if
179 it would not otherwise be quoted). */
181 set_char_quoting (struct quoting_options
*o
, char c
, int i
)
183 unsigned char uc
= c
;
184 int *p
= (o
? o
: &default_quoting_options
)->quote_these_too
+ uc
/ INT_BITS
;
185 int shift
= uc
% INT_BITS
;
186 int r
= (*p
>> shift
) & 1;
187 *p
^= ((i
& 1) ^ r
) << shift
;
191 /* MSGID approximates a quotation mark. Return its translation if it
192 has one; otherwise, return either it or "\"", depending on S. */
194 gettext_quote (char const *msgid
, enum quoting_style s
)
196 char const *translation
= _(msgid
);
197 if (translation
== msgid
&& s
== clocale_quoting_style
)
202 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
203 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
204 non-quoting-style part of O to control quoting.
205 Terminate the output with a null character, and return the written
206 size of the output, not counting the terminating null.
207 If BUFFERSIZE is too small to store the output string, return the
208 value that would have been returned had BUFFERSIZE been large enough.
209 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
211 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
212 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
213 style specified by O, and O may not be null. */
216 quotearg_buffer_restyled (char *buffer
, size_t buffersize
,
217 char const *arg
, size_t argsize
,
218 enum quoting_style quoting_style
,
219 struct quoting_options
const *o
)
223 char const *quote_string
= 0;
224 size_t quote_string_len
= 0;
225 int backslash_escapes
= 0;
226 int unibyte_locale
= MB_CUR_MAX
== 1;
231 if (len < buffersize) \
237 switch (quoting_style
)
239 case c_quoting_style
:
241 backslash_escapes
= 1;
243 quote_string_len
= 1;
246 case escape_quoting_style
:
247 backslash_escapes
= 1;
250 case locale_quoting_style
:
251 case clocale_quoting_style
:
253 /* Get translations for open and closing quotation marks.
255 The message catalog should translate "`" to a left
256 quotation mark suitable for the locale, and similarly for
257 "'". If the catalog has no translation,
258 locale_quoting_style quotes `like this', and
259 clocale_quoting_style quotes "like this".
261 For example, an American English Unicode locale should
262 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
263 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
264 MARK). A British English Unicode locale should instead
265 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
266 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
268 char const *left
= gettext_quote (N_("`"), quoting_style
);
269 char const *right
= gettext_quote (N_("'"), quoting_style
);
270 for (quote_string
= left
; *quote_string
; quote_string
++)
271 STORE (*quote_string
);
272 backslash_escapes
= 1;
273 quote_string
= right
;
274 quote_string_len
= strlen (quote_string
);
278 case shell_always_quoting_style
:
281 quote_string_len
= 1;
288 for (i
= 0; ! (argsize
== (size_t) -1 ? arg
[i
] == '\0' : i
== argsize
); i
++)
293 if (backslash_escapes
295 && i
+ quote_string_len
<= argsize
296 && memcmp (arg
+ i
, quote_string
, quote_string_len
) == 0)
303 switch (quoting_style
)
305 case shell_quoting_style
:
306 goto use_shell_always_quoting_style
;
308 case c_quoting_style
:
309 if (i
+ 2 < argsize
&& arg
[i
+ 1] == '?')
313 case '(': case ')': case '-': case '/':
314 case '<': case '=': case '>':
315 /* Escape the second '?' in what would otherwise be
331 case ALERT_CHAR
: esc
= 'a'; goto c_escape
;
332 case '\b': esc
= 'b'; goto c_escape
;
333 case '\f': esc
= 'f'; goto c_escape
;
334 case '\n': esc
= 'n'; goto c_and_shell_escape
;
335 case '\r': esc
= 'r'; goto c_and_shell_escape
;
336 case '\t': esc
= 't'; goto c_and_shell_escape
;
337 case '\v': esc
= 'v'; goto c_escape
;
338 case '\\': esc
= c
; goto c_and_shell_escape
;
341 if (quoting_style
== shell_quoting_style
)
342 goto use_shell_always_quoting_style
;
344 if (backslash_escapes
)
356 case '!': /* special in bash */
357 case '"': case '$': case '&':
358 case '(': case ')': case '*': case ';':
359 case '<': case '>': case '[':
360 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
362 /* A shell special character. In theory, '$' and '`' could
363 be the first bytes of multibyte characters, which means
364 we should check them with mbrtowc, but in practice this
365 doesn't happen so it's not worth worrying about. */
366 if (quoting_style
== shell_quoting_style
)
367 goto use_shell_always_quoting_style
;
371 switch (quoting_style
)
373 case shell_quoting_style
:
374 goto use_shell_always_quoting_style
;
376 case shell_always_quoting_style
:
387 case '%': case '+': case ',': case '-': case '.': case '/':
388 case '0': case '1': case '2': case '3': case '4': case '5':
389 case '6': case '7': case '8': case '9': case ':': case '=':
390 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
391 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
392 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
393 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
394 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
395 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
396 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
397 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
398 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
400 /* These characters don't cause problems, no matter what the
401 quoting style is. They cannot start multibyte sequences. */
405 /* If we have a multibyte sequence, copy it until we reach
406 its end, find an error, or come back to the initial shift
407 state. For C-like styles, if the sequence has
408 unprintable characters, escape the whole sequence, since
409 we can't easily escape single characters within it. */
411 /* Length of multibyte sequence found so far. */
419 printable
= ISPRINT (c
);
424 memset (&mbstate
, 0, sizeof mbstate
);
428 if (argsize
== (size_t) -1)
429 argsize
= strlen (arg
);
434 size_t bytes
= mbrtowc (&w
, &arg
[i
+ m
],
435 argsize
- (i
+ m
), &mbstate
);
438 else if (bytes
== (size_t) -1)
443 else if (bytes
== (size_t) -2)
446 while (i
+ m
< argsize
&& arg
[i
+ m
])
457 while (! mbsinit (&mbstate
));
460 if (1 < m
|| (backslash_escapes
&& ! printable
))
462 /* Output a multibyte sequence, or an escaped
463 unprintable unibyte character. */
468 if (backslash_escapes
&& ! printable
)
471 STORE ('0' + (c
>> 6));
472 STORE ('0' + ((c
>> 3) & 7));
486 if (! (backslash_escapes
487 && o
->quote_these_too
[c
/ INT_BITS
] & (1 << (c
% INT_BITS
))))
498 for (; *quote_string
; quote_string
++)
499 STORE (*quote_string
);
501 if (len
< buffersize
)
505 use_shell_always_quoting_style
:
506 return quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
,
507 shell_always_quoting_style
, o
);
510 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
511 argument ARG (of size ARGSIZE), using O to control quoting.
512 If O is null, use the default.
513 Terminate the output with a null character, and return the written
514 size of the output, not counting the terminating null.
515 If BUFFERSIZE is too small to store the output string, return the
516 value that would have been returned had BUFFERSIZE been large enough.
517 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
519 quotearg_buffer (char *buffer
, size_t buffersize
,
520 char const *arg
, size_t argsize
,
521 struct quoting_options
const *o
)
523 struct quoting_options
const *p
= o
? o
: &default_quoting_options
;
524 return quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
,
528 /* Use storage slot N to return a quoted version of the string ARG.
529 OPTIONS specifies the quoting options.
530 The returned value points to static storage that can be
531 reused by the next call to this function with the same value of N.
532 N must be nonnegative. N is deliberately declared with type "int"
533 to allow for future extensions (using negative values). */
535 quotearg_n_options (int n
, char const *arg
,
536 struct quoting_options
const *options
)
538 /* Preallocate a slot 0 buffer, so that the caller can always quote
539 one small component of a "memory exhausted" message in slot 0. */
540 static char slot0
[256];
541 static unsigned int nslots
= 1;
547 static struct slotvec slotvec0
= {sizeof slot0
, slot0
};
548 static struct slotvec
*slotvec
= &slotvec0
;
553 size_t s
= n1
* sizeof (struct slotvec
);
554 if (! (0 < n1
&& n1
== s
/ sizeof (struct slotvec
)))
556 if (slotvec
== &slotvec0
)
558 slotvec
= (struct slotvec
*) xmalloc (sizeof (struct slotvec
));
561 slotvec
= (struct slotvec
*) xrealloc (slotvec
, s
);
562 memset (slotvec
+ nslots
, 0, (n1
- nslots
) * sizeof (struct slotvec
));
567 size_t size
= slotvec
[n
].size
;
568 char *val
= slotvec
[n
].val
;
569 size_t qsize
= quotearg_buffer (val
, size
, arg
, (size_t) -1, options
);
573 slotvec
[n
].size
= size
= qsize
+ 1;
574 slotvec
[n
].val
= val
= xrealloc (val
== slot0
? 0 : val
, size
);
575 quotearg_buffer (val
, size
, arg
, (size_t) -1, options
);
583 quotearg_n (unsigned int n
, char const *arg
)
585 return quotearg_n_options (n
, arg
, &default_quoting_options
);
589 quotearg (char const *arg
)
591 return quotearg_n (0, arg
);
595 quotearg_n_style (unsigned int n
, enum quoting_style s
, char const *arg
)
597 struct quoting_options o
;
599 memset (o
.quote_these_too
, 0, sizeof o
.quote_these_too
);
600 return quotearg_n_options (n
, arg
, &o
);
604 quotearg_style (enum quoting_style s
, char const *arg
)
606 return quotearg_n_style (0, s
, arg
);
610 quotearg_char (char const *arg
, char ch
)
612 struct quoting_options options
;
613 options
= default_quoting_options
;
614 set_char_quoting (&options
, ch
, 1);
615 return quotearg_n_options (0, arg
, &options
);
619 quotearg_colon (char const *arg
)
621 return quotearg_char (arg
, ':');