1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
25 # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
27 #include <sys/types.h>
35 # define _(text) gettext (text)
48 # define UCHAR_MAX ((unsigned char) -1)
51 #if HAVE_C_BACKSLASH_A
52 # define ALERT_CHAR '\a'
54 # define ALERT_CHAR '\7'
70 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
71 other macros are defined only for documentation and to satisfy C
75 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
76 # define mbsinit(ps) 1
77 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
84 # if !defined iswprint && !HAVE_ISWPRINT
85 # define iswprint(wc) 1
89 #define INT_BITS (sizeof (int) * CHAR_BIT)
91 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
92 # define IN_CTYPE_DOMAIN(c) 1
94 # define IN_CTYPE_DOMAIN(c) isascii(c)
97 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
99 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
101 struct quoting_options
103 /* Basic quoting style. */
104 enum quoting_style style
;
106 /* Quote the characters indicated by this bit vector even if the
107 quoting style would not normally require them to be quoted. */
108 int quote_these_too
[(UCHAR_MAX
/ INT_BITS
) + 1];
111 /* Names of quoting styles. */
112 char const *const quoting_style_args
[] =
124 /* Correspondences to quoting style names. */
125 enum quoting_style
const quoting_style_vals
[] =
127 literal_quoting_style
,
129 shell_always_quoting_style
,
131 escape_quoting_style
,
132 locale_quoting_style
,
133 clocale_quoting_style
136 /* The default quoting options. */
137 static struct quoting_options default_quoting_options
;
139 /* Allocate a new set of quoting options, with contents initially identical
140 to O if O is not null, or to the default if O is null.
141 It is the caller's responsibility to free the result. */
142 struct quoting_options
*
143 clone_quoting_options (struct quoting_options
*o
)
145 struct quoting_options
*p
146 = (struct quoting_options
*) xmalloc (sizeof (struct quoting_options
));
147 *p
= *(o
? o
: &default_quoting_options
);
151 /* Get the value of O's quoting style. If O is null, use the default. */
153 get_quoting_style (struct quoting_options
*o
)
155 return (o
? o
: &default_quoting_options
)->style
;
158 /* In O (or in the default if O is null),
159 set the value of the quoting style to S. */
161 set_quoting_style (struct quoting_options
*o
, enum quoting_style s
)
163 (o
? o
: &default_quoting_options
)->style
= s
;
166 /* In O (or in the default if O is null),
167 set the value of the quoting options for character C to I.
168 Return the old value. Currently, the only values defined for I are
169 0 (the default) and 1 (which means to quote the character even if
170 it would not otherwise be quoted). */
172 set_char_quoting (struct quoting_options
*o
, char c
, int i
)
174 unsigned char uc
= c
;
175 int *p
= (o
? o
: &default_quoting_options
)->quote_these_too
+ uc
/ INT_BITS
;
176 int shift
= uc
% INT_BITS
;
177 int r
= (*p
>> shift
) & 1;
178 *p
^= ((i
& 1) ^ r
) << shift
;
182 /* MSGID approximates a quotation mark. Return its translation if it
183 has one; otherwise, return either it or "\"", depending on S. */
185 gettext_quote (char const *msgid
, enum quoting_style s
)
187 char const *translation
= _(msgid
);
188 if (translation
== msgid
&& s
== clocale_quoting_style
)
193 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
194 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
195 non-quoting-style part of O to control quoting.
196 Terminate the output with a null character, and return the written
197 size of the output, not counting the terminating null.
198 If BUFFERSIZE is too small to store the output string, return the
199 value that would have been returned had BUFFERSIZE been large enough.
200 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
202 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
203 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
204 style specified by O, and O may not be null. */
207 quotearg_buffer_restyled (char *buffer
, size_t buffersize
,
208 char const *arg
, size_t argsize
,
209 enum quoting_style quoting_style
,
210 struct quoting_options
const *o
)
214 char const *quote_string
= 0;
215 size_t quote_string_len
= 0;
216 int backslash_escapes
= 0;
217 int unibyte_locale
= MB_CUR_MAX
== 1;
222 if (len < buffersize) \
228 switch (quoting_style
)
230 case c_quoting_style
:
232 backslash_escapes
= 1;
234 quote_string_len
= 1;
237 case escape_quoting_style
:
238 backslash_escapes
= 1;
241 case locale_quoting_style
:
242 case clocale_quoting_style
:
244 /* Get translations for open and closing quotation marks.
246 The message catalog should translate "`" to a left
247 quotation mark suitable for the locale, and similarly for
248 "'". If the catalog has no translation,
249 locale_quoting_style quotes `like this', and
250 clocale_quoting_style quotes "like this".
252 For example, an American English Unicode locale should
253 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
254 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
255 MARK). A British English Unicode locale should instead
256 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
257 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
259 char const *left
= gettext_quote (N_("`"), quoting_style
);
260 char const *right
= gettext_quote (N_("'"), quoting_style
);
261 for (quote_string
= left
; *quote_string
; quote_string
++)
262 STORE (*quote_string
);
263 backslash_escapes
= 1;
264 quote_string
= right
;
265 quote_string_len
= strlen (quote_string
);
269 case shell_always_quoting_style
:
272 quote_string_len
= 1;
279 for (i
= 0; ! (argsize
== (size_t) -1 ? arg
[i
] == '\0' : i
== argsize
); i
++)
284 if (backslash_escapes
286 && i
+ quote_string_len
<= argsize
287 && memcmp (arg
+ i
, quote_string
, quote_string_len
) == 0)
294 switch (quoting_style
)
296 case shell_quoting_style
:
297 goto use_shell_always_quoting_style
;
299 case c_quoting_style
:
300 if (i
+ 2 < argsize
&& arg
[i
+ 1] == '?')
304 case '(': case ')': case '-': case '/':
305 case '<': case '=': case '>':
306 /* Escape the second '?' in what would otherwise be
322 case ALERT_CHAR
: esc
= 'a'; goto c_escape
;
323 case '\b': esc
= 'b'; goto c_escape
;
324 case '\f': esc
= 'f'; goto c_escape
;
325 case '\n': esc
= 'n'; goto c_and_shell_escape
;
326 case '\r': esc
= 'r'; goto c_and_shell_escape
;
327 case '\t': esc
= 't'; goto c_and_shell_escape
;
328 case '\v': esc
= 'v'; goto c_escape
;
329 case '\\': esc
= c
; goto c_and_shell_escape
;
332 if (quoting_style
== shell_quoting_style
)
333 goto use_shell_always_quoting_style
;
335 if (backslash_escapes
)
347 case '!': /* special in bash */
348 case '"': case '$': case '&':
349 case '(': case ')': case '*': case ';':
350 case '<': case '>': case '[':
351 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
353 /* A shell special character. In theory, '$' and '`' could
354 be the first bytes of multibyte characters, which means
355 we should check them with mbrtowc, but in practice this
356 doesn't happen so it's not worth worrying about. */
357 if (quoting_style
== shell_quoting_style
)
358 goto use_shell_always_quoting_style
;
362 switch (quoting_style
)
364 case shell_quoting_style
:
365 goto use_shell_always_quoting_style
;
367 case shell_always_quoting_style
:
378 case '%': case '+': case ',': case '-': case '.': case '/':
379 case '0': case '1': case '2': case '3': case '4': case '5':
380 case '6': case '7': case '8': case '9': case ':': case '=':
381 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
382 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
383 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
384 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
385 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
386 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
387 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
388 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
389 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
391 /* These characters don't cause problems, no matter what the
392 quoting style is. They cannot start multibyte sequences. */
396 /* If we have a multibyte sequence, copy it until we reach
397 its end, find an error, or come back to the initial shift
398 state. For C-like styles, if the sequence has
399 unprintable characters, escape the whole sequence, since
400 we can't easily escape single characters within it. */
402 /* Length of multibyte sequence found so far. */
410 printable
= ISPRINT (c
);
415 memset (&mbstate
, 0, sizeof mbstate
);
419 if (argsize
== (size_t) -1)
420 argsize
= strlen (arg
);
425 size_t bytes
= mbrtowc (&w
, &arg
[i
+ m
],
426 argsize
- (i
+ m
), &mbstate
);
429 else if (bytes
== (size_t) -1)
434 else if (bytes
== (size_t) -2)
437 while (i
+ m
< argsize
&& arg
[i
+ m
])
448 while (! mbsinit (&mbstate
));
451 if (1 < m
|| (backslash_escapes
&& ! printable
))
453 /* Output a multibyte sequence, or an escaped
454 unprintable unibyte character. */
459 if (backslash_escapes
&& ! printable
)
462 STORE ('0' + (c
>> 6));
463 STORE ('0' + ((c
>> 3) & 7));
477 if (! (backslash_escapes
478 && o
->quote_these_too
[c
/ INT_BITS
] & (1 << (c
% INT_BITS
))))
489 for (; *quote_string
; quote_string
++)
490 STORE (*quote_string
);
492 if (len
< buffersize
)
496 use_shell_always_quoting_style
:
497 return quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
,
498 shell_always_quoting_style
, o
);
501 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
502 argument ARG (of size ARGSIZE), using O to control quoting.
503 If O is null, use the default.
504 Terminate the output with a null character, and return the written
505 size of the output, not counting the terminating null.
506 If BUFFERSIZE is too small to store the output string, return the
507 value that would have been returned had BUFFERSIZE been large enough.
508 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
510 quotearg_buffer (char *buffer
, size_t buffersize
,
511 char const *arg
, size_t argsize
,
512 struct quoting_options
const *o
)
514 struct quoting_options
const *p
= o
? o
: &default_quoting_options
;
515 return quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
,
519 /* Use storage slot N to return a quoted version of the string ARG.
520 OPTIONS specifies the quoting options.
521 The returned value points to static storage that can be
522 reused by the next call to this function with the same value of N.
523 N must be nonnegative. N is deliberately declared with type "int"
524 to allow for future extensions (using negative values). */
526 quotearg_n_options (int n
, char const *arg
,
527 struct quoting_options
const *options
)
529 /* Preallocate a slot 0 buffer, so that the caller can always quote
530 one small component of a "memory exhausted" message in slot 0. */
531 static char slot0
[256];
532 static unsigned int nslots
= 1;
538 static struct slotvec slotvec0
= {sizeof slot0
, slot0
};
539 static struct slotvec
*slotvec
= &slotvec0
;
544 size_t s
= n1
* sizeof (struct slotvec
);
545 if (! (0 < n1
&& n1
== s
/ sizeof (struct slotvec
)))
547 if (slotvec
== &slotvec0
)
549 slotvec
= (struct slotvec
*) xmalloc (sizeof (struct slotvec
));
552 slotvec
= (struct slotvec
*) xrealloc (slotvec
, s
);
553 memset (slotvec
+ nslots
, 0, (n1
- nslots
) * sizeof (struct slotvec
));
558 size_t size
= slotvec
[n
].size
;
559 char *val
= slotvec
[n
].val
;
560 size_t qsize
= quotearg_buffer (val
, size
, arg
, (size_t) -1, options
);
564 slotvec
[n
].size
= size
= qsize
+ 1;
565 slotvec
[n
].val
= val
= xrealloc (val
== slot0
? 0 : val
, size
);
566 quotearg_buffer (val
, size
, arg
, (size_t) -1, options
);
574 quotearg_n (unsigned int n
, char const *arg
)
576 return quotearg_n_options (n
, arg
, &default_quoting_options
);
580 quotearg (char const *arg
)
582 return quotearg_n (0, arg
);
586 quotearg_n_style (unsigned int n
, enum quoting_style s
, char const *arg
)
588 struct quoting_options o
;
590 memset (o
.quote_these_too
, 0, sizeof o
.quote_these_too
);
591 return quotearg_n_options (n
, arg
, &o
);
595 quotearg_style (enum quoting_style s
, char const *arg
)
597 return quotearg_n_style (0, s
, arg
);
601 quotearg_char (char const *arg
, char ch
)
603 struct quoting_options options
;
604 options
= default_quoting_options
;
605 set_char_quoting (&options
, ch
, 1);
606 return quotearg_n_options (0, arg
, &options
);
610 quotearg_colon (char const *arg
)
612 return quotearg_char (arg
, ':');