1 /* quotearg.c - quote arguments for output 
   2    Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. 
   4    This program is free software; you can redistribute it and/or modify 
   5    it under the terms of the GNU General Public License as published by 
   6    the Free Software Foundation; either version 2, or (at your option) 
   9    This program is distributed in the hope that it will be useful, 
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  12    GNU General Public License for more details. 
  14    You should have received a copy of the GNU General Public License 
  15    along with this program; if not, write to the Free Software Foundation, 
  16    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */ 
  18 /* Written by Paul Eggert <eggert@twinsun.com> */ 
  25 # include <stddef.h>  /* For the definition of size_t on windows w/MSVC.  */ 
  27 #include <sys/types.h> 
  35 # define _(text) gettext (text) 
  48 # define UCHAR_MAX ((unsigned char) -1) 
  51 #if HAVE_C_BACKSLASH_A 
  52 # define ALERT_CHAR '\a' 
  54 # define ALERT_CHAR '\7' 
  70 /* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the 
  71    other macros are defined only for documentation and to satisfy C 
  75 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) 
  76 # define mbsinit(ps) 1 
  77 # define iswprint(wc) ISPRINT ((unsigned char) (wc)) 
  84 # if !defined iswprint && !HAVE_ISWPRINT 
  85 #  define iswprint(wc) 1 
  89 #define INT_BITS (sizeof (int) * CHAR_BIT) 
  91 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) 
  92 # define IN_CTYPE_DOMAIN(c) 1 
  94 # define IN_CTYPE_DOMAIN(c) isascii(c) 
  97 /* Undefine to protect against the definition in wctype.h of solaris2.6.   */ 
  99 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) 
 101 struct quoting_options
 
 103   /* Basic quoting style.  */ 
 104   enum quoting_style style
; 
 106   /* Quote the characters indicated by this bit vector even if the 
 107      quoting style would not normally require them to be quoted.  */ 
 108   int quote_these_too
[(UCHAR_MAX 
/ INT_BITS
) + 1]; 
 111 /* Names of quoting styles.  */ 
 112 char const *const quoting_style_args
[] = 
 124 /* Correspondences to quoting style names.  */ 
 125 enum quoting_style 
const quoting_style_vals
[] = 
 127   literal_quoting_style
, 
 129   shell_always_quoting_style
, 
 131   escape_quoting_style
, 
 132   locale_quoting_style
, 
 133   clocale_quoting_style
 
 136 /* The default quoting options.  */ 
 137 static struct quoting_options default_quoting_options
; 
 139 /* Allocate a new set of quoting options, with contents initially identical 
 140    to O if O is not null, or to the default if O is null. 
 141    It is the caller's responsibility to free the result.  */ 
 142 struct quoting_options 
* 
 143 clone_quoting_options (struct quoting_options 
*o
) 
 145   struct quoting_options 
*p
 
 146     = (struct quoting_options 
*) xmalloc (sizeof (struct quoting_options
)); 
 147   *p 
= *(o 
? o 
: &default_quoting_options
); 
 151 /* Get the value of O's quoting style.  If O is null, use the default.  */ 
 153 get_quoting_style (struct quoting_options 
*o
) 
 155   return (o 
? o 
: &default_quoting_options
)->style
; 
 158 /* In O (or in the default if O is null), 
 159    set the value of the quoting style to S.  */ 
 161 set_quoting_style (struct quoting_options 
*o
, enum quoting_style s
) 
 163   (o 
? o 
: &default_quoting_options
)->style 
= s
; 
 166 /* In O (or in the default if O is null), 
 167    set the value of the quoting options for character C to I. 
 168    Return the old value.  Currently, the only values defined for I are 
 169    0 (the default) and 1 (which means to quote the character even if 
 170    it would not otherwise be quoted).  */ 
 172 set_char_quoting (struct quoting_options 
*o
, char c
, int i
) 
 174   unsigned char uc 
= c
; 
 175   int *p 
= (o 
? o 
: &default_quoting_options
)->quote_these_too 
+ uc 
/ INT_BITS
; 
 176   int shift 
= uc 
% INT_BITS
; 
 177   int r 
= (*p 
>> shift
) & 1; 
 178   *p 
^= ((i 
& 1) ^ r
) << shift
; 
 182 /* MSGID approximates a quotation mark.  Return its translation if it 
 183    has one; otherwise, return either it or "\"", depending on S.  */ 
 185 gettext_quote (char const *msgid
, enum quoting_style s
) 
 187   char const *translation 
= _(msgid
); 
 188   if (translation 
== msgid 
&& s 
== clocale_quoting_style
) 
 193 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 
 194    argument ARG (of size ARGSIZE), using QUOTING_STYLE and the 
 195    non-quoting-style part of O to control quoting. 
 196    Terminate the output with a null character, and return the written 
 197    size of the output, not counting the terminating null. 
 198    If BUFFERSIZE is too small to store the output string, return the 
 199    value that would have been returned had BUFFERSIZE been large enough. 
 200    If ARGSIZE is -1, use the string length of the argument for ARGSIZE. 
 202    This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, 
 203    ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting 
 204    style specified by O, and O may not be null.  */ 
 207 quotearg_buffer_restyled (char *buffer
, size_t buffersize
, 
 208                           char const *arg
, size_t argsize
, 
 209                           enum quoting_style quoting_style
, 
 210                           struct quoting_options 
const *o
) 
 214   char const *quote_string 
= 0; 
 215   size_t quote_string_len 
= 0; 
 216   int backslash_escapes 
= 0; 
 217   int unibyte_locale 
= MB_CUR_MAX 
== 1; 
 222         if (len < buffersize) \ 
 228   switch (quoting_style
) 
 230     case c_quoting_style
: 
 232       backslash_escapes 
= 1; 
 234       quote_string_len 
= 1; 
 237     case escape_quoting_style
: 
 238       backslash_escapes 
= 1; 
 241     case locale_quoting_style
: 
 242     case clocale_quoting_style
: 
 244         /* Get translations for open and closing quotation marks. 
 246            The message catalog should translate "`" to a left 
 247            quotation mark suitable for the locale, and similarly for 
 248            "'".  If the catalog has no translation, 
 249            locale_quoting_style quotes `like this', and 
 250            clocale_quoting_style quotes "like this". 
 252            For example, an American English Unicode locale should 
 253            translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and 
 254            should translate "'" to U+201D (RIGHT DOUBLE QUOTATION 
 255            MARK).  A British English Unicode locale should instead 
 256            translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and 
 257            U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.  */ 
 259         char const *left 
= gettext_quote (N_("`"), quoting_style
); 
 260         char const *right 
= gettext_quote (N_("'"), quoting_style
); 
 261         for (quote_string 
= left
; *quote_string
; quote_string
++) 
 262           STORE (*quote_string
); 
 263         backslash_escapes 
= 1; 
 264         quote_string 
= right
; 
 265         quote_string_len 
= strlen (quote_string
); 
 269     case shell_always_quoting_style
: 
 272       quote_string_len 
= 1; 
 279   for (i 
= 0;  ! (argsize 
== (size_t) -1 ? arg
[i
] == '\0' : i 
== argsize
);  i
++) 
 284       if (backslash_escapes
 
 286           && i 
+ quote_string_len 
<= argsize
 
 287           && memcmp (arg 
+ i
, quote_string
, quote_string_len
) == 0) 
 294           switch (quoting_style
) 
 296             case shell_quoting_style
: 
 297               goto use_shell_always_quoting_style
; 
 299             case c_quoting_style
: 
 300               if (i 
+ 2 < argsize 
&& arg
[i 
+ 1] == '?') 
 304                   case '(': case ')': case '-': case '/': 
 305                   case '<': case '=': case '>': 
 306                     /* Escape the second '?' in what would otherwise be 
 322         case ALERT_CHAR
: esc 
= 'a'; goto c_escape
; 
 323         case '\b': esc 
= 'b'; goto c_escape
; 
 324         case '\f': esc 
= 'f'; goto c_escape
; 
 325         case '\n': esc 
= 'n'; goto c_and_shell_escape
; 
 326         case '\r': esc 
= 'r'; goto c_and_shell_escape
; 
 327         case '\t': esc 
= 't'; goto c_and_shell_escape
; 
 328         case '\v': esc 
= 'v'; goto c_escape
; 
 329         case '\\': esc 
= c
; goto c_and_shell_escape
; 
 332           if (quoting_style 
== shell_quoting_style
) 
 333             goto use_shell_always_quoting_style
; 
 335           if (backslash_escapes
) 
 347         case '!': /* special in bash */ 
 348         case '"': case '$': case '&': 
 349         case '(': case ')': case '*': case ';': 
 350         case '<': case '>': case '[': 
 351         case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ 
 353           /* A shell special character.  In theory, '$' and '`' could 
 354              be the first bytes of multibyte characters, which means 
 355              we should check them with mbrtowc, but in practice this 
 356              doesn't happen so it's not worth worrying about.  */ 
 357           if (quoting_style 
== shell_quoting_style
) 
 358             goto use_shell_always_quoting_style
; 
 362           switch (quoting_style
) 
 364             case shell_quoting_style
: 
 365               goto use_shell_always_quoting_style
; 
 367             case shell_always_quoting_style
: 
 378         case '%': case '+': case ',': case '-': case '.': case '/': 
 379         case '0': case '1': case '2': case '3': case '4': case '5': 
 380         case '6': case '7': case '8': case '9': case ':': case '=': 
 381         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 
 382         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 
 383         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 
 384         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 
 385         case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': 
 386         case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': 
 387         case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 
 388         case 'o': case 'p': case 'q': case 'r': case 's': case 't': 
 389         case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': 
 391           /* These characters don't cause problems, no matter what the 
 392              quoting style is.  They cannot start multibyte sequences.  */ 
 396           /* If we have a multibyte sequence, copy it until we reach 
 397              its end, find an error, or come back to the initial shift 
 398              state.  For C-like styles, if the sequence has 
 399              unprintable characters, escape the whole sequence, since 
 400              we can't easily escape single characters within it.  */ 
 402             /* Length of multibyte sequence found so far.  */ 
 410                 printable 
= ISPRINT (c
); 
 415                 memset (&mbstate
, 0, sizeof mbstate
); 
 419                 if (argsize 
== (size_t) -1) 
 420                   argsize 
= strlen (arg
); 
 425                     size_t bytes 
= mbrtowc (&w
, &arg
[i 
+ m
], 
 426                                             argsize 
- (i 
+ m
), &mbstate
); 
 429                     else if (bytes 
== (size_t) -1) 
 434                     else if (bytes 
== (size_t) -2) 
 437                         while (i 
+ m 
< argsize 
&& arg
[i 
+ m
]) 
 448                 while (! mbsinit (&mbstate
)); 
 451             if (1 < m 
|| (backslash_escapes 
&& ! printable
)) 
 453                 /* Output a multibyte sequence, or an escaped 
 454                    unprintable unibyte character.  */ 
 459                     if (backslash_escapes 
&& ! printable
) 
 462                         STORE ('0' + (c 
>> 6)); 
 463                         STORE ('0' + ((c 
>> 3) & 7)); 
 477       if (! (backslash_escapes
 
 478              && o
->quote_these_too
[c 
/ INT_BITS
] & (1 << (c 
% INT_BITS
)))) 
 489     for (; *quote_string
; quote_string
++) 
 490       STORE (*quote_string
); 
 492   if (len 
< buffersize
) 
 496  use_shell_always_quoting_style
: 
 497   return quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
, 
 498                                    shell_always_quoting_style
, o
); 
 501 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 
 502    argument ARG (of size ARGSIZE), using O to control quoting. 
 503    If O is null, use the default. 
 504    Terminate the output with a null character, and return the written 
 505    size of the output, not counting the terminating null. 
 506    If BUFFERSIZE is too small to store the output string, return the 
 507    value that would have been returned had BUFFERSIZE been large enough. 
 508    If ARGSIZE is -1, use the string length of the argument for ARGSIZE.  */ 
 510 quotearg_buffer (char *buffer
, size_t buffersize
, 
 511                  char const *arg
, size_t argsize
, 
 512                  struct quoting_options 
const *o
) 
 514   struct quoting_options 
const *p 
= o 
? o 
: &default_quoting_options
; 
 515   return quotearg_buffer_restyled (buffer
, buffersize
, arg
, argsize
, 
 519 /* Use storage slot N to return a quoted version of the string ARG. 
 520    OPTIONS specifies the quoting options. 
 521    The returned value points to static storage that can be 
 522    reused by the next call to this function with the same value of N. 
 523    N must be nonnegative.  N is deliberately declared with type "int" 
 524    to allow for future extensions (using negative values).  */ 
 526 quotearg_n_options (int n
, char const *arg
, 
 527                     struct quoting_options 
const *options
) 
 529   /* Preallocate a slot 0 buffer, so that the caller can always quote 
 530      one small component of a "memory exhausted" message in slot 0.  */ 
 531   static char slot0
[256]; 
 532   static unsigned int nslots 
= 1; 
 538   static struct slotvec slotvec0 
= {sizeof slot0
, slot0
}; 
 539   static struct slotvec 
*slotvec 
= &slotvec0
; 
 544       size_t s 
= n1 
* sizeof (struct slotvec
); 
 545       if (! (0 < n1 
&& n1 
== s 
/ sizeof (struct slotvec
))) 
 547       if (slotvec 
== &slotvec0
) 
 549           slotvec 
= (struct slotvec 
*) xmalloc (sizeof (struct slotvec
)); 
 552       slotvec 
= (struct slotvec 
*) xrealloc (slotvec
, s
); 
 553       memset (slotvec 
+ nslots
, 0, (n1 
- nslots
) * sizeof (struct slotvec
)); 
 558     size_t size 
= slotvec
[n
].size
; 
 559     char *val 
= slotvec
[n
].val
; 
 560     size_t qsize 
= quotearg_buffer (val
, size
, arg
, (size_t) -1, options
); 
 564         slotvec
[n
].size 
= size 
= qsize 
+ 1; 
 565         slotvec
[n
].val 
= val 
= xrealloc (val 
== slot0 
? 0 : val
, size
); 
 566         quotearg_buffer (val
, size
, arg
, (size_t) -1, options
); 
 574 quotearg_n (unsigned int n
, char const *arg
) 
 576   return quotearg_n_options (n
, arg
, &default_quoting_options
); 
 580 quotearg (char const *arg
) 
 582   return quotearg_n (0, arg
); 
 586 quotearg_n_style (unsigned int n
, enum quoting_style s
, char const *arg
) 
 588   struct quoting_options o
; 
 590   memset (o
.quote_these_too
, 0, sizeof o
.quote_these_too
); 
 591   return quotearg_n_options (n
, arg
, &o
); 
 595 quotearg_style (enum quoting_style s
, char const *arg
) 
 597   return quotearg_n_style (0, s
, arg
); 
 601 quotearg_char (char const *arg
, char ch
) 
 603   struct quoting_options options
; 
 604   options 
= default_quoting_options
; 
 605   set_char_quoting (&options
, ch
, 1); 
 606   return quotearg_n_options (0, arg
, &options
); 
 610 quotearg_colon (char const *arg
) 
 612   return quotearg_char (arg
, ':');