]> git.saurik.com Git - apple/libc.git/blobdiff - gen/wordexp.c
Libc-498.1.5.tar.gz
[apple/libc.git] / gen / wordexp.c
index a75b9441a40502678da991474cc4016049ab5ab2..1a10891b4c9d0f6bb8d8b727e5652038f020bd39 100644 (file)
 /*
- *   Copyright 1994, University Corporation for Atmospheric Research
- *   See ../COPYRIGHT file for copying and redistribution conditions.
- */
-/*
- * Reproduction of ../COPYRIGHT file:
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
  *
- *********************************************************************
-Copyright 1995-2002 University Corporation for Atmospheric Research/Unidata
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this
+ * file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
 
-Portions of this software were developed by the Unidata Program at the 
-University Corporation for Atmospheric Research.
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <wordexp.h>
+#include <pthread.h>
+#include <regex.h>
+#include <assert.h>
+#include <unistd.h>
+#include <paths.h>
+#include <strings.h>
+#include <sys/errno.h>
 
-Access and use of this software shall impose the following obligations
-and understandings on the user. The user is granted the right, without
-any fee or cost, to use, copy, modify, alter, enhance and distribute
-this software, and any derivative works thereof, and its supporting
-documentation for any purpose whatsoever, provided that this entire
-notice appears in all copies of the software, derivative works and
-supporting documentation.  Further, UCAR requests that the user credit
-UCAR/Unidata in any publications that result from the use of this
-software or in any product that includes this software. The names UCAR
-and/or Unidata, however, may not be used in any advertising or publicity
-to endorse or promote any products or commercial entity unless specific
-written permission is obtained from UCAR/Unidata. The user also
-understands that UCAR/Unidata is not obligated to provide the user with
-any support, consulting, training or assistance of any kind with regard
-to the use, operation and performance of this software nor to provide
-the user with any updates, revisions, new versions or "bug fixes."
+// For _NSGetEnviron() -- which gives us a pointer to environ
+#include <crt_externs.h>
 
-THIS SOFTWARE IS PROVIDED BY UCAR/UNIDATA "AS IS" AND ANY EXPRESS OR
-IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL UCAR/UNIDATA BE LIABLE FOR ANY SPECIAL,
-INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
-FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
-NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
-WITH THE ACCESS, USE OR PERFORMANCE OF THIS SOFTWARE.
+extern size_t malloc_good_size(size_t size);
+extern int errno;
 
- *********************************************************************
- */
+pthread_once_t re_init_c = PTHREAD_ONCE_INIT;
+static regex_t re_cmd, re_goodchars, re_subcmd_syntax_err_kludge;
 
-/* $Id: wordexp.c,v 1.13 2002/12/26 16:46:46 steve Exp $ */
+/* Similar to popen, but catures stderr for you.  Doesn't interoperate
+  with pclose.  Call wait4 on your own */
+pid_t popen_oe(char *cmd, FILE **out, FILE **err) {
+    int out_pipe[2], err_pipe[2];
+    char *argv[4];
+    pid_t pid;
 
-#if 0
-#include "ldmconfig.h"
-#endif
+    if (pipe(out_pipe) < 0) {
+       return 0;
+    }
+    if (pipe(err_pipe) < 0) {
+       close(out_pipe[0]);
+       close(out_pipe[1]);
+       return 0;
+    }
 
-/*
- * Hack to provide POSIX 1003.2-1992 _interface_.
- * NOT fully functional
- */
+    argv[0] = "sh";
+    argv[1] = "-c";
+    argv[2] = cmd;
+    argv[3] = NULL;
 
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
+    switch(pid = vfork()) {
+       case -1:
+           close(out_pipe[0]);
+           close(out_pipe[1]);
+           close(err_pipe[0]);
+           close(err_pipe[1]);
+           return 0;
+       case 0:
+           if (out_pipe[1] != STDOUT_FILENO) {
+               dup2(out_pipe[1], STDOUT_FILENO);
+               close(out_pipe[1]);
+           }
+           close(out_pipe[0]);
+           if (err_pipe[1] != STDERR_FILENO) {
+               dup2(err_pipe[1], STDERR_FILENO);
+               close(err_pipe[1]);
+           }
+           close(err_pipe[0]);
+           execve(_PATH_BSHELL, argv, *_NSGetEnviron());
+           _exit(127);
+       default:
+           *out = fdopen(out_pipe[0], "r");
+           assert(*out);
+           close(out_pipe[1]);
+           *err = fdopen(err_pipe[0], "r");
+           assert(*err);
+           close(err_pipe[1]);
 
-#include "wordexp.h"
+           return pid;
+    }
+}
 
+void re_init(void) {
+    int rc = regcomp(&re_cmd, "(^|[^\\])(`|\\$\\()", REG_EXTENDED|REG_NOSUB);
+    /* XXX I'm not sure the { } stuff is correct,
+      it may be overly restrictave */
+    char *rx = "^([^\\\"'|&;<>(){}]"
+      "|\\\\."
+      "|'([^']|\\\\')*'"
+      "|\"([^\"]|\\\\\")*\""
+      "|`([^`]|\\\\`)*`"
+      "|\\$(([^)]|\\\\))*\\)"  /* can't do nesting in a regex */
+      "|\\$\\{[^}]*\\}"
+      /* XXX: { } ? */
+      ")*$";
+    rc = regcomp(&re_goodchars, rx,
+      REG_EXTENDED|REG_NOSUB);
 
+    rc = regcomp(&re_subcmd_syntax_err_kludge, 
+      "command substitution.*syntax error", REG_EXTENDED|REG_NOSUB);
+}
 
-/*
- * Translate return value from wordexp() into a string
- */
-const char *
-s_wrde_err(int wrde_err)
-{
-       switch(wrde_err) {
-       case 0: return "No Error";
-       case WRDE_BADCHAR: return "WRDE_BADCHAR";
-       case WRDE_BADVAL: return "WRDE_BADVAL";
-       case WRDE_CMDSUB: return "WRDE_CMDSUB";
-       case WRDE_NOSPACE: return "WRDE_NOSPACE";
-       case WRDE_SYNTAX: return "WRDE_SYNTAX";
-       }
-       /* default */
-       return "Unknown Error";
+/* Returns zero if it can't realloc */
+static int word_alloc(size_t want, wordexp_t *__restrict__ pwe, size_t *have) {
+    if (want < *have) {
+       return 1;
+    }
+    size_t bytes = malloc_good_size(sizeof(char *) * want * 2);
+    pwe->we_wordv = reallocf(pwe->we_wordv, bytes);
+    if (pwe->we_wordv) {
+       *have = bytes / sizeof(char *);
+       return 1;
+    }
+    return 0;
 }
 
+/* XXX this is _not_ designed to be fast */
+/* wordexp is also rife with security "chalenges", unless you pass it
+  WRDE_NOCMD it *must* support subshell expansion, and even if you
+  don't beause it has to support so much of the standard shell (all
+  the odd little variable expansion options for example) it is hard
+  to do without a subshell).  It is probbably just plan a Bad Idea
+  to call in anything setuid, or executing remotely. */
 
-/*ARGSUSED*/
-int
-wordexp(const char *words, wordexp_t *pwordexp, int flags)
-{
-       const char *ccp;
-       char **argv;
-       const char *buf;
-       size_t argc;
-        enum states {ARGSTART, IN_QUOTE, NOT_IN_QUOTE, DONE};
-        enum classes {EOS, SPACE, QUOTE, OTHER};
-        int state = ARGSTART;
-        char *argbuf;
-        const char *sp;
-        char *dp;
-        int status = 0;
-        
-       /* devour leading white space */
-       for(ccp = words; *ccp != 0 && isspace(*ccp); )
-           ccp++;
-       /* skip comments */
-       if(*ccp == '#')
-       {
-               pwordexp->we_wordc = 0;
-               pwordexp->we_wordv = NULL;
-               return 0;
-       }
+int wordexp(const char *__restrict__ words,
+  wordexp_t *__restrict__ pwe, int flags) {
+    /* cbuf_l's inital value needs to be big enough for 'cmd' plus
+      about 20 chars */
+    size_t cbuf_l = 1024;
+    char *cbuf = NULL;
+    /* Put a NUL byte between eaach word, and at the end */
+    char *cmd = "/usr/bin/perl -e 'print join(chr(0), @ARGV), chr(0)' -- ";
+    size_t wordv_l = 0, wordv_i = 0;
+    int rc;
+    wordexp_t save;
 
-/* If every other character was a space ... */
-#define MAXNARGS(str) ((strlen(str) +1)/2 +1)
-       argv = (char **)calloc(MAXNARGS(ccp), sizeof(char *));
-       if(argv == NULL)
-               return WRDE_NOSPACE;
+    /* Some errors require us to leave pwe unchanged, so we save it here */
+    save = *pwe;
+    pthread_once(&re_init_c, re_init);
 
-       buf = ccp;
+    if (flags & WRDE_NOCMD) {
+       /* Thi attmpts to match any backticks or $(...)'s, but there may be
+         other ways to do subshell expansion that the standard doesn't
+         cover, but I don't know of any -- failures here aare a potential
+         security risk */
+       rc = regexec(&re_cmd, words, 0, NULL, 0);
+       if (rc != REG_NOMATCH) {
+           /* Technically ==0 is WRDE_CMDSUB, and != REG_NOMATCH is
+             "some internal error", but failing to catch those here
+             could allow a subshell */
+           return WRDE_CMDSUB;
+       }
+    }
+    rc = regexec(&re_goodchars, words, 0, NULL, 0);
+    if (rc != 0) {
+       /* Technically ==REG_NOMATCH is WRDE_BADCHAR, and != is
+         some internal error", but again failure to notice the
+         internal error could allow unexpected shell commands
+         (allowing an unexcaped ;), or file clobbering (unescaped
+         >) */
+       return WRDE_BADCHAR;
+    }
 
-        argbuf = malloc(strlen(words)+1); /* where each arg is built */
-        if (argbuf == NULL)
-        {
-            free(argv);
-            return WRDE_NOSPACE;
-        }
+    if (flags & WRDE_APPEND) {
+       wordv_i = wordv_l = pwe->we_wordc;
+       if (flags & WRDE_DOOFFS) {
+           wordv_l = wordv_i += pwe->we_offs;
+       }
+    } else {
+       if (flags & WRDE_REUSE) {
+           wordfree(pwe);
+       }
+       pwe->we_wordc = 0;
+       pwe->we_wordv = NULL;
 
-        sp = buf;
-        dp = argbuf;
-        argc = 0;
-       while(state != DONE)
-       {
-            int class;
+       if (flags & WRDE_DOOFFS) {
+           size_t wend = wordv_i + pwe->we_offs;
+           word_alloc(wend, pwe, &wordv_l);
+           if (!pwe->we_wordv) {
+               return WRDE_NOSPACE;
+           }
+           bzero(pwe->we_wordv + wordv_i, pwe->we_offs * sizeof(char *));
+           wordv_i = wend;
+       }
+    }
 
-            if (*sp == 0)
-                class = EOS;
-            else if (isspace(*sp))
-                class = SPACE;
-            else if (*sp == '"')
-                class = QUOTE;
-            else
-                class = OTHER;
-            switch (state) {
-                case ARGSTART:
-                    switch(class) {
-                        case EOS:
-                            state = DONE;
-                            break;
-                        case SPACE:
-                            sp++;
-                            break;
-                        case QUOTE:
-                            sp++;
-                            state = IN_QUOTE;
-                            break;
-                        case OTHER:
-                            *dp++ = *sp++;
-                            state = NOT_IN_QUOTE;
-                            break;
-                    }
-                    break;
-                case IN_QUOTE:
-                    switch(class) {
-                        case EOS: /* unmatched quote */
-                            state = DONE;
-                            status = WRDE_SYNTAX;
-                            break;
-                        case QUOTE:
-                            sp++;
-                            state = NOT_IN_QUOTE;
-                            break;
-                        case SPACE:
-                        case OTHER:
-                            *dp++ = *sp++;
-                            break;
-                    }
-                    break;
-                case NOT_IN_QUOTE:
-                    switch(class) {
-                        case EOS:
-                            *dp = 0;
-                            dp = argbuf;
-                            argv[argc++] = strdup(argbuf);
-                            state = DONE;
-                            break;
-                        case SPACE:
-                            *dp = 0;
-                            dp = argbuf;
-                            argv[argc++] = strdup(argbuf);
-                            sp++;
-                            state = ARGSTART;
-                            break;
-                        case QUOTE:
-                            sp++;
-                            state = IN_QUOTE;
-                            break;
-                        case OTHER:
-                            *dp++ = *sp++;
-                            break;
-                    }
-                    break;
-            }
+    size_t need = 0;
+    while(!cbuf || need > cbuf_l) { 
+       if (need > cbuf_l) {
+           cbuf_l = malloc_good_size(need +1);
+       }
+       cbuf = reallocf(cbuf, cbuf_l);
+       if (cbuf == NULL) {
+           wordfree(pwe);
+           return WRDE_NOSPACE;
+       }
+       cbuf[0] = '\0';
+       if (flags & WRDE_UNDEF) {
+           strlcat(cbuf, "set -u; ", cbuf_l);
        }
-       argv[argc] = NULL;
+       /* This kludge is needed because /bin/sh seems to set IFS to the
+         defualt even if you have set it;  We also can't just ignore it
+         because it is hard/unplesent to code around or even a potential
+         security problem because the test suiete explicitly checks
+         to make sure setting IFS "works" */
+       if (getenv("IFS")) {
+           setenv("_IFS", getenv("IFS"), 1);
+           strlcat(cbuf, "export IFS=${_IFS}; ", cbuf_l);
+       }
+       strlcat(cbuf, cmd, cbuf_l);
+       need = strlcat(cbuf, words, cbuf_l);
+    }
 
-       pwordexp->we_wordc = argc;
-       pwordexp->we_wordv = argv;
-       
-        free(argbuf);
-        
-       return status;
-}
+    FILE *out, *err;
+    pid_t pid = popen_oe(cbuf, &out, &err);
+    if (pid == 0) {
+       wordfree(pwe);
+       return WRDE_NOSPACE;
+    }
+    
+    char *word = NULL;
+    int word_l = 0;
+    int word_i = 0;
+    int ch;
 
+    while(EOF != (ch = fgetc(out))) {
+       if (word_l <= word_i) {
+           word_l = malloc_good_size(word_l * 2 + 1);
+           word = reallocf(word, word_l);
+           if (!word) {
+               fclose(err);
+               fclose(out);
+               wordfree(pwe);
+               return WRDE_NOSPACE;
+           }
+       }
+       word[word_i++] = ch;
 
-void
-wordfree(wordexp_t *pwordexp)
-{
-       if(pwordexp == NULL || pwordexp->we_wordv == NULL)
-               return;
-       if(*pwordexp->we_wordv)
-               free(*pwordexp->we_wordv);
-       free(pwordexp->we_wordv);
-}
+       if (ch == '\0') {
+           word_alloc(wordv_i + 1, pwe, &wordv_l);
+           char *tmp = strdup(word);
+           if (pwe->we_wordv == NULL || tmp == NULL) {
+               fclose(err);
+               fclose(out);
+               wordfree(pwe);
+               free(word);
+               free(tmp);
+               int status;
+               wait4(pid, &status, 0, NULL);
+               return WRDE_NOSPACE;
+           }
+           pwe->we_wordv[wordv_i++] = tmp;
+           pwe->we_wordc++;
+           word_i = 0;
+       }
+    }
 
+    assert(word_i == 0);
+    free(word);
 
-#if TEST
+    char err_buf[1024];
+    size_t err_sz = fread(err_buf, 1, sizeof(err_buf) -1, err);
+    err_buf[(err_sz >= 0) ? err_sz : 0] = '\0';
+    if (flags & WRDE_SHOWERR) {
+       fputs(err_buf, stderr);
+    }
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+    pid_t got_pid = 0;
+    int status;
+    do {
+       pid = wait4(pid, &status, 0, NULL);
+    } while(got_pid == -1 && errno == EINTR);
 
-int
-main(int argc, char *argv[])
-{
-       char strbuf[1024];
-       wordexp_t wrdexp;
-       int status;
-       char **cpp;
+    fclose(out);
+    fclose(err);
 
-       while(fgets(strbuf, sizeof(strbuf), stdin) != NULL)
-       {
-               {
-                       char *cp = strrchr(strbuf,'\n');
-                       if(cp)
-                               *cp = 0;
-               }
-               fprintf(stdout, "\t%s\n", strbuf);
-               status = wordexp(strbuf, &wrdexp, WRDE_SHOWERR);
-               if(status)
-               {
-                       fprintf(stderr, "wordexp: %s\n", s_wrde_err(status));
-                       continue;
-               }
-               /* else */
-               fprintf(stdout, "\t%d:\n", wrdexp.we_wordc);
-               for(cpp = wrdexp.we_wordv;
-                       cpp < &wrdexp.we_wordv[wrdexp.we_wordc]; cpp++)
-               {
-                       fprintf(stdout, "\t\t%s\n", *cpp);
-               }
-               wordfree(&wrdexp);
-               
+    /* the exit status isn't set for some command syntax errors */
+    if (regexec(&re_subcmd_syntax_err_kludge, err_buf, 0, NULL, 0) == 0
+      || got_pid == -1 || (WIFEXITED(status) && WEXITSTATUS(status))) {
+       if (!(flags & (WRDE_APPEND|WRDE_REUSE))) {
+           /* Restore pwe if possiable, can't really do it in the append
+             case, and isn't easy in the reuse case */
+           *pwe = save;
        }
-       exit(EXIT_SUCCESS);
+       if (strstr(err_buf, " unbound variable")) {
+           return WRDE_BADVAL;
+       }
+       return WRDE_SYNTAX;
+    }
+
+    if (!word_alloc(wordv_i + 1, pwe, &wordv_l)) {
+       return WRDE_NOSPACE;
+    }
+    pwe->we_wordv[wordv_i] = NULL;
+
+    return 0;
 }
 
-#endif /* TEST */
+void wordfree(wordexp_t *pwe) {
+    if (pwe == NULL || pwe->we_wordv == NULL) {
+       return;
+    }
+
+    int i = 0, e = pwe->we_wordc + pwe->we_offs;
+    for(i = 0; i < e; i++) {
+       free(pwe->we_wordv[i]);
+    }
+    free(pwe->we_wordv);
+    pwe->we_wordv = NULL;
+}