]> git.saurik.com Git - apple/shell_cmds.git/blob - sh/expand.c
193d65177e74c81fb64fa7f5d04a00ed9bbeb7eb
[apple/shell_cmds.git] / sh / expand.c
1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1997-2005
5 * Herbert Xu <herbert@gondor.apana.org.au>. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Kenneth Almquist.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #ifndef lint
36 #if 0
37 static char sccsid[] = "@(#)expand.c 8.5 (Berkeley) 5/15/95";
38 #endif
39 #endif /* not lint */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 #include <sys/types.h>
44 #include <sys/time.h>
45 #include <sys/stat.h>
46 #include <dirent.h>
47 #include <errno.h>
48 #include <inttypes.h>
49 #include <limits.h>
50 #include <pwd.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <unistd.h>
55 #include <wchar.h>
56 #include <wctype.h>
57
58 /*
59 * Routines to expand arguments to commands. We have to deal with
60 * backquotes, shell variables, and file metacharacters.
61 */
62
63 #include "shell.h"
64 #include "main.h"
65 #include "nodes.h"
66 #include "eval.h"
67 #include "expand.h"
68 #include "syntax.h"
69 #include "parser.h"
70 #include "jobs.h"
71 #include "options.h"
72 #include "var.h"
73 #include "input.h"
74 #include "output.h"
75 #include "memalloc.h"
76 #include "error.h"
77 #include "mystring.h"
78 #include "arith.h"
79 #include "show.h"
80 #include "builtins.h"
81
82 /*
83 * Structure specifying which parts of the string should be searched
84 * for IFS characters.
85 */
86
87 struct ifsregion {
88 struct ifsregion *next; /* next region in list */
89 int begoff; /* offset of start of region */
90 int endoff; /* offset of end of region */
91 int inquotes; /* search for nul bytes only */
92 };
93
94
95 static char *expdest; /* output of current string */
96 static struct nodelist *argbackq; /* list of back quote expressions */
97 static struct ifsregion ifsfirst; /* first struct in list of ifs regions */
98 static struct ifsregion *ifslastp; /* last struct in list */
99 static struct arglist exparg; /* holds expanded arg list */
100
101 static char *argstr(char *, int);
102 static char *exptilde(char *, int);
103 static char *expari(char *);
104 static void expbackq(union node *, int, int);
105 static int subevalvar(char *, char *, int, int, int, int, int);
106 static char *evalvar(char *, int);
107 static int varisset(const char *, int);
108 static void strtodest(const char *, int, int, int);
109 static void varvalue(const char *, int, int, int);
110 static void recordregion(int, int, int);
111 static void removerecordregions(int);
112 static void ifsbreakup(char *, struct arglist *);
113 static void expandmeta(struct strlist *);
114 static void expmeta(char *, char *);
115 static void addfname(char *);
116 static struct strlist *expsort(struct strlist *);
117 static struct strlist *msort(struct strlist *, int);
118 static int patmatch(const char *, const char *, int);
119 static char *cvtnum(int, char *);
120 static int collate_range_cmp(wchar_t, wchar_t);
121
122 static int
123 collate_range_cmp(wchar_t c1, wchar_t c2)
124 {
125 static wchar_t s1[2], s2[2];
126
127 s1[0] = c1;
128 s2[0] = c2;
129 return (wcscoll(s1, s2));
130 }
131
132 static char *
133 stputs_quotes(const char *data, const char *syntax, char *p)
134 {
135 while (*data) {
136 CHECKSTRSPACE(2, p);
137 if (syntax[(int)*data] == CCTL)
138 USTPUTC(CTLESC, p);
139 USTPUTC(*data++, p);
140 }
141 return (p);
142 }
143 #define STPUTS_QUOTES(data, syntax, p) p = stputs_quotes((data), syntax, p)
144
145 /*
146 * Perform expansions on an argument, placing the resulting list of arguments
147 * in arglist. Parameter expansion, command substitution and arithmetic
148 * expansion are always performed; additional expansions can be requested
149 * via flag (EXP_*).
150 * The result is left in the stack string.
151 * When arglist is NULL, perform here document expansion.
152 *
153 * Caution: this function uses global state and is not reentrant.
154 * However, a new invocation after an interrupted invocation is safe
155 * and will reset the global state for the new call.
156 */
157 void
158 expandarg(union node *arg, struct arglist *arglist, int flag)
159 {
160 struct strlist *sp;
161 char *p;
162
163 argbackq = arg->narg.backquote;
164 STARTSTACKSTR(expdest);
165 ifsfirst.next = NULL;
166 ifslastp = NULL;
167 argstr(arg->narg.text, flag);
168 if (arglist == NULL) {
169 STACKSTRNUL(expdest);
170 return; /* here document expanded */
171 }
172 STPUTC('\0', expdest);
173 p = grabstackstr(expdest);
174 exparg.lastp = &exparg.list;
175 if (flag & EXP_FULL) {
176 ifsbreakup(p, &exparg);
177 *exparg.lastp = NULL;
178 exparg.lastp = &exparg.list;
179 expandmeta(exparg.list);
180 } else {
181 sp = (struct strlist *)stalloc(sizeof (struct strlist));
182 sp->text = p;
183 *exparg.lastp = sp;
184 exparg.lastp = &sp->next;
185 }
186 while (ifsfirst.next != NULL) {
187 struct ifsregion *ifsp;
188 INTOFF;
189 ifsp = ifsfirst.next->next;
190 ckfree(ifsfirst.next);
191 ifsfirst.next = ifsp;
192 INTON;
193 }
194 *exparg.lastp = NULL;
195 if (exparg.list) {
196 *arglist->lastp = exparg.list;
197 arglist->lastp = exparg.lastp;
198 }
199 }
200
201
202
203 /*
204 * Perform parameter expansion, command substitution and arithmetic
205 * expansion, and tilde expansion if requested via EXP_TILDE/EXP_VARTILDE.
206 * Processing ends at a CTLENDVAR or CTLENDARI character as well as '\0'.
207 * This is used to expand word in ${var+word} etc.
208 * If EXP_FULL or EXP_CASE are set, keep and/or generate CTLESC
209 * characters to allow for further processing.
210 * If EXP_FULL is set, also preserve CTLQUOTEMARK characters.
211 */
212 static char *
213 argstr(char *p, int flag)
214 {
215 char c;
216 int quotes = flag & (EXP_FULL | EXP_CASE); /* do CTLESC */
217 int firsteq = 1;
218 int split_lit;
219 int lit_quoted;
220
221 split_lit = flag & EXP_SPLIT_LIT;
222 lit_quoted = flag & EXP_LIT_QUOTED;
223 flag &= ~(EXP_SPLIT_LIT | EXP_LIT_QUOTED);
224 if (*p == '~' && (flag & (EXP_TILDE | EXP_VARTILDE)))
225 p = exptilde(p, flag);
226 for (;;) {
227 CHECKSTRSPACE(2, expdest);
228 switch (c = *p++) {
229 case '\0':
230 return (p - 1);
231 case CTLENDVAR:
232 case CTLENDARI:
233 return (p);
234 case CTLQUOTEMARK:
235 lit_quoted = 1;
236 /* "$@" syntax adherence hack */
237 if (p[0] == CTLVAR && p[2] == '@' && p[3] == '=')
238 break;
239 if ((flag & EXP_FULL) != 0)
240 USTPUTC(c, expdest);
241 break;
242 case CTLQUOTEEND:
243 lit_quoted = 0;
244 break;
245 case CTLESC:
246 if (quotes)
247 USTPUTC(c, expdest);
248 c = *p++;
249 USTPUTC(c, expdest);
250 if (split_lit && !lit_quoted)
251 recordregion(expdest - stackblock() -
252 (quotes ? 2 : 1),
253 expdest - stackblock(), 0);
254 break;
255 case CTLVAR:
256 p = evalvar(p, flag);
257 break;
258 case CTLBACKQ:
259 case CTLBACKQ|CTLQUOTE:
260 expbackq(argbackq->n, c & CTLQUOTE, flag);
261 argbackq = argbackq->next;
262 break;
263 case CTLARI:
264 p = expari(p);
265 break;
266 case ':':
267 case '=':
268 /*
269 * sort of a hack - expand tildes in variable
270 * assignments (after the first '=' and after ':'s).
271 */
272 USTPUTC(c, expdest);
273 if (split_lit && !lit_quoted)
274 recordregion(expdest - stackblock() - 1,
275 expdest - stackblock(), 0);
276 if (flag & EXP_VARTILDE && *p == '~' &&
277 (c != '=' || firsteq)) {
278 if (c == '=')
279 firsteq = 0;
280 p = exptilde(p, flag);
281 }
282 break;
283 default:
284 USTPUTC(c, expdest);
285 if (split_lit && !lit_quoted)
286 recordregion(expdest - stackblock() - 1,
287 expdest - stackblock(), 0);
288 }
289 }
290 }
291
292 /*
293 * Perform tilde expansion, placing the result in the stack string and
294 * returning the next position in the input string to process.
295 */
296 static char *
297 exptilde(char *p, int flag)
298 {
299 char c, *startp = p;
300 struct passwd *pw;
301 char *home;
302
303 for (;;) {
304 c = *p;
305 switch(c) {
306 case CTLESC: /* This means CTL* are always considered quoted. */
307 case CTLVAR:
308 case CTLBACKQ:
309 case CTLBACKQ | CTLQUOTE:
310 case CTLARI:
311 case CTLENDARI:
312 case CTLQUOTEMARK:
313 return (startp);
314 case ':':
315 if ((flag & EXP_VARTILDE) == 0)
316 break;
317 /* FALLTHROUGH */
318 case '\0':
319 case '/':
320 case CTLENDVAR:
321 *p = '\0';
322 if (*(startp+1) == '\0') {
323 home = lookupvar("HOME");
324 } else {
325 pw = getpwnam(startp+1);
326 home = pw != NULL ? pw->pw_dir : NULL;
327 }
328 *p = c;
329 if (home == NULL || *home == '\0')
330 return (startp);
331 strtodest(home, flag, VSNORMAL, 1);
332 return (p);
333 }
334 p++;
335 }
336 }
337
338
339 static void
340 removerecordregions(int endoff)
341 {
342 if (ifslastp == NULL)
343 return;
344
345 if (ifsfirst.endoff > endoff) {
346 while (ifsfirst.next != NULL) {
347 struct ifsregion *ifsp;
348 INTOFF;
349 ifsp = ifsfirst.next->next;
350 ckfree(ifsfirst.next);
351 ifsfirst.next = ifsp;
352 INTON;
353 }
354 if (ifsfirst.begoff > endoff)
355 ifslastp = NULL;
356 else {
357 ifslastp = &ifsfirst;
358 ifsfirst.endoff = endoff;
359 }
360 return;
361 }
362
363 ifslastp = &ifsfirst;
364 while (ifslastp->next && ifslastp->next->begoff < endoff)
365 ifslastp=ifslastp->next;
366 while (ifslastp->next != NULL) {
367 struct ifsregion *ifsp;
368 INTOFF;
369 ifsp = ifslastp->next->next;
370 ckfree(ifslastp->next);
371 ifslastp->next = ifsp;
372 INTON;
373 }
374 if (ifslastp->endoff > endoff)
375 ifslastp->endoff = endoff;
376 }
377
378 /*
379 * Expand arithmetic expression.
380 * Note that flag is not required as digits never require CTLESC characters.
381 */
382 static char *
383 expari(char *p)
384 {
385 char *q, *start;
386 arith_t result;
387 int begoff;
388 int quoted;
389 int adj;
390
391 quoted = *p++ == '"';
392 begoff = expdest - stackblock();
393 p = argstr(p, 0);
394 removerecordregions(begoff);
395 STPUTC('\0', expdest);
396 start = stackblock() + begoff;
397
398 q = grabstackstr(expdest);
399 result = arith(start);
400 ungrabstackstr(q, expdest);
401
402 start = stackblock() + begoff;
403 adj = start - expdest;
404 STADJUST(adj, expdest);
405
406 CHECKSTRSPACE((int)(DIGITS(result) + 1), expdest);
407 fmtstr(expdest, DIGITS(result), ARITH_FORMAT_STR, result);
408 adj = strlen(expdest);
409 STADJUST(adj, expdest);
410 if (!quoted)
411 recordregion(begoff, expdest - stackblock(), 0);
412 return p;
413 }
414
415
416 /*
417 * Perform command substitution.
418 */
419 static void
420 expbackq(union node *cmd, int quoted, int flag)
421 {
422 struct backcmd in;
423 int i;
424 char buf[128];
425 char *p;
426 char *dest = expdest;
427 struct ifsregion saveifs, *savelastp;
428 struct nodelist *saveargbackq;
429 char lastc;
430 int startloc = dest - stackblock();
431 char const *syntax = quoted? DQSYNTAX : BASESYNTAX;
432 int quotes = flag & (EXP_FULL | EXP_CASE);
433 size_t nnl;
434
435 INTOFF;
436 saveifs = ifsfirst;
437 savelastp = ifslastp;
438 saveargbackq = argbackq;
439 p = grabstackstr(dest);
440 evalbackcmd(cmd, &in);
441 ungrabstackstr(p, dest);
442 ifsfirst = saveifs;
443 ifslastp = savelastp;
444 argbackq = saveargbackq;
445
446 p = in.buf;
447 lastc = '\0';
448 nnl = 0;
449 /* Don't copy trailing newlines */
450 for (;;) {
451 if (--in.nleft < 0) {
452 if (in.fd < 0)
453 break;
454 while ((i = read(in.fd, buf, sizeof buf)) < 0 && errno == EINTR);
455 TRACE(("expbackq: read returns %d\n", i));
456 if (i <= 0)
457 break;
458 p = buf;
459 in.nleft = i - 1;
460 }
461 lastc = *p++;
462 if (lastc != '\0') {
463 if (lastc == '\n') {
464 nnl++;
465 } else {
466 CHECKSTRSPACE(nnl + 2, dest);
467 while (nnl > 0) {
468 nnl--;
469 USTPUTC('\n', dest);
470 }
471 if (quotes && syntax[(int)lastc] == CCTL)
472 USTPUTC(CTLESC, dest);
473 USTPUTC(lastc, dest);
474 }
475 }
476 }
477
478 if (in.fd >= 0)
479 close(in.fd);
480 if (in.buf)
481 ckfree(in.buf);
482 if (in.jp)
483 exitstatus = waitforjob(in.jp, (int *)NULL);
484 if (quoted == 0)
485 recordregion(startloc, dest - stackblock(), 0);
486 TRACE(("expbackq: size=%td: \"%.*s\"\n",
487 ((dest - stackblock()) - startloc),
488 (int)((dest - stackblock()) - startloc),
489 stackblock() + startloc));
490 expdest = dest;
491 INTON;
492 }
493
494
495
496 static void
497 recordleft(const char *str, const char *loc, char *startp)
498 {
499 int amount;
500
501 amount = ((str - 1) - (loc - startp)) - expdest;
502 STADJUST(amount, expdest);
503 while (loc != str - 1)
504 *startp++ = *loc++;
505 }
506
507 static int
508 subevalvar(char *p, char *str, int strloc, int subtype, int startloc,
509 int varflags, int quotes)
510 {
511 char *startp;
512 char *loc = NULL;
513 char *q;
514 int c = 0;
515 struct nodelist *saveargbackq = argbackq;
516 int amount;
517
518 argstr(p, (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX ||
519 subtype == VSTRIMRIGHT || subtype == VSTRIMRIGHTMAX ?
520 EXP_CASE : 0) | EXP_TILDE);
521 STACKSTRNUL(expdest);
522 argbackq = saveargbackq;
523 startp = stackblock() + startloc;
524 if (str == NULL)
525 str = stackblock() + strloc;
526
527 switch (subtype) {
528 case VSASSIGN:
529 setvar(str, startp, 0);
530 amount = startp - expdest;
531 STADJUST(amount, expdest);
532 varflags &= ~VSNUL;
533 return 1;
534
535 case VSQUESTION:
536 if (*p != CTLENDVAR) {
537 outfmt(out2, "%s\n", startp);
538 error((char *)NULL);
539 }
540 error("%.*s: parameter %snot set", (int)(p - str - 1),
541 str, (varflags & VSNUL) ? "null or " : "");
542 return 0;
543
544 case VSTRIMLEFT:
545 for (loc = startp; loc < str; loc++) {
546 c = *loc;
547 *loc = '\0';
548 if (patmatch(str, startp, quotes)) {
549 *loc = c;
550 recordleft(str, loc, startp);
551 return 1;
552 }
553 *loc = c;
554 if (quotes && *loc == CTLESC)
555 loc++;
556 }
557 return 0;
558
559 case VSTRIMLEFTMAX:
560 for (loc = str - 1; loc >= startp;) {
561 c = *loc;
562 *loc = '\0';
563 if (patmatch(str, startp, quotes)) {
564 *loc = c;
565 recordleft(str, loc, startp);
566 return 1;
567 }
568 *loc = c;
569 loc--;
570 if (quotes && loc > startp && *(loc - 1) == CTLESC) {
571 for (q = startp; q < loc; q++)
572 if (*q == CTLESC)
573 q++;
574 if (q > loc)
575 loc--;
576 }
577 }
578 return 0;
579
580 case VSTRIMRIGHT:
581 for (loc = str - 1; loc >= startp;) {
582 if (patmatch(str, loc, quotes)) {
583 amount = loc - expdest;
584 STADJUST(amount, expdest);
585 return 1;
586 }
587 loc--;
588 if (quotes && loc > startp && *(loc - 1) == CTLESC) {
589 for (q = startp; q < loc; q++)
590 if (*q == CTLESC)
591 q++;
592 if (q > loc)
593 loc--;
594 }
595 }
596 return 0;
597
598 case VSTRIMRIGHTMAX:
599 for (loc = startp; loc < str - 1; loc++) {
600 if (patmatch(str, loc, quotes)) {
601 amount = loc - expdest;
602 STADJUST(amount, expdest);
603 return 1;
604 }
605 if (quotes && *loc == CTLESC)
606 loc++;
607 }
608 return 0;
609
610
611 default:
612 abort();
613 }
614 }
615
616
617 /*
618 * Expand a variable, and return a pointer to the next character in the
619 * input string.
620 */
621
622 static char *
623 evalvar(char *p, int flag)
624 {
625 int subtype;
626 int varflags;
627 char *var;
628 const char *val;
629 int patloc;
630 int c;
631 int set;
632 int special;
633 int startloc;
634 int varlen;
635 int varlenb;
636 int easy;
637 int quotes = flag & (EXP_FULL | EXP_CASE);
638 int record = 0;
639
640 varflags = (unsigned char)*p++;
641 subtype = varflags & VSTYPE;
642 var = p;
643 special = 0;
644 if (! is_name(*p))
645 special = 1;
646 p = strchr(p, '=') + 1;
647 again: /* jump here after setting a variable with ${var=text} */
648 if (varflags & VSLINENO) {
649 set = 1;
650 special = 1;
651 val = NULL;
652 } else if (special) {
653 set = varisset(var, varflags & VSNUL);
654 val = NULL;
655 } else {
656 val = bltinlookup(var, 1);
657 if (val == NULL || ((varflags & VSNUL) && val[0] == '\0')) {
658 val = NULL;
659 set = 0;
660 } else
661 set = 1;
662 }
663 varlen = 0;
664 startloc = expdest - stackblock();
665 if (!set && uflag && *var != '@' && *var != '*') {
666 switch (subtype) {
667 case VSNORMAL:
668 case VSTRIMLEFT:
669 case VSTRIMLEFTMAX:
670 case VSTRIMRIGHT:
671 case VSTRIMRIGHTMAX:
672 case VSLENGTH:
673 error("%.*s: parameter not set", (int)(p - var - 1),
674 var);
675 }
676 }
677 if (set && subtype != VSPLUS) {
678 /* insert the value of the variable */
679 if (special) {
680 if (varflags & VSLINENO)
681 STPUTBIN(var, p - var - 1, expdest);
682 else
683 varvalue(var, varflags & VSQUOTE, subtype, flag);
684 if (subtype == VSLENGTH) {
685 varlenb = expdest - stackblock() - startloc;
686 varlen = varlenb;
687 if (localeisutf8) {
688 val = stackblock() + startloc;
689 for (;val != expdest; val++)
690 if ((*val & 0xC0) == 0x80)
691 varlen--;
692 }
693 STADJUST(-varlenb, expdest);
694 }
695 } else {
696 if (subtype == VSLENGTH) {
697 for (;*val; val++)
698 if (!localeisutf8 ||
699 (*val & 0xC0) != 0x80)
700 varlen++;
701 }
702 else
703 strtodest(val, flag, subtype,
704 varflags & VSQUOTE);
705 }
706 }
707
708 if (subtype == VSPLUS)
709 set = ! set;
710
711 easy = ((varflags & VSQUOTE) == 0 ||
712 (*var == '@' && shellparam.nparam != 1));
713
714
715 switch (subtype) {
716 case VSLENGTH:
717 expdest = cvtnum(varlen, expdest);
718 record = 1;
719 break;
720
721 case VSNORMAL:
722 record = easy;
723 break;
724
725 case VSPLUS:
726 case VSMINUS:
727 if (!set) {
728 argstr(p, flag | (flag & EXP_FULL ? EXP_SPLIT_LIT : 0) |
729 (varflags & VSQUOTE ? EXP_LIT_QUOTED : 0));
730 break;
731 }
732 record = easy;
733 break;
734
735 case VSTRIMLEFT:
736 case VSTRIMLEFTMAX:
737 case VSTRIMRIGHT:
738 case VSTRIMRIGHTMAX:
739 if (!set)
740 break;
741 /*
742 * Terminate the string and start recording the pattern
743 * right after it
744 */
745 STPUTC('\0', expdest);
746 patloc = expdest - stackblock();
747 if (subevalvar(p, NULL, patloc, subtype,
748 startloc, varflags, quotes) == 0) {
749 int amount = (expdest - stackblock() - patloc) + 1;
750 STADJUST(-amount, expdest);
751 }
752 /* Remove any recorded regions beyond start of variable */
753 removerecordregions(startloc);
754 record = 1;
755 break;
756
757 case VSASSIGN:
758 case VSQUESTION:
759 if (!set) {
760 if (subevalvar(p, var, 0, subtype, startloc, varflags,
761 quotes)) {
762 varflags &= ~VSNUL;
763 /*
764 * Remove any recorded regions beyond
765 * start of variable
766 */
767 removerecordregions(startloc);
768 goto again;
769 }
770 break;
771 }
772 record = easy;
773 break;
774
775 case VSERROR:
776 c = p - var - 1;
777 error("${%.*s%s}: Bad substitution", c, var,
778 (c > 0 && *p != CTLENDVAR) ? "..." : "");
779
780 default:
781 abort();
782 }
783
784 if (record)
785 recordregion(startloc, expdest - stackblock(),
786 varflags & VSQUOTE || (ifsset() && ifsval()[0] == '\0' &&
787 (*var == '@' || *var == '*')));
788
789 if (subtype != VSNORMAL) { /* skip to end of alternative */
790 int nesting = 1;
791 for (;;) {
792 if ((c = *p++) == CTLESC)
793 p++;
794 else if (c == CTLBACKQ || c == (CTLBACKQ|CTLQUOTE)) {
795 if (set)
796 argbackq = argbackq->next;
797 } else if (c == CTLVAR) {
798 if ((*p++ & VSTYPE) != VSNORMAL)
799 nesting++;
800 } else if (c == CTLENDVAR) {
801 if (--nesting == 0)
802 break;
803 }
804 }
805 }
806 return p;
807 }
808
809
810
811 /*
812 * Test whether a specialized variable is set.
813 */
814
815 static int
816 varisset(const char *name, int nulok)
817 {
818
819 if (*name == '!')
820 return backgndpidset();
821 else if (*name == '@' || *name == '*') {
822 if (*shellparam.p == NULL)
823 return 0;
824
825 if (nulok) {
826 char **av;
827
828 for (av = shellparam.p; *av; av++)
829 if (**av != '\0')
830 return 1;
831 return 0;
832 }
833 } else if (is_digit(*name)) {
834 char *ap;
835 long num;
836
837 errno = 0;
838 num = strtol(name, NULL, 10);
839 if (errno != 0 || num > shellparam.nparam)
840 return 0;
841
842 if (num == 0)
843 ap = arg0;
844 else
845 ap = shellparam.p[num - 1];
846
847 if (nulok && (ap == NULL || *ap == '\0'))
848 return 0;
849 }
850 return 1;
851 }
852
853 static void
854 strtodest(const char *p, int flag, int subtype, int quoted)
855 {
856 if (flag & (EXP_FULL | EXP_CASE) && subtype != VSLENGTH)
857 STPUTS_QUOTES(p, quoted ? DQSYNTAX : BASESYNTAX, expdest);
858 else
859 STPUTS(p, expdest);
860 }
861
862 /*
863 * Add the value of a specialized variable to the stack string.
864 */
865
866 static void
867 varvalue(const char *name, int quoted, int subtype, int flag)
868 {
869 int num;
870 char *p;
871 int i;
872 char sep[2];
873 char **ap;
874
875 switch (*name) {
876 case '$':
877 num = rootpid;
878 break;
879 case '?':
880 num = oexitstatus;
881 break;
882 case '#':
883 num = shellparam.nparam;
884 break;
885 case '!':
886 num = backgndpidval();
887 break;
888 case '-':
889 for (i = 0 ; i < NOPTS ; i++) {
890 if (optlist[i].val)
891 STPUTC(optlist[i].letter, expdest);
892 }
893 return;
894 case '@':
895 if (flag & EXP_FULL && quoted) {
896 for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
897 strtodest(p, flag, subtype, quoted);
898 if (*ap)
899 STPUTC('\0', expdest);
900 }
901 return;
902 }
903 /* FALLTHROUGH */
904 case '*':
905 if (ifsset())
906 sep[0] = ifsval()[0];
907 else
908 sep[0] = ' ';
909 sep[1] = '\0';
910 for (ap = shellparam.p ; (p = *ap++) != NULL ; ) {
911 strtodest(p, flag, subtype, quoted);
912 if (!*ap)
913 break;
914 if (sep[0])
915 strtodest(sep, flag, subtype, quoted);
916 else if (flag & EXP_FULL && !quoted && **ap != '\0')
917 STPUTC('\0', expdest);
918 }
919 return;
920 default:
921 if (is_digit(*name)) {
922 num = atoi(name);
923 if (num == 0)
924 p = arg0;
925 else if (num > 0 && num <= shellparam.nparam)
926 p = shellparam.p[num - 1];
927 else
928 return;
929 strtodest(p, flag, subtype, quoted);
930 }
931 return;
932 }
933 expdest = cvtnum(num, expdest);
934 }
935
936
937
938 /*
939 * Record the fact that we have to scan this region of the
940 * string for IFS characters.
941 */
942
943 static void
944 recordregion(int start, int end, int inquotes)
945 {
946 struct ifsregion *ifsp;
947
948 INTOFF;
949 if (ifslastp == NULL) {
950 ifsp = &ifsfirst;
951 } else {
952 if (ifslastp->endoff == start
953 && ifslastp->inquotes == inquotes) {
954 /* extend previous area */
955 ifslastp->endoff = end;
956 INTON;
957 return;
958 }
959 ifsp = (struct ifsregion *)ckmalloc(sizeof (struct ifsregion));
960 ifslastp->next = ifsp;
961 }
962 ifslastp = ifsp;
963 ifslastp->next = NULL;
964 ifslastp->begoff = start;
965 ifslastp->endoff = end;
966 ifslastp->inquotes = inquotes;
967 INTON;
968 }
969
970
971
972 /*
973 * Break the argument string into pieces based upon IFS and add the
974 * strings to the argument list. The regions of the string to be
975 * searched for IFS characters have been stored by recordregion.
976 * CTLESC characters are preserved but have little effect in this pass
977 * other than escaping CTL* characters. In particular, they do not escape
978 * IFS characters: that should be done with the ifsregion mechanism.
979 * CTLQUOTEMARK characters are used to preserve empty quoted strings.
980 * This pass treats them as a regular character, making the string non-empty.
981 * Later, they are removed along with the other CTL* characters.
982 */
983 static void
984 ifsbreakup(char *string, struct arglist *arglist)
985 {
986 struct ifsregion *ifsp;
987 struct strlist *sp;
988 char *start;
989 char *p;
990 char *q;
991 const char *ifs;
992 const char *ifsspc;
993 int had_param_ch = 0;
994
995 start = string;
996
997 if (ifslastp == NULL) {
998 /* Return entire argument, IFS doesn't apply to any of it */
999 sp = (struct strlist *)stalloc(sizeof *sp);
1000 sp->text = start;
1001 *arglist->lastp = sp;
1002 arglist->lastp = &sp->next;
1003 return;
1004 }
1005
1006 ifs = ifsset() ? ifsval() : " \t\n";
1007
1008 for (ifsp = &ifsfirst; ifsp != NULL; ifsp = ifsp->next) {
1009 p = string + ifsp->begoff;
1010 while (p < string + ifsp->endoff) {
1011 q = p;
1012 if (*p == CTLESC)
1013 p++;
1014 if (ifsp->inquotes) {
1015 /* Only NULs (should be from "$@") end args */
1016 had_param_ch = 1;
1017 if (*p != 0) {
1018 p++;
1019 continue;
1020 }
1021 ifsspc = NULL;
1022 } else {
1023 if (!strchr(ifs, *p)) {
1024 had_param_ch = 1;
1025 p++;
1026 continue;
1027 }
1028 ifsspc = strchr(" \t\n", *p);
1029
1030 /* Ignore IFS whitespace at start */
1031 if (q == start && ifsspc != NULL) {
1032 p++;
1033 start = p;
1034 continue;
1035 }
1036 had_param_ch = 0;
1037 }
1038
1039 /* Save this argument... */
1040 *q = '\0';
1041 sp = (struct strlist *)stalloc(sizeof *sp);
1042 sp->text = start;
1043 *arglist->lastp = sp;
1044 arglist->lastp = &sp->next;
1045 p++;
1046
1047 if (ifsspc != NULL) {
1048 /* Ignore further trailing IFS whitespace */
1049 for (; p < string + ifsp->endoff; p++) {
1050 q = p;
1051 if (*p == CTLESC)
1052 p++;
1053 if (strchr(ifs, *p) == NULL) {
1054 p = q;
1055 break;
1056 }
1057 if (strchr(" \t\n", *p) == NULL) {
1058 p++;
1059 break;
1060 }
1061 }
1062 }
1063 start = p;
1064 }
1065 }
1066
1067 /*
1068 * Save anything left as an argument.
1069 * Traditionally we have treated 'IFS=':'; set -- x$IFS' as
1070 * generating 2 arguments, the second of which is empty.
1071 * Some recent clarification of the Posix spec say that it
1072 * should only generate one....
1073 */
1074 if (had_param_ch || *start != 0) {
1075 sp = (struct strlist *)stalloc(sizeof *sp);
1076 sp->text = start;
1077 *arglist->lastp = sp;
1078 arglist->lastp = &sp->next;
1079 }
1080 }
1081
1082
1083 static char expdir[PATH_MAX];
1084 #define expdir_end (expdir + sizeof(expdir))
1085
1086 /*
1087 * Perform pathname generation and remove control characters.
1088 * At this point, the only control characters should be CTLESC and CTLQUOTEMARK.
1089 * The results are stored in the list exparg.
1090 */
1091 static void
1092 expandmeta(struct strlist *str)
1093 {
1094 char *p;
1095 struct strlist **savelastp;
1096 struct strlist *sp;
1097 char c;
1098
1099 while (str) {
1100 savelastp = exparg.lastp;
1101 if (!fflag) {
1102 p = str->text;
1103 for (; (c = *p) != '\0'; p++) {
1104 /* fast check for meta chars */
1105 if (c == '*' || c == '?' || c == '[') {
1106 INTOFF;
1107 expmeta(expdir, str->text);
1108 INTON;
1109 break;
1110 }
1111 }
1112 }
1113 if (exparg.lastp == savelastp) {
1114 /*
1115 * no matches
1116 */
1117 *exparg.lastp = str;
1118 rmescapes(str->text);
1119 exparg.lastp = &str->next;
1120 } else {
1121 *exparg.lastp = NULL;
1122 *savelastp = sp = expsort(*savelastp);
1123 while (sp->next != NULL)
1124 sp = sp->next;
1125 exparg.lastp = &sp->next;
1126 }
1127 str = str->next;
1128 }
1129 }
1130
1131
1132 /*
1133 * Do metacharacter (i.e. *, ?, [...]) expansion.
1134 */
1135
1136 static void
1137 expmeta(char *enddir, char *name)
1138 {
1139 const char *p;
1140 const char *q;
1141 const char *start;
1142 char *endname;
1143 int metaflag;
1144 struct stat statb;
1145 DIR *dirp;
1146 struct dirent *dp;
1147 int atend;
1148 int matchdot;
1149 int esc;
1150 int namlen;
1151
1152 metaflag = 0;
1153 start = name;
1154 for (p = name; esc = 0, *p; p += esc + 1) {
1155 if (*p == '*' || *p == '?')
1156 metaflag = 1;
1157 else if (*p == '[') {
1158 q = p + 1;
1159 if (*q == '!' || *q == '^')
1160 q++;
1161 for (;;) {
1162 while (*q == CTLQUOTEMARK)
1163 q++;
1164 if (*q == CTLESC)
1165 q++;
1166 if (*q == '/' || *q == '\0')
1167 break;
1168 if (*++q == ']') {
1169 metaflag = 1;
1170 break;
1171 }
1172 }
1173 } else if (*p == '\0')
1174 break;
1175 else if (*p == CTLQUOTEMARK)
1176 continue;
1177 else {
1178 if (*p == CTLESC)
1179 esc++;
1180 if (p[esc] == '/') {
1181 if (metaflag)
1182 break;
1183 start = p + esc + 1;
1184 }
1185 }
1186 }
1187 if (metaflag == 0) { /* we've reached the end of the file name */
1188 if (enddir != expdir)
1189 metaflag++;
1190 for (p = name ; ; p++) {
1191 if (*p == CTLQUOTEMARK)
1192 continue;
1193 if (*p == CTLESC)
1194 p++;
1195 *enddir++ = *p;
1196 if (*p == '\0')
1197 break;
1198 if (enddir == expdir_end)
1199 return;
1200 }
1201 if (metaflag == 0 || lstat(expdir, &statb) >= 0)
1202 addfname(expdir);
1203 return;
1204 }
1205 endname = name + (p - name);
1206 if (start != name) {
1207 p = name;
1208 while (p < start) {
1209 while (*p == CTLQUOTEMARK)
1210 p++;
1211 if (*p == CTLESC)
1212 p++;
1213 *enddir++ = *p++;
1214 if (enddir == expdir_end)
1215 return;
1216 }
1217 }
1218 if (enddir == expdir) {
1219 p = ".";
1220 } else if (enddir == expdir + 1 && *expdir == '/') {
1221 p = "/";
1222 } else {
1223 p = expdir;
1224 enddir[-1] = '\0';
1225 }
1226 if ((dirp = opendir(p)) == NULL)
1227 return;
1228 if (enddir != expdir)
1229 enddir[-1] = '/';
1230 if (*endname == 0) {
1231 atend = 1;
1232 } else {
1233 atend = 0;
1234 *endname = '\0';
1235 endname += esc + 1;
1236 }
1237 matchdot = 0;
1238 p = start;
1239 while (*p == CTLQUOTEMARK)
1240 p++;
1241 if (*p == CTLESC)
1242 p++;
1243 if (*p == '.')
1244 matchdot++;
1245 while (! int_pending() && (dp = readdir(dirp)) != NULL) {
1246 if (dp->d_name[0] == '.' && ! matchdot)
1247 continue;
1248 if (patmatch(start, dp->d_name, 0)) {
1249 namlen = dp->d_namlen;
1250 if (enddir + namlen + 1 > expdir_end)
1251 continue;
1252 memcpy(enddir, dp->d_name, namlen + 1);
1253 if (atend)
1254 addfname(expdir);
1255 else {
1256 if (dp->d_type != DT_UNKNOWN &&
1257 dp->d_type != DT_DIR &&
1258 dp->d_type != DT_LNK)
1259 continue;
1260 if (enddir + namlen + 2 > expdir_end)
1261 continue;
1262 enddir[namlen] = '/';
1263 enddir[namlen + 1] = '\0';
1264 expmeta(enddir + namlen + 1, endname);
1265 }
1266 }
1267 }
1268 closedir(dirp);
1269 if (! atend)
1270 endname[-esc - 1] = esc ? CTLESC : '/';
1271 }
1272
1273
1274 /*
1275 * Add a file name to the list.
1276 */
1277
1278 static void
1279 addfname(char *name)
1280 {
1281 char *p;
1282 struct strlist *sp;
1283
1284 p = stsavestr(name);
1285 sp = (struct strlist *)stalloc(sizeof *sp);
1286 sp->text = p;
1287 *exparg.lastp = sp;
1288 exparg.lastp = &sp->next;
1289 }
1290
1291
1292 /*
1293 * Sort the results of file name expansion. It calculates the number of
1294 * strings to sort and then calls msort (short for merge sort) to do the
1295 * work.
1296 */
1297
1298 static struct strlist *
1299 expsort(struct strlist *str)
1300 {
1301 int len;
1302 struct strlist *sp;
1303
1304 len = 0;
1305 for (sp = str ; sp ; sp = sp->next)
1306 len++;
1307 return msort(str, len);
1308 }
1309
1310
1311 static struct strlist *
1312 msort(struct strlist *list, int len)
1313 {
1314 struct strlist *p, *q = NULL;
1315 struct strlist **lpp;
1316 int half;
1317 int n;
1318
1319 if (len <= 1)
1320 return list;
1321 half = len >> 1;
1322 p = list;
1323 for (n = half ; --n >= 0 ; ) {
1324 q = p;
1325 p = p->next;
1326 }
1327 q->next = NULL; /* terminate first half of list */
1328 q = msort(list, half); /* sort first half of list */
1329 p = msort(p, len - half); /* sort second half */
1330 lpp = &list;
1331 for (;;) {
1332 if (strcmp(p->text, q->text) < 0) {
1333 *lpp = p;
1334 lpp = &p->next;
1335 if ((p = *lpp) == NULL) {
1336 *lpp = q;
1337 break;
1338 }
1339 } else {
1340 *lpp = q;
1341 lpp = &q->next;
1342 if ((q = *lpp) == NULL) {
1343 *lpp = p;
1344 break;
1345 }
1346 }
1347 }
1348 return list;
1349 }
1350
1351
1352
1353 static wchar_t
1354 get_wc(const char **p)
1355 {
1356 wchar_t c;
1357 int chrlen;
1358
1359 chrlen = mbtowc(&c, *p, 4);
1360 if (chrlen == 0)
1361 return 0;
1362 else if (chrlen == -1)
1363 c = 0;
1364 else
1365 *p += chrlen;
1366 return c;
1367 }
1368
1369
1370 /*
1371 * See if a character matches a character class, starting at the first colon
1372 * of "[:class:]".
1373 * If a valid character class is recognized, a pointer to the next character
1374 * after the final closing bracket is stored into *end, otherwise a null
1375 * pointer is stored into *end.
1376 */
1377 static int
1378 match_charclass(const char *p, wchar_t chr, const char **end)
1379 {
1380 char name[20];
1381 const char *nameend;
1382 wctype_t cclass;
1383
1384 *end = NULL;
1385 p++;
1386 nameend = strstr(p, ":]");
1387 if (nameend == NULL || (size_t)(nameend - p) >= sizeof(name) ||
1388 nameend == p)
1389 return 0;
1390 memcpy(name, p, nameend - p);
1391 name[nameend - p] = '\0';
1392 *end = nameend + 2;
1393 cclass = wctype(name);
1394 /* An unknown class matches nothing but is valid nevertheless. */
1395 if (cclass == 0)
1396 return 0;
1397 return iswctype(chr, cclass);
1398 }
1399
1400
1401 /*
1402 * Returns true if the pattern matches the string.
1403 */
1404
1405 static int
1406 patmatch(const char *pattern, const char *string, int squoted)
1407 {
1408 const char *p, *q, *end;
1409 const char *bt_p, *bt_q;
1410 char c;
1411 wchar_t wc, wc2;
1412
1413 p = pattern;
1414 q = string;
1415 bt_p = NULL;
1416 bt_q = NULL;
1417 for (;;) {
1418 switch (c = *p++) {
1419 case '\0':
1420 if (*q != '\0')
1421 goto backtrack;
1422 return 1;
1423 case CTLESC:
1424 if (squoted && *q == CTLESC)
1425 q++;
1426 if (*q++ != *p++)
1427 goto backtrack;
1428 break;
1429 case CTLQUOTEMARK:
1430 continue;
1431 case '?':
1432 if (squoted && *q == CTLESC)
1433 q++;
1434 if (*q == '\0')
1435 return 0;
1436 if (localeisutf8) {
1437 wc = get_wc(&q);
1438 /*
1439 * A '?' does not match invalid UTF-8 but a
1440 * '*' does, so backtrack.
1441 */
1442 if (wc == 0)
1443 goto backtrack;
1444 } else
1445 wc = (unsigned char)*q++;
1446 break;
1447 case '*':
1448 c = *p;
1449 while (c == CTLQUOTEMARK || c == '*')
1450 c = *++p;
1451 /*
1452 * If the pattern ends here, we know the string
1453 * matches without needing to look at the rest of it.
1454 */
1455 if (c == '\0')
1456 return 1;
1457 /*
1458 * First try the shortest match for the '*' that
1459 * could work. We can forget any earlier '*' since
1460 * there is no way having it match more characters
1461 * can help us, given that we are already here.
1462 */
1463 bt_p = p;
1464 bt_q = q;
1465 break;
1466 case '[': {
1467 const char *endp;
1468 int invert, found;
1469 wchar_t chr;
1470
1471 endp = p;
1472 if (*endp == '!' || *endp == '^')
1473 endp++;
1474 do {
1475 while (*endp == CTLQUOTEMARK)
1476 endp++;
1477 if (*endp == 0)
1478 goto dft; /* no matching ] */
1479 if (*endp == CTLESC)
1480 endp++;
1481 } while (*++endp != ']');
1482 invert = 0;
1483 if (*p == '!' || *p == '^') {
1484 invert++;
1485 p++;
1486 }
1487 found = 0;
1488 if (squoted && *q == CTLESC)
1489 q++;
1490 if (*q == '\0')
1491 return 0;
1492 if (localeisutf8) {
1493 chr = get_wc(&q);
1494 if (chr == 0)
1495 goto backtrack;
1496 } else
1497 chr = (unsigned char)*q++;
1498 c = *p++;
1499 do {
1500 if (c == CTLQUOTEMARK)
1501 continue;
1502 if (c == '[' && *p == ':') {
1503 found |= match_charclass(p, chr, &end);
1504 if (end != NULL)
1505 p = end;
1506 }
1507 if (c == CTLESC)
1508 c = *p++;
1509 if (localeisutf8 && c & 0x80) {
1510 p--;
1511 wc = get_wc(&p);
1512 if (wc == 0) /* bad utf-8 */
1513 return 0;
1514 } else
1515 wc = (unsigned char)c;
1516 if (*p == '-' && p[1] != ']') {
1517 p++;
1518 while (*p == CTLQUOTEMARK)
1519 p++;
1520 if (*p == CTLESC)
1521 p++;
1522 if (localeisutf8) {
1523 wc2 = get_wc(&p);
1524 if (wc2 == 0) /* bad utf-8 */
1525 return 0;
1526 } else
1527 wc2 = (unsigned char)*p++;
1528 if ( collate_range_cmp(chr, wc) >= 0
1529 && collate_range_cmp(chr, wc2) <= 0
1530 )
1531 found = 1;
1532 } else {
1533 if (chr == wc)
1534 found = 1;
1535 }
1536 } while ((c = *p++) != ']');
1537 if (found == invert)
1538 goto backtrack;
1539 break;
1540 }
1541 dft: default:
1542 if (squoted && *q == CTLESC)
1543 q++;
1544 if (*q == '\0')
1545 return 0;
1546 if (*q++ == c)
1547 break;
1548 backtrack:
1549 /*
1550 * If we have a mismatch (other than hitting the end
1551 * of the string), go back to the last '*' seen and
1552 * have it match one additional character.
1553 */
1554 if (bt_p == NULL)
1555 return 0;
1556 if (squoted && *bt_q == CTLESC)
1557 bt_q++;
1558 if (*bt_q == '\0')
1559 return 0;
1560 bt_q++;
1561 p = bt_p;
1562 q = bt_q;
1563 break;
1564 }
1565 }
1566 }
1567
1568
1569
1570 /*
1571 * Remove any CTLESC and CTLQUOTEMARK characters from a string.
1572 */
1573
1574 void
1575 rmescapes(char *str)
1576 {
1577 char *p, *q;
1578
1579 p = str;
1580 while (*p != CTLESC && *p != CTLQUOTEMARK && *p != CTLQUOTEEND) {
1581 if (*p++ == '\0')
1582 return;
1583 }
1584 q = p;
1585 while (*p) {
1586 if (*p == CTLQUOTEMARK || *p == CTLQUOTEEND) {
1587 p++;
1588 continue;
1589 }
1590 if (*p == CTLESC)
1591 p++;
1592 *q++ = *p++;
1593 }
1594 *q = '\0';
1595 }
1596
1597
1598
1599 /*
1600 * See if a pattern matches in a case statement.
1601 */
1602
1603 int
1604 casematch(union node *pattern, const char *val)
1605 {
1606 struct stackmark smark;
1607 int result;
1608 char *p;
1609
1610 setstackmark(&smark);
1611 argbackq = pattern->narg.backquote;
1612 STARTSTACKSTR(expdest);
1613 ifslastp = NULL;
1614 argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
1615 STPUTC('\0', expdest);
1616 p = grabstackstr(expdest);
1617 result = patmatch(p, val, 0);
1618 popstackmark(&smark);
1619 return result;
1620 }
1621
1622 /*
1623 * Our own itoa().
1624 */
1625
1626 static char *
1627 cvtnum(int num, char *buf)
1628 {
1629 char temp[32];
1630 int neg = num < 0;
1631 char *p = temp + 31;
1632
1633 temp[31] = '\0';
1634
1635 do {
1636 *--p = num % 10 + '0';
1637 } while ((num /= 10) != 0);
1638
1639 if (neg)
1640 *--p = '-';
1641
1642 STPUTS(p, buf);
1643 return buf;
1644 }
1645
1646 /*
1647 * Do most of the work for wordexp(3).
1648 */
1649
1650 int
1651 wordexpcmd(int argc, char **argv)
1652 {
1653 size_t len;
1654 int i;
1655
1656 out1fmt("%08x", argc - 1);
1657 for (i = 1, len = 0; i < argc; i++)
1658 len += strlen(argv[i]);
1659 out1fmt("%08x", (int)len);
1660 for (i = 1; i < argc; i++)
1661 outbin(argv[i], strlen(argv[i]) + 1, out1);
1662 return (0);
1663 }