]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exec.c
29d2bd13ff95c41bd048ddf39679c34e20146e1d
[apple/xnu.git] / bsd / kern / kern_exec.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Mach Operating System
25 * Copyright (c) 1987 Carnegie-Mellon University
26 * All rights reserved. The CMU software License Agreement specifies
27 * the terms and conditions for use and redistribution.
28 */
29
30 #include <cputypes.h>
31
32 /*-
33 * Copyright (c) 1982, 1986, 1991, 1993
34 * The Regents of the University of California. All rights reserved.
35 * (c) UNIX System Laboratories, Inc.
36 * All or some portions of this file are derived from material licensed
37 * to the University of California by American Telephone and Telegraph
38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
39 * the permission of UNIX System Laboratories, Inc.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 * must display the following acknowledgement:
51 * This product includes software developed by the University of
52 * California, Berkeley and its contributors.
53 * 4. Neither the name of the University nor the names of its contributors
54 * may be used to endorse or promote products derived from this software
55 * without specific prior written permission.
56 *
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 * SUCH DAMAGE.
68 *
69 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
70 */
71 #include <machine/reg.h>
72
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/filedesc.h>
76 #include <sys/kernel.h>
77 #include <sys/proc_internal.h>
78 #include <sys/kauth.h>
79 #include <sys/user.h>
80 #include <sys/socketvar.h>
81 #include <sys/malloc.h>
82 #include <sys/namei.h>
83 #include <sys/mount_internal.h>
84 #include <sys/vnode_internal.h>
85 #include <sys/file_internal.h>
86 #include <sys/stat.h>
87 #include <sys/uio_internal.h>
88 #include <sys/acct.h>
89 #include <sys/exec.h>
90 #include <sys/kdebug.h>
91 #include <sys/signal.h>
92 #include <sys/aio_kern.h>
93 #include <sys/sysproto.h>
94 #include <sys/shm_internal.h> /* shmexec() */
95 #include <sys/ubc_internal.h> /* ubc_map() */
96
97 #include <bsm/audit_kernel.h>
98
99 #include <mach/mach_types.h>
100 #include <mach/task.h>
101 #include <mach/thread_act.h>
102 #include <mach/vm_map.h>
103 #include <mach/mach_vm.h>
104 #include <mach/vm_param.h>
105
106 #include <vm/vm_map.h>
107 #include <vm/vm_kern.h>
108 #include <vm/vm_pager.h>
109 #include <vm/vm_kern.h>
110 #include <vm/task_working_set.h>
111 #include <vm/vm_shared_memory_server.h>
112
113 /*
114 * Mach things for which prototypes are unavailable from Mach headers
115 */
116 void ipc_task_reset(
117 task_t task);
118 void ipc_thread_reset(
119 thread_t thread);
120
121 extern struct savearea *get_user_regs(thread_t);
122
123
124 #include <kern/thread.h>
125 #include <kern/task.h>
126 #include <kern/ast.h>
127 #include <kern/mach_loader.h>
128 #include <mach-o/fat.h>
129 #include <mach-o/loader.h>
130 #include <machine/vmparam.h>
131 #if KTRACE
132 #include <sys/ktrace.h>
133 #endif
134 #include <sys/imgact.h>
135
136
137 /*
138 * SIZE_MAXPTR The maximum size of a user space pointer, in bytes
139 * SIZE_IMG_STRSPACE The available string space, minus two pointers; we
140 * define it interms of the maximum, since we don't
141 * know the pointer size going in, until after we've
142 * parsed the executable image.
143 */
144 #define SIZE_MAXPTR 8 /* 64 bits */
145 #define SIZE_IMG_STRSPACE (NCARGS - 2 * SIZE_MAXPTR)
146
147 int app_profile = 0;
148
149 extern vm_map_t bsd_pageable_map;
150 extern struct fileops vnops;
151
152 #define ROUND_PTR(type, addr) \
153 (type *)( ( (unsigned)(addr) + 16 - 1) \
154 & ~(16 - 1) )
155
156 struct image_params; /* Forward */
157 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
158 static int load_return_to_errno(load_return_t lrtn);
159 static int execargs_alloc(struct image_params *imgp);
160 static int execargs_free(struct image_params *imgp);
161 static int exec_check_permissions(struct image_params *imgp);
162 static int exec_extract_strings(struct image_params *imgp);
163 static int exec_handle_sugid(struct image_params *imgp);
164 static int sugid_scripts = 0;
165 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW, &sugid_scripts, 0, "");
166 static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack,
167 int customstack, struct proc *p);
168 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
169
170 /* XXX forward; should be in headers, but can't be for one reason or another */
171 extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype);
172 extern void vfork_return(thread_t th_act,
173 struct proc * p,
174 struct proc *p2,
175 register_t *retval);
176
177
178 extern char classichandler[32];
179 extern uint32_t classichandler_fsid;
180 extern long classichandler_fileid;
181
182
183 /*
184 * exec_add_string
185 *
186 * Add the requested string to the string space area.
187 *
188 * Parameters; struct image_params * image parameter block
189 * user_addr_t string to add to strings area
190 * uio_seg segment where string is located
191 *
192 * Returns: 0 Success
193 * !0 Failure errno from copyinstr()
194 *
195 * Implicit returns:
196 * (imgp->ip_strendp) updated location of next add, if any
197 * (imgp->ip_strspace) updated byte count of space remaining
198 */
199 static int
200 exec_add_string(struct image_params *imgp, user_addr_t str, /*uio_seg*/int seg)
201 {
202 int error = 0;
203
204 do {
205 size_t len = 0;
206 if (imgp->ip_strspace <= 0) {
207 error = E2BIG;
208 break;
209 }
210 if (IS_UIO_SYS_SPACE(seg)) {
211 char *kstr = CAST_DOWN(char *,str); /* SAFE */
212 error = copystr(kstr, imgp->ip_strendp, imgp->ip_strspace, &len);
213 } else {
214 error = copyinstr(str, imgp->ip_strendp, imgp->ip_strspace,
215 &len);
216 }
217 imgp->ip_strendp += len;
218 imgp->ip_strspace -= len;
219 } while (error == ENAMETOOLONG);
220
221 return error;
222 }
223
224 /*
225 * exec_save_path
226 *
227 * To support new app package launching for Mac OS X, the dyld needs the
228 * first argument to execve() stored on the user stack.
229 *
230 * Save the executable path name at the top of the strings area and set
231 * the argument vector pointer to the location following that to indicate
232 * the start of the argument and environment tuples, setting the remaining
233 * string space count to the size of the string area minus the path length
234 * and a reserve for two pointers.
235 *
236 * Parameters; struct image_params * image parameter block
237 * char * path used to invoke program
238 * uio_seg segment where path is located
239 *
240 * Returns: int 0 Success
241 * !0 Failure: error number
242 * Implicit returns:
243 * (imgp->ip_strings) saved path
244 * (imgp->ip_strspace) space remaining in ip_strings
245 * (imgp->ip_argv) beginning of argument list
246 * (imgp->ip_strendp) start of remaining copy area
247 *
248 * Note: We have to do this before the initial namei() since in the
249 * path contains symbolic links, namei() will overwrite the
250 * original path buffer contents. If the last symbolic link
251 * resolved was a relative pathname, we would lose the original
252 * "path", which could be an absolute pathname. This might be
253 * unacceptable for dyld.
254 */
255 static int
256 exec_save_path(struct image_params *imgp, user_addr_t path, /*uio_seg*/int seg)
257 {
258 int error;
259 size_t len;
260 char *kpath = CAST_DOWN(char *,path); /* SAFE */
261
262 imgp->ip_strendp = imgp->ip_strings;
263 imgp->ip_strspace = SIZE_IMG_STRSPACE;
264
265 len = MIN(MAXPATHLEN, imgp->ip_strspace);
266
267 switch( seg) {
268 case UIO_USERSPACE32:
269 case UIO_USERSPACE64: /* Same for copyin()... */
270 error = copyinstr(path, imgp->ip_strings, len, &len);
271 break;
272 case UIO_SYSSPACE32:
273 error = copystr(kpath, imgp->ip_strings, len, &len);
274 break;
275 default:
276 error = EFAULT;
277 break;
278 }
279
280 if (!error) {
281 imgp->ip_strendp += len;
282 imgp->ip_strspace -= len;
283 imgp->ip_argv = imgp->ip_strendp;
284 }
285
286 return(error);
287 }
288
289
290
291 /*
292 * exec_shell_imgact
293 *
294 * Image activator for interpreter scripts. If the image begins with the
295 * characters "#!", then it is an interpreter script. Verify that we are
296 * not already executing in Classic mode, and that the length of the script
297 * line indicating the interpreter is not in excess of the maximum allowed
298 * size. If this is the case, then break out the arguments, if any, which
299 * are separated by white space, and copy them into the argument save area
300 * as if they were provided on the command line before all other arguments.
301 * The line ends when we encounter a comment character ('#') or newline.
302 *
303 * Parameters; struct image_params * image parameter block
304 *
305 * Returns: -1 not an interpreter (keep looking)
306 * -3 Success: interpreter: relookup
307 * >0 Failure: interpreter: error number
308 *
309 * A return value other than -1 indicates subsequent image activators should
310 * not be given the opportunity to attempt to activate the image.
311 */
312 static int
313 exec_shell_imgact(struct image_params *imgp)
314 {
315 char *vdata = imgp->ip_vdata;
316 char *ihp;
317 char *line_endp;
318 char *interp;
319
320 /*
321 * Make sure it's a shell script. If we've already redirected
322 * from an interpreted file once, don't do it again.
323 *
324 * Note: We disallow Classic, since the expectation is that we
325 * may run a Classic interpreter, but not an interpret a Classic
326 * image. This is consistent with historical behaviour.
327 */
328 if (vdata[0] != '#' ||
329 vdata[1] != '!' ||
330 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
331 return (-1);
332 }
333
334
335 imgp->ip_flags |= IMGPF_INTERPRET;
336
337 /* Check to see if SUGID scripts are permitted. If they aren't then
338 * clear the SUGID bits.
339 * imgp->ip_vattr is known to be valid.
340 */
341 if (sugid_scripts == 0) {
342 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
343 }
344
345 /* Find the nominal end of the interpreter line */
346 for( ihp = &vdata[2]; *ihp != '\n' && *ihp != '#'; ihp++) {
347 if (ihp >= &vdata[IMG_SHSIZE])
348 return (ENOEXEC);
349 }
350
351 line_endp = ihp;
352 ihp = &vdata[2];
353 /* Skip over leading spaces - until the interpreter name */
354 while ( ihp < line_endp && ((*ihp == ' ') || (*ihp == '\t')))
355 ihp++;
356
357 /*
358 * Find the last non-whitespace character before the end of line or
359 * the beginning of a comment; this is our new end of line.
360 */
361 for (;line_endp > ihp && ((*line_endp == ' ') || (*line_endp == '\t')); line_endp--)
362 continue;
363
364 /* Empty? */
365 if (line_endp == ihp)
366 return (ENOEXEC);
367
368 /* copy the interpreter name */
369 interp = imgp->ip_interp_name;
370 while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t'))
371 *interp++ = *ihp++;
372 *interp = '\0';
373
374 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_name),
375 UIO_SYSSPACE32);
376
377 ihp = &vdata[2];
378 while (ihp < line_endp) {
379 /* Skip leading whitespace before each argument */
380 while ((*ihp == ' ') || (*ihp == '\t'))
381 ihp++;
382
383 if (ihp >= line_endp)
384 break;
385
386 /* We have an argument; copy it */
387 while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t')) {
388 *imgp->ip_strendp++ = *ihp++;
389 imgp->ip_strspace--;
390 }
391 *imgp->ip_strendp++ = 0;
392 imgp->ip_strspace--;
393 imgp->ip_argc++;
394 }
395
396 return (-3);
397 }
398
399
400
401 /*
402 * exec_fat_imgact
403 *
404 * Image activator for fat 1.0 binaries. If the binary is fat, then we
405 * need to select an image from it internally, and make that the image
406 * we are going to attempt to execute. At present, this consists of
407 * reloading the first page for the image with a first page from the
408 * offset location indicated by the fat header.
409 *
410 * Important: This image activator is byte order neutral.
411 *
412 * Note: If we find an encapsulated binary, we make no assertions
413 * about its validity; instead, we leave that up to a rescan
414 * for an activator to claim it, and, if it is claimed by one,
415 * that activator is responsible for determining validity.
416 */
417 static int
418 exec_fat_imgact(struct image_params *imgp)
419 {
420 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
421 kauth_cred_t cred = p->p_ucred;
422 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
423 struct fat_arch fat_arch;
424 int resid, error;
425 load_return_t lret;
426
427 /* Make sure it's a fat binary */
428 if ((fat_header->magic != FAT_MAGIC) &&
429 (fat_header->magic != FAT_CIGAM)) {
430 error = -1;
431 goto bad;
432 }
433
434 /* Look up our preferred architecture in the fat file. */
435 lret = fatfile_getarch_affinity(imgp->ip_vp,
436 (vm_offset_t)fat_header,
437 &fat_arch,
438 (p->p_flag & P_AFFINITY));
439 if (lret != LOAD_SUCCESS) {
440 error = load_return_to_errno(lret);
441 goto bad;
442 }
443
444 /* Read the Mach-O header out of it */
445 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
446 PAGE_SIZE, fat_arch.offset,
447 UIO_SYSSPACE32, (IO_UNIT|IO_NODELOCKED),
448 cred, &resid, p);
449 if (error) {
450 goto bad;
451 }
452
453 /* Did we read a complete header? */
454 if (resid) {
455 error = EBADEXEC;
456 goto bad;
457 }
458
459 /* Success. Indicate we have identified an encapsulated binary */
460 error = -2;
461 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
462 imgp->ip_arch_size = (user_size_t)fat_arch.size;
463
464 bad:
465 return (error);
466 }
467
468 /*
469 * exec_mach_imgact
470 *
471 * Image activator for mach-o 1.0 binaries.
472 *
473 * Important: This image activator is NOT byte order neutral.
474 */
475 static int
476 exec_mach_imgact(struct image_params *imgp)
477 {
478 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
479 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
480 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
481 int error = 0;
482 int vfexec = 0;
483 task_t task;
484 task_t new_task;
485 thread_t thread;
486 struct uthread *uthread;
487 vm_map_t old_map = VM_MAP_NULL;
488 vm_map_t map;
489 boolean_t clean_regions = FALSE;
490 shared_region_mapping_t initial_region = NULL;
491 load_return_t lret;
492 load_result_t load_result;
493
494 /*
495 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
496 * is a reserved field on the end, so for the most part, we can
497 * treat them as if they were identical.
498 */
499 if ((mach_header->magic != MH_MAGIC) &&
500 (mach_header->magic != MH_MAGIC_64)) {
501 error = -1;
502 goto bad;
503 }
504
505 task = current_task();
506 thread = current_thread();
507 uthread = get_bsdthread_info(thread);
508
509 if (uthread->uu_flag & UT_VFORK)
510 vfexec = 1; /* Mark in exec */
511
512 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
513 imgp->ip_flags |= IMGPF_IS_64BIT;
514
515 if (!grade_binary(mach_header->cputype, mach_header->cpusubtype)) {
516 error = EBADARCH;
517 goto bad;
518 }
519
520 /*
521 * Copy in arguments/environment from the old process, if the
522 * vector is non-NULL (i.e. exec is not being called from
523 * load_init_program(), as a special case, at system startup).
524 */
525 if (imgp->ip_user_argv != 0LL) {
526 error = exec_extract_strings(imgp);
527 if (error)
528 goto bad;
529 }
530
531 /*
532 * Hack for binary compatability; put three NULs on the end of the
533 * string area, and round it up to the next word boundary. This
534 * ensures padding with NULs to the boundary.
535 */
536 imgp->ip_strendp[0] = 0;
537 imgp->ip_strendp[1] = 0;
538 imgp->ip_strendp[2] = 0;
539 imgp->ip_strendp += (((imgp->ip_strendp - imgp->ip_strings) + NBPW-1) & ~(NBPW-1));
540
541
542 if (vfexec) {
543 kern_return_t result;
544
545 result = task_create_internal(task, FALSE, &new_task);
546 if (result != KERN_SUCCESS)
547 printf("execve: task_create failed. Code: 0x%x\n", result);
548 p->task = new_task;
549 set_bsdtask_info(new_task, p);
550 if (p->p_nice != 0)
551 resetpriority(p);
552 map = get_task_map(new_task);
553 result = thread_create(new_task, &imgp->ip_vfork_thread);
554 if (result != KERN_SUCCESS)
555 printf("execve: thread_create failed. Code: 0x%x\n", result);
556 /* reset local idea of task, thread, uthread */
557 task = new_task;
558 thread = imgp->ip_vfork_thread;
559 uthread = get_bsdthread_info(thread);
560 } else {
561 map = VM_MAP_NULL;
562 }
563
564 /*
565 * We set these flags here; this is OK, since if we fail after
566 * this point, we have already destroyed the parent process anyway.
567 */
568 if (imgp->ip_flags & IMGPF_IS_64BIT) {
569 task_set_64bit(task, TRUE);
570 p->p_flag |= P_LP64;
571 } else {
572 task_set_64bit(task, FALSE);
573 p->p_flag &= ~P_LP64;
574 }
575
576 /*
577 * Load the Mach-O file.
578 */
579 /* LP64 - remove following "if" statement after osfmk/vm/task_working_set.c */
580 if((imgp->ip_flags & IMGPF_IS_64BIT) == 0)
581 if(imgp->ip_tws_cache_name) {
582 tws_handle_startup_file(task, kauth_cred_getuid(cred),
583 imgp->ip_tws_cache_name, imgp->ip_vp, &clean_regions);
584 }
585
586 vm_get_shared_region(task, &initial_region);
587
588
589 /*
590 * NOTE: An error after this point indicates we have potentially
591 * destroyed or overwrote some process state while attempting an
592 * execve() following a vfork(), which is an unrecoverable condition.
593 */
594
595 /*
596 * We reset the task to 64-bit (or not) here. It may have picked up
597 * a new map, and we need that to reflect its true 64-bit nature.
598 */
599 task_set_64bit(task,
600 ((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT));
601
602 /*
603 * Actually load the image file we previously decided to load.
604 */
605 lret = load_machfile(imgp, mach_header, thread, map, clean_regions, &load_result);
606
607 if (lret != LOAD_SUCCESS) {
608 error = load_return_to_errno(lret);
609 goto badtoolate;
610 }
611
612 /* load_machfile() maps the vnode */
613 (void)ubc_map(imgp->ip_vp, PROT_EXEC);
614
615 /*
616 * Close file descriptors
617 * which specify close-on-exec.
618 */
619 fdexec(p);
620
621 /*
622 * deal with set[ug]id.
623 */
624 error = exec_handle_sugid(imgp);
625
626 KNOTE(&p->p_klist, NOTE_EXEC);
627
628 if (!vfexec && (p->p_flag & P_TRACED))
629 psignal(p, SIGTRAP);
630
631 if (error) {
632 goto badtoolate;
633 }
634 vnode_put(imgp->ip_vp);
635 imgp->ip_vp = NULL;
636
637 if (load_result.unixproc &&
638 create_unix_stack(get_task_map(task),
639 load_result.user_stack, load_result.customstack, p)) {
640 error = load_return_to_errno(LOAD_NOSPACE);
641 goto badtoolate;
642 }
643
644 if (vfexec) {
645 uthread->uu_ar0 = (void *)get_user_regs(thread);
646 old_map = vm_map_switch(get_task_map(task));
647 }
648
649 if (load_result.unixproc) {
650 user_addr_t ap;
651
652 /*
653 * Copy the strings area out into the new process address
654 * space.
655 */
656 ap = p->user_stack;
657 error = exec_copyout_strings(imgp, &ap);
658 if (error) {
659 if (vfexec)
660 vm_map_switch(old_map);
661 goto badtoolate;
662 }
663 /* Set the stack */
664 thread_setuserstack(thread, ap);
665 }
666
667 if (load_result.dynlinker) {
668 uint64_t ap;
669
670 /* Adjust the stack */
671 if (imgp->ip_flags & IMGPF_IS_64BIT) {
672 ap = thread_adjuserstack(thread, -8);
673 (void)copyoutptr(load_result.mach_header, ap, 8);
674 } else {
675 ap = thread_adjuserstack(thread, -4);
676 (void)suword(ap, load_result.mach_header);
677 }
678 }
679
680 if (vfexec) {
681 vm_map_switch(old_map);
682 }
683 /* Set the entry point */
684 thread_setentrypoint(thread, load_result.entry_point);
685
686 /* Stop profiling */
687 stopprofclock(p);
688
689 /*
690 * Reset signal state.
691 */
692 execsigs(p, thread);
693
694 /*
695 * need to cancel async IO requests that can be cancelled and wait for those
696 * already active. MAY BLOCK!
697 */
698 _aio_exec( p );
699
700 /* FIXME: Till vmspace inherit is fixed: */
701 if (!vfexec && p->vm_shm)
702 shmexec(p);
703 /* Clean up the semaphores */
704 semexit(p);
705
706 /*
707 * Remember file name for accounting.
708 */
709 p->p_acflag &= ~AFORK;
710 /* If the translated name isn't NULL, then we want to use
711 * that translated name as the name we show as the "real" name.
712 * Otherwise, use the name passed into exec.
713 */
714 if (0 != imgp->ip_p_comm[0]) {
715 bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
716 sizeof(p->p_comm));
717 } else {
718 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
719 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
720 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
721 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
722 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
723 }
724
725 {
726 /* This is for kdebug */
727 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
728
729 /* Collect the pathname for tracing */
730 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
731
732
733
734 if (vfexec)
735 {
736 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
737 p->p_pid ,0,0,0, (unsigned int)thread);
738 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
739 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thread);
740 }
741 else
742 {
743 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
744 p->p_pid ,0,0,0,0);
745 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
746 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
747 }
748 }
749
750 p->p_flag &= ~P_CLASSIC;
751
752 /*
753 * mark as execed, wakeup the process that vforked (if any) and tell
754 * it that it now has it's own resources back
755 */
756 p->p_flag |= P_EXEC;
757 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
758 p->p_flag &= ~P_PPWAIT;
759 wakeup((caddr_t)p->p_pptr);
760 }
761
762 if (vfexec && (p->p_flag & P_TRACED)) {
763 psignal_vfork(p, new_task, thread, SIGTRAP);
764 }
765
766 badtoolate:
767 if (vfexec) {
768 task_deallocate(new_task);
769 thread_deallocate(thread);
770 if (error)
771 error = 0;
772 }
773
774 bad:
775 return(error);
776 }
777
778
779
780
781 /*
782 * Our image activator table; this is the table of the image types we are
783 * capable of loading. We list them in order of preference to ensure the
784 * fastest image load speed.
785 *
786 * XXX hardcoded, for now; should use linker sets
787 */
788 struct execsw {
789 int (*ex_imgact)(struct image_params *);
790 const char *ex_name;
791 } execsw[] = {
792 { exec_mach_imgact, "Mach-o Binary" },
793 { exec_fat_imgact, "Fat Binary" },
794 { exec_shell_imgact, "Interpreter Script" },
795 { NULL, NULL}
796 };
797
798
799 /*
800 * TODO: Dynamic linker header address on stack is copied via suword()
801 */
802 /* ARGSUSED */
803 int
804 execve(struct proc *p, struct execve_args *uap, register_t *retval)
805 {
806 kauth_cred_t cred = p->p_ucred;
807 struct image_params image_params, *imgp;
808 struct vnode_attr va;
809 struct vnode_attr origva;
810 struct nameidata nd;
811 struct uthread *uthread;
812 int i;
813 int resid, error;
814 task_t task;
815 int numthreads;
816 int vfexec=0;
817 int once = 1; /* save SGUID-ness for interpreted files */
818 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for Classic */
819 int is_64 = IS_64BIT_PROCESS(p);
820 int seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
821 struct vfs_context context;
822
823 context.vc_proc = p;
824 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
825
826
827 imgp = &image_params;
828
829 /* Initialize the common data in the image_params structure */
830 bzero(imgp, sizeof(*imgp));
831 imgp->ip_user_fname = uap->fname;
832 imgp->ip_user_argv = uap->argp;
833 imgp->ip_user_envv = uap->envp;
834 imgp->ip_vattr = &va;
835 imgp->ip_origvattr = &origva;
836 imgp->ip_vfs_context = &context;
837 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
838 imgp->ip_tws_cache_name = NULL;
839 imgp->ip_p_comm = alt_p_comm; /* for Classic */
840
841 /*
842 * XXXAUDIT: Currently, we only audit the pathname of the binary.
843 * There may also be poor interaction with dyld.
844 */
845
846 task = current_task();
847 uthread = get_bsdthread_info(current_thread());
848
849 if (uthread->uu_flag & UT_VFORK) {
850 vfexec = 1; /* Mark in exec */
851 } else {
852 if (task != kernel_task) {
853 numthreads = get_task_numacts(task);
854 if (numthreads <= 0 )
855 return(EINVAL);
856 if (numthreads > 1) {
857 return(ENOTSUP);
858 }
859 }
860 }
861
862 error = execargs_alloc(imgp);
863 if (error)
864 return(error);
865
866 /*
867 * XXXAUDIT: Note: the double copyin introduces an audit
868 * race. To correct this race, we must use a single
869 * copyin(), e.g. by passing a flag to namei to indicate an
870 * external path buffer is being used.
871 */
872 error = exec_save_path(imgp, uap->fname, seg);
873 if (error) {
874 execargs_free(imgp);
875 return(error);
876 }
877
878 /*
879 * No app profiles under chroot
880 */
881 if((p->p_fd->fd_rdir == NULLVP) && (app_profile != 0)) {
882
883 /* grab the name of the file out of its path */
884 /* we will need this for lookup within the */
885 /* name file */
886 /* Scan backwards for the first '/' or start of string */
887 imgp->ip_tws_cache_name = imgp->ip_strendp;
888 while (imgp->ip_tws_cache_name[0] != '/') {
889 if(imgp->ip_tws_cache_name == imgp->ip_strings) {
890 imgp->ip_tws_cache_name--;
891 break;
892 }
893 imgp->ip_tws_cache_name--;
894 }
895 imgp->ip_tws_cache_name++;
896 }
897 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
898 seg, uap->fname, imgp->ip_vfs_context);
899
900 again:
901 error = namei(&nd);
902 if (error)
903 goto bad;
904 imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */
905 imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */
906
907 error = exec_check_permissions(imgp);
908 if (error)
909 goto bad;
910
911 /* Copy; avoid invocation of an interpreter overwriting the original */
912 if (once) {
913 once = 0;
914 origva = va;
915 }
916
917 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
918 UIO_SYSSPACE32, IO_NODELOCKED, cred, &resid, p);
919 if (error)
920 goto bad;
921
922 encapsulated_binary:
923 error = -1;
924 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
925
926 error = (*execsw[i].ex_imgact)(imgp);
927
928 switch (error) {
929 /* case -1: not claimed: continue */
930 case -2: /* Encapsulated binary */
931 goto encapsulated_binary;
932
933 case -3: /* Interpreter */
934 vnode_put(imgp->ip_vp);
935 imgp->ip_vp = NULL; /* already put */
936 nd.ni_cnd.cn_nameiop = LOOKUP;
937 nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) |
938 (FOLLOW | LOCKLEAF);
939
940
941 nd.ni_segflg = UIO_SYSSPACE32;
942 nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name);
943 goto again;
944
945 default:
946 break;
947 }
948 }
949
950 /* call out to allow 3rd party notification of exec.
951 * Ignore result of kauth_authorize_fileop call.
952 */
953 if (error == 0 && kauth_authorize_fileop_has_listeners()) {
954 kauth_authorize_fileop(vfs_context_ucred(&context), KAUTH_FILEOP_EXEC,
955 (uintptr_t)nd.ni_vp, 0);
956 }
957
958 /* Image not claimed by any activator? */
959 if (error == -1)
960 error = ENOEXEC;
961
962 bad:
963 if (imgp->ip_ndp)
964 nameidone(imgp->ip_ndp);
965 if (imgp->ip_vp)
966 vnode_put(imgp->ip_vp);
967 if (imgp->ip_strings)
968 execargs_free(imgp);
969 if (!error && vfexec) {
970 vfork_return(current_thread(), p->p_pptr, p, retval);
971 (void)thread_resume(imgp->ip_vfork_thread);
972 return(0);
973 }
974 return(error);
975 }
976
977
978 static int
979 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
980 {
981 int error;
982
983 if (ptr_size == 4) {
984 /* 64 bit value containing 32 bit address */
985 unsigned int i;
986
987 error = copyin(froma, &i, 4);
988 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
989 } else {
990 error = copyin(froma, toptr, 8);
991 }
992 return (error);
993 }
994
995
996 static int
997 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
998 {
999 int error;
1000
1001 if (ptr_size == 4) {
1002 /* 64 bit value containing 32 bit address */
1003 unsigned int i = CAST_DOWN(unsigned int,ua); /* SAFE */
1004
1005 error = copyout(&i, ptr, 4);
1006 } else {
1007 error = copyout(&ua, ptr, 8);
1008 }
1009 return (error);
1010 }
1011
1012
1013 /*
1014 * exec_copyout_strings
1015 *
1016 * Copy out the strings segment to user space. The strings segment is put
1017 * on a preinitialized stack frame.
1018 *
1019 * Parameters: struct image_params * the image parameter block
1020 * int * a pointer to the stack offset variable
1021 *
1022 * Returns: 0 Success
1023 * !0 Faiure: errno
1024 *
1025 * Implicit returns:
1026 * (*stackp) The stack offset, modified
1027 *
1028 * Note: The strings segment layout is backward, from the beginning
1029 * of the top of the stack to consume the minimal amount of
1030 * space possible; the returned stack pointer points to the
1031 * end of the area consumed (stacks grow upward).
1032 *
1033 * argc is an int; arg[i] are pointers; env[i] are pointers;
1034 * exec_path is a pointer; the 0's are (void *)NULL's
1035 *
1036 * The stack frame layout is:
1037 *
1038 * +-------------+
1039 * sp-> | argc |
1040 * +-------------+
1041 * | arg[0] |
1042 * +-------------+
1043 * :
1044 * :
1045 * +-------------+
1046 * | arg[argc-1] |
1047 * +-------------+
1048 * | 0 |
1049 * +-------------+
1050 * | env[0] |
1051 * +-------------+
1052 * :
1053 * :
1054 * +-------------+
1055 * | env[n] |
1056 * +-------------+
1057 * | 0 |
1058 * +-------------+
1059 * | exec_path | In MacOS X PR2 Beaker2E the path passed to exec() is
1060 * +-------------+ passed on the stack just after the trailing 0 of the
1061 * | 0 | the envp[] array as a pointer to a string.
1062 * +-------------+
1063 * | PATH AREA |
1064 * +-------------+
1065 * | STRING AREA |
1066 * :
1067 * :
1068 * | | <- p->user_stack
1069 * +-------------+
1070 *
1071 * Although technically a part of the STRING AREA, we treat the PATH AREA as
1072 * a separate entity. This allows us to align the beginning of the PATH AREA
1073 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
1074 * which preceed it on the stack are properly aligned.
1075 *
1076 * TODO: argc copied with suword(), which takes a 64 bit address
1077 */
1078 static int
1079 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
1080 {
1081 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1082 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
1083 char *argv = imgp->ip_argv; /* modifiable copy of argv */
1084 user_addr_t string_area; /* *argv[], *env[] */
1085 user_addr_t path_area; /* package launch path */
1086 user_addr_t ptr_area; /* argv[], env[], exec_path */
1087 user_addr_t stack;
1088 int stringc = imgp->ip_argc + imgp->ip_envc;
1089 int len;
1090 int error;
1091 int strspace;
1092
1093 stack = *stackp;
1094
1095 /*
1096 * Set up pointers to the beginning of the string area, the beginning
1097 * of the path area, and the beginning of the pointer area (actually,
1098 * the location of argc, an int, which may be smaller than a pointer,
1099 * but we use ptr_size worth of space for it, for alignment).
1100 */
1101 string_area = stack - (((imgp->ip_strendp - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)) - ptr_size;
1102 path_area = string_area - (((imgp->ip_argv - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1));
1103 ptr_area = path_area - ((imgp->ip_argc + imgp->ip_envc + 4) * ptr_size) - ptr_size /*argc*/;
1104
1105 /* Return the initial stack address: the location of argc */
1106 *stackp = ptr_area;
1107
1108 /*
1109 * Record the size of the arguments area so that sysctl_procargs()
1110 * can return the argument area without having to parse the arguments.
1111 */
1112 p->p_argc = imgp->ip_argc;
1113 p->p_argslen = (int)(stack - path_area);
1114
1115
1116 /*
1117 * Support for new app package launching for Mac OS X allocates
1118 * the "path" at the begining of the imgp->ip_strings buffer.
1119 * copy it just before the string area.
1120 */
1121 len = 0;
1122 error = copyoutstr(imgp->ip_strings, path_area,
1123 (unsigned)(imgp->ip_argv - imgp->ip_strings),
1124 (size_t *)&len);
1125 if (error)
1126 goto bad;
1127
1128
1129 /* Save a NULL pointer below it */
1130 (void)copyoutptr(0LL, path_area - ptr_size, ptr_size);
1131
1132 /* Save the pointer to "path" just below it */
1133 (void)copyoutptr(path_area, path_area - 2*ptr_size, ptr_size);
1134
1135 /*
1136 * ptr_size for 2 NULL one each ofter arg[argc -1] and env[n]
1137 * ptr_size for argc
1138 * skip over saved path, ptr_size for pointer to path,
1139 * and ptr_size for the NULL after pointer to path.
1140 */
1141
1142 /* argc (int32, stored in a ptr_size area) */
1143 (void)suword(ptr_area, imgp->ip_argc);
1144 ptr_area += sizeof(int);
1145 /* pad to ptr_size, if 64 bit image, to ensure user stack alignment */
1146 if (imgp->ip_flags & IMGPF_IS_64BIT) {
1147 (void)suword(ptr_area, 0); /* int, not long: ignored */
1148 ptr_area += sizeof(int);
1149 }
1150
1151
1152 /*
1153 * We use (string_area - path_area) here rather than the more
1154 * intuitive (imgp->ip_argv - imgp->ip_strings) because we are
1155 * interested in the length of the PATH_AREA in user space,
1156 * rather than the actual length of the execution path, since
1157 * it includes alignment padding of the PATH_AREA + STRING_AREA
1158 * to a ptr_size boundary.
1159 */
1160 strspace = SIZE_IMG_STRSPACE - (string_area - path_area);
1161 for (;;) {
1162 if (stringc == imgp->ip_envc) {
1163 /* argv[n] = NULL */
1164 (void)copyoutptr(0LL, ptr_area, ptr_size);
1165 ptr_area += ptr_size;
1166 }
1167 if (--stringc < 0)
1168 break;
1169
1170 /* pointer: argv[n]/env[n] */
1171 (void)copyoutptr(string_area, ptr_area, ptr_size);
1172
1173 /* string : argv[n][]/env[n][] */
1174 do {
1175 if (strspace <= 0) {
1176 error = E2BIG;
1177 break;
1178 }
1179 error = copyoutstr(argv, string_area,
1180 (unsigned)strspace,
1181 (size_t *)&len);
1182 string_area += len;
1183 argv += len;
1184 strspace -= len;
1185 } while (error == ENAMETOOLONG);
1186 if (error == EFAULT || error == E2BIG)
1187 break; /* bad stack - user's problem */
1188 ptr_area += ptr_size;
1189 }
1190 /* env[n] = NULL */
1191 (void)copyoutptr(0LL, ptr_area, ptr_size);
1192
1193 bad:
1194 return(error);
1195 }
1196
1197
1198 /*
1199 * exec_extract_strings
1200 *
1201 * Copy arguments and environment from user space into work area; we may
1202 * have already copied some early arguments into the work area, and if
1203 * so, any arguments opied in are appended to those already there.
1204 *
1205 * Parameters: struct image_params * the image parameter block
1206 *
1207 * Returns: 0 Success
1208 * !0 Failure: errno
1209 *
1210 * Implicit returns;
1211 * (imgp->ip_argc) Count of arguments, updated
1212 * (imgp->ip_envc) Count of environment strings, updated
1213 *
1214 *
1215 * Notes: The argument and environment vectors are user space pointers
1216 * to arrays of user space pointers.
1217 */
1218 static int
1219 exec_extract_strings(struct image_params *imgp)
1220 {
1221 int error = 0;
1222 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1223 int seg = (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32);
1224 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
1225 user_addr_t argv = imgp->ip_user_argv;
1226 user_addr_t envv = imgp->ip_user_envv;
1227
1228 /* Now, get rest of arguments */
1229
1230 /*
1231 * If we are running an interpreter, replace the av[0] that was
1232 * passed to execve() with the fully qualified path name that was
1233 * passed to execve() for interpreters which do not use the PATH
1234 * to locate their script arguments.
1235 */
1236 if((imgp->ip_flags & IMGPF_INTERPRET) != 0 && argv != 0LL) {
1237 user_addr_t arg;
1238
1239 error = copyinptr(argv, &arg, ptr_size);
1240 if (error)
1241 goto bad;
1242 if (arg != 0LL && arg != (user_addr_t)-1) {
1243 argv += ptr_size;
1244 error = exec_add_string(imgp, imgp->ip_user_fname, seg);
1245 if (error)
1246 goto bad;
1247 imgp->ip_argc++;
1248 }
1249 }
1250
1251 while (argv != 0LL) {
1252 user_addr_t arg;
1253
1254 error = copyinptr(argv, &arg, ptr_size);
1255 if (error)
1256 goto bad;
1257
1258 argv += ptr_size;
1259 if (arg == 0LL) {
1260 break;
1261 } else if (arg == (user_addr_t)-1) {
1262 /* Um... why would it be -1? */
1263 error = EFAULT;
1264 goto bad;
1265 }
1266 /*
1267 * av[n...] = arg[n]
1268 */
1269 error = exec_add_string(imgp, arg, seg);
1270 if (error)
1271 goto bad;
1272 imgp->ip_argc++;
1273 }
1274
1275 /* Now, get the environment */
1276 while (envv != 0LL) {
1277 user_addr_t env;
1278
1279 error = copyinptr(envv, &env, ptr_size);
1280 if (error)
1281 goto bad;
1282
1283 envv += ptr_size;
1284 if (env == 0LL) {
1285 break;
1286 } else if (env == (user_addr_t)-1) {
1287 error = EFAULT;
1288 goto bad;
1289 }
1290 /*
1291 * av[n...] = env[n]
1292 */
1293 error = exec_add_string(imgp, env, seg);
1294 if (error)
1295 goto bad;
1296 imgp->ip_envc++;
1297 }
1298 bad:
1299 return error;
1300 }
1301
1302
1303 #define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
1304
1305 static int
1306 exec_check_permissions(struct image_params *imgp)
1307 {
1308 struct vnode *vp = imgp->ip_vp;
1309 struct vnode_attr *vap = imgp->ip_vattr;
1310 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1311 int error;
1312 kauth_action_t action;
1313
1314 /* Only allow execution of regular files */
1315 if (!vnode_isreg(vp))
1316 return (EACCES);
1317
1318 /* Get the file attributes that we will be using here and elsewhere */
1319 VATTR_INIT(vap);
1320 VATTR_WANTED(vap, va_uid);
1321 VATTR_WANTED(vap, va_gid);
1322 VATTR_WANTED(vap, va_mode);
1323 VATTR_WANTED(vap, va_fsid);
1324 VATTR_WANTED(vap, va_fileid);
1325 VATTR_WANTED(vap, va_data_size);
1326 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
1327 return (error);
1328
1329 /*
1330 * Ensure that at least one execute bit is on - otherwise root
1331 * will always succeed, and we don't want to happen unless the
1332 * file really is executable.
1333 */
1334 if ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
1335 return (EACCES);
1336
1337 /* Disallow zero length files */
1338 if (vap->va_data_size == 0)
1339 return (ENOEXEC);
1340
1341 imgp->ip_arch_offset = (user_size_t)0;
1342 imgp->ip_arch_size = vap->va_data_size;
1343
1344 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
1345 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED))
1346 vap->va_mode &= ~(VSUID | VSGID);
1347
1348 /* Check for execute permission */
1349 action = KAUTH_VNODE_EXECUTE;
1350 /* Traced images must also be readable */
1351 if (p->p_flag & P_TRACED)
1352 action |= KAUTH_VNODE_READ_DATA;
1353 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
1354 return (error);
1355
1356 /* Don't let it run if anyone had it open for writing */
1357 if (vp->v_writecount)
1358 return (ETXTBSY);
1359
1360
1361 /* XXX May want to indicate to underlying FS that vnode is open */
1362
1363 return (error);
1364 }
1365
1366 /*
1367 * exec_handle_sugid
1368 *
1369 * Initially clear the P_SUGID in the process flags; if an SUGID process is
1370 * exec'ing a non-SUGID image, then this is the point of no return.
1371 *
1372 * If the image being activated is SUGI, then replace the credential with a
1373 * copy, disable tracing (unless the tracing process is root), reset the
1374 * mach task port to revoke it, set the P_SUGID bit,
1375 *
1376 * If the saved user and group ID will be changing, then make sure it happens
1377 * to a new credential, rather than a shared one.
1378 *
1379 * Set the security token (this is probably obsolete, given that the token
1380 * should not technically be separate from the credential itself).
1381 *
1382 * Parameters: struct image_params * the image parameter block
1383 *
1384 * Returns: void No failure indication
1385 *
1386 * Implicit returns:
1387 * <process credential> Potentially modified/replaced
1388 * <task port> Potentially revoked
1389 * <process flags> P_SUGID bit potentially modified
1390 * <security token> Potentially modified
1391 */
1392 static int
1393 exec_handle_sugid(struct image_params *imgp)
1394 {
1395 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
1396 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1397 int i;
1398 int error = 0;
1399 static struct vnode *dev_null = NULLVP;
1400
1401 p->p_flag &= ~P_SUGID;
1402
1403 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
1404 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
1405 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
1406 cred->cr_gid != imgp->ip_origvattr->va_gid)) {
1407 #if KTRACE
1408 /*
1409 * If process is being ktraced, turn off - unless
1410 * root set it.
1411 */
1412 if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) {
1413 struct vnode *tvp = p->p_tracep;
1414 p->p_tracep = NULL;
1415 p->p_traceflag = 0;
1416 vnode_rele(tvp);
1417 }
1418 #endif
1419 /*
1420 * Replace the credential with a copy of itself if euid or egid change.
1421 */
1422 if (imgp->ip_origvattr->va_mode & VSUID) {
1423 p->p_ucred = kauth_cred_seteuid(p->p_ucred, imgp->ip_origvattr->va_uid);
1424 }
1425 if (imgp->ip_origvattr->va_mode & VSGID) {
1426 p->p_ucred = kauth_cred_setegid(p->p_ucred, imgp->ip_origvattr->va_gid);
1427 }
1428
1429 /*
1430 * Have mach reset the task and thread ports.
1431 * We don't want anyone who had the ports before
1432 * a setuid exec to be able to access/control the
1433 * task/thread after.
1434 */
1435 if (current_task() == p->task) {
1436 ipc_task_reset(p->task);
1437 ipc_thread_reset(current_thread());
1438 }
1439
1440 p->p_flag |= P_SUGID;
1441
1442 /* Cache the vnode for /dev/null the first time around */
1443 if (dev_null == NULLVP) {
1444 struct nameidata nd1;
1445
1446 NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE32,
1447 CAST_USER_ADDR_T("/dev/null"),
1448 imgp->ip_vfs_context);
1449
1450 if ((error = vn_open(&nd1, FREAD, 0)) == 0) {
1451 dev_null = nd1.ni_vp;
1452 /*
1453 * vn_open returns with both a use_count
1454 * and an io_count on the found vnode
1455 * drop the io_count, but keep the use_count
1456 */
1457 vnode_put(nd1.ni_vp);
1458 }
1459 }
1460
1461 /* Radar 2261856; setuid security hole fix */
1462 /* Patch from OpenBSD: A. Ramesh */
1463 /*
1464 * XXX For setuid processes, attempt to ensure that
1465 * stdin, stdout, and stderr are already allocated.
1466 * We do not want userland to accidentally allocate
1467 * descriptors in this range which has implied meaning
1468 * to libc.
1469 */
1470 if (dev_null != NULLVP) {
1471 for (i = 0; i < 3; i++) {
1472 struct fileproc *fp;
1473 int indx;
1474
1475 if (p->p_fd->fd_ofiles[i] != NULL)
1476 continue;
1477
1478 if ((error = falloc(p, &fp, &indx)) != 0)
1479 continue;
1480
1481 if ((error = vnode_ref_ext(dev_null, FREAD)) != 0) {
1482 fp_free(p, indx, fp);
1483 break;
1484 }
1485
1486 fp->f_fglob->fg_flag = FREAD;
1487 fp->f_fglob->fg_type = DTYPE_VNODE;
1488 fp->f_fglob->fg_ops = &vnops;
1489 fp->f_fglob->fg_data = (caddr_t)dev_null;
1490
1491 proc_fdlock(p);
1492 procfdtbl_releasefd(p, indx, NULL);
1493 fp_drop(p, indx, fp, 1);
1494 proc_fdunlock(p);
1495 }
1496 /*
1497 * for now we need to drop the reference immediately
1498 * since we don't have any mechanism in place to
1499 * release it before starting to unmount "/dev"
1500 * during a reboot/shutdown
1501 */
1502 vnode_rele(dev_null);
1503 dev_null = NULLVP;
1504 }
1505 }
1506
1507 /*
1508 * Implement the semantic where the effective user and group become
1509 * the saved user and group in exec'ed programs.
1510 */
1511 p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), p->p_ucred->cr_gid);
1512
1513 /* XXX Obsolete; security token should not be separate from cred */
1514 set_security_token(p);
1515
1516 return(error);
1517 }
1518
1519 static kern_return_t
1520 create_unix_stack(vm_map_t map, user_addr_t user_stack, int customstack,
1521 struct proc *p)
1522 {
1523 mach_vm_size_t size;
1524 mach_vm_offset_t addr;
1525
1526 p->user_stack = user_stack;
1527 if (!customstack) {
1528 size = mach_vm_round_page(unix_stack_size(p));
1529 addr = mach_vm_trunc_page(user_stack - size);
1530 return (mach_vm_allocate(map, &addr, size,
1531 VM_MAKE_TAG(VM_MEMORY_STACK) |
1532 VM_FLAGS_FIXED));
1533 } else
1534 return(KERN_SUCCESS);
1535 }
1536
1537 #include <sys/reboot.h>
1538
1539 static char init_program_name[128] = "/sbin/launchd";
1540 static const char * other_init = "/sbin/mach_init";
1541
1542 char init_args[128] = "";
1543
1544 struct execve_args init_exec_args;
1545 int init_attempts = 0;
1546
1547
1548 void
1549 load_init_program(struct proc *p)
1550 {
1551 vm_offset_t init_addr;
1552 char *argv[3];
1553 int error;
1554 register_t retval[2];
1555
1556 error = 0;
1557
1558 /* init_args are copied in string form directly from bootstrap */
1559
1560 do {
1561 if (boothowto & RB_INITNAME) {
1562 printf("init program? ");
1563 #if FIXME /* [ */
1564 gets(init_program_name, init_program_name);
1565 #endif /* FIXME ] */
1566 }
1567
1568 if (error && ((boothowto & RB_INITNAME) == 0) &&
1569 (init_attempts == 1)) {
1570 printf("Load of %s, errno %d, trying %s\n",
1571 init_program_name, error, other_init);
1572 error = 0;
1573 bcopy(other_init, init_program_name,
1574 sizeof(other_init));
1575 }
1576
1577 init_attempts++;
1578
1579 if (error) {
1580 printf("Load of %s failed, errno %d\n",
1581 init_program_name, error);
1582 error = 0;
1583 boothowto |= RB_INITNAME;
1584 continue;
1585 }
1586
1587 /*
1588 * Copy out program name.
1589 */
1590
1591 init_addr = VM_MIN_ADDRESS;
1592 (void) vm_allocate(current_map(), &init_addr,
1593 PAGE_SIZE, VM_FLAGS_ANYWHERE);
1594 if (init_addr == 0)
1595 init_addr++;
1596
1597 (void) copyout((caddr_t) init_program_name,
1598 CAST_USER_ADDR_T(init_addr),
1599 (unsigned) sizeof(init_program_name)+1);
1600
1601 argv[0] = (char *) init_addr;
1602 init_addr += sizeof(init_program_name);
1603 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
1604
1605 /*
1606 * Put out first (and only) argument, similarly.
1607 * Assumes everything fits in a page as allocated
1608 * above.
1609 */
1610
1611 (void) copyout((caddr_t) init_args,
1612 CAST_USER_ADDR_T(init_addr),
1613 (unsigned) sizeof(init_args));
1614
1615 argv[1] = (char *) init_addr;
1616 init_addr += sizeof(init_args);
1617 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
1618
1619 /*
1620 * Null-end the argument list
1621 */
1622
1623 argv[2] = (char *) 0;
1624
1625 /*
1626 * Copy out the argument list.
1627 */
1628
1629 (void) copyout((caddr_t) argv,
1630 CAST_USER_ADDR_T(init_addr),
1631 (unsigned) sizeof(argv));
1632
1633 /*
1634 * Set up argument block for fake call to execve.
1635 */
1636
1637 init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
1638 init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
1639 init_exec_args.envp = CAST_USER_ADDR_T(0);
1640
1641 /* So that mach_init task
1642 * is set with uid,gid 0 token
1643 */
1644 set_security_token(p);
1645
1646 error = execve(p,&init_exec_args,retval);
1647 } while (error);
1648 }
1649
1650 /*
1651 * Convert a load_return_t to an errno.
1652 */
1653 static int
1654 load_return_to_errno(load_return_t lrtn)
1655 {
1656 switch (lrtn) {
1657 case LOAD_SUCCESS:
1658 return 0;
1659 case LOAD_BADARCH:
1660 return EBADARCH;
1661 case LOAD_BADMACHO:
1662 return EBADMACHO;
1663 case LOAD_SHLIB:
1664 return ESHLIBVERS;
1665 case LOAD_NOSPACE:
1666 case LOAD_RESOURCE:
1667 return ENOMEM;
1668 case LOAD_PROTECT:
1669 return EACCES;
1670 case LOAD_ENOENT:
1671 return ENOENT;
1672 case LOAD_IOERROR:
1673 return EIO;
1674 case LOAD_FAILURE:
1675 default:
1676 return EBADEXEC;
1677 }
1678 }
1679
1680 #include <mach/mach_types.h>
1681 #include <mach/vm_prot.h>
1682 #include <mach/semaphore.h>
1683 #include <mach/sync_policy.h>
1684 #include <kern/clock.h>
1685 #include <mach/kern_return.h>
1686
1687 extern semaphore_t execve_semaphore;
1688
1689 /*
1690 * The block of memory used by the execve arguments. At the same time,
1691 * we allocate a page so that we can read in the first page of the image.
1692 */
1693 static int
1694 execargs_alloc(struct image_params *imgp)
1695 {
1696 kern_return_t kret;
1697
1698 kret = semaphore_wait(execve_semaphore);
1699 if (kret != KERN_SUCCESS)
1700 switch (kret) {
1701 default:
1702 return (EINVAL);
1703 case KERN_INVALID_ADDRESS:
1704 case KERN_PROTECTION_FAILURE:
1705 return (EACCES);
1706 case KERN_ABORTED:
1707 case KERN_OPERATION_TIMED_OUT:
1708 return (EINTR);
1709 }
1710
1711 kret = kmem_alloc_pageable(bsd_pageable_map, (vm_offset_t *)&imgp->ip_strings, NCARGS + PAGE_SIZE);
1712 imgp->ip_vdata = imgp->ip_strings + NCARGS;
1713 if (kret != KERN_SUCCESS) {
1714 semaphore_signal(execve_semaphore);
1715 return (ENOMEM);
1716 }
1717 return (0);
1718 }
1719
1720 static int
1721 execargs_free(struct image_params *imgp)
1722 {
1723 kern_return_t kret;
1724
1725 kmem_free(bsd_pageable_map, (vm_offset_t)imgp->ip_strings, NCARGS + PAGE_SIZE);
1726 imgp->ip_strings = NULL;
1727
1728 kret = semaphore_signal(execve_semaphore);
1729 switch (kret) {
1730 case KERN_INVALID_ADDRESS:
1731 case KERN_PROTECTION_FAILURE:
1732 return (EINVAL);
1733 case KERN_ABORTED:
1734 case KERN_OPERATION_TIMED_OUT:
1735 return (EINTR);
1736 case KERN_SUCCESS:
1737 return(0);
1738 default:
1739 return (EINVAL);
1740 }
1741 }