]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exec.c
15cac8d966199802ecca8bffc39454b6cb5e326d
[apple/xnu.git] / bsd / kern / kern_exec.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1987 Carnegie-Mellon University
34 * All rights reserved. The CMU software License Agreement specifies
35 * the terms and conditions for use and redistribution.
36 */
37
38 #include <cputypes.h>
39
40 /*-
41 * Copyright (c) 1982, 1986, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
51 * are met:
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
57 * 3. All advertising materials mentioning features or use of this software
58 * must display the following acknowledgement:
59 * This product includes software developed by the University of
60 * California, Berkeley and its contributors.
61 * 4. Neither the name of the University nor the names of its contributors
62 * may be used to endorse or promote products derived from this software
63 * without specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75 * SUCH DAMAGE.
76 *
77 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
78 */
79 #include <machine/reg.h>
80
81 #include <sys/param.h>
82 #include <sys/systm.h>
83 #include <sys/filedesc.h>
84 #include <sys/kernel.h>
85 #include <sys/proc_internal.h>
86 #include <sys/kauth.h>
87 #include <sys/user.h>
88 #include <sys/socketvar.h>
89 #include <sys/malloc.h>
90 #include <sys/namei.h>
91 #include <sys/mount_internal.h>
92 #include <sys/vnode_internal.h>
93 #include <sys/file_internal.h>
94 #include <sys/stat.h>
95 #include <sys/uio_internal.h>
96 #include <sys/acct.h>
97 #include <sys/exec.h>
98 #include <sys/kdebug.h>
99 #include <sys/signal.h>
100 #include <sys/aio_kern.h>
101 #include <sys/sysproto.h>
102 #include <sys/shm_internal.h> /* shmexec() */
103 #include <sys/ubc_internal.h> /* ubc_map() */
104
105 #include <bsm/audit_kernel.h>
106
107 #include <mach/mach_types.h>
108 #include <mach/task.h>
109 #include <mach/thread_act.h>
110 #include <mach/vm_map.h>
111 #include <mach/mach_vm.h>
112 #include <mach/vm_param.h>
113
114 #include <vm/vm_map.h>
115 #include <vm/vm_kern.h>
116 #include <vm/vm_pager.h>
117 #include <vm/vm_kern.h>
118 #include <vm/task_working_set.h>
119 #include <vm/vm_shared_memory_server.h>
120
121 /*
122 * Mach things for which prototypes are unavailable from Mach headers
123 */
124 void ipc_task_reset(
125 task_t task);
126
127 extern struct savearea *get_user_regs(thread_t);
128
129
130 #include <kern/thread.h>
131 #include <kern/task.h>
132 #include <kern/ast.h>
133 #include <kern/mach_loader.h>
134 #include <mach-o/fat.h>
135 #include <mach-o/loader.h>
136 #include <machine/vmparam.h>
137 #if KTRACE
138 #include <sys/ktrace.h>
139 #endif
140 #include <sys/imgact.h>
141
142
143 /*
144 * SIZE_MAXPTR The maximum size of a user space pointer, in bytes
145 * SIZE_IMG_STRSPACE The available string space, minus two pointers; we
146 * define it interms of the maximum, since we don't
147 * know the pointer size going in, until after we've
148 * parsed the executable image.
149 */
150 #define SIZE_MAXPTR 8 /* 64 bits */
151 #define SIZE_IMG_STRSPACE (NCARGS - 2 * SIZE_MAXPTR)
152
153 int app_profile = 0;
154
155 extern vm_map_t bsd_pageable_map;
156 extern struct fileops vnops;
157
158 #define ROUND_PTR(type, addr) \
159 (type *)( ( (unsigned)(addr) + 16 - 1) \
160 & ~(16 - 1) )
161
162 struct image_params; /* Forward */
163 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
164 static int load_return_to_errno(load_return_t lrtn);
165 static int execargs_alloc(struct image_params *imgp);
166 static int execargs_free(struct image_params *imgp);
167 static int exec_check_permissions(struct image_params *imgp);
168 static int exec_extract_strings(struct image_params *imgp);
169 static int exec_handle_sugid(struct image_params *imgp);
170 static int sugid_scripts = 0;
171 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW, &sugid_scripts, 0, "");
172 static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack,
173 int customstack, struct proc *p);
174 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
175
176 /* XXX forward; should be in headers, but can't be for one reason or another */
177 extern int grade_binary(cpu_type_t exectype, cpu_subtype_t execsubtype);
178 extern void vfork_return(thread_t th_act,
179 struct proc * p,
180 struct proc *p2,
181 register_t *retval);
182
183
184 extern char classichandler[32];
185 extern uint32_t classichandler_fsid;
186 extern long classichandler_fileid;
187
188
189 /*
190 * exec_add_string
191 *
192 * Add the requested string to the string space area.
193 *
194 * Parameters; struct image_params * image parameter block
195 * user_addr_t string to add to strings area
196 * uio_seg segment where string is located
197 *
198 * Returns: 0 Success
199 * !0 Failure errno from copyinstr()
200 *
201 * Implicit returns:
202 * (imgp->ip_strendp) updated location of next add, if any
203 * (imgp->ip_strspace) updated byte count of space remaining
204 */
205 static int
206 exec_add_string(struct image_params *imgp, user_addr_t str, /*uio_seg*/int seg)
207 {
208 int error = 0;
209
210 do {
211 size_t len = 0;
212 if (imgp->ip_strspace <= 0) {
213 error = E2BIG;
214 break;
215 }
216 if (IS_UIO_SYS_SPACE(seg)) {
217 char *kstr = CAST_DOWN(char *,str); /* SAFE */
218 error = copystr(kstr, imgp->ip_strendp, imgp->ip_strspace, &len);
219 } else {
220 error = copyinstr(str, imgp->ip_strendp, imgp->ip_strspace,
221 &len);
222 }
223 imgp->ip_strendp += len;
224 imgp->ip_strspace -= len;
225 } while (error == ENAMETOOLONG);
226
227 return error;
228 }
229
230 /*
231 * exec_save_path
232 *
233 * To support new app package launching for Mac OS X, the dyld needs the
234 * first argument to execve() stored on the user stack.
235 *
236 * Save the executable path name at the top of the strings area and set
237 * the argument vector pointer to the location following that to indicate
238 * the start of the argument and environment tuples, setting the remaining
239 * string space count to the size of the string area minus the path length
240 * and a reserve for two pointers.
241 *
242 * Parameters; struct image_params * image parameter block
243 * char * path used to invoke program
244 * uio_seg segment where path is located
245 *
246 * Returns: int 0 Success
247 * !0 Failure: error number
248 * Implicit returns:
249 * (imgp->ip_strings) saved path
250 * (imgp->ip_strspace) space remaining in ip_strings
251 * (imgp->ip_argv) beginning of argument list
252 * (imgp->ip_strendp) start of remaining copy area
253 *
254 * Note: We have to do this before the initial namei() since in the
255 * path contains symbolic links, namei() will overwrite the
256 * original path buffer contents. If the last symbolic link
257 * resolved was a relative pathname, we would lose the original
258 * "path", which could be an absolute pathname. This might be
259 * unacceptable for dyld.
260 */
261 static int
262 exec_save_path(struct image_params *imgp, user_addr_t path, /*uio_seg*/int seg)
263 {
264 int error;
265 size_t len;
266 char *kpath = CAST_DOWN(char *,path); /* SAFE */
267
268 imgp->ip_strendp = imgp->ip_strings;
269 imgp->ip_strspace = SIZE_IMG_STRSPACE;
270
271 len = MIN(MAXPATHLEN, imgp->ip_strspace);
272
273 switch( seg) {
274 case UIO_USERSPACE32:
275 case UIO_USERSPACE64: /* Same for copyin()... */
276 error = copyinstr(path, imgp->ip_strings, len, &len);
277 break;
278 case UIO_SYSSPACE32:
279 error = copystr(kpath, imgp->ip_strings, len, &len);
280 break;
281 default:
282 error = EFAULT;
283 break;
284 }
285
286 if (!error) {
287 imgp->ip_strendp += len;
288 imgp->ip_strspace -= len;
289 imgp->ip_argv = imgp->ip_strendp;
290 }
291
292 return(error);
293 }
294
295
296
297 /*
298 * exec_shell_imgact
299 *
300 * Image activator for interpreter scripts. If the image begins with the
301 * characters "#!", then it is an interpreter script. Verify that we are
302 * not already executing in Classic mode, and that the length of the script
303 * line indicating the interpreter is not in excess of the maximum allowed
304 * size. If this is the case, then break out the arguments, if any, which
305 * are separated by white space, and copy them into the argument save area
306 * as if they were provided on the command line before all other arguments.
307 * The line ends when we encounter a comment character ('#') or newline.
308 *
309 * Parameters; struct image_params * image parameter block
310 *
311 * Returns: -1 not an interpreter (keep looking)
312 * -3 Success: interpreter: relookup
313 * >0 Failure: interpreter: error number
314 *
315 * A return value other than -1 indicates subsequent image activators should
316 * not be given the opportunity to attempt to activate the image.
317 */
318 static int
319 exec_shell_imgact(struct image_params *imgp)
320 {
321 char *vdata = imgp->ip_vdata;
322 char *ihp;
323 char *line_endp;
324 char *interp;
325
326 /*
327 * Make sure it's a shell script. If we've already redirected
328 * from an interpreted file once, don't do it again.
329 *
330 * Note: We disallow Classic, since the expectation is that we
331 * may run a Classic interpreter, but not an interpret a Classic
332 * image. This is consistent with historical behaviour.
333 */
334 if (vdata[0] != '#' ||
335 vdata[1] != '!' ||
336 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
337 return (-1);
338 }
339
340
341 imgp->ip_flags |= IMGPF_INTERPRET;
342
343 /* Check to see if SUGID scripts are permitted. If they aren't then
344 * clear the SUGID bits.
345 * imgp->ip_vattr is known to be valid.
346 */
347 if (sugid_scripts == 0) {
348 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
349 }
350
351 /* Find the nominal end of the interpreter line */
352 for( ihp = &vdata[2]; *ihp != '\n' && *ihp != '#'; ihp++) {
353 if (ihp >= &vdata[IMG_SHSIZE])
354 return (ENOEXEC);
355 }
356
357 line_endp = ihp;
358 ihp = &vdata[2];
359 /* Skip over leading spaces - until the interpreter name */
360 while ( ihp < line_endp && ((*ihp == ' ') || (*ihp == '\t')))
361 ihp++;
362
363 /*
364 * Find the last non-whitespace character before the end of line or
365 * the beginning of a comment; this is our new end of line.
366 */
367 for (;line_endp > ihp && ((*line_endp == ' ') || (*line_endp == '\t')); line_endp--)
368 continue;
369
370 /* Empty? */
371 if (line_endp == ihp)
372 return (ENOEXEC);
373
374 /* copy the interpreter name */
375 interp = imgp->ip_interp_name;
376 while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t'))
377 *interp++ = *ihp++;
378 *interp = '\0';
379
380 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_name),
381 UIO_SYSSPACE32);
382
383 ihp = &vdata[2];
384 while (ihp < line_endp) {
385 /* Skip leading whitespace before each argument */
386 while ((*ihp == ' ') || (*ihp == '\t'))
387 ihp++;
388
389 if (ihp >= line_endp)
390 break;
391
392 /* We have an argument; copy it */
393 while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t')) {
394 *imgp->ip_strendp++ = *ihp++;
395 imgp->ip_strspace--;
396 }
397 *imgp->ip_strendp++ = 0;
398 imgp->ip_strspace--;
399 imgp->ip_argc++;
400 }
401
402 return (-3);
403 }
404
405
406
407 /*
408 * exec_fat_imgact
409 *
410 * Image activator for fat 1.0 binaries. If the binary is fat, then we
411 * need to select an image from it internally, and make that the image
412 * we are going to attempt to execute. At present, this consists of
413 * reloading the first page for the image with a first page from the
414 * offset location indicated by the fat header.
415 *
416 * Important: This image activator is byte order neutral.
417 *
418 * Note: If we find an encapsulated binary, we make no assertions
419 * about its validity; instead, we leave that up to a rescan
420 * for an activator to claim it, and, if it is claimed by one,
421 * that activator is responsible for determining validity.
422 */
423 static int
424 exec_fat_imgact(struct image_params *imgp)
425 {
426 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
427 kauth_cred_t cred = p->p_ucred;
428 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
429 struct fat_arch fat_arch;
430 int resid, error;
431 load_return_t lret;
432
433 /* Make sure it's a fat binary */
434 if ((fat_header->magic != FAT_MAGIC) &&
435 (fat_header->magic != FAT_CIGAM)) {
436 error = -1;
437 goto bad;
438 }
439
440 /* Look up our preferred architecture in the fat file. */
441 lret = fatfile_getarch_affinity(imgp->ip_vp,
442 (vm_offset_t)fat_header,
443 &fat_arch,
444 (p->p_flag & P_AFFINITY));
445 if (lret != LOAD_SUCCESS) {
446 error = load_return_to_errno(lret);
447 goto bad;
448 }
449
450 /* Read the Mach-O header out of it */
451 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
452 PAGE_SIZE, fat_arch.offset,
453 UIO_SYSSPACE32, (IO_UNIT|IO_NODELOCKED),
454 cred, &resid, p);
455 if (error) {
456 goto bad;
457 }
458
459 /* Did we read a complete header? */
460 if (resid) {
461 error = EBADEXEC;
462 goto bad;
463 }
464
465 /* Success. Indicate we have identified an encapsulated binary */
466 error = -2;
467 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
468 imgp->ip_arch_size = (user_size_t)fat_arch.size;
469
470 bad:
471 return (error);
472 }
473
474 /*
475 * exec_mach_imgact
476 *
477 * Image activator for mach-o 1.0 binaries.
478 *
479 * Important: This image activator is NOT byte order neutral.
480 */
481 static int
482 exec_mach_imgact(struct image_params *imgp)
483 {
484 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
485 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
486 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
487 int error = 0;
488 int vfexec = 0;
489 task_t task;
490 task_t new_task;
491 thread_t thread;
492 struct uthread *uthread;
493 vm_map_t old_map = VM_MAP_NULL;
494 vm_map_t map;
495 boolean_t clean_regions = FALSE;
496 shared_region_mapping_t initial_region = NULL;
497 load_return_t lret;
498 load_result_t load_result;
499
500 /*
501 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
502 * is a reserved field on the end, so for the most part, we can
503 * treat them as if they were identical.
504 */
505 if ((mach_header->magic != MH_MAGIC) &&
506 (mach_header->magic != MH_MAGIC_64)) {
507 error = -1;
508 goto bad;
509 }
510
511 task = current_task();
512 thread = current_thread();
513 uthread = get_bsdthread_info(thread);
514
515 if (uthread->uu_flag & UT_VFORK)
516 vfexec = 1; /* Mark in exec */
517
518 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
519 imgp->ip_flags |= IMGPF_IS_64BIT;
520
521 if (!grade_binary(mach_header->cputype, mach_header->cpusubtype)) {
522 error = EBADARCH;
523 goto bad;
524 }
525
526 /*
527 * Copy in arguments/environment from the old process, if the
528 * vector is non-NULL (i.e. exec is not being called from
529 * load_init_program(), as a special case, at system startup).
530 */
531 if (imgp->ip_user_argv != 0LL) {
532 error = exec_extract_strings(imgp);
533 if (error)
534 goto bad;
535 }
536
537 /*
538 * Hack for binary compatability; put three NULs on the end of the
539 * string area, and round it up to the next word boundary. This
540 * ensures padding with NULs to the boundary.
541 */
542 imgp->ip_strendp[0] = 0;
543 imgp->ip_strendp[1] = 0;
544 imgp->ip_strendp[2] = 0;
545 imgp->ip_strendp += (((imgp->ip_strendp - imgp->ip_strings) + NBPW-1) & ~(NBPW-1));
546
547
548 if (vfexec) {
549 kern_return_t result;
550
551 result = task_create_internal(task, FALSE, &new_task);
552 if (result != KERN_SUCCESS)
553 printf("execve: task_create failed. Code: 0x%x\n", result);
554 p->task = new_task;
555 set_bsdtask_info(new_task, p);
556 if (p->p_nice != 0)
557 resetpriority(p);
558 map = get_task_map(new_task);
559 result = thread_create(new_task, &imgp->ip_vfork_thread);
560 if (result != KERN_SUCCESS)
561 printf("execve: thread_create failed. Code: 0x%x\n", result);
562 /* reset local idea of task, thread, uthread */
563 task = new_task;
564 thread = imgp->ip_vfork_thread;
565 uthread = get_bsdthread_info(thread);
566 } else {
567 map = VM_MAP_NULL;
568 }
569
570 /*
571 * We set these flags here; this is OK, since if we fail after
572 * this point, we have already destroyed the parent process anyway.
573 */
574 if (imgp->ip_flags & IMGPF_IS_64BIT) {
575 task_set_64bit(task, TRUE);
576 p->p_flag |= P_LP64;
577 } else {
578 task_set_64bit(task, FALSE);
579 p->p_flag &= ~P_LP64;
580 }
581
582 /*
583 * Load the Mach-O file.
584 */
585 /* LP64 - remove following "if" statement after osfmk/vm/task_working_set.c */
586 if((imgp->ip_flags & IMGPF_IS_64BIT) == 0)
587 if(imgp->ip_tws_cache_name) {
588 tws_handle_startup_file(task, kauth_cred_getuid(cred),
589 imgp->ip_tws_cache_name, imgp->ip_vp, &clean_regions);
590 }
591
592 vm_get_shared_region(task, &initial_region);
593
594
595 /*
596 * NOTE: An error after this point indicates we have potentially
597 * destroyed or overwrote some process state while attempting an
598 * execve() following a vfork(), which is an unrecoverable condition.
599 */
600
601 /*
602 * We reset the task to 64-bit (or not) here. It may have picked up
603 * a new map, and we need that to reflect its true 64-bit nature.
604 */
605 task_set_64bit(task,
606 ((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT));
607
608 /*
609 * Actually load the image file we previously decided to load.
610 */
611 lret = load_machfile(imgp, mach_header, thread, map, clean_regions, &load_result);
612
613 if (lret != LOAD_SUCCESS) {
614 error = load_return_to_errno(lret);
615 goto badtoolate;
616 }
617
618 /* load_machfile() maps the vnode */
619 (void)ubc_map(imgp->ip_vp, PROT_EXEC);
620
621 /*
622 * deal with set[ug]id.
623 */
624 error = exec_handle_sugid(imgp);
625
626 KNOTE(&p->p_klist, NOTE_EXEC);
627
628 if (!vfexec && (p->p_flag & P_TRACED))
629 psignal(p, SIGTRAP);
630
631 if (error) {
632 goto badtoolate;
633 }
634 vnode_put(imgp->ip_vp);
635 imgp->ip_vp = NULL;
636
637 if (load_result.unixproc &&
638 create_unix_stack(get_task_map(task),
639 load_result.user_stack, load_result.customstack, p)) {
640 error = load_return_to_errno(LOAD_NOSPACE);
641 goto badtoolate;
642 }
643
644 if (vfexec) {
645 uthread->uu_ar0 = (void *)get_user_regs(thread);
646 old_map = vm_map_switch(get_task_map(task));
647 }
648
649 if (load_result.unixproc) {
650 user_addr_t ap;
651
652 /*
653 * Copy the strings area out into the new process address
654 * space.
655 */
656 ap = p->user_stack;
657 error = exec_copyout_strings(imgp, &ap);
658 if (error) {
659 if (vfexec)
660 vm_map_switch(old_map);
661 goto badtoolate;
662 }
663 /* Set the stack */
664 thread_setuserstack(thread, ap);
665 }
666
667 if (load_result.dynlinker) {
668 uint64_t ap;
669
670 /* Adjust the stack */
671 if (imgp->ip_flags & IMGPF_IS_64BIT) {
672 ap = thread_adjuserstack(thread, -8);
673 (void)copyoutptr(load_result.mach_header, ap, 8);
674 } else {
675 ap = thread_adjuserstack(thread, -4);
676 (void)suword(ap, load_result.mach_header);
677 }
678 }
679
680 if (vfexec) {
681 vm_map_switch(old_map);
682 }
683 /* Set the entry point */
684 thread_setentrypoint(thread, load_result.entry_point);
685
686 /* Stop profiling */
687 stopprofclock(p);
688
689 /*
690 * Reset signal state.
691 */
692 execsigs(p, thread);
693
694 /*
695 * Close file descriptors
696 * which specify close-on-exec.
697 */
698 fdexec(p);
699
700 /*
701 * need to cancel async IO requests that can be cancelled and wait for those
702 * already active. MAY BLOCK!
703 */
704 _aio_exec( p );
705
706 /* FIXME: Till vmspace inherit is fixed: */
707 if (!vfexec && p->vm_shm)
708 shmexec(p);
709 /* Clean up the semaphores */
710 semexit(p);
711
712 /*
713 * Remember file name for accounting.
714 */
715 p->p_acflag &= ~AFORK;
716 /* If the translated name isn't NULL, then we want to use
717 * that translated name as the name we show as the "real" name.
718 * Otherwise, use the name passed into exec.
719 */
720 if (0 != imgp->ip_p_comm[0]) {
721 bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
722 sizeof(p->p_comm));
723 } else {
724 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
725 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
726 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
727 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
728 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
729 }
730
731 {
732 /* This is for kdebug */
733 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
734
735 /* Collect the pathname for tracing */
736 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
737
738
739
740 if (vfexec)
741 {
742 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
743 p->p_pid ,0,0,0, (unsigned int)thread);
744 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
745 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thread);
746 }
747 else
748 {
749 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
750 p->p_pid ,0,0,0,0);
751 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
752 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
753 }
754 }
755
756 p->p_flag &= ~P_CLASSIC;
757
758 /*
759 * mark as execed, wakeup the process that vforked (if any) and tell
760 * it that it now has it's own resources back
761 */
762 p->p_flag |= P_EXEC;
763 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
764 p->p_flag &= ~P_PPWAIT;
765 wakeup((caddr_t)p->p_pptr);
766 }
767
768 if (vfexec && (p->p_flag & P_TRACED)) {
769 psignal_vfork(p, new_task, thread, SIGTRAP);
770 }
771
772 badtoolate:
773 if (vfexec) {
774 task_deallocate(new_task);
775 thread_deallocate(thread);
776 if (error)
777 error = 0;
778 }
779
780 bad:
781 return(error);
782 }
783
784
785
786
787 /*
788 * Our image activator table; this is the table of the image types we are
789 * capable of loading. We list them in order of preference to ensure the
790 * fastest image load speed.
791 *
792 * XXX hardcoded, for now; should use linker sets
793 */
794 struct execsw {
795 int (*ex_imgact)(struct image_params *);
796 const char *ex_name;
797 } execsw[] = {
798 { exec_mach_imgact, "Mach-o Binary" },
799 { exec_fat_imgact, "Fat Binary" },
800 { exec_shell_imgact, "Interpreter Script" },
801 { NULL, NULL}
802 };
803
804
805 /*
806 * TODO: Dynamic linker header address on stack is copied via suword()
807 */
808 /* ARGSUSED */
809 int
810 execve(struct proc *p, struct execve_args *uap, register_t *retval)
811 {
812 kauth_cred_t cred = p->p_ucred;
813 struct image_params image_params, *imgp;
814 struct vnode_attr va;
815 struct vnode_attr origva;
816 struct nameidata nd;
817 struct uthread *uthread;
818 int i;
819 int resid, error;
820 task_t task;
821 int numthreads;
822 int vfexec=0;
823 int once = 1; /* save SGUID-ness for interpreted files */
824 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for Classic */
825 int is_64 = IS_64BIT_PROCESS(p);
826 int seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
827 struct vfs_context context;
828
829 context.vc_proc = p;
830 context.vc_ucred = p->p_ucred; /* XXX must NOT be kauth_cred_get() */
831
832
833 imgp = &image_params;
834
835 /* Initialize the common data in the image_params structure */
836 bzero(imgp, sizeof(*imgp));
837 imgp->ip_user_fname = uap->fname;
838 imgp->ip_user_argv = uap->argp;
839 imgp->ip_user_envv = uap->envp;
840 imgp->ip_vattr = &va;
841 imgp->ip_origvattr = &origva;
842 imgp->ip_vfs_context = &context;
843 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
844 imgp->ip_tws_cache_name = NULL;
845 imgp->ip_p_comm = alt_p_comm; /* for Classic */
846
847 /*
848 * XXXAUDIT: Currently, we only audit the pathname of the binary.
849 * There may also be poor interaction with dyld.
850 */
851
852 task = current_task();
853 uthread = get_bsdthread_info(current_thread());
854
855 if (uthread->uu_flag & UT_VFORK) {
856 vfexec = 1; /* Mark in exec */
857 } else {
858 if (task != kernel_task) {
859 numthreads = get_task_numacts(task);
860 if (numthreads <= 0 )
861 return(EINVAL);
862 if (numthreads > 1) {
863 return(ENOTSUP);
864 }
865 }
866 }
867
868 error = execargs_alloc(imgp);
869 if (error)
870 return(error);
871
872 /*
873 * XXXAUDIT: Note: the double copyin introduces an audit
874 * race. To correct this race, we must use a single
875 * copyin(), e.g. by passing a flag to namei to indicate an
876 * external path buffer is being used.
877 */
878 error = exec_save_path(imgp, uap->fname, seg);
879 if (error) {
880 execargs_free(imgp);
881 return(error);
882 }
883
884 /*
885 * No app profiles under chroot
886 */
887 if((p->p_fd->fd_rdir == NULLVP) && (app_profile != 0)) {
888
889 /* grab the name of the file out of its path */
890 /* we will need this for lookup within the */
891 /* name file */
892 /* Scan backwards for the first '/' or start of string */
893 imgp->ip_tws_cache_name = imgp->ip_strendp;
894 while (imgp->ip_tws_cache_name[0] != '/') {
895 if(imgp->ip_tws_cache_name == imgp->ip_strings) {
896 imgp->ip_tws_cache_name--;
897 break;
898 }
899 imgp->ip_tws_cache_name--;
900 }
901 imgp->ip_tws_cache_name++;
902 }
903 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
904 seg, uap->fname, imgp->ip_vfs_context);
905
906 again:
907 error = namei(&nd);
908 if (error)
909 goto bad;
910 imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */
911 imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */
912
913 error = exec_check_permissions(imgp);
914 if (error)
915 goto bad;
916
917 /* Copy; avoid invocation of an interpreter overwriting the original */
918 if (once) {
919 once = 0;
920 origva = va;
921 }
922
923 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
924 UIO_SYSSPACE32, IO_NODELOCKED, cred, &resid, p);
925 if (error)
926 goto bad;
927
928 encapsulated_binary:
929 error = -1;
930 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
931
932 error = (*execsw[i].ex_imgact)(imgp);
933
934 switch (error) {
935 /* case -1: not claimed: continue */
936 case -2: /* Encapsulated binary */
937 goto encapsulated_binary;
938
939 case -3: /* Interpreter */
940 vnode_put(imgp->ip_vp);
941 imgp->ip_vp = NULL; /* already put */
942 nd.ni_cnd.cn_nameiop = LOOKUP;
943 nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) |
944 (FOLLOW | LOCKLEAF);
945
946
947 nd.ni_segflg = UIO_SYSSPACE32;
948 nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name);
949 goto again;
950
951 default:
952 break;
953 }
954 }
955
956 /* call out to allow 3rd party notification of exec.
957 * Ignore result of kauth_authorize_fileop call.
958 */
959 if (error == 0 && kauth_authorize_fileop_has_listeners()) {
960 kauth_authorize_fileop(vfs_context_ucred(&context), KAUTH_FILEOP_EXEC,
961 (uintptr_t)nd.ni_vp, 0);
962 }
963
964 /* Image not claimed by any activator? */
965 if (error == -1)
966 error = ENOEXEC;
967
968 bad:
969 if (imgp->ip_ndp)
970 nameidone(imgp->ip_ndp);
971 if (imgp->ip_vp)
972 vnode_put(imgp->ip_vp);
973 if (imgp->ip_strings)
974 execargs_free(imgp);
975 if (!error && vfexec) {
976 vfork_return(current_thread(), p->p_pptr, p, retval);
977 (void)thread_resume(imgp->ip_vfork_thread);
978 return(0);
979 }
980 return(error);
981 }
982
983
984 static int
985 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
986 {
987 int error;
988
989 if (ptr_size == 4) {
990 /* 64 bit value containing 32 bit address */
991 unsigned int i;
992
993 error = copyin(froma, &i, 4);
994 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
995 } else {
996 error = copyin(froma, toptr, 8);
997 }
998 return (error);
999 }
1000
1001
1002 static int
1003 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
1004 {
1005 int error;
1006
1007 if (ptr_size == 4) {
1008 /* 64 bit value containing 32 bit address */
1009 unsigned int i = CAST_DOWN(unsigned int,ua); /* SAFE */
1010
1011 error = copyout(&i, ptr, 4);
1012 } else {
1013 error = copyout(&ua, ptr, 8);
1014 }
1015 return (error);
1016 }
1017
1018
1019 /*
1020 * exec_copyout_strings
1021 *
1022 * Copy out the strings segment to user space. The strings segment is put
1023 * on a preinitialized stack frame.
1024 *
1025 * Parameters: struct image_params * the image parameter block
1026 * int * a pointer to the stack offset variable
1027 *
1028 * Returns: 0 Success
1029 * !0 Faiure: errno
1030 *
1031 * Implicit returns:
1032 * (*stackp) The stack offset, modified
1033 *
1034 * Note: The strings segment layout is backward, from the beginning
1035 * of the top of the stack to consume the minimal amount of
1036 * space possible; the returned stack pointer points to the
1037 * end of the area consumed (stacks grow upward).
1038 *
1039 * argc is an int; arg[i] are pointers; env[i] are pointers;
1040 * exec_path is a pointer; the 0's are (void *)NULL's
1041 *
1042 * The stack frame layout is:
1043 *
1044 * +-------------+
1045 * sp-> | argc |
1046 * +-------------+
1047 * | arg[0] |
1048 * +-------------+
1049 * :
1050 * :
1051 * +-------------+
1052 * | arg[argc-1] |
1053 * +-------------+
1054 * | 0 |
1055 * +-------------+
1056 * | env[0] |
1057 * +-------------+
1058 * :
1059 * :
1060 * +-------------+
1061 * | env[n] |
1062 * +-------------+
1063 * | 0 |
1064 * +-------------+
1065 * | exec_path | In MacOS X PR2 Beaker2E the path passed to exec() is
1066 * +-------------+ passed on the stack just after the trailing 0 of the
1067 * | 0 | the envp[] array as a pointer to a string.
1068 * +-------------+
1069 * | PATH AREA |
1070 * +-------------+
1071 * | STRING AREA |
1072 * :
1073 * :
1074 * | | <- p->user_stack
1075 * +-------------+
1076 *
1077 * Although technically a part of the STRING AREA, we treat the PATH AREA as
1078 * a separate entity. This allows us to align the beginning of the PATH AREA
1079 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
1080 * which preceed it on the stack are properly aligned.
1081 *
1082 * TODO: argc copied with suword(), which takes a 64 bit address
1083 */
1084 static int
1085 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
1086 {
1087 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1088 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
1089 char *argv = imgp->ip_argv; /* modifiable copy of argv */
1090 user_addr_t string_area; /* *argv[], *env[] */
1091 user_addr_t path_area; /* package launch path */
1092 user_addr_t ptr_area; /* argv[], env[], exec_path */
1093 user_addr_t stack;
1094 int stringc = imgp->ip_argc + imgp->ip_envc;
1095 int len;
1096 int error;
1097 int strspace;
1098
1099 stack = *stackp;
1100
1101 /*
1102 * Set up pointers to the beginning of the string area, the beginning
1103 * of the path area, and the beginning of the pointer area (actually,
1104 * the location of argc, an int, which may be smaller than a pointer,
1105 * but we use ptr_size worth of space for it, for alignment).
1106 */
1107 string_area = stack - (((imgp->ip_strendp - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)) - ptr_size;
1108 path_area = string_area - (((imgp->ip_argv - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1));
1109 ptr_area = path_area - ((imgp->ip_argc + imgp->ip_envc + 4) * ptr_size) - ptr_size /*argc*/;
1110
1111 /* Return the initial stack address: the location of argc */
1112 *stackp = ptr_area;
1113
1114 /*
1115 * Record the size of the arguments area so that sysctl_procargs()
1116 * can return the argument area without having to parse the arguments.
1117 */
1118 p->p_argc = imgp->ip_argc;
1119 p->p_argslen = (int)(stack - path_area);
1120
1121
1122 /*
1123 * Support for new app package launching for Mac OS X allocates
1124 * the "path" at the begining of the imgp->ip_strings buffer.
1125 * copy it just before the string area.
1126 */
1127 len = 0;
1128 error = copyoutstr(imgp->ip_strings, path_area,
1129 (unsigned)(imgp->ip_argv - imgp->ip_strings),
1130 (size_t *)&len);
1131 if (error)
1132 goto bad;
1133
1134
1135 /* Save a NULL pointer below it */
1136 (void)copyoutptr(0LL, path_area - ptr_size, ptr_size);
1137
1138 /* Save the pointer to "path" just below it */
1139 (void)copyoutptr(path_area, path_area - 2*ptr_size, ptr_size);
1140
1141 /*
1142 * ptr_size for 2 NULL one each ofter arg[argc -1] and env[n]
1143 * ptr_size for argc
1144 * skip over saved path, ptr_size for pointer to path,
1145 * and ptr_size for the NULL after pointer to path.
1146 */
1147
1148 /* argc (int32, stored in a ptr_size area) */
1149 (void)suword(ptr_area, imgp->ip_argc);
1150 ptr_area += sizeof(int);
1151 /* pad to ptr_size, if 64 bit image, to ensure user stack alignment */
1152 if (imgp->ip_flags & IMGPF_IS_64BIT) {
1153 (void)suword(ptr_area, 0); /* int, not long: ignored */
1154 ptr_area += sizeof(int);
1155 }
1156
1157
1158 /*
1159 * We use (string_area - path_area) here rather than the more
1160 * intuitive (imgp->ip_argv - imgp->ip_strings) because we are
1161 * interested in the length of the PATH_AREA in user space,
1162 * rather than the actual length of the execution path, since
1163 * it includes alignment padding of the PATH_AREA + STRING_AREA
1164 * to a ptr_size boundary.
1165 */
1166 strspace = SIZE_IMG_STRSPACE - (string_area - path_area);
1167 for (;;) {
1168 if (stringc == imgp->ip_envc) {
1169 /* argv[n] = NULL */
1170 (void)copyoutptr(0LL, ptr_area, ptr_size);
1171 ptr_area += ptr_size;
1172 }
1173 if (--stringc < 0)
1174 break;
1175
1176 /* pointer: argv[n]/env[n] */
1177 (void)copyoutptr(string_area, ptr_area, ptr_size);
1178
1179 /* string : argv[n][]/env[n][] */
1180 do {
1181 if (strspace <= 0) {
1182 error = E2BIG;
1183 break;
1184 }
1185 error = copyoutstr(argv, string_area,
1186 (unsigned)strspace,
1187 (size_t *)&len);
1188 string_area += len;
1189 argv += len;
1190 strspace -= len;
1191 } while (error == ENAMETOOLONG);
1192 if (error == EFAULT || error == E2BIG)
1193 break; /* bad stack - user's problem */
1194 ptr_area += ptr_size;
1195 }
1196 /* env[n] = NULL */
1197 (void)copyoutptr(0LL, ptr_area, ptr_size);
1198
1199 bad:
1200 return(error);
1201 }
1202
1203
1204 /*
1205 * exec_extract_strings
1206 *
1207 * Copy arguments and environment from user space into work area; we may
1208 * have already copied some early arguments into the work area, and if
1209 * so, any arguments opied in are appended to those already there.
1210 *
1211 * Parameters: struct image_params * the image parameter block
1212 *
1213 * Returns: 0 Success
1214 * !0 Failure: errno
1215 *
1216 * Implicit returns;
1217 * (imgp->ip_argc) Count of arguments, updated
1218 * (imgp->ip_envc) Count of environment strings, updated
1219 *
1220 *
1221 * Notes: The argument and environment vectors are user space pointers
1222 * to arrays of user space pointers.
1223 */
1224 static int
1225 exec_extract_strings(struct image_params *imgp)
1226 {
1227 int error = 0;
1228 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1229 int seg = (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32);
1230 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
1231 user_addr_t argv = imgp->ip_user_argv;
1232 user_addr_t envv = imgp->ip_user_envv;
1233
1234 /* Now, get rest of arguments */
1235
1236 /*
1237 * If we are running an interpreter, replace the av[0] that was
1238 * passed to execve() with the fully qualified path name that was
1239 * passed to execve() for interpreters which do not use the PATH
1240 * to locate their script arguments.
1241 */
1242 if((imgp->ip_flags & IMGPF_INTERPRET) != 0 && argv != 0LL) {
1243 user_addr_t arg;
1244
1245 error = copyinptr(argv, &arg, ptr_size);
1246 if (error)
1247 goto bad;
1248 if (arg != 0LL && arg != (user_addr_t)-1) {
1249 argv += ptr_size;
1250 error = exec_add_string(imgp, imgp->ip_user_fname, seg);
1251 if (error)
1252 goto bad;
1253 imgp->ip_argc++;
1254 }
1255 }
1256
1257 while (argv != 0LL) {
1258 user_addr_t arg;
1259
1260 error = copyinptr(argv, &arg, ptr_size);
1261 if (error)
1262 goto bad;
1263
1264 argv += ptr_size;
1265 if (arg == 0LL) {
1266 break;
1267 } else if (arg == (user_addr_t)-1) {
1268 /* Um... why would it be -1? */
1269 error = EFAULT;
1270 goto bad;
1271 }
1272 /*
1273 * av[n...] = arg[n]
1274 */
1275 error = exec_add_string(imgp, arg, seg);
1276 if (error)
1277 goto bad;
1278 imgp->ip_argc++;
1279 }
1280
1281 /* Now, get the environment */
1282 while (envv != 0LL) {
1283 user_addr_t env;
1284
1285 error = copyinptr(envv, &env, ptr_size);
1286 if (error)
1287 goto bad;
1288
1289 envv += ptr_size;
1290 if (env == 0LL) {
1291 break;
1292 } else if (env == (user_addr_t)-1) {
1293 error = EFAULT;
1294 goto bad;
1295 }
1296 /*
1297 * av[n...] = env[n]
1298 */
1299 error = exec_add_string(imgp, env, seg);
1300 if (error)
1301 goto bad;
1302 imgp->ip_envc++;
1303 }
1304 bad:
1305 return error;
1306 }
1307
1308
1309 #define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
1310
1311 static int
1312 exec_check_permissions(struct image_params *imgp)
1313 {
1314 struct vnode *vp = imgp->ip_vp;
1315 struct vnode_attr *vap = imgp->ip_vattr;
1316 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1317 int error;
1318 kauth_action_t action;
1319
1320 /* Only allow execution of regular files */
1321 if (!vnode_isreg(vp))
1322 return (EACCES);
1323
1324 /* Get the file attributes that we will be using here and elsewhere */
1325 VATTR_INIT(vap);
1326 VATTR_WANTED(vap, va_uid);
1327 VATTR_WANTED(vap, va_gid);
1328 VATTR_WANTED(vap, va_mode);
1329 VATTR_WANTED(vap, va_fsid);
1330 VATTR_WANTED(vap, va_fileid);
1331 VATTR_WANTED(vap, va_data_size);
1332 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
1333 return (error);
1334
1335 /*
1336 * Ensure that at least one execute bit is on - otherwise root
1337 * will always succeed, and we don't want to happen unless the
1338 * file really is executable.
1339 */
1340 if ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
1341 return (EACCES);
1342
1343 /* Disallow zero length files */
1344 if (vap->va_data_size == 0)
1345 return (ENOEXEC);
1346
1347 imgp->ip_arch_offset = (user_size_t)0;
1348 imgp->ip_arch_size = vap->va_data_size;
1349
1350 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
1351 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED))
1352 vap->va_mode &= ~(VSUID | VSGID);
1353
1354 /* Check for execute permission */
1355 action = KAUTH_VNODE_EXECUTE;
1356 /* Traced images must also be readable */
1357 if (p->p_flag & P_TRACED)
1358 action |= KAUTH_VNODE_READ_DATA;
1359 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
1360 return (error);
1361
1362 /* Don't let it run if anyone had it open for writing */
1363 if (vp->v_writecount)
1364 return (ETXTBSY);
1365
1366
1367 /* XXX May want to indicate to underlying FS that vnode is open */
1368
1369 return (error);
1370 }
1371
1372 /*
1373 * exec_handle_sugid
1374 *
1375 * Initially clear the P_SUGID in the process flags; if an SUGID process is
1376 * exec'ing a non-SUGID image, then this is the point of no return.
1377 *
1378 * If the image being activated is SUGI, then replace the credential with a
1379 * copy, disable tracing (unless the tracing process is root), reset the
1380 * mach task port to revoke it, set the P_SUGID bit,
1381 *
1382 * If the saved user and group ID will be changing, then make sure it happens
1383 * to a new credential, rather than a shared one.
1384 *
1385 * Set the security token (this is probably obsolete, given that the token
1386 * should not technically be separate from the credential itself).
1387 *
1388 * Parameters: struct image_params * the image parameter block
1389 *
1390 * Returns: void No failure indication
1391 *
1392 * Implicit returns:
1393 * <process credential> Potentially modified/replaced
1394 * <task port> Potentially revoked
1395 * <process flags> P_SUGID bit potentially modified
1396 * <security token> Potentially modified
1397 */
1398 static int
1399 exec_handle_sugid(struct image_params *imgp)
1400 {
1401 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
1402 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1403 int i;
1404 int error = 0;
1405 static struct vnode *dev_null = NULLVP;
1406
1407 p->p_flag &= ~P_SUGID;
1408
1409 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
1410 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
1411 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
1412 cred->cr_gid != imgp->ip_origvattr->va_gid)) {
1413 #if KTRACE
1414 /*
1415 * If process is being ktraced, turn off - unless
1416 * root set it.
1417 */
1418 if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) {
1419 struct vnode *tvp = p->p_tracep;
1420 p->p_tracep = NULL;
1421 p->p_traceflag = 0;
1422 vnode_rele(tvp);
1423 }
1424 #endif
1425 /*
1426 * Replace the credential with a copy of itself if euid or egid change.
1427 */
1428 if (imgp->ip_origvattr->va_mode & VSUID) {
1429 p->p_ucred = kauth_cred_seteuid(p->p_ucred, imgp->ip_origvattr->va_uid);
1430 }
1431 if (imgp->ip_origvattr->va_mode & VSGID) {
1432 p->p_ucred = kauth_cred_setegid(p->p_ucred, imgp->ip_origvattr->va_gid);
1433 }
1434
1435 /*
1436 * Have mach reset the task port. We don't want
1437 * anyone who had the task port before a setuid
1438 * exec to be able to access/control the task
1439 * after.
1440 */
1441 if (current_task() == p->task)
1442 ipc_task_reset(p->task);
1443
1444 p->p_flag |= P_SUGID;
1445
1446 /* Cache the vnode for /dev/null the first time around */
1447 if (dev_null == NULLVP) {
1448 struct nameidata nd1;
1449
1450 NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE32,
1451 CAST_USER_ADDR_T("/dev/null"),
1452 imgp->ip_vfs_context);
1453
1454 if ((error = vn_open(&nd1, FREAD, 0)) == 0) {
1455 dev_null = nd1.ni_vp;
1456 /*
1457 * vn_open returns with both a use_count
1458 * and an io_count on the found vnode
1459 * drop the io_count, but keep the use_count
1460 */
1461 vnode_put(nd1.ni_vp);
1462 }
1463 }
1464
1465 /* Radar 2261856; setuid security hole fix */
1466 /* Patch from OpenBSD: A. Ramesh */
1467 /*
1468 * XXX For setuid processes, attempt to ensure that
1469 * stdin, stdout, and stderr are already allocated.
1470 * We do not want userland to accidentally allocate
1471 * descriptors in this range which has implied meaning
1472 * to libc.
1473 */
1474 if (dev_null != NULLVP) {
1475 for (i = 0; i < 3; i++) {
1476 struct fileproc *fp;
1477 int indx;
1478
1479 if (p->p_fd->fd_ofiles[i] != NULL)
1480 continue;
1481
1482 if ((error = falloc(p, &fp, &indx)) != 0)
1483 continue;
1484
1485 if ((error = vnode_ref_ext(dev_null, FREAD)) != 0) {
1486 fp_free(p, indx, fp);
1487 break;
1488 }
1489
1490 fp->f_fglob->fg_flag = FREAD;
1491 fp->f_fglob->fg_type = DTYPE_VNODE;
1492 fp->f_fglob->fg_ops = &vnops;
1493 fp->f_fglob->fg_data = (caddr_t)dev_null;
1494
1495 proc_fdlock(p);
1496 *fdflags(p, indx) &= ~UF_RESERVED;
1497 fp_drop(p, indx, fp, 1);
1498 proc_fdunlock(p);
1499 }
1500 /*
1501 * for now we need to drop the reference immediately
1502 * since we don't have any mechanism in place to
1503 * release it before starting to unmount "/dev"
1504 * during a reboot/shutdown
1505 */
1506 vnode_rele(dev_null);
1507 dev_null = NULLVP;
1508 }
1509 }
1510
1511 /*
1512 * Implement the semantic where the effective user and group become
1513 * the saved user and group in exec'ed programs.
1514 */
1515 p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), p->p_ucred->cr_gid);
1516
1517 /* XXX Obsolete; security token should not be separate from cred */
1518 set_security_token(p);
1519
1520 return(error);
1521 }
1522
1523 static kern_return_t
1524 create_unix_stack(vm_map_t map, user_addr_t user_stack, int customstack,
1525 struct proc *p)
1526 {
1527 mach_vm_size_t size;
1528 mach_vm_offset_t addr;
1529
1530 p->user_stack = user_stack;
1531 if (!customstack) {
1532 size = mach_vm_round_page(unix_stack_size(p));
1533 addr = mach_vm_trunc_page(user_stack - size);
1534 return (mach_vm_allocate(map, &addr, size,
1535 VM_MAKE_TAG(VM_MEMORY_STACK) |
1536 VM_FLAGS_FIXED));
1537 } else
1538 return(KERN_SUCCESS);
1539 }
1540
1541 #include <sys/reboot.h>
1542
1543 static char init_program_name[128] = "/sbin/launchd";
1544 static const char * other_init = "/sbin/mach_init";
1545
1546 char init_args[128] = "";
1547
1548 struct execve_args init_exec_args;
1549 int init_attempts = 0;
1550
1551
1552 void
1553 load_init_program(struct proc *p)
1554 {
1555 vm_offset_t init_addr;
1556 char *argv[3];
1557 int error;
1558 register_t retval[2];
1559
1560 error = 0;
1561
1562 /* init_args are copied in string form directly from bootstrap */
1563
1564 do {
1565 if (boothowto & RB_INITNAME) {
1566 printf("init program? ");
1567 #if FIXME /* [ */
1568 gets(init_program_name, init_program_name);
1569 #endif /* FIXME ] */
1570 }
1571
1572 if (error && ((boothowto & RB_INITNAME) == 0) &&
1573 (init_attempts == 1)) {
1574 printf("Load of %s, errno %d, trying %s\n",
1575 init_program_name, error, other_init);
1576 error = 0;
1577 bcopy(other_init, init_program_name,
1578 sizeof(other_init));
1579 }
1580
1581 init_attempts++;
1582
1583 if (error) {
1584 printf("Load of %s failed, errno %d\n",
1585 init_program_name, error);
1586 error = 0;
1587 boothowto |= RB_INITNAME;
1588 continue;
1589 }
1590
1591 /*
1592 * Copy out program name.
1593 */
1594
1595 init_addr = VM_MIN_ADDRESS;
1596 (void) vm_allocate(current_map(), &init_addr,
1597 PAGE_SIZE, VM_FLAGS_ANYWHERE);
1598 if (init_addr == 0)
1599 init_addr++;
1600
1601 (void) copyout((caddr_t) init_program_name,
1602 CAST_USER_ADDR_T(init_addr),
1603 (unsigned) sizeof(init_program_name)+1);
1604
1605 argv[0] = (char *) init_addr;
1606 init_addr += sizeof(init_program_name);
1607 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
1608
1609 /*
1610 * Put out first (and only) argument, similarly.
1611 * Assumes everything fits in a page as allocated
1612 * above.
1613 */
1614
1615 (void) copyout((caddr_t) init_args,
1616 CAST_USER_ADDR_T(init_addr),
1617 (unsigned) sizeof(init_args));
1618
1619 argv[1] = (char *) init_addr;
1620 init_addr += sizeof(init_args);
1621 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
1622
1623 /*
1624 * Null-end the argument list
1625 */
1626
1627 argv[2] = (char *) 0;
1628
1629 /*
1630 * Copy out the argument list.
1631 */
1632
1633 (void) copyout((caddr_t) argv,
1634 CAST_USER_ADDR_T(init_addr),
1635 (unsigned) sizeof(argv));
1636
1637 /*
1638 * Set up argument block for fake call to execve.
1639 */
1640
1641 init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
1642 init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
1643 init_exec_args.envp = CAST_USER_ADDR_T(0);
1644
1645 /* So that mach_init task
1646 * is set with uid,gid 0 token
1647 */
1648 set_security_token(p);
1649
1650 error = execve(p,&init_exec_args,retval);
1651 } while (error);
1652 }
1653
1654 /*
1655 * Convert a load_return_t to an errno.
1656 */
1657 static int
1658 load_return_to_errno(load_return_t lrtn)
1659 {
1660 switch (lrtn) {
1661 case LOAD_SUCCESS:
1662 return 0;
1663 case LOAD_BADARCH:
1664 return EBADARCH;
1665 case LOAD_BADMACHO:
1666 return EBADMACHO;
1667 case LOAD_SHLIB:
1668 return ESHLIBVERS;
1669 case LOAD_NOSPACE:
1670 case LOAD_RESOURCE:
1671 return ENOMEM;
1672 case LOAD_PROTECT:
1673 return EACCES;
1674 case LOAD_ENOENT:
1675 return ENOENT;
1676 case LOAD_IOERROR:
1677 return EIO;
1678 case LOAD_FAILURE:
1679 default:
1680 return EBADEXEC;
1681 }
1682 }
1683
1684 #include <mach/mach_types.h>
1685 #include <mach/vm_prot.h>
1686 #include <mach/semaphore.h>
1687 #include <mach/sync_policy.h>
1688 #include <kern/clock.h>
1689 #include <mach/kern_return.h>
1690
1691 extern semaphore_t execve_semaphore;
1692
1693 /*
1694 * The block of memory used by the execve arguments. At the same time,
1695 * we allocate a page so that we can read in the first page of the image.
1696 */
1697 static int
1698 execargs_alloc(struct image_params *imgp)
1699 {
1700 kern_return_t kret;
1701
1702 kret = semaphore_wait(execve_semaphore);
1703 if (kret != KERN_SUCCESS)
1704 switch (kret) {
1705 default:
1706 return (EINVAL);
1707 case KERN_INVALID_ADDRESS:
1708 case KERN_PROTECTION_FAILURE:
1709 return (EACCES);
1710 case KERN_ABORTED:
1711 case KERN_OPERATION_TIMED_OUT:
1712 return (EINTR);
1713 }
1714
1715 kret = kmem_alloc_pageable(bsd_pageable_map, (vm_offset_t *)&imgp->ip_strings, NCARGS + PAGE_SIZE);
1716 imgp->ip_vdata = imgp->ip_strings + NCARGS;
1717 if (kret != KERN_SUCCESS) {
1718 semaphore_signal(execve_semaphore);
1719 return (ENOMEM);
1720 }
1721 return (0);
1722 }
1723
1724 static int
1725 execargs_free(struct image_params *imgp)
1726 {
1727 kern_return_t kret;
1728
1729 kmem_free(bsd_pageable_map, (vm_offset_t)imgp->ip_strings, NCARGS + PAGE_SIZE);
1730 imgp->ip_strings = NULL;
1731
1732 kret = semaphore_signal(execve_semaphore);
1733 switch (kret) {
1734 case KERN_INVALID_ADDRESS:
1735 case KERN_PROTECTION_FAILURE:
1736 return (EINVAL);
1737 case KERN_ABORTED:
1738 case KERN_OPERATION_TIMED_OUT:
1739 return (EINTR);
1740 case KERN_SUCCESS:
1741 return(0);
1742 default:
1743 return (EINVAL);
1744 }
1745 }