]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_exec.c
xnu-792.18.15.tar.gz
[apple/xnu.git] / bsd / kern / kern_exec.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
29 /*
30 * Mach Operating System
31 * Copyright (c) 1987 Carnegie-Mellon University
32 * All rights reserved. The CMU software License Agreement specifies
33 * the terms and conditions for use and redistribution.
34 */
35
36 #include <cputypes.h>
37
38 /*-
39 * Copyright (c) 1982, 1986, 1991, 1993
40 * The Regents of the University of California. All rights reserved.
41 * (c) UNIX System Laboratories, Inc.
42 * All or some portions of this file are derived from material licensed
43 * to the University of California by American Telephone and Telegraph
44 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
45 * the permission of UNIX System Laboratories, Inc.
46 *
47 * Redistribution and use in source and binary forms, with or without
48 * modification, are permitted provided that the following conditions
49 * are met:
50 * 1. Redistributions of source code must retain the above copyright
51 * notice, this list of conditions and the following disclaimer.
52 * 2. Redistributions in binary form must reproduce the above copyright
53 * notice, this list of conditions and the following disclaimer in the
54 * documentation and/or other materials provided with the distribution.
55 * 3. All advertising materials mentioning features or use of this software
56 * must display the following acknowledgement:
57 * This product includes software developed by the University of
58 * California, Berkeley and its contributors.
59 * 4. Neither the name of the University nor the names of its contributors
60 * may be used to endorse or promote products derived from this software
61 * without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73 * SUCH DAMAGE.
74 *
75 * from: @(#)kern_exec.c 8.1 (Berkeley) 6/10/93
76 */
77 #include <machine/reg.h>
78
79 #include <sys/param.h>
80 #include <sys/systm.h>
81 #include <sys/filedesc.h>
82 #include <sys/kernel.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/user.h>
86 #include <sys/socketvar.h>
87 #include <sys/malloc.h>
88 #include <sys/namei.h>
89 #include <sys/mount_internal.h>
90 #include <sys/vnode_internal.h>
91 #include <sys/file_internal.h>
92 #include <sys/stat.h>
93 #include <sys/uio_internal.h>
94 #include <sys/acct.h>
95 #include <sys/exec.h>
96 #include <sys/kdebug.h>
97 #include <sys/signal.h>
98 #include <sys/aio_kern.h>
99 #include <sys/sysproto.h>
100 #include <sys/shm_internal.h> /* shmexec() */
101 #include <sys/ubc_internal.h> /* ubc_map() */
102
103 #include <bsm/audit_kernel.h>
104
105 #include <mach/mach_types.h>
106 #include <mach/task.h>
107 #include <mach/thread_act.h>
108 #include <mach/vm_map.h>
109 #include <mach/mach_vm.h>
110 #include <mach/vm_param.h>
111
112 #include <vm/vm_map.h>
113 #include <vm/vm_kern.h>
114 #include <vm/vm_pager.h>
115 #include <vm/vm_kern.h>
116 #include <vm/task_working_set.h>
117 #include <vm/vm_shared_memory_server.h>
118
119 /*
120 * Mach things for which prototypes are unavailable from Mach headers
121 */
122 void ipc_task_reset(
123 task_t task);
124
125 extern struct savearea *get_user_regs(thread_t);
126
127
128 #include <kern/thread.h>
129 #include <kern/task.h>
130 #include <kern/ast.h>
131 #include <kern/mach_loader.h>
132 #include <mach-o/fat.h>
133 #include <mach-o/loader.h>
134 #include <machine/vmparam.h>
135 #if KTRACE
136 #include <sys/ktrace.h>
137 #endif
138 #include <sys/imgact.h>
139
140
141 /*
142 * SIZE_MAXPTR The maximum size of a user space pointer, in bytes
143 * SIZE_IMG_STRSPACE The available string space, minus two pointers; we
144 * define it interms of the maximum, since we don't
145 * know the pointer size going in, until after we've
146 * parsed the executable image.
147 */
148 #define SIZE_MAXPTR 8 /* 64 bits */
149 #define SIZE_IMG_STRSPACE (NCARGS - 2 * SIZE_MAXPTR)
150
151 int app_profile = 0;
152
153 extern vm_map_t bsd_pageable_map;
154 extern struct fileops vnops;
155
156 #define ROUND_PTR(type, addr) \
157 (type *)( ( (unsigned)(addr) + 16 - 1) \
158 & ~(16 - 1) )
159
160 struct image_params; /* Forward */
161 static int exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp);
162 static int load_return_to_errno(load_return_t lrtn);
163 static int execargs_alloc(struct image_params *imgp);
164 static int execargs_free(struct image_params *imgp);
165 static int exec_check_permissions(struct image_params *imgp);
166 static int exec_extract_strings(struct image_params *imgp);
167 static int exec_handle_sugid(struct image_params *imgp);
168 static int sugid_scripts = 0;
169 SYSCTL_INT (_kern, OID_AUTO, sugid_scripts, CTLFLAG_RW, &sugid_scripts, 0, "");
170 static kern_return_t create_unix_stack(vm_map_t map, user_addr_t user_stack,
171 int customstack, struct proc *p);
172 static int copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size);
173
174 /* XXX forward; should be in headers, but can't be for one reason or another */
175 extern void vfork_return(thread_t th_act,
176 struct proc * p,
177 struct proc *p2,
178 register_t *retval);
179
180 /*
181 * exec_add_string
182 *
183 * Add the requested string to the string space area.
184 *
185 * Parameters; struct image_params * image parameter block
186 * user_addr_t string to add to strings area
187 * uio_seg segment where string is located
188 *
189 * Returns: 0 Success
190 * !0 Failure errno from copyinstr()
191 *
192 * Implicit returns:
193 * (imgp->ip_strendp) updated location of next add, if any
194 * (imgp->ip_strspace) updated byte count of space remaining
195 */
196 static int
197 exec_add_string(struct image_params *imgp, user_addr_t str, /*uio_seg*/int seg)
198 {
199 int error = 0;
200
201 do {
202 size_t len = 0;
203 if (imgp->ip_strspace <= 0) {
204 error = E2BIG;
205 break;
206 }
207 if (IS_UIO_SYS_SPACE(seg)) {
208 char *kstr = CAST_DOWN(char *,str); /* SAFE */
209 error = copystr(kstr, imgp->ip_strendp, imgp->ip_strspace, &len);
210 } else {
211 error = copyinstr(str, imgp->ip_strendp, imgp->ip_strspace,
212 &len);
213 }
214 imgp->ip_strendp += len;
215 imgp->ip_strspace -= len;
216 } while (error == ENAMETOOLONG);
217
218 return error;
219 }
220
221 /*
222 * exec_save_path
223 *
224 * To support new app package launching for Mac OS X, the dyld needs the
225 * first argument to execve() stored on the user stack.
226 *
227 * Save the executable path name at the top of the strings area and set
228 * the argument vector pointer to the location following that to indicate
229 * the start of the argument and environment tuples, setting the remaining
230 * string space count to the size of the string area minus the path length
231 * and a reserve for two pointers.
232 *
233 * Parameters; struct image_params * image parameter block
234 * char * path used to invoke program
235 * uio_seg segment where path is located
236 *
237 * Returns: int 0 Success
238 * !0 Failure: error number
239 * Implicit returns:
240 * (imgp->ip_strings) saved path
241 * (imgp->ip_strspace) space remaining in ip_strings
242 * (imgp->ip_argv) beginning of argument list
243 * (imgp->ip_strendp) start of remaining copy area
244 *
245 * Note: We have to do this before the initial namei() since in the
246 * path contains symbolic links, namei() will overwrite the
247 * original path buffer contents. If the last symbolic link
248 * resolved was a relative pathname, we would lose the original
249 * "path", which could be an absolute pathname. This might be
250 * unacceptable for dyld.
251 */
252 static int
253 exec_save_path(struct image_params *imgp, user_addr_t path, /*uio_seg*/int seg)
254 {
255 int error;
256 size_t len;
257 char *kpath = CAST_DOWN(char *,path); /* SAFE */
258
259 imgp->ip_strendp = imgp->ip_strings;
260 imgp->ip_strspace = SIZE_IMG_STRSPACE;
261
262 len = MIN(MAXPATHLEN, imgp->ip_strspace);
263
264 switch( seg) {
265 case UIO_USERSPACE32:
266 case UIO_USERSPACE64: /* Same for copyin()... */
267 error = copyinstr(path, imgp->ip_strings, len, &len);
268 break;
269 case UIO_SYSSPACE32:
270 error = copystr(kpath, imgp->ip_strings, len, &len);
271 break;
272 default:
273 error = EFAULT;
274 break;
275 }
276
277 if (!error) {
278 imgp->ip_strendp += len;
279 imgp->ip_strspace -= len;
280 imgp->ip_argv = imgp->ip_strendp;
281 }
282
283 return(error);
284 }
285
286 #ifdef IMGPF_POWERPC
287 /*
288 * exec_powerpc32_imgact
289 *
290 * Implicitly invoke the PowerPC handler for a byte-swapped image magic
291 * number. This may happen either as a result of an attempt to invoke a
292 * PowerPC image directly, or indirectly as the interpreter used in an
293 * interpreter script.
294 *
295 * Parameters; struct image_params * image parameter block
296 *
297 * Returns: -1 not an PowerPC image (keep looking)
298 * -3 Success: exec_archhandler_ppc: relookup
299 * >0 Failure: exec_archhandler_ppc: error number
300 *
301 * Note: This image activator does not handle the case of a direct
302 * invocation of the exec_archhandler_ppc, since in that case, the
303 * exec_archhandler_ppc itself is not a PowerPC binary; instead,
304 * binary image activators must recognize the exec_archhandler_ppc;
305 * This is managed in exec_check_permissions().
306 *
307 * Note: This image activator is limited to 32 bit powerpc images;
308 * if support for 64 bit powerpc images is desired, it would
309 * be more in line with this design to write a separate 64 bit
310 * image activator.
311 */
312 static int
313 exec_powerpc32_imgact(struct image_params *imgp)
314 {
315 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
316 int error;
317 size_t len = 0;
318
319 /*
320 * Make sure it's a PowerPC binary. If we've already redirected
321 * from an interpreted file once, don't do it again.
322 */
323 if (mach_header->magic != MH_CIGAM)
324 return (-1);
325
326 /* If there is no exec_archhandler_ppc, we can't run it */
327 if (exec_archhandler_ppc.path[0] == 0)
328 return (EBADARCH);
329
330 /*
331 * The PowerPC flag will be set by the exec_check_permissions()
332 * call anyway; however, we set this flag here so that the relookup
333 * in execve() does not follow symbolic links, as a side effect.
334 */
335 imgp->ip_flags |= IMGPF_POWERPC;
336
337 /* impute an interpreter */
338 error = copystr(exec_archhandler_ppc.path, imgp->ip_interp_name,
339 IMG_SHSIZE, &len);
340 if (error)
341 return (error);
342
343 /*
344 * provide a replacement string for p->p_comm; we have to use an
345 * an alternate buffer for this, rather than replacing it directly,
346 * since the exec may fail and return to the parent. In that case,
347 * we would have erroneously changed the parent p->p_comm instead.
348 */
349 strncpy(imgp->ip_p_comm, imgp->ip_ndp->ni_cnd.cn_nameptr, MAXCOMLEN);
350 imgp->ip_p_comm[MAXCOMLEN] = '\0';
351
352 return (-3);
353 }
354 #endif /* IMGPF_POWERPC */
355
356
357 /*
358 * exec_shell_imgact
359 *
360 * Image activator for interpreter scripts. If the image begins with the
361 * characters "#!", then it is an interpreter script. Verify that we are
362 * not already executing in PowerPC mode, and that the length of the script
363 * line indicating the interpreter is not in excess of the maximum allowed
364 * size. If this is the case, then break out the arguments, if any, which
365 * are separated by white space, and copy them into the argument save area
366 * as if they were provided on the command line before all other arguments.
367 * The line ends when we encounter a comment character ('#') or newline.
368 *
369 * Parameters; struct image_params * image parameter block
370 *
371 * Returns: -1 not an interpreter (keep looking)
372 * -3 Success: interpreter: relookup
373 * >0 Failure: interpreter: error number
374 *
375 * A return value other than -1 indicates subsequent image activators should
376 * not be given the opportunity to attempt to activate the image.
377 */
378 static int
379 exec_shell_imgact(struct image_params *imgp)
380 {
381 char *vdata = imgp->ip_vdata;
382 char *ihp;
383 char *line_endp;
384 char *interp;
385
386 /*
387 * Make sure it's a shell script. If we've already redirected
388 * from an interpreted file once, don't do it again.
389 *
390 * Note: We disallow PowerPC, since the expectation is that we
391 * may run a PowerPC interpreter, but not an interpret a PowerPC
392 * image. This is consistent with historical behaviour.
393 */
394 if (vdata[0] != '#' ||
395 vdata[1] != '!' ||
396 (imgp->ip_flags & IMGPF_INTERPRET) != 0) {
397 return (-1);
398 }
399
400 #ifdef IMGPF_POWERPC
401 if ((imgp->ip_flags & IMGPF_POWERPC) != 0)
402 return (EBADARCH);
403 #endif /* IMGPF_POWERPC */
404
405 imgp->ip_flags |= IMGPF_INTERPRET;
406
407 /* Check to see if SUGID scripts are permitted. If they aren't then
408 * clear the SUGID bits.
409 * imgp->ip_vattr is known to be valid.
410 */
411 if (sugid_scripts == 0) {
412 imgp->ip_origvattr->va_mode &= ~(VSUID | VSGID);
413 }
414
415 /* Find the nominal end of the interpreter line */
416 for( ihp = &vdata[2]; *ihp != '\n' && *ihp != '#'; ihp++) {
417 if (ihp >= &vdata[IMG_SHSIZE])
418 return (ENOEXEC);
419 }
420
421 line_endp = ihp;
422 ihp = &vdata[2];
423 /* Skip over leading spaces - until the interpreter name */
424 while ( ihp < line_endp && ((*ihp == ' ') || (*ihp == '\t')))
425 ihp++;
426
427 /*
428 * Find the last non-whitespace character before the end of line or
429 * the beginning of a comment; this is our new end of line.
430 */
431 for (;line_endp > ihp && ((*line_endp == ' ') || (*line_endp == '\t')); line_endp--)
432 continue;
433
434 /* Empty? */
435 if (line_endp == ihp)
436 return (ENOEXEC);
437
438 /* copy the interpreter name */
439 interp = imgp->ip_interp_name;
440 while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t'))
441 *interp++ = *ihp++;
442 *interp = '\0';
443
444 exec_save_path(imgp, CAST_USER_ADDR_T(imgp->ip_interp_name),
445 UIO_SYSSPACE32);
446
447 ihp = &vdata[2];
448 while (ihp < line_endp) {
449 /* Skip leading whitespace before each argument */
450 while ((*ihp == ' ') || (*ihp == '\t'))
451 ihp++;
452
453 if (ihp >= line_endp)
454 break;
455
456 /* We have an argument; copy it */
457 while ((ihp < line_endp) && (*ihp != ' ') && (*ihp != '\t')) {
458 *imgp->ip_strendp++ = *ihp++;
459 imgp->ip_strspace--;
460 }
461 *imgp->ip_strendp++ = 0;
462 imgp->ip_strspace--;
463 imgp->ip_argc++;
464 }
465
466 return (-3);
467 }
468
469
470
471 /*
472 * exec_fat_imgact
473 *
474 * Image activator for fat 1.0 binaries. If the binary is fat, then we
475 * need to select an image from it internally, and make that the image
476 * we are going to attempt to execute. At present, this consists of
477 * reloading the first page for the image with a first page from the
478 * offset location indicated by the fat header.
479 *
480 * Important: This image activator is byte order neutral.
481 *
482 * Note: If we find an encapsulated binary, we make no assertions
483 * about its validity; instead, we leave that up to a rescan
484 * for an activator to claim it, and, if it is claimed by one,
485 * that activator is responsible for determining validity.
486 */
487 static int
488 exec_fat_imgact(struct image_params *imgp)
489 {
490 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
491 kauth_cred_t cred = kauth_cred_proc_ref(p);
492 struct fat_header *fat_header = (struct fat_header *)imgp->ip_vdata;
493 struct fat_arch fat_arch;
494 int resid, error;
495 load_return_t lret;
496
497 /* Make sure it's a fat binary */
498 if ((fat_header->magic != FAT_MAGIC) &&
499 (fat_header->magic != FAT_CIGAM)) {
500 error = -1;
501 goto bad;
502 }
503
504 /* Look up our preferred architecture in the fat file. */
505 lret = fatfile_getarch_affinity(imgp->ip_vp,
506 (vm_offset_t)fat_header,
507 &fat_arch,
508 (p->p_flag & P_AFFINITY));
509 if (lret != LOAD_SUCCESS) {
510 error = load_return_to_errno(lret);
511 goto bad;
512 }
513
514 /* Read the Mach-O header out of it */
515 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata,
516 PAGE_SIZE, fat_arch.offset,
517 UIO_SYSSPACE32, (IO_UNIT|IO_NODELOCKED),
518 cred, &resid, p);
519 if (error) {
520 goto bad;
521 }
522
523 /* Did we read a complete header? */
524 if (resid) {
525 error = EBADEXEC;
526 goto bad;
527 }
528
529 /* Success. Indicate we have identified an encapsulated binary */
530 error = -2;
531 imgp->ip_arch_offset = (user_size_t)fat_arch.offset;
532 imgp->ip_arch_size = (user_size_t)fat_arch.size;
533
534 bad:
535 kauth_cred_unref(&cred);
536 return (error);
537 }
538
539 /*
540 * exec_mach_imgact
541 *
542 * Image activator for mach-o 1.0 binaries.
543 *
544 * Important: This image activator is NOT byte order neutral.
545 */
546 static int
547 exec_mach_imgact(struct image_params *imgp)
548 {
549 struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
550 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
551 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
552 int error = 0;
553 int vfexec = 0;
554 task_t task;
555 task_t new_task;
556 thread_t thread;
557 struct uthread *uthread;
558 vm_map_t old_map = VM_MAP_NULL;
559 vm_map_t map;
560 boolean_t clean_regions = FALSE;
561 load_return_t lret;
562 load_result_t load_result;
563 shared_region_mapping_t shared_region, initial_region;
564 #ifdef IMGPF_POWERPC
565 int powerpcParent, powerpcImage;
566 #endif /* IMGPF_POWERPC */
567
568 /*
569 * make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
570 * is a reserved field on the end, so for the most part, we can
571 * treat them as if they were identical.
572 */
573 if ((mach_header->magic != MH_MAGIC) &&
574 (mach_header->magic != MH_MAGIC_64)) {
575 error = -1;
576 goto bad;
577 }
578
579 task = current_task();
580 thread = current_thread();
581 uthread = get_bsdthread_info(thread);
582
583 if (uthread->uu_flag & UT_VFORK)
584 vfexec = 1; /* Mark in exec */
585
586 if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
587 imgp->ip_flags |= IMGPF_IS_64BIT;
588
589 if (!grade_binary(mach_header->cputype, mach_header->cpusubtype)) {
590 error = EBADARCH;
591 goto bad;
592 }
593
594 /*
595 * Copy in arguments/environment from the old process, if the
596 * vector is non-NULL (i.e. exec is not being called from
597 * load_init_program(), as a special case, at system startup).
598 */
599 if (imgp->ip_user_argv != 0LL) {
600 error = exec_extract_strings(imgp);
601 if (error)
602 goto bad;
603 }
604
605 /*
606 * Hack for binary compatability; put three NULs on the end of the
607 * string area, and round it up to the next word boundary. This
608 * ensures padding with NULs to the boundary.
609 */
610 imgp->ip_strendp[0] = 0;
611 imgp->ip_strendp[1] = 0;
612 imgp->ip_strendp[2] = 0;
613 imgp->ip_strendp += (((imgp->ip_strendp - imgp->ip_strings) + NBPW-1) & ~(NBPW-1));
614
615 #ifdef IMGPF_POWERPC
616 /*
617 * XXX
618 *
619 * Should be factored out; this is here because we might be getting
620 * invoked this way as the result of a shell script, and the check
621 * in exec_check_permissions() is not interior to the jump back up
622 * to the "encapsulated_binary:" label in execve().
623 */
624 if (imgp->ip_vattr->va_fsid == exec_archhandler_ppc.fsid &&
625 imgp->ip_vattr->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) {
626 imgp->ip_flags |= IMGPF_POWERPC;
627 }
628 #endif /* IMGPF_POWERPC */
629
630 if (vfexec) {
631 kern_return_t result;
632
633 result = task_create_internal(task, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT), &new_task);
634 if (result != KERN_SUCCESS)
635 printf("execve: task_create failed. Code: 0x%x\n", result);
636 p->task = new_task;
637 set_bsdtask_info(new_task, p);
638 if (p->p_nice != 0)
639 resetpriority(p);
640 map = get_task_map(new_task);
641
642 if (imgp->ip_flags & IMGPF_IS_64BIT)
643 vm_map_set_64bit(map);
644 else
645 vm_map_set_32bit(map);
646
647 result = thread_create(new_task, &imgp->ip_vfork_thread);
648 if (result != KERN_SUCCESS)
649 printf("execve: thread_create failed. Code: 0x%x\n", result);
650 /* reset local idea of task, thread, uthread */
651 task = new_task;
652 thread = imgp->ip_vfork_thread;
653 uthread = get_bsdthread_info(thread);
654 } else {
655 map = VM_MAP_NULL;
656 }
657
658 /*
659 * We set these flags here; this is OK, since if we fail after
660 * this point, we have already destroyed the parent process anyway.
661 */
662 if (imgp->ip_flags & IMGPF_IS_64BIT) {
663 task_set_64bit(task, TRUE);
664 p->p_flag |= P_LP64;
665 } else {
666 task_set_64bit(task, FALSE);
667 p->p_flag &= ~P_LP64;
668 }
669
670 /*
671 * Load the Mach-O file.
672 */
673 /* LP64 - remove following "if" statement after osfmk/vm/task_working_set.c */
674 if((imgp->ip_flags & IMGPF_IS_64BIT) == 0)
675 if(imgp->ip_tws_cache_name) {
676 tws_handle_startup_file(task, kauth_cred_getuid(cred),
677 imgp->ip_tws_cache_name, imgp->ip_vp, &clean_regions);
678 }
679
680 vm_get_shared_region(task, &initial_region);
681
682 #ifdef IMGPF_POWERPC
683 /*
684 * If we are transitioning to/from powerpc, then we need to do extra
685 * work here.
686 */
687 powerpcParent = (p->p_flag & P_TRANSLATED) ? 1 : 0;
688 powerpcImage = (imgp->ip_flags & IMGPF_POWERPC) ? 1 : 0;
689
690 if (powerpcParent ^ powerpcImage) {
691 cpu_type_t cpu = (powerpcImage ? CPU_TYPE_POWERPC : cpu_type());
692 struct vnode *rootDir = p->p_fd->fd_rdir;
693
694 shared_region = lookup_default_shared_region((int)rootDir, cpu);
695 if (shared_region == NULL) {
696 shared_region_mapping_t old_region;
697 shared_region_mapping_t new_region;
698 vm_get_shared_region(current_task(), &old_region);
699 /* grrrr... this sets current_task(), not task
700 * -- they're different (usually)
701 */
702 shared_file_boot_time_init((int)rootDir,cpu);
703 if ( current_task() != task ) {
704 vm_get_shared_region(current_task(),&new_region);
705 vm_set_shared_region(task,new_region);
706 vm_set_shared_region(current_task(),old_region);
707 }
708 } else {
709 vm_set_shared_region(task, shared_region);
710 }
711 shared_region_mapping_dealloc(initial_region);
712 } else
713 #endif /* IMGPF_POWERPC */
714
715 {
716 struct shared_region_task_mappings map_info;
717 shared_region_mapping_t next;
718
719 shared_region_mapping_info(initial_region,
720 &map_info.text_region,
721 &map_info.text_size,
722 &map_info.data_region,
723 &map_info.data_size,
724 &map_info.region_mappings,
725 &map_info.client_base,
726 &map_info.alternate_base,
727 &map_info.alternate_next,
728 &map_info.fs_base,
729 &map_info.system,
730 &map_info.flags,
731 &next);
732 if (map_info.flags & SHARED_REGION_STANDALONE) {
733 /*
734 * We were using a private shared region.
735 * Try and get back to a system-wide shared region
736 * with matching "fs_base" (for chroot) and "system"
737 * (for CPU type).
738 */
739 shared_region = lookup_default_shared_region(
740 map_info.fs_base,
741 map_info.system);
742 if (shared_region == NULL) {
743 /*
744 * No system-wide default regions, stick to
745 * our private region...
746 */
747 } else {
748 SHARED_REGION_TRACE(
749 SHARED_REGION_TRACE_INFO,
750 ("shared_region: %p [%d(%s)] "
751 "exec(\"%s\"): "
752 "moving from private %p[%x,%x,%x] "
753 "to default %p\n",
754 current_thread(),
755 p->p_pid, p->p_comm,
756 (imgp->ip_p_comm[0] ?
757 imgp->ip_p_comm :
758 imgp->ip_ndp->ni_cnd.cn_nameptr),
759 initial_region,
760 map_info.fs_base,
761 map_info.system,
762 map_info.flags,
763 shared_region));
764 vm_set_shared_region(task, shared_region);
765 shared_region_mapping_dealloc(initial_region);
766 }
767 }
768 }
769
770 /*
771 * NOTE: An error after this point indicates we have potentially
772 * destroyed or overwrote some process state while attempting an
773 * execve() following a vfork(), which is an unrecoverable condition.
774 */
775
776 /*
777 * We reset the task to 64-bit (or not) here. It may have picked up
778 * a new map, and we need that to reflect its true 64-bit nature.
779 */
780
781 task_set_64bit(task,
782 ((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT));
783
784 /*
785 * Actually load the image file we previously decided to load.
786 */
787 lret = load_machfile(imgp, mach_header, thread, map, clean_regions, &load_result);
788
789 if (lret != LOAD_SUCCESS) {
790 error = load_return_to_errno(lret);
791 goto badtoolate;
792 }
793
794 /* load_machfile() maps the vnode */
795 (void)ubc_map(imgp->ip_vp, PROT_EXEC);
796
797 /*
798 * deal with set[ug]id.
799 */
800 error = exec_handle_sugid(imgp);
801
802 KNOTE(&p->p_klist, NOTE_EXEC);
803
804 if (!vfexec && (p->p_flag & P_TRACED))
805 psignal(p, SIGTRAP);
806
807 if (error) {
808 goto badtoolate;
809 }
810 vnode_put(imgp->ip_vp);
811 imgp->ip_vp = NULL;
812
813 if (load_result.unixproc &&
814 create_unix_stack(get_task_map(task),
815 load_result.user_stack, load_result.customstack, p)) {
816 error = load_return_to_errno(LOAD_NOSPACE);
817 goto badtoolate;
818 }
819
820 if (vfexec) {
821 old_map = vm_map_switch(get_task_map(task));
822 }
823
824 if (load_result.unixproc) {
825 user_addr_t ap;
826
827 /*
828 * Copy the strings area out into the new process address
829 * space.
830 */
831 ap = p->user_stack;
832 error = exec_copyout_strings(imgp, &ap);
833 if (error) {
834 if (vfexec)
835 vm_map_switch(old_map);
836 goto badtoolate;
837 }
838 /* Set the stack */
839 thread_setuserstack(thread, ap);
840 }
841
842 if (load_result.dynlinker) {
843 uint64_t ap;
844
845 /* Adjust the stack */
846 if (imgp->ip_flags & IMGPF_IS_64BIT) {
847 ap = thread_adjuserstack(thread, -8);
848 error = copyoutptr(load_result.mach_header, ap, 8);
849 } else {
850 ap = thread_adjuserstack(thread, -4);
851 error = suword(ap, load_result.mach_header);
852 }
853 if (error) {
854 if (vfexec)
855 vm_map_switch(old_map);
856 goto badtoolate;
857 }
858 }
859
860 if (vfexec) {
861 vm_map_switch(old_map);
862 }
863 /* Set the entry point */
864 thread_setentrypoint(thread, load_result.entry_point);
865
866 /* Stop profiling */
867 stopprofclock(p);
868
869 /*
870 * Reset signal state.
871 */
872 execsigs(p, thread);
873
874 /*
875 * Close file descriptors
876 * which specify close-on-exec.
877 */
878 fdexec(p);
879
880 /*
881 * need to cancel async IO requests that can be cancelled and wait for those
882 * already active. MAY BLOCK!
883 */
884 _aio_exec( p );
885
886 /* FIXME: Till vmspace inherit is fixed: */
887 if (!vfexec && p->vm_shm)
888 shmexec(p);
889 /* Clean up the semaphores */
890 semexit(p);
891
892 /*
893 * Remember file name for accounting.
894 */
895 p->p_acflag &= ~AFORK;
896 /* If the translated name isn't NULL, then we want to use
897 * that translated name as the name we show as the "real" name.
898 * Otherwise, use the name passed into exec.
899 */
900 if (0 != imgp->ip_p_comm[0]) {
901 bcopy((caddr_t)imgp->ip_p_comm, (caddr_t)p->p_comm,
902 sizeof(p->p_comm));
903 } else {
904 if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
905 imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
906 bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
907 (unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
908 p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
909 }
910
911 if (kdebug_enable) {
912 long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
913
914 /*
915 * Collect the pathname for tracing
916 */
917 kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
918
919 if (vfexec)
920 {
921 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
922 p->p_pid ,0,0,0, (unsigned int)thread);
923 KERNEL_DEBUG_CONSTANT1((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
924 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (unsigned int)thread);
925 }
926 else
927 {
928 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_DATA, 2)) | DBG_FUNC_NONE,
929 p->p_pid ,0,0,0,0);
930 KERNEL_DEBUG_CONSTANT((TRACEDBG_CODE(DBG_TRACE_STRING, 2)) | DBG_FUNC_NONE,
931 dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
932 }
933 }
934
935 #ifdef IMGPF_POWERPC
936 /*
937 * Mark the process as powerpc or not. If powerpc, set the affinity
938 * flag, which will be used for grading binaries in future exec's
939 * from the process.
940 */
941 if (((imgp->ip_flags & IMGPF_POWERPC) != 0))
942 p->p_flag |= P_TRANSLATED;
943 else
944 #endif /* IMGPF_POWERPC */
945 p->p_flag &= ~P_TRANSLATED;
946 p->p_flag &= ~P_AFFINITY;
947
948 /*
949 * mark as execed, wakeup the process that vforked (if any) and tell
950 * it that it now has it's own resources back
951 */
952 p->p_flag |= P_EXEC;
953 if (p->p_pptr && (p->p_flag & P_PPWAIT)) {
954 p->p_flag &= ~P_PPWAIT;
955 wakeup((caddr_t)p->p_pptr);
956 }
957
958 if (vfexec && (p->p_flag & P_TRACED)) {
959 psignal_vfork(p, new_task, thread, SIGTRAP);
960 }
961
962 badtoolate:
963 if (vfexec) {
964 task_deallocate(new_task);
965 thread_deallocate(thread);
966 if (error)
967 error = 0;
968 }
969
970 bad:
971 return(error);
972 }
973
974
975
976
977 /*
978 * Our image activator table; this is the table of the image types we are
979 * capable of loading. We list them in order of preference to ensure the
980 * fastest image load speed.
981 *
982 * XXX hardcoded, for now; should use linker sets
983 */
984 struct execsw {
985 int (*ex_imgact)(struct image_params *);
986 const char *ex_name;
987 } execsw[] = {
988 { exec_mach_imgact, "Mach-o Binary" },
989 { exec_fat_imgact, "Fat Binary" },
990 #ifdef IMGPF_POWERPC
991 { exec_powerpc32_imgact, "PowerPC binary" },
992 #endif /* IMGPF_POWERPC */
993 { exec_shell_imgact, "Interpreter Script" },
994 { NULL, NULL}
995 };
996
997
998 /*
999 * TODO: Dynamic linker header address on stack is copied via suword()
1000 */
1001 /* ARGSUSED */
1002 int
1003 execve(struct proc *p, struct execve_args *uap, register_t *retval)
1004 {
1005 kauth_cred_t cred = kauth_cred_proc_ref(p);
1006 struct image_params image_params, *imgp;
1007 struct vnode_attr va;
1008 struct vnode_attr origva;
1009 struct nameidata nd;
1010 struct uthread *uthread;
1011 int i;
1012 int resid, error;
1013 task_t task;
1014 int numthreads;
1015 int vfexec=0;
1016 int once = 1; /* save SGUID-ness for interpreted files */
1017 char alt_p_comm[sizeof(p->p_comm)] = {0}; /* for PowerPC */
1018 int is_64 = IS_64BIT_PROCESS(p);
1019 int seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
1020 struct vfs_context context;
1021
1022 context.vc_proc = p;
1023 context.vc_ucred = cred; /* XXX must NOT be kauth_cred_get() */
1024
1025
1026 imgp = &image_params;
1027
1028 /* Initialize the common data in the image_params structure */
1029 bzero(imgp, sizeof(*imgp));
1030 imgp->ip_user_fname = uap->fname;
1031 imgp->ip_user_argv = uap->argp;
1032 imgp->ip_user_envv = uap->envp;
1033 imgp->ip_vattr = &va;
1034 imgp->ip_origvattr = &origva;
1035 imgp->ip_vfs_context = &context;
1036 imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE);
1037 imgp->ip_tws_cache_name = NULL;
1038 imgp->ip_p_comm = alt_p_comm; /* for PowerPC */
1039
1040 /*
1041 * XXXAUDIT: Currently, we only audit the pathname of the binary.
1042 * There may also be poor interaction with dyld.
1043 */
1044
1045 task = current_task();
1046 uthread = get_bsdthread_info(current_thread());
1047
1048 if (uthread->uu_flag & UT_VFORK) {
1049 vfexec = 1; /* Mark in exec */
1050 } else {
1051 if (task != kernel_task) {
1052 numthreads = get_task_numacts(task);
1053 if (numthreads <= 0 ) {
1054 kauth_cred_unref(&cred);
1055 return(EINVAL);
1056 }
1057 if (numthreads > 1) {
1058 kauth_cred_unref(&cred);
1059 return(ENOTSUP);
1060 }
1061 }
1062 }
1063
1064 error = execargs_alloc(imgp);
1065 if (error) {
1066 kauth_cred_unref(&cred);
1067 return(error);
1068 }
1069 /*
1070 * XXXAUDIT: Note: the double copyin introduces an audit
1071 * race. To correct this race, we must use a single
1072 * copyin(), e.g. by passing a flag to namei to indicate an
1073 * external path buffer is being used.
1074 */
1075 error = exec_save_path(imgp, uap->fname, seg);
1076 if (error) {
1077 execargs_free(imgp);
1078 kauth_cred_unref(&cred);
1079 return(error);
1080 }
1081
1082 /*
1083 * No app profiles under chroot
1084 */
1085 if((p->p_fd->fd_rdir == NULLVP) && (app_profile != 0)) {
1086
1087 /* grab the name of the file out of its path */
1088 /* we will need this for lookup within the */
1089 /* name file */
1090 /* Scan backwards for the first '/' or start of string */
1091 imgp->ip_tws_cache_name = imgp->ip_strendp;
1092 while (imgp->ip_tws_cache_name[0] != '/') {
1093 if(imgp->ip_tws_cache_name == imgp->ip_strings) {
1094 imgp->ip_tws_cache_name--;
1095 break;
1096 }
1097 imgp->ip_tws_cache_name--;
1098 }
1099 imgp->ip_tws_cache_name++;
1100 }
1101 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
1102 seg, uap->fname, imgp->ip_vfs_context);
1103
1104 again:
1105 error = namei(&nd);
1106 if (error)
1107 goto bad;
1108 imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */
1109 imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */
1110
1111 error = exec_check_permissions(imgp);
1112 if (error)
1113 goto bad;
1114
1115 /* Copy; avoid invocation of an interpreter overwriting the original */
1116 if (once) {
1117 once = 0;
1118 origva = va;
1119 }
1120
1121 error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
1122 UIO_SYSSPACE32, IO_NODELOCKED, cred, &resid, p);
1123 if (error)
1124 goto bad;
1125
1126 encapsulated_binary:
1127 error = -1;
1128 for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
1129
1130 error = (*execsw[i].ex_imgact)(imgp);
1131
1132 switch (error) {
1133 /* case -1: not claimed: continue */
1134 case -2: /* Encapsulated binary */
1135 goto encapsulated_binary;
1136
1137 case -3: /* Interpreter */
1138 vnode_put(imgp->ip_vp);
1139 imgp->ip_vp = NULL; /* already put */
1140 nd.ni_cnd.cn_nameiop = LOOKUP;
1141 nd.ni_cnd.cn_flags = (nd.ni_cnd.cn_flags & HASBUF) |
1142 (FOLLOW | LOCKLEAF);
1143
1144 #ifdef IMGPF_POWERPC
1145 /*
1146 * PowerPC does not follow symlinks because the
1147 * code which sets exec_archhandler_ppc.fsid and
1148 * exec_archhandler_ppc.fileid doesn't follow them.
1149 */
1150 if (imgp->ip_flags & IMGPF_POWERPC)
1151 nd.ni_cnd.cn_flags &= ~FOLLOW;
1152 #endif /* IMGPF_POWERPC */
1153
1154 nd.ni_segflg = UIO_SYSSPACE32;
1155 nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name);
1156 goto again;
1157
1158 default:
1159 break;
1160 }
1161 }
1162
1163 /* call out to allow 3rd party notification of exec.
1164 * Ignore result of kauth_authorize_fileop call.
1165 */
1166 if (error == 0 && kauth_authorize_fileop_has_listeners()) {
1167 kauth_authorize_fileop(vfs_context_ucred(&context), KAUTH_FILEOP_EXEC,
1168 (uintptr_t)nd.ni_vp, 0);
1169 }
1170
1171 /* Image not claimed by any activator? */
1172 if (error == -1)
1173 error = ENOEXEC;
1174
1175 bad:
1176 kauth_cred_unref(&cred);
1177
1178 if (imgp->ip_ndp)
1179 nameidone(imgp->ip_ndp);
1180 if (imgp->ip_vp)
1181 vnode_put(imgp->ip_vp);
1182 if (imgp->ip_strings)
1183 execargs_free(imgp);
1184 if (!error && vfexec) {
1185 vfork_return(current_thread(), p->p_pptr, p, retval);
1186 (void)thread_resume(imgp->ip_vfork_thread);
1187 return(0);
1188 }
1189 return(error);
1190 }
1191
1192
1193 static int
1194 copyinptr(user_addr_t froma, user_addr_t *toptr, int ptr_size)
1195 {
1196 int error;
1197
1198 if (ptr_size == 4) {
1199 /* 64 bit value containing 32 bit address */
1200 unsigned int i;
1201
1202 error = copyin(froma, &i, 4);
1203 *toptr = CAST_USER_ADDR_T(i); /* SAFE */
1204 } else {
1205 error = copyin(froma, toptr, 8);
1206 }
1207 return (error);
1208 }
1209
1210
1211 static int
1212 copyoutptr(user_addr_t ua, user_addr_t ptr, int ptr_size)
1213 {
1214 int error;
1215
1216 if (ptr_size == 4) {
1217 /* 64 bit value containing 32 bit address */
1218 unsigned int i = CAST_DOWN(unsigned int,ua); /* SAFE */
1219
1220 error = copyout(&i, ptr, 4);
1221 } else {
1222 error = copyout(&ua, ptr, 8);
1223 }
1224 return (error);
1225 }
1226
1227
1228 /*
1229 * exec_copyout_strings
1230 *
1231 * Copy out the strings segment to user space. The strings segment is put
1232 * on a preinitialized stack frame.
1233 *
1234 * Parameters: struct image_params * the image parameter block
1235 * int * a pointer to the stack offset variable
1236 *
1237 * Returns: 0 Success
1238 * !0 Faiure: errno
1239 *
1240 * Implicit returns:
1241 * (*stackp) The stack offset, modified
1242 *
1243 * Note: The strings segment layout is backward, from the beginning
1244 * of the top of the stack to consume the minimal amount of
1245 * space possible; the returned stack pointer points to the
1246 * end of the area consumed (stacks grow upward).
1247 *
1248 * argc is an int; arg[i] are pointers; env[i] are pointers;
1249 * exec_path is a pointer; the 0's are (void *)NULL's
1250 *
1251 * The stack frame layout is:
1252 *
1253 * +-------------+
1254 * sp-> | argc |
1255 * +-------------+
1256 * | arg[0] |
1257 * +-------------+
1258 * :
1259 * :
1260 * +-------------+
1261 * | arg[argc-1] |
1262 * +-------------+
1263 * | 0 |
1264 * +-------------+
1265 * | env[0] |
1266 * +-------------+
1267 * :
1268 * :
1269 * +-------------+
1270 * | env[n] |
1271 * +-------------+
1272 * | 0 |
1273 * +-------------+
1274 * | exec_path | In MacOS X PR2 Beaker2E the path passed to exec() is
1275 * +-------------+ passed on the stack just after the trailing 0 of the
1276 * | 0 | the envp[] array as a pointer to a string.
1277 * +-------------+
1278 * | PATH AREA |
1279 * +-------------+
1280 * | STRING AREA |
1281 * :
1282 * :
1283 * | | <- p->user_stack
1284 * +-------------+
1285 *
1286 * Although technically a part of the STRING AREA, we treat the PATH AREA as
1287 * a separate entity. This allows us to align the beginning of the PATH AREA
1288 * to a pointer boundary so that the exec_path, env[i], and argv[i] pointers
1289 * which preceed it on the stack are properly aligned.
1290 *
1291 * TODO: argc copied with suword(), which takes a 64 bit address
1292 */
1293 static int
1294 exec_copyout_strings(struct image_params *imgp, user_addr_t *stackp)
1295 {
1296 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1297 int ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
1298 char *argv = imgp->ip_argv; /* modifiable copy of argv */
1299 user_addr_t string_area; /* *argv[], *env[] */
1300 user_addr_t path_area; /* package launch path */
1301 user_addr_t ptr_area; /* argv[], env[], exec_path */
1302 user_addr_t stack;
1303 int stringc = imgp->ip_argc + imgp->ip_envc;
1304 int len;
1305 int error;
1306 int strspace;
1307
1308 stack = *stackp;
1309
1310 unsigned patharea_len = imgp->ip_argv - imgp->ip_strings;
1311 int envc_add = 0;
1312
1313 #ifdef IMGPF_POWERPC
1314 /*
1315 * oah750 expects /usr/lib/dyld\0 as the start of the program name.
1316 * It also expects to have a certain environment variable set to 0.
1317 * 50 bytes for each to ensure we have enough space without having
1318 * to count every byte.
1319 */
1320 char *progname, *envvar;
1321 char progname_str[] = "/usr/lib/dyld";
1322 char envvar_str[] = "OAH750_CFG_FU_STACK_SIZE=0";
1323
1324 if (imgp->ip_flags & IMGPF_POWERPC) {
1325 progname = progname_str;
1326 envvar = envvar_str;
1327 patharea_len += strlen(progname) + strlen(envvar) + 2;
1328 envc_add = 1;
1329 }
1330 #endif /* IMGPF_POWERPC */
1331 /*
1332 * Set up pointers to the beginning of the string area, the beginning
1333 * of the path area, and the beginning of the pointer area (actually,
1334 * the location of argc, an int, which may be smaller than a pointer,
1335 * but we use ptr_size worth of space for it, for alignment).
1336 */
1337 string_area = stack - (((imgp->ip_strendp - imgp->ip_strings) + ptr_size-1) & ~(ptr_size-1)) - ptr_size;
1338 path_area = string_area - ((patharea_len + ptr_size-1) & ~(ptr_size-1));
1339 ptr_area = path_area - ((imgp->ip_argc + imgp->ip_envc + 4 + envc_add) * ptr_size) - ptr_size /*argc*/;
1340
1341 /* Return the initial stack address: the location of argc */
1342 *stackp = ptr_area;
1343
1344 /*
1345 * Record the size of the arguments area so that sysctl_procargs()
1346 * can return the argument area without having to parse the arguments.
1347 */
1348 p->p_argc = imgp->ip_argc;
1349 p->p_argslen = (int)(stack - path_area);
1350
1351
1352 /*
1353 * Support for new app package launching for Mac OS X allocates
1354 * the "path" at the begining of the imgp->ip_strings buffer.
1355 * copy it just before the string area.
1356 */
1357 len = 0;
1358 #ifdef IMGPF_POWERPC
1359 if (imgp->ip_flags & IMGPF_POWERPC) {
1360 error = copyoutstr(progname, path_area,
1361 patharea_len,
1362 (size_t *)&len);
1363 if (error)
1364 goto bad;
1365 error = copyoutstr(imgp->ip_strings, path_area + strlen(progname) + 1,
1366 patharea_len,
1367 (size_t *)&len);
1368 } else
1369 #endif /* IMGPF_POWERPC */
1370 error = copyoutstr(imgp->ip_strings, path_area,
1371 patharea_len,
1372 (size_t *)&len);
1373 if (error)
1374 goto bad;
1375
1376
1377 /* Save a NULL pointer below it */
1378 (void)copyoutptr(0LL, path_area - ptr_size, ptr_size);
1379
1380 /* Save the pointer to "path" just below it */
1381 (void)copyoutptr(path_area, path_area - 2*ptr_size, ptr_size);
1382
1383 /*
1384 * ptr_size for 2 NULL one each ofter arg[argc -1] and env[n]
1385 * ptr_size for argc
1386 * skip over saved path, ptr_size for pointer to path,
1387 * and ptr_size for the NULL after pointer to path.
1388 */
1389
1390 /* argc (int32, stored in a ptr_size area) */
1391 (void)suword(ptr_area, imgp->ip_argc);
1392 ptr_area += sizeof(int);
1393 /* pad to ptr_size, if 64 bit image, to ensure user stack alignment */
1394 if (imgp->ip_flags & IMGPF_IS_64BIT) {
1395 (void)suword(ptr_area, 0); /* int, not long: ignored */
1396 ptr_area += sizeof(int);
1397 }
1398
1399
1400 /*
1401 * We use (string_area - path_area) here rather than the more
1402 * intuitive (imgp->ip_argv - imgp->ip_strings) because we are
1403 * interested in the length of the PATH_AREA in user space,
1404 * rather than the actual length of the execution path, since
1405 * it includes alignment padding of the PATH_AREA + STRING_AREA
1406 * to a ptr_size boundary.
1407 */
1408 strspace = SIZE_IMG_STRSPACE - (string_area - path_area);
1409 for (;;) {
1410 if (stringc == imgp->ip_envc) {
1411 /* argv[n] = NULL */
1412 (void)copyoutptr(0LL, ptr_area, ptr_size);
1413 ptr_area += ptr_size;
1414 #ifdef IMGPF_POWERPC
1415 if (envc_add) {
1416 (void)copyoutptr(string_area, ptr_area, ptr_size);
1417
1418 do {
1419 if (strspace <= 0) {
1420 error = E2BIG;
1421 break;
1422 }
1423 error = copyoutstr(envvar, string_area,
1424 (unsigned)strspace,
1425 (size_t *)&len);
1426 string_area += len;
1427 envvar += len;
1428 strspace -= len;
1429 } while (error == ENAMETOOLONG);
1430 if (error == EFAULT || error == E2BIG)
1431 break;
1432 ptr_area += ptr_size;
1433 }
1434 #endif /* IMGPF_POWERPC */
1435 }
1436 if (--stringc < 0)
1437 break;
1438
1439 /* pointer: argv[n]/env[n] */
1440 (void)copyoutptr(string_area, ptr_area, ptr_size);
1441
1442 /* string : argv[n][]/env[n][] */
1443 do {
1444 if (strspace <= 0) {
1445 error = E2BIG;
1446 break;
1447 }
1448 error = copyoutstr(argv, string_area,
1449 (unsigned)strspace,
1450 (size_t *)&len);
1451 string_area += len;
1452 argv += len;
1453 strspace -= len;
1454 } while (error == ENAMETOOLONG);
1455 if (error == EFAULT || error == E2BIG)
1456 break; /* bad stack - user's problem */
1457 ptr_area += ptr_size;
1458 }
1459 /* env[n] = NULL */
1460 (void)copyoutptr(0LL, ptr_area, ptr_size);
1461
1462 bad:
1463 return(error);
1464 }
1465
1466
1467 /*
1468 * exec_extract_strings
1469 *
1470 * Copy arguments and environment from user space into work area; we may
1471 * have already copied some early arguments into the work area, and if
1472 * so, any arguments opied in are appended to those already there.
1473 *
1474 * Parameters: struct image_params * the image parameter block
1475 *
1476 * Returns: 0 Success
1477 * !0 Failure: errno
1478 *
1479 * Implicit returns;
1480 * (imgp->ip_argc) Count of arguments, updated
1481 * (imgp->ip_envc) Count of environment strings, updated
1482 *
1483 *
1484 * Notes: The argument and environment vectors are user space pointers
1485 * to arrays of user space pointers.
1486 */
1487 static int
1488 exec_extract_strings(struct image_params *imgp)
1489 {
1490 int error = 0;
1491 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1492 int seg = (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32);
1493 int ptr_size = (imgp->ip_flags & IMGPF_WAS_64BIT) ? 8 : 4;
1494 user_addr_t argv = imgp->ip_user_argv;
1495 user_addr_t envv = imgp->ip_user_envv;
1496
1497 /* Now, get rest of arguments */
1498
1499 /*
1500 * If we are running an interpreter, replace the av[0] that was
1501 * passed to execve() with the fully qualified path name that was
1502 * passed to execve() for interpreters which do not use the PATH
1503 * to locate their script arguments.
1504 */
1505 if((imgp->ip_flags & IMGPF_INTERPRET) != 0 && argv != 0LL) {
1506 user_addr_t arg;
1507
1508 error = copyinptr(argv, &arg, ptr_size);
1509 if (error)
1510 goto bad;
1511 if (arg != 0LL && arg != (user_addr_t)-1) {
1512 argv += ptr_size;
1513 error = exec_add_string(imgp, imgp->ip_user_fname, seg);
1514 if (error)
1515 goto bad;
1516 imgp->ip_argc++;
1517 }
1518 }
1519
1520 while (argv != 0LL) {
1521 user_addr_t arg;
1522
1523 error = copyinptr(argv, &arg, ptr_size);
1524 if (error)
1525 goto bad;
1526
1527 argv += ptr_size;
1528 if (arg == 0LL) {
1529 break;
1530 } else if (arg == (user_addr_t)-1) {
1531 /* Um... why would it be -1? */
1532 error = EFAULT;
1533 goto bad;
1534 }
1535 /*
1536 * av[n...] = arg[n]
1537 */
1538 error = exec_add_string(imgp, arg, seg);
1539 if (error)
1540 goto bad;
1541 imgp->ip_argc++;
1542 }
1543
1544 /* Now, get the environment */
1545 while (envv != 0LL) {
1546 user_addr_t env;
1547
1548 error = copyinptr(envv, &env, ptr_size);
1549 if (error)
1550 goto bad;
1551
1552 envv += ptr_size;
1553 if (env == 0LL) {
1554 break;
1555 } else if (env == (user_addr_t)-1) {
1556 error = EFAULT;
1557 goto bad;
1558 }
1559 /*
1560 * av[n...] = env[n]
1561 */
1562 error = exec_add_string(imgp, env, seg);
1563 if (error)
1564 goto bad;
1565 imgp->ip_envc++;
1566 }
1567 bad:
1568 return error;
1569 }
1570
1571
1572 #define unix_stack_size(p) (p->p_rlimit[RLIMIT_STACK].rlim_cur)
1573
1574 static int
1575 exec_check_permissions(struct image_params *imgp)
1576 {
1577 struct vnode *vp = imgp->ip_vp;
1578 struct vnode_attr *vap = imgp->ip_vattr;
1579 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1580 int error;
1581 kauth_action_t action;
1582
1583 /* Only allow execution of regular files */
1584 if (!vnode_isreg(vp))
1585 return (EACCES);
1586
1587 /* Get the file attributes that we will be using here and elsewhere */
1588 VATTR_INIT(vap);
1589 VATTR_WANTED(vap, va_uid);
1590 VATTR_WANTED(vap, va_gid);
1591 VATTR_WANTED(vap, va_mode);
1592 VATTR_WANTED(vap, va_fsid);
1593 VATTR_WANTED(vap, va_fileid);
1594 VATTR_WANTED(vap, va_data_size);
1595 if ((error = vnode_getattr(vp, vap, imgp->ip_vfs_context)) != 0)
1596 return (error);
1597
1598 /*
1599 * Ensure that at least one execute bit is on - otherwise root
1600 * will always succeed, and we don't want to happen unless the
1601 * file really is executable.
1602 */
1603 if ((vap->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)
1604 return (EACCES);
1605
1606 /* Disallow zero length files */
1607 if (vap->va_data_size == 0)
1608 return (ENOEXEC);
1609
1610 imgp->ip_arch_offset = (user_size_t)0;
1611 imgp->ip_arch_size = vap->va_data_size;
1612
1613 /* Disable setuid-ness for traced programs or if MNT_NOSUID */
1614 if ((vp->v_mount->mnt_flag & MNT_NOSUID) || (p->p_flag & P_TRACED))
1615 vap->va_mode &= ~(VSUID | VSGID);
1616
1617 /* Check for execute permission */
1618 action = KAUTH_VNODE_EXECUTE;
1619 /* Traced images must also be readable */
1620 if (p->p_flag & P_TRACED)
1621 action |= KAUTH_VNODE_READ_DATA;
1622 if ((error = vnode_authorize(vp, NULL, action, imgp->ip_vfs_context)) != 0)
1623 return (error);
1624
1625 /* Don't let it run if anyone had it open for writing */
1626 if (vp->v_writecount)
1627 return (ETXTBSY);
1628
1629 #ifdef IMGPF_POWERPC
1630 /*
1631 * If the file we are about to attempt to load is the exec_handler_ppc,
1632 * which is determined by matching the vattr fields against previously
1633 * cached values, then we set the PowerPC environment flag.
1634 */
1635 if (vap->va_fsid == exec_archhandler_ppc.fsid &&
1636 vap->va_fileid == (uint64_t)((u_long)exec_archhandler_ppc.fileid)) {
1637 imgp->ip_flags |= IMGPF_POWERPC;
1638 }
1639 #endif /* IMGPF_POWERPC */
1640
1641 /* XXX May want to indicate to underlying FS that vnode is open */
1642
1643 return (error);
1644 }
1645
1646 /*
1647 * exec_handle_sugid
1648 *
1649 * Initially clear the P_SUGID in the process flags; if an SUGID process is
1650 * exec'ing a non-SUGID image, then this is the point of no return.
1651 *
1652 * If the image being activated is SUGI, then replace the credential with a
1653 * copy, disable tracing (unless the tracing process is root), reset the
1654 * mach task port to revoke it, set the P_SUGID bit,
1655 *
1656 * If the saved user and group ID will be changing, then make sure it happens
1657 * to a new credential, rather than a shared one.
1658 *
1659 * Set the security token (this is probably obsolete, given that the token
1660 * should not technically be separate from the credential itself).
1661 *
1662 * Parameters: struct image_params * the image parameter block
1663 *
1664 * Returns: void No failure indication
1665 *
1666 * Implicit returns:
1667 * <process credential> Potentially modified/replaced
1668 * <task port> Potentially revoked
1669 * <process flags> P_SUGID bit potentially modified
1670 * <security token> Potentially modified
1671 */
1672 static int
1673 exec_handle_sugid(struct image_params *imgp)
1674 {
1675 kauth_cred_t cred = vfs_context_ucred(imgp->ip_vfs_context);
1676 struct proc *p = vfs_context_proc(imgp->ip_vfs_context);
1677 int i;
1678 int error = 0;
1679 static struct vnode *dev_null = NULLVP;
1680
1681 p->p_flag &= ~P_SUGID;
1682
1683 if (((imgp->ip_origvattr->va_mode & VSUID) != 0 &&
1684 kauth_cred_getuid(cred) != imgp->ip_origvattr->va_uid) ||
1685 ((imgp->ip_origvattr->va_mode & VSGID) != 0 &&
1686 cred->cr_gid != imgp->ip_origvattr->va_gid)) {
1687 #if KTRACE
1688 /*
1689 * If process is being ktraced, turn off - unless
1690 * root set it.
1691 */
1692 if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) {
1693 struct vnode *tvp = p->p_tracep;
1694 p->p_tracep = NULL;
1695 p->p_traceflag = 0;
1696 vnode_rele(tvp);
1697 }
1698 #endif
1699 /*
1700 * Replace the credential with a copy of itself if euid or egid change.
1701 */
1702 if (imgp->ip_origvattr->va_mode & VSUID) {
1703 p->p_ucred = kauth_cred_seteuid(p->p_ucred, imgp->ip_origvattr->va_uid);
1704 }
1705 if (imgp->ip_origvattr->va_mode & VSGID) {
1706 p->p_ucred = kauth_cred_setegid(p->p_ucred, imgp->ip_origvattr->va_gid);
1707 }
1708
1709 /*
1710 * Have mach reset the task port. We don't want
1711 * anyone who had the task port before a setuid
1712 * exec to be able to access/control the task
1713 * after.
1714 */
1715 if (current_task() == p->task)
1716 ipc_task_reset(p->task);
1717
1718 p->p_flag |= P_SUGID;
1719
1720 /* Cache the vnode for /dev/null the first time around */
1721 if (dev_null == NULLVP) {
1722 struct nameidata nd1;
1723
1724 NDINIT(&nd1, LOOKUP, FOLLOW, UIO_SYSSPACE32,
1725 CAST_USER_ADDR_T("/dev/null"),
1726 imgp->ip_vfs_context);
1727
1728 if ((error = vn_open(&nd1, FREAD, 0)) == 0) {
1729 dev_null = nd1.ni_vp;
1730 /*
1731 * vn_open returns with both a use_count
1732 * and an io_count on the found vnode
1733 * drop the io_count, but keep the use_count
1734 */
1735 vnode_put(nd1.ni_vp);
1736 }
1737 }
1738
1739 /* Radar 2261856; setuid security hole fix */
1740 /* Patch from OpenBSD: A. Ramesh */
1741 /*
1742 * XXX For setuid processes, attempt to ensure that
1743 * stdin, stdout, and stderr are already allocated.
1744 * We do not want userland to accidentally allocate
1745 * descriptors in this range which has implied meaning
1746 * to libc.
1747 */
1748 if (dev_null != NULLVP) {
1749 for (i = 0; i < 3; i++) {
1750 struct fileproc *fp;
1751 int indx;
1752
1753 if (p->p_fd->fd_ofiles[i] != NULL)
1754 continue;
1755
1756 if ((error = falloc(p, &fp, &indx)) != 0)
1757 continue;
1758
1759 if ((error = vnode_ref_ext(dev_null, FREAD)) != 0) {
1760 fp_free(p, indx, fp);
1761 break;
1762 }
1763
1764 fp->f_fglob->fg_flag = FREAD;
1765 fp->f_fglob->fg_type = DTYPE_VNODE;
1766 fp->f_fglob->fg_ops = &vnops;
1767 fp->f_fglob->fg_data = (caddr_t)dev_null;
1768
1769 proc_fdlock(p);
1770 *fdflags(p, indx) &= ~UF_RESERVED;
1771 fp_drop(p, indx, fp, 1);
1772 proc_fdunlock(p);
1773 }
1774 /*
1775 * for now we need to drop the reference immediately
1776 * since we don't have any mechanism in place to
1777 * release it before starting to unmount "/dev"
1778 * during a reboot/shutdown
1779 */
1780 vnode_rele(dev_null);
1781 dev_null = NULLVP;
1782 }
1783 }
1784
1785 /*
1786 * Implement the semantic where the effective user and group become
1787 * the saved user and group in exec'ed programs.
1788 */
1789 p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), p->p_ucred->cr_gid);
1790
1791 /* XXX Obsolete; security token should not be separate from cred */
1792 set_security_token(p);
1793
1794 return(error);
1795 }
1796
1797 static kern_return_t
1798 create_unix_stack(vm_map_t map, user_addr_t user_stack, int customstack,
1799 struct proc *p)
1800 {
1801 mach_vm_size_t size;
1802 mach_vm_offset_t addr;
1803
1804 p->user_stack = user_stack;
1805 if (!customstack) {
1806 size = mach_vm_round_page(unix_stack_size(p));
1807 addr = mach_vm_trunc_page(user_stack - size);
1808 return (mach_vm_allocate(map, &addr, size,
1809 VM_MAKE_TAG(VM_MEMORY_STACK) |
1810 VM_FLAGS_FIXED));
1811 } else
1812 return(KERN_SUCCESS);
1813 }
1814
1815 #include <sys/reboot.h>
1816
1817 static char init_program_name[128] = "/sbin/launchd";
1818 static const char * other_init = "/sbin/mach_init";
1819
1820 char init_args[128] = "";
1821
1822 struct execve_args init_exec_args;
1823 int init_attempts = 0;
1824
1825
1826 void
1827 load_init_program(struct proc *p)
1828 {
1829 vm_offset_t init_addr;
1830 char *argv[3];
1831 int error;
1832 register_t retval[2];
1833
1834 error = 0;
1835
1836 /* init_args are copied in string form directly from bootstrap */
1837
1838 do {
1839 if (boothowto & RB_INITNAME) {
1840 printf("init program? ");
1841 #if FIXME /* [ */
1842 gets(init_program_name, init_program_name);
1843 #endif /* FIXME ] */
1844 }
1845
1846 if (error && ((boothowto & RB_INITNAME) == 0) &&
1847 (init_attempts == 1)) {
1848 printf("Load of %s, errno %d, trying %s\n",
1849 init_program_name, error, other_init);
1850 error = 0;
1851 bcopy(other_init, init_program_name,
1852 sizeof(other_init));
1853 }
1854
1855 init_attempts++;
1856
1857 if (error) {
1858 printf("Load of %s failed, errno %d\n",
1859 init_program_name, error);
1860 error = 0;
1861 boothowto |= RB_INITNAME;
1862 continue;
1863 }
1864
1865 /*
1866 * Copy out program name.
1867 */
1868
1869 init_addr = VM_MIN_ADDRESS;
1870 (void) vm_allocate(current_map(), &init_addr,
1871 PAGE_SIZE, VM_FLAGS_ANYWHERE);
1872 if (init_addr == 0)
1873 init_addr++;
1874
1875 (void) copyout((caddr_t) init_program_name,
1876 CAST_USER_ADDR_T(init_addr),
1877 (unsigned) sizeof(init_program_name)+1);
1878
1879 argv[0] = (char *) init_addr;
1880 init_addr += sizeof(init_program_name);
1881 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
1882
1883 /*
1884 * Put out first (and only) argument, similarly.
1885 * Assumes everything fits in a page as allocated
1886 * above.
1887 */
1888
1889 (void) copyout((caddr_t) init_args,
1890 CAST_USER_ADDR_T(init_addr),
1891 (unsigned) sizeof(init_args));
1892
1893 argv[1] = (char *) init_addr;
1894 init_addr += sizeof(init_args);
1895 init_addr = (vm_offset_t)ROUND_PTR(char, init_addr);
1896
1897 /*
1898 * Null-end the argument list
1899 */
1900
1901 argv[2] = (char *) 0;
1902
1903 /*
1904 * Copy out the argument list.
1905 */
1906
1907 (void) copyout((caddr_t) argv,
1908 CAST_USER_ADDR_T(init_addr),
1909 (unsigned) sizeof(argv));
1910
1911 /*
1912 * Set up argument block for fake call to execve.
1913 */
1914
1915 init_exec_args.fname = CAST_USER_ADDR_T(argv[0]);
1916 init_exec_args.argp = CAST_USER_ADDR_T((char **)init_addr);
1917 init_exec_args.envp = CAST_USER_ADDR_T(0);
1918
1919 /* So that mach_init task
1920 * is set with uid,gid 0 token
1921 */
1922 set_security_token(p);
1923
1924 error = execve(p,&init_exec_args,retval);
1925 } while (error);
1926 }
1927
1928 /*
1929 * Convert a load_return_t to an errno.
1930 */
1931 static int
1932 load_return_to_errno(load_return_t lrtn)
1933 {
1934 switch (lrtn) {
1935 case LOAD_SUCCESS:
1936 return 0;
1937 case LOAD_BADARCH:
1938 return EBADARCH;
1939 case LOAD_BADMACHO:
1940 return EBADMACHO;
1941 case LOAD_SHLIB:
1942 return ESHLIBVERS;
1943 case LOAD_NOSPACE:
1944 case LOAD_RESOURCE:
1945 return ENOMEM;
1946 case LOAD_PROTECT:
1947 return EACCES;
1948 case LOAD_ENOENT:
1949 return ENOENT;
1950 case LOAD_IOERROR:
1951 return EIO;
1952 case LOAD_FAILURE:
1953 default:
1954 return EBADEXEC;
1955 }
1956 }
1957
1958 #include <mach/mach_types.h>
1959 #include <mach/vm_prot.h>
1960 #include <mach/semaphore.h>
1961 #include <mach/sync_policy.h>
1962 #include <kern/clock.h>
1963 #include <mach/kern_return.h>
1964
1965 extern semaphore_t execve_semaphore;
1966
1967 /*
1968 * The block of memory used by the execve arguments. At the same time,
1969 * we allocate a page so that we can read in the first page of the image.
1970 */
1971 static int
1972 execargs_alloc(struct image_params *imgp)
1973 {
1974 kern_return_t kret;
1975
1976 kret = semaphore_wait(execve_semaphore);
1977 if (kret != KERN_SUCCESS)
1978 switch (kret) {
1979 default:
1980 return (EINVAL);
1981 case KERN_INVALID_ADDRESS:
1982 case KERN_PROTECTION_FAILURE:
1983 return (EACCES);
1984 case KERN_ABORTED:
1985 case KERN_OPERATION_TIMED_OUT:
1986 return (EINTR);
1987 }
1988
1989 kret = kmem_alloc_pageable(bsd_pageable_map, (vm_offset_t *)&imgp->ip_strings, NCARGS + PAGE_SIZE);
1990 imgp->ip_vdata = imgp->ip_strings + NCARGS;
1991 if (kret != KERN_SUCCESS) {
1992 semaphore_signal(execve_semaphore);
1993 return (ENOMEM);
1994 }
1995 return (0);
1996 }
1997
1998 static int
1999 execargs_free(struct image_params *imgp)
2000 {
2001 kern_return_t kret;
2002
2003 kmem_free(bsd_pageable_map, (vm_offset_t)imgp->ip_strings, NCARGS + PAGE_SIZE);
2004 imgp->ip_strings = NULL;
2005
2006 kret = semaphore_signal(execve_semaphore);
2007 switch (kret) {
2008 case KERN_INVALID_ADDRESS:
2009 case KERN_PROTECTION_FAILURE:
2010 return (EINVAL);
2011 case KERN_ABORTED:
2012 case KERN_OPERATION_TIMED_OUT:
2013 return (EINTR);
2014 case KERN_SUCCESS:
2015 return(0);
2016 default:
2017 return (EINVAL);
2018 }
2019 }