]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_mman.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / kern / kern_mman.c
1 /*
2 * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 *
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
35 * Science Department.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
66 *
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
68 */
69 /*
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
73 * Version 2.0.
74 */
75
76 /*
77 * Mapped file (mmap) interface to VM
78 */
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
87 #include <sys/acct.h>
88 #include <sys/wait.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
92 #include <sys/mman.h>
93 #include <sys/conf.h>
94 #include <sys/stat.h>
95 #include <sys/ubc.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
98
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
102
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
105
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
117
118 #include <machine/machine_routines.h>
119
120 #include <kern/cpu_number.h>
121 #include <kern/host.h>
122 #include <kern/task.h>
123 #include <kern/page_decrypt.h>
124
125 #include <IOKit/IOReturn.h>
126
127 #include <vm/vm_map.h>
128 #include <vm/vm_kern.h>
129 #include <vm/vm_pager.h>
130 #include <vm/vm_protos.h>
131
132 #if CONFIG_MACF
133 #include <security/mac_framework.h>
134 #endif
135 #include <os/overflow.h>
136
137 /*
138 * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
139 * from dyld_priv.h. this way we can consistently deny / allow allocations based
140 * on SDK version at fall 2020 level. Compare output to proc_sdk(current_proc())
141 */
142 static uint32_t
143 proc_2020_fall_os_sdk(void)
144 {
145 switch (current_proc()->p_platform) {
146 case PLATFORM_MACOS:
147 return 0x000a1000; // DYLD_MACOSX_VERSION_10_16
148 case PLATFORM_IOS:
149 case PLATFORM_IOSSIMULATOR:
150 case PLATFORM_MACCATALYST:
151 return 0x000e0000; // DYLD_IOS_VERSION_14_0
152 case PLATFORM_BRIDGEOS:
153 return 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
154 case PLATFORM_TVOS:
155 case PLATFORM_TVOSSIMULATOR:
156 return 0x000e0000; // DYLD_TVOS_VERSION_14_0
157 case PLATFORM_WATCHOS:
158 case PLATFORM_WATCHOSSIMULATOR:
159 return 0x00070000; // DYLD_WATCHOS_VERSION_7_0
160 default:
161 return 0;
162 }
163 }
164
165 /*
166 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
167 * XXX usage is PROT_* from an interface perspective. Thus the values of
168 * XXX VM_PROT_* and PROT_* need to correspond.
169 */
170 int
171 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
172 {
173 /*
174 * Map in special device (must be SHARED) or file
175 */
176 struct fileproc *fp;
177 struct vnode *vp;
178 int flags;
179 int prot;
180 int err = 0;
181 vm_map_t user_map;
182 kern_return_t result;
183 vm_map_offset_t user_addr;
184 vm_map_offset_t sum;
185 vm_map_size_t user_size;
186 vm_object_offset_t pageoff;
187 vm_object_offset_t file_pos;
188 int alloc_flags = 0;
189 vm_tag_t tag = VM_KERN_MEMORY_NONE;
190 vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
191 boolean_t docow;
192 vm_prot_t maxprot;
193 void *handle;
194 memory_object_t pager = MEMORY_OBJECT_NULL;
195 memory_object_control_t control;
196 int mapanon = 0;
197 int fpref = 0;
198 int error = 0;
199 int fd = uap->fd;
200 int num_retries = 0;
201
202 /*
203 * Note that for UNIX03 conformance, there is additional parameter checking for
204 * mmap() system call in libsyscall prior to entering the kernel. The sanity
205 * checks and argument validation done in this function are not the only places
206 * one can get returned errnos.
207 */
208
209 user_map = current_map();
210 user_addr = (vm_map_offset_t)uap->addr;
211 user_size = (vm_map_size_t) uap->len;
212
213 AUDIT_ARG(addr, user_addr);
214 AUDIT_ARG(len, user_size);
215 AUDIT_ARG(fd, uap->fd);
216
217 if (vm_map_range_overflows(user_addr, user_size)) {
218 return EINVAL;
219 }
220 prot = (uap->prot & VM_PROT_ALL);
221 #if 3777787
222 /*
223 * Since the hardware currently does not support writing without
224 * read-before-write, or execution-without-read, if the request is
225 * for write or execute access, we must imply read access as well;
226 * otherwise programs expecting this to work will fail to operate.
227 */
228 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
229 prot |= VM_PROT_READ;
230 }
231 #endif /* radar 3777787 */
232
233 flags = uap->flags;
234 vp = NULLVP;
235
236 /*
237 * verify no unknown flags are passed in, and if any are,
238 * fail out early to make sure the logic below never has to deal
239 * with invalid flag values
240 */
241 if (flags & ~(MAP_SHARED |
242 MAP_PRIVATE |
243 MAP_COPY |
244 MAP_FIXED |
245 MAP_RENAME |
246 MAP_NORESERVE |
247 MAP_RESERVED0080 | //grandfathered in as accepted and ignored
248 MAP_NOEXTEND |
249 MAP_HASSEMAPHORE |
250 MAP_NOCACHE |
251 MAP_JIT |
252 MAP_FILE |
253 MAP_ANON |
254 MAP_RESILIENT_CODESIGN |
255 MAP_RESILIENT_MEDIA |
256 #if XNU_TARGET_OS_OSX
257 MAP_32BIT |
258 #endif
259 MAP_TRANSLATED_ALLOW_EXECUTE |
260 MAP_UNIX03)) {
261 if (proc_sdk(current_proc()) >= proc_2020_fall_os_sdk()) {
262 return EINVAL;
263 }
264 }
265
266
267 /*
268 * The vm code does not have prototypes & compiler doesn't do
269 * the right thing when you cast 64bit value and pass it in function
270 * call. So here it is.
271 */
272 file_pos = (vm_object_offset_t)uap->pos;
273
274
275 /* make sure mapping fits into numeric range etc */
276 if (os_add3_overflow(file_pos, user_size, PAGE_SIZE_64 - 1, &sum)) {
277 return EINVAL;
278 }
279
280 if (flags & MAP_UNIX03) {
281 vm_map_offset_t offset_alignment_mask;
282
283 /*
284 * Enforce UNIX03 compliance.
285 */
286
287 if (vm_map_is_exotic(current_map())) {
288 offset_alignment_mask = 0xFFF;
289 } else {
290 offset_alignment_mask = vm_map_page_mask(current_map());
291 }
292 if (file_pos & offset_alignment_mask) {
293 /* file offset should be page-aligned */
294 return EINVAL;
295 }
296 if (!(flags & (MAP_PRIVATE | MAP_SHARED))) {
297 /* need either MAP_PRIVATE or MAP_SHARED */
298 return EINVAL;
299 }
300 if (user_size == 0) {
301 /* mapping length should not be 0 */
302 return EINVAL;
303 }
304 }
305
306 /*
307 * Align the file position to a page boundary,
308 * and save its page offset component.
309 */
310 pageoff = (file_pos & vm_map_page_mask(user_map));
311 file_pos -= (vm_object_offset_t)pageoff;
312
313
314 /* Adjust size for rounding (on both ends). */
315 user_size += pageoff; /* low end... */
316 user_size = vm_map_round_page(user_size,
317 vm_map_page_mask(user_map)); /* hi end */
318
319
320 if (flags & MAP_JIT) {
321 if ((flags & MAP_FIXED) ||
322 (flags & MAP_SHARED) ||
323 !(flags & MAP_ANON) ||
324 (flags & MAP_RESILIENT_CODESIGN) ||
325 (flags & MAP_RESILIENT_MEDIA)) {
326 return EINVAL;
327 }
328 }
329
330 if ((flags & MAP_RESILIENT_CODESIGN) ||
331 (flags & MAP_RESILIENT_MEDIA)) {
332 if ((flags & MAP_ANON) ||
333 (flags & MAP_JIT)) {
334 return EINVAL;
335 }
336 }
337 if (flags & MAP_RESILIENT_CODESIGN) {
338 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
339 if (prot & reject_prot) {
340 /*
341 * Quick sanity check. maxprot is calculated below and
342 * we will test it again.
343 */
344 return EPERM;
345 }
346 }
347 if (flags & MAP_SHARED) {
348 /*
349 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
350 * there is no place to inject zero-filled pages without
351 * actually adding them to the file.
352 * Since we didn't reject that combination before, there might
353 * already be callers using it and getting a valid MAP_SHARED
354 * mapping but without the resilience.
355 * For backwards compatibility's sake, let's keep ignoring
356 * MAP_RESILIENT_MEDIA in that case.
357 */
358 flags &= ~MAP_RESILIENT_MEDIA;
359 }
360 if (flags & MAP_RESILIENT_MEDIA) {
361 if ((flags & MAP_ANON) ||
362 (flags & MAP_SHARED)) {
363 return EINVAL;
364 }
365 }
366
367 /*
368 * Check for illegal addresses. Watch out for address wrap... Note
369 * that VM_*_ADDRESS are not constants due to casts (argh).
370 */
371 if (flags & MAP_FIXED) {
372 /*
373 * The specified address must have the same remainder
374 * as the file offset taken modulo PAGE_SIZE, so it
375 * should be aligned after adjustment by pageoff.
376 */
377 user_addr -= pageoff;
378 if (user_addr & vm_map_page_mask(user_map)) {
379 return EINVAL;
380 }
381 }
382 #ifdef notyet
383 /* DO not have apis to get this info, need to wait till then*/
384 /*
385 * XXX for non-fixed mappings where no hint is provided or
386 * the hint would fall in the potential heap space,
387 * place it after the end of the largest possible heap.
388 *
389 * There should really be a pmap call to determine a reasonable
390 * location.
391 */
392 else if (addr < vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
393 vm_map_page_mask(user_map))) {
394 addr = vm_map_round_page(p->p_vmspace->vm_daddr + MAXDSIZ,
395 vm_map_page_mask(user_map));
396 }
397
398 #endif
399
400 alloc_flags = 0;
401
402 if (flags & MAP_ANON) {
403 maxprot = VM_PROT_ALL;
404 #if CONFIG_MACF
405 /*
406 * Entitlement check.
407 */
408 error = mac_proc_check_map_anon(p, user_addr, user_size, prot, flags, &maxprot);
409 if (error) {
410 return EINVAL;
411 }
412 #endif /* MAC */
413
414 /*
415 * Mapping blank space is trivial. Use positive fds as the alias
416 * value for memory tracking.
417 */
418 if (fd != -1) {
419 /*
420 * Use "fd" to pass (some) Mach VM allocation flags,
421 * (see the VM_FLAGS_* definitions).
422 */
423 alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
424 VM_FLAGS_SUPERPAGE_MASK |
425 VM_FLAGS_PURGABLE |
426 VM_FLAGS_4GB_CHUNK);
427 if (alloc_flags != fd) {
428 /* reject if there are any extra flags */
429 return EINVAL;
430 }
431 VM_GET_FLAGS_ALIAS(alloc_flags, tag);
432 alloc_flags &= ~VM_FLAGS_ALIAS_MASK;
433 }
434
435 handle = NULL;
436 file_pos = 0;
437 pageoff = 0;
438 mapanon = 1;
439 } else {
440 struct vnode_attr va;
441 vfs_context_t ctx = vfs_context_current();
442
443 if (flags & MAP_JIT) {
444 return EINVAL;
445 }
446
447 /*
448 * Mapping file, get fp for validation. Obtain vnode and make
449 * sure it is of appropriate type.
450 */
451 err = fp_lookup(p, fd, &fp, 0);
452 if (err) {
453 return err;
454 }
455 fpref = 1;
456 switch (FILEGLOB_DTYPE(fp->fp_glob)) {
457 case DTYPE_PSXSHM:
458 uap->addr = (user_addr_t)user_addr;
459 uap->len = (user_size_t)user_size;
460 uap->prot = prot;
461 uap->flags = flags;
462 uap->pos = file_pos;
463 error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
464 goto bad;
465 case DTYPE_VNODE:
466 break;
467 default:
468 error = EINVAL;
469 goto bad;
470 }
471 vp = (struct vnode *)fp->fp_glob->fg_data;
472 error = vnode_getwithref(vp);
473 if (error != 0) {
474 goto bad;
475 }
476
477 if (vp->v_type != VREG && vp->v_type != VCHR) {
478 (void)vnode_put(vp);
479 error = EINVAL;
480 goto bad;
481 }
482
483 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
484
485 /*
486 * POSIX: mmap needs to update access time for mapped files
487 */
488 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
489 VATTR_INIT(&va);
490 nanotime(&va.va_access_time);
491 VATTR_SET_ACTIVE(&va, va_access_time);
492 vnode_setattr(vp, &va, ctx);
493 }
494
495 /*
496 * XXX hack to handle use of /dev/zero to map anon memory (ala
497 * SunOS).
498 */
499 if (vp->v_type == VCHR || vp->v_type == VSTR) {
500 (void)vnode_put(vp);
501 error = ENODEV;
502 goto bad;
503 } else {
504 /*
505 * Ensure that file and memory protections are
506 * compatible. Note that we only worry about
507 * writability if mapping is shared; in this case,
508 * current and max prot are dictated by the open file.
509 * XXX use the vnode instead? Problem is: what
510 * credentials do we use for determination? What if
511 * proc does a setuid?
512 */
513 maxprot = VM_PROT_EXECUTE; /* TODO: Remove this and restrict maxprot? */
514 if (fp->fp_glob->fg_flag & FREAD) {
515 maxprot |= VM_PROT_READ;
516 } else if (prot & PROT_READ) {
517 (void)vnode_put(vp);
518 error = EACCES;
519 goto bad;
520 }
521 /*
522 * If we are sharing potential changes (either via
523 * MAP_SHARED or via the implicit sharing of character
524 * device mappings), and we are trying to get write
525 * permission although we opened it without asking
526 * for it, bail out.
527 */
528
529 if ((flags & MAP_SHARED) != 0) {
530 if ((fp->fp_glob->fg_flag & FWRITE) != 0 &&
531 /*
532 * Do not allow writable mappings of
533 * swap files (see vm_swapfile_pager.c).
534 */
535 !vnode_isswap(vp)) {
536 /*
537 * check for write access
538 *
539 * Note that we already made this check when granting FWRITE
540 * against the file, so it seems redundant here.
541 */
542 error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
543
544 /* if not granted for any reason, but we wanted it, bad */
545 if ((prot & PROT_WRITE) && (error != 0)) {
546 vnode_put(vp);
547 goto bad;
548 }
549
550 /* if writable, remember */
551 if (error == 0) {
552 maxprot |= VM_PROT_WRITE;
553 }
554 } else if ((prot & PROT_WRITE) != 0) {
555 (void)vnode_put(vp);
556 error = EACCES;
557 goto bad;
558 }
559 } else {
560 maxprot |= VM_PROT_WRITE;
561 }
562
563 handle = (void *)vp;
564 #if CONFIG_MACF
565 error = mac_file_check_mmap(vfs_context_ucred(ctx),
566 fp->fp_glob, prot, flags, file_pos + pageoff,
567 &maxprot);
568 if (error) {
569 (void)vnode_put(vp);
570 goto bad;
571 }
572 #endif /* MAC */
573 /*
574 * Consult the file system to determine if this
575 * particular file object can be mapped.
576 *
577 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
578 * then we don't check for writeability on the file
579 * object, because it will only ever see reads.
580 */
581 error = VNOP_MMAP_CHECK(vp, (flags & MAP_PRIVATE) ?
582 (prot & ~PROT_WRITE) : prot, ctx);
583 if (error) {
584 (void)vnode_put(vp);
585 goto bad;
586 }
587 }
588
589 /*
590 * No copy-on-read for mmap() mappings themselves.
591 */
592 vmk_flags.vmkf_no_copy_on_read = 1;
593 }
594
595 if (user_size == 0) {
596 if (!mapanon) {
597 (void)vnode_put(vp);
598 }
599 error = 0;
600 goto bad;
601 }
602
603 /*
604 * We bend a little - round the start and end addresses
605 * to the nearest page boundary.
606 */
607 user_size = vm_map_round_page(user_size,
608 vm_map_page_mask(user_map));
609
610 if (file_pos & vm_map_page_mask(user_map)) {
611 if (!mapanon) {
612 (void)vnode_put(vp);
613 }
614 error = EINVAL;
615 goto bad;
616 }
617
618 if ((flags & MAP_FIXED) == 0) {
619 alloc_flags |= VM_FLAGS_ANYWHERE;
620 user_addr = vm_map_round_page(user_addr,
621 vm_map_page_mask(user_map));
622 } else {
623 if (user_addr != vm_map_trunc_page(user_addr,
624 vm_map_page_mask(user_map))) {
625 if (!mapanon) {
626 (void)vnode_put(vp);
627 }
628 error = EINVAL;
629 goto bad;
630 }
631 /*
632 * mmap(MAP_FIXED) will replace any existing mappings in the
633 * specified range, if the new mapping is successful.
634 * If we just deallocate the specified address range here,
635 * another thread might jump in and allocate memory in that
636 * range before we get a chance to establish the new mapping,
637 * and we won't have a chance to restore the old mappings.
638 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
639 * has to deallocate the existing mappings and establish the
640 * new ones atomically.
641 */
642 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
643 }
644
645 if (flags & MAP_NOCACHE) {
646 alloc_flags |= VM_FLAGS_NO_CACHE;
647 }
648
649 if (flags & MAP_JIT) {
650 vmk_flags.vmkf_map_jit = TRUE;
651 }
652
653
654 if (flags & MAP_RESILIENT_CODESIGN) {
655 alloc_flags |= VM_FLAGS_RESILIENT_CODESIGN;
656 }
657 if (flags & MAP_RESILIENT_MEDIA) {
658 alloc_flags |= VM_FLAGS_RESILIENT_MEDIA;
659 }
660
661 #if XNU_TARGET_OS_OSX
662 /* macOS-specific MAP_32BIT flag handling */
663 if (flags & MAP_32BIT) {
664 vmk_flags.vmkf_32bit_map_va = TRUE;
665 }
666 #endif
667
668 /*
669 * Lookup/allocate object.
670 */
671 if (handle == NULL) {
672 control = NULL;
673 #ifdef notyet
674 /* Hmm .. */
675 #if defined(VM_PROT_READ_IS_EXEC)
676 if (prot & VM_PROT_READ) {
677 prot |= VM_PROT_EXECUTE;
678 }
679 if (maxprot & VM_PROT_READ) {
680 maxprot |= VM_PROT_EXECUTE;
681 }
682 #endif
683 #endif
684
685 #if 3777787
686 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
687 prot |= VM_PROT_READ;
688 }
689 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
690 maxprot |= VM_PROT_READ;
691 }
692 #endif /* radar 3777787 */
693 map_anon_retry:
694
695 result = vm_map_enter_mem_object(user_map,
696 &user_addr, user_size,
697 0, alloc_flags, vmk_flags,
698 tag,
699 IPC_PORT_NULL, 0, FALSE,
700 prot, maxprot,
701 (flags & MAP_SHARED) ?
702 VM_INHERIT_SHARE :
703 VM_INHERIT_DEFAULT);
704
705 /* If a non-binding address was specified for this anonymous
706 * mapping, retry the mapping with a zero base
707 * in the event the mapping operation failed due to
708 * lack of space between the address and the map's maximum.
709 */
710 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
711 user_addr = vm_map_page_size(user_map);
712 goto map_anon_retry;
713 }
714 } else {
715 if (vnode_isswap(vp)) {
716 /*
717 * Map swap files with a special pager
718 * that returns obfuscated contents.
719 */
720 control = NULL;
721 pager = swapfile_pager_setup(vp);
722 if (pager != MEMORY_OBJECT_NULL) {
723 control = swapfile_pager_control(pager);
724 }
725 } else {
726 control = ubc_getobject(vp, UBC_FLAGS_NONE);
727 }
728
729 if (control == NULL) {
730 (void)vnode_put(vp);
731 error = ENOMEM;
732 goto bad;
733 }
734
735 /*
736 * Set credentials:
737 * FIXME: if we're writing the file we need a way to
738 * ensure that someone doesn't replace our R/W creds
739 * with ones that only work for read.
740 */
741
742 ubc_setthreadcred(vp, p, current_thread());
743 docow = FALSE;
744 if ((flags & (MAP_ANON | MAP_SHARED)) == 0) {
745 docow = TRUE;
746 }
747
748 #ifdef notyet
749 /* Hmm .. */
750 #if defined(VM_PROT_READ_IS_EXEC)
751 if (prot & VM_PROT_READ) {
752 prot |= VM_PROT_EXECUTE;
753 }
754 if (maxprot & VM_PROT_READ) {
755 maxprot |= VM_PROT_EXECUTE;
756 }
757 #endif
758 #endif /* notyet */
759
760 #if 3777787
761 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
762 prot |= VM_PROT_READ;
763 }
764 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
765 maxprot |= VM_PROT_READ;
766 }
767 #endif /* radar 3777787 */
768
769 map_file_retry:
770 if (flags & MAP_RESILIENT_CODESIGN) {
771 int reject_prot = ((flags & MAP_PRIVATE) ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
772 if (prot & reject_prot) {
773 /*
774 * Would like to use (prot | maxprot) here
775 * but the assignment of VM_PROT_EXECUTE
776 * to maxprot above would always fail the test.
777 *
778 * Skipping the check is ok, however, because we
779 * restrict maxprot to prot just below in this
780 * block.
781 */
782 assert(!mapanon);
783 vnode_put(vp);
784 error = EPERM;
785 goto bad;
786 }
787 /* strictly limit access to "prot" */
788 maxprot &= prot;
789 }
790
791 vm_object_offset_t end_pos = 0;
792 if (os_add_overflow(user_size, file_pos, &end_pos)) {
793 vnode_put(vp);
794 error = EINVAL;
795 goto bad;
796 }
797
798 result = vm_map_enter_mem_object_control(user_map,
799 &user_addr, user_size,
800 0, alloc_flags, vmk_flags,
801 tag,
802 control, file_pos,
803 docow, prot, maxprot,
804 (flags & MAP_SHARED) ?
805 VM_INHERIT_SHARE :
806 VM_INHERIT_DEFAULT);
807
808 /* If a non-binding address was specified for this file backed
809 * mapping, retry the mapping with a zero base
810 * in the event the mapping operation failed due to
811 * lack of space between the address and the map's maximum.
812 */
813 if ((result == KERN_NO_SPACE) && ((flags & MAP_FIXED) == 0) && user_addr && (num_retries++ == 0)) {
814 user_addr = vm_map_page_size(user_map);
815 goto map_file_retry;
816 }
817 }
818
819 if (!mapanon) {
820 (void)vnode_put(vp);
821 }
822
823 switch (result) {
824 case KERN_SUCCESS:
825 *retval = user_addr + pageoff;
826 error = 0;
827 break;
828 case KERN_INVALID_ADDRESS:
829 case KERN_NO_SPACE:
830 error = ENOMEM;
831 break;
832 case KERN_PROTECTION_FAILURE:
833 error = EACCES;
834 break;
835 default:
836 error = EINVAL;
837 break;
838 }
839 bad:
840 if (pager != MEMORY_OBJECT_NULL) {
841 /*
842 * Release the reference on the pager.
843 * If the mapping was successful, it now holds
844 * an extra reference.
845 */
846 memory_object_deallocate(pager);
847 }
848 if (fpref) {
849 fp_drop(p, fd, fp, 0);
850 }
851
852 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
853 #ifndef CONFIG_EMBEDDED
854 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
855 (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
856 #endif
857 return error;
858 }
859
860 int
861 msync(__unused proc_t p, struct msync_args *uap, int32_t *retval)
862 {
863 __pthread_testcancel(1);
864 return msync_nocancel(p, (struct msync_nocancel_args *)uap, retval);
865 }
866
867 int
868 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused int32_t *retval)
869 {
870 mach_vm_offset_t addr;
871 mach_vm_size_t size;
872 int flags;
873 vm_map_t user_map;
874 int rv;
875 vm_sync_t sync_flags = 0;
876
877 user_map = current_map();
878 addr = (mach_vm_offset_t) uap->addr;
879 size = (mach_vm_size_t) uap->len;
880 #ifndef CONFIG_EMBEDDED
881 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
882 #endif
883 if (mach_vm_range_overflows(addr, size)) {
884 return EINVAL;
885 }
886 if (addr & vm_map_page_mask(user_map)) {
887 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
888 return EINVAL;
889 }
890 if (size == 0) {
891 /*
892 * We cannot support this properly without maintaining
893 * list all mmaps done. Cannot use vm_map_entry as they could be
894 * split or coalesced by indepenedant actions. So instead of
895 * inaccurate results, lets just return error as invalid size
896 * specified
897 */
898 return EINVAL; /* XXX breaks posix apps */
899 }
900
901 flags = uap->flags;
902 /* disallow contradictory flags */
903 if ((flags & (MS_SYNC | MS_ASYNC)) == (MS_SYNC | MS_ASYNC)) {
904 return EINVAL;
905 }
906
907 if (flags & MS_KILLPAGES) {
908 sync_flags |= VM_SYNC_KILLPAGES;
909 }
910 if (flags & MS_DEACTIVATE) {
911 sync_flags |= VM_SYNC_DEACTIVATE;
912 }
913 if (flags & MS_INVALIDATE) {
914 sync_flags |= VM_SYNC_INVALIDATE;
915 }
916
917 if (!(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
918 if (flags & MS_ASYNC) {
919 sync_flags |= VM_SYNC_ASYNCHRONOUS;
920 } else {
921 sync_flags |= VM_SYNC_SYNCHRONOUS;
922 }
923 }
924
925 sync_flags |= VM_SYNC_CONTIGUOUS; /* complain if holes */
926
927 rv = mach_vm_msync(user_map, addr, size, sync_flags);
928
929 switch (rv) {
930 case KERN_SUCCESS:
931 break;
932 case KERN_INVALID_ADDRESS: /* hole in region being sync'ed */
933 return ENOMEM;
934 case KERN_FAILURE:
935 return EIO;
936 default:
937 return EINVAL;
938 }
939 return 0;
940 }
941
942
943 int
944 munmap(__unused proc_t p, struct munmap_args *uap, __unused int32_t *retval)
945 {
946 mach_vm_offset_t user_addr;
947 mach_vm_size_t user_size;
948 kern_return_t result;
949 vm_map_t user_map;
950
951 user_map = current_map();
952 user_addr = (mach_vm_offset_t) uap->addr;
953 user_size = (mach_vm_size_t) uap->len;
954
955 AUDIT_ARG(addr, user_addr);
956 AUDIT_ARG(len, user_size);
957
958 if (user_addr & vm_map_page_mask(user_map)) {
959 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
960 return EINVAL;
961 }
962
963 if (mach_vm_range_overflows(user_addr, user_size)) {
964 return EINVAL;
965 }
966
967 if (user_size == 0) {
968 /* UNIX SPEC: size is 0, return EINVAL */
969 return EINVAL;
970 }
971
972 result = mach_vm_deallocate(user_map, user_addr, user_size);
973 if (result != KERN_SUCCESS) {
974 return EINVAL;
975 }
976 return 0;
977 }
978
979 int
980 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused int32_t *retval)
981 {
982 vm_prot_t prot;
983 mach_vm_offset_t user_addr;
984 mach_vm_size_t user_size;
985 kern_return_t result;
986 vm_map_t user_map;
987 #if CONFIG_MACF
988 int error;
989 #endif
990
991 AUDIT_ARG(addr, uap->addr);
992 AUDIT_ARG(len, uap->len);
993 AUDIT_ARG(value32, uap->prot);
994
995 user_map = current_map();
996 user_addr = (mach_vm_offset_t) uap->addr;
997 user_size = (mach_vm_size_t) uap->len;
998 prot = (vm_prot_t)(uap->prot & (VM_PROT_ALL | VM_PROT_TRUSTED | VM_PROT_STRIP_READ));
999
1000 if (mach_vm_range_overflows(user_addr, user_size)) {
1001 return EINVAL;
1002 }
1003 if (user_addr & vm_map_page_mask(user_map)) {
1004 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1005 return EINVAL;
1006 }
1007
1008 #ifdef notyet
1009 /* Hmm .. */
1010 #if defined(VM_PROT_READ_IS_EXEC)
1011 if (prot & VM_PROT_READ) {
1012 prot |= VM_PROT_EXECUTE;
1013 }
1014 #endif
1015 #endif /* notyet */
1016
1017 #if 3936456
1018 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) {
1019 prot |= VM_PROT_READ;
1020 }
1021 #endif /* 3936456 */
1022
1023 #if defined(__arm64__)
1024 if (prot & VM_PROT_STRIP_READ) {
1025 prot &= ~(VM_PROT_READ | VM_PROT_STRIP_READ);
1026 }
1027 #endif
1028
1029 #if CONFIG_MACF
1030 /*
1031 * The MAC check for mprotect is of limited use for 2 reasons:
1032 * Without mmap revocation, the caller could have asked for the max
1033 * protections initially instead of a reduced set, so a mprotect
1034 * check would offer no new security.
1035 * It is not possible to extract the vnode from the pager object(s)
1036 * of the target memory range.
1037 * However, the MAC check may be used to prevent a process from,
1038 * e.g., making the stack executable.
1039 */
1040 error = mac_proc_check_mprotect(p, user_addr,
1041 user_size, prot);
1042 if (error) {
1043 return error;
1044 }
1045 #endif
1046
1047 if (prot & VM_PROT_TRUSTED) {
1048 #if CONFIG_DYNAMIC_CODE_SIGNING
1049 /* CODE SIGNING ENFORCEMENT - JIT support */
1050 /* The special protection value VM_PROT_TRUSTED requests that we treat
1051 * this page as if it had a valid code signature.
1052 * If this is enabled, there MUST be a MAC policy implementing the
1053 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1054 * compromised because the check would always succeed and thusly any
1055 * process could sign dynamically. */
1056 result = vm_map_sign(
1057 user_map,
1058 vm_map_trunc_page(user_addr,
1059 vm_map_page_mask(user_map)),
1060 vm_map_round_page(user_addr + user_size,
1061 vm_map_page_mask(user_map)));
1062 switch (result) {
1063 case KERN_SUCCESS:
1064 break;
1065 case KERN_INVALID_ADDRESS:
1066 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1067 return ENOMEM;
1068 default:
1069 return EINVAL;
1070 }
1071 #else
1072 return ENOTSUP;
1073 #endif
1074 }
1075 prot &= ~VM_PROT_TRUSTED;
1076
1077 result = mach_vm_protect(user_map, user_addr, user_size,
1078 FALSE, prot);
1079 switch (result) {
1080 case KERN_SUCCESS:
1081 return 0;
1082 case KERN_PROTECTION_FAILURE:
1083 return EACCES;
1084 case KERN_INVALID_ADDRESS:
1085 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1086 return ENOMEM;
1087 }
1088 return EINVAL;
1089 }
1090
1091
1092 int
1093 minherit(__unused proc_t p, struct minherit_args *uap, __unused int32_t *retval)
1094 {
1095 mach_vm_offset_t addr;
1096 mach_vm_size_t size;
1097 vm_inherit_t inherit;
1098 vm_map_t user_map;
1099 kern_return_t result;
1100
1101 AUDIT_ARG(addr, uap->addr);
1102 AUDIT_ARG(len, uap->len);
1103 AUDIT_ARG(value32, uap->inherit);
1104
1105 addr = (mach_vm_offset_t)uap->addr;
1106 size = (mach_vm_size_t)uap->len;
1107 inherit = uap->inherit;
1108 if (mach_vm_range_overflows(addr, size)) {
1109 return EINVAL;
1110 }
1111 user_map = current_map();
1112 result = mach_vm_inherit(user_map, addr, size,
1113 inherit);
1114 switch (result) {
1115 case KERN_SUCCESS:
1116 return 0;
1117 case KERN_PROTECTION_FAILURE:
1118 return EACCES;
1119 }
1120 return EINVAL;
1121 }
1122
1123 int
1124 madvise(__unused proc_t p, struct madvise_args *uap, __unused int32_t *retval)
1125 {
1126 vm_map_t user_map;
1127 mach_vm_offset_t start;
1128 mach_vm_size_t size;
1129 vm_behavior_t new_behavior;
1130 kern_return_t result;
1131
1132 /*
1133 * Since this routine is only advisory, we default to conservative
1134 * behavior.
1135 */
1136 switch (uap->behav) {
1137 case MADV_RANDOM:
1138 new_behavior = VM_BEHAVIOR_RANDOM;
1139 break;
1140 case MADV_SEQUENTIAL:
1141 new_behavior = VM_BEHAVIOR_SEQUENTIAL;
1142 break;
1143 case MADV_NORMAL:
1144 new_behavior = VM_BEHAVIOR_DEFAULT;
1145 break;
1146 case MADV_WILLNEED:
1147 new_behavior = VM_BEHAVIOR_WILLNEED;
1148 break;
1149 case MADV_DONTNEED:
1150 new_behavior = VM_BEHAVIOR_DONTNEED;
1151 break;
1152 case MADV_FREE:
1153 new_behavior = VM_BEHAVIOR_FREE;
1154 break;
1155 case MADV_ZERO_WIRED_PAGES:
1156 new_behavior = VM_BEHAVIOR_ZERO_WIRED_PAGES;
1157 break;
1158 case MADV_FREE_REUSABLE:
1159 new_behavior = VM_BEHAVIOR_REUSABLE;
1160 break;
1161 case MADV_FREE_REUSE:
1162 new_behavior = VM_BEHAVIOR_REUSE;
1163 break;
1164 case MADV_CAN_REUSE:
1165 new_behavior = VM_BEHAVIOR_CAN_REUSE;
1166 break;
1167 case MADV_PAGEOUT:
1168 #if MACH_ASSERT
1169 new_behavior = VM_BEHAVIOR_PAGEOUT;
1170 break;
1171 #else /* MACH_ASSERT */
1172 return ENOTSUP;
1173 #endif /* MACH_ASSERT */
1174 default:
1175 return EINVAL;
1176 }
1177
1178 start = (mach_vm_offset_t) uap->addr;
1179 size = (mach_vm_size_t) uap->len;
1180 if (mach_vm_range_overflows(start, size)) {
1181 return EINVAL;
1182 }
1183 #if __arm64__
1184 if (start == 0 &&
1185 size != 0 &&
1186 (uap->behav == MADV_FREE ||
1187 uap->behav == MADV_FREE_REUSABLE)) {
1188 printf("** FOURK_COMPAT: %d[%s] "
1189 "failing madvise(0x%llx,0x%llx,%s)\n",
1190 p->p_pid, p->p_comm, start, size,
1191 ((uap->behav == MADV_FREE_REUSABLE)
1192 ? "MADV_FREE_REUSABLE"
1193 : "MADV_FREE"));
1194 DTRACE_VM3(fourk_compat_madvise,
1195 uint64_t, start,
1196 uint64_t, size,
1197 int, uap->behav);
1198 return EINVAL;
1199 }
1200 #endif /* __arm64__ */
1201
1202 user_map = current_map();
1203
1204 result = mach_vm_behavior_set(user_map, start, size, new_behavior);
1205 switch (result) {
1206 case KERN_SUCCESS:
1207 return 0;
1208 case KERN_INVALID_ADDRESS:
1209 return EINVAL;
1210 case KERN_NO_SPACE:
1211 return ENOMEM;
1212 }
1213
1214 return EINVAL;
1215 }
1216
1217 int
1218 mincore(__unused proc_t p, struct mincore_args *uap, __unused int32_t *retval)
1219 {
1220 mach_vm_offset_t addr = 0, first_addr = 0, end = 0, cur_end = 0;
1221 vm_map_t map = VM_MAP_NULL;
1222 user_addr_t vec = 0;
1223 int error = 0;
1224 int64_t lastvecindex = 0;
1225 int mincoreinfo = 0;
1226 int pqueryinfo = 0;
1227 uint64_t pqueryinfo_vec_size = 0;
1228 vm_page_info_basic_t info = NULL;
1229 mach_msg_type_number_t count = 0;
1230 char *kernel_vec = NULL;
1231 uint64_t req_vec_size_pages = 0, cur_vec_size_pages = 0, vecindex = 0;
1232 kern_return_t kr = KERN_SUCCESS;
1233 int effective_page_shift, effective_page_size;
1234
1235 map = current_map();
1236
1237 /*
1238 * On systems with 4k kernel space and 16k user space, we will
1239 * use the kernel page size to report back the residency information.
1240 * This is for backwards compatibility since we already have
1241 * processes that depend on this behavior.
1242 */
1243 if (vm_map_page_shift(map) < PAGE_SHIFT) {
1244 effective_page_shift = vm_map_page_shift(map);
1245 effective_page_size = vm_map_page_size(map);
1246 } else {
1247 effective_page_shift = PAGE_SHIFT;
1248 effective_page_size = PAGE_SIZE;
1249 }
1250
1251 /*
1252 * Make sure that the addresses presented are valid for user
1253 * mode.
1254 */
1255 first_addr = addr = vm_map_trunc_page(uap->addr,
1256 vm_map_page_mask(map));
1257 end = vm_map_round_page(uap->addr + uap->len,
1258 vm_map_page_mask(map));
1259
1260 if (end < addr) {
1261 return EINVAL;
1262 }
1263
1264 if (end == addr) {
1265 return 0;
1266 }
1267
1268 /*
1269 * We are going to loop through the whole 'req_vec_size' pages
1270 * range in chunks of 'cur_vec_size'.
1271 */
1272
1273 req_vec_size_pages = (end - addr) >> effective_page_shift;
1274 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1275
1276 kernel_vec = (void*) _MALLOC(cur_vec_size_pages * sizeof(char), M_TEMP, M_WAITOK | M_ZERO);
1277
1278 if (kernel_vec == NULL) {
1279 return ENOMEM;
1280 }
1281
1282 /*
1283 * Address of byte vector
1284 */
1285 vec = uap->vec;
1286
1287 pqueryinfo_vec_size = cur_vec_size_pages * sizeof(struct vm_page_info_basic);
1288 info = (void*) _MALLOC(pqueryinfo_vec_size, M_TEMP, M_WAITOK);
1289
1290 if (info == NULL) {
1291 FREE(kernel_vec, M_TEMP);
1292 return ENOMEM;
1293 }
1294
1295 while (addr < end) {
1296 cur_end = addr + (cur_vec_size_pages * effective_page_size);
1297
1298 count = VM_PAGE_INFO_BASIC_COUNT;
1299 kr = vm_map_page_range_info_internal(map,
1300 addr,
1301 cur_end,
1302 effective_page_shift,
1303 VM_PAGE_INFO_BASIC,
1304 (vm_page_info_t) info,
1305 &count);
1306
1307 assert(kr == KERN_SUCCESS);
1308
1309 /*
1310 * Do this on a map entry basis so that if the pages are not
1311 * in the current processes address space, we can easily look
1312 * up the pages elsewhere.
1313 */
1314 lastvecindex = -1;
1315
1316 for (; addr < cur_end; addr += effective_page_size) {
1317 pqueryinfo = info[lastvecindex + 1].disposition;
1318
1319 mincoreinfo = 0;
1320
1321 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT) {
1322 mincoreinfo |= MINCORE_INCORE;
1323 }
1324 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF) {
1325 mincoreinfo |= MINCORE_REFERENCED;
1326 }
1327 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY) {
1328 mincoreinfo |= MINCORE_MODIFIED;
1329 }
1330 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PAGED_OUT) {
1331 mincoreinfo |= MINCORE_PAGED_OUT;
1332 }
1333 if (pqueryinfo & VM_PAGE_QUERY_PAGE_COPIED) {
1334 mincoreinfo |= MINCORE_COPIED;
1335 }
1336 if ((pqueryinfo & VM_PAGE_QUERY_PAGE_EXTERNAL) == 0) {
1337 mincoreinfo |= MINCORE_ANONYMOUS;
1338 }
1339 /*
1340 * calculate index into user supplied byte vector
1341 */
1342 vecindex = (addr - first_addr) >> effective_page_shift;
1343 kernel_vec[vecindex] = (char)mincoreinfo;
1344 lastvecindex = vecindex;
1345 }
1346
1347
1348 assert(vecindex == (cur_vec_size_pages - 1));
1349
1350 error = copyout(kernel_vec, vec, cur_vec_size_pages * sizeof(char) /* a char per page */);
1351
1352 if (error) {
1353 break;
1354 }
1355
1356 /*
1357 * For the next chunk, we'll need:
1358 * - bump the location in the user buffer for our next disposition.
1359 * - new length
1360 * - starting address
1361 */
1362 vec += cur_vec_size_pages * sizeof(char);
1363 req_vec_size_pages = (end - addr) >> effective_page_shift;
1364 cur_vec_size_pages = MIN(req_vec_size_pages, (MAX_PAGE_RANGE_QUERY >> effective_page_shift));
1365
1366 first_addr = addr;
1367 }
1368
1369 FREE(kernel_vec, M_TEMP);
1370 FREE(info, M_TEMP);
1371
1372 if (error) {
1373 return EFAULT;
1374 }
1375
1376 return 0;
1377 }
1378
1379 int
1380 mlock(__unused proc_t p, struct mlock_args *uap, __unused int32_t *retvalval)
1381 {
1382 vm_map_t user_map;
1383 vm_map_offset_t addr;
1384 vm_map_size_t size, pageoff;
1385 kern_return_t result;
1386
1387 AUDIT_ARG(addr, uap->addr);
1388 AUDIT_ARG(len, uap->len);
1389
1390 addr = (vm_map_offset_t) uap->addr;
1391 size = (vm_map_size_t)uap->len;
1392
1393 if (vm_map_range_overflows(addr, size)) {
1394 return EINVAL;
1395 }
1396
1397 if (size == 0) {
1398 return 0;
1399 }
1400
1401 user_map = current_map();
1402 pageoff = (addr & vm_map_page_mask(user_map));
1403 addr -= pageoff;
1404 size = vm_map_round_page(size + pageoff, vm_map_page_mask(user_map));
1405
1406 /* have to call vm_map_wire directly to pass "I don't know" protections */
1407 result = vm_map_wire_kernel(user_map, addr, addr + size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK, TRUE);
1408
1409 if (result == KERN_RESOURCE_SHORTAGE) {
1410 return EAGAIN;
1411 } else if (result == KERN_PROTECTION_FAILURE) {
1412 return EACCES;
1413 } else if (result != KERN_SUCCESS) {
1414 return ENOMEM;
1415 }
1416
1417 return 0; /* KERN_SUCCESS */
1418 }
1419
1420 int
1421 munlock(__unused proc_t p, struct munlock_args *uap, __unused int32_t *retval)
1422 {
1423 mach_vm_offset_t addr;
1424 mach_vm_size_t size;
1425 vm_map_t user_map;
1426 kern_return_t result;
1427
1428 AUDIT_ARG(addr, uap->addr);
1429 AUDIT_ARG(len, uap->len);
1430
1431 addr = (mach_vm_offset_t) uap->addr;
1432 size = (mach_vm_size_t)uap->len;
1433 user_map = current_map();
1434 if (mach_vm_range_overflows(addr, size)) {
1435 return EINVAL;
1436 }
1437 /* JMM - need to remove all wirings by spec - this just removes one */
1438 result = mach_vm_wire_kernel(host_priv_self(), user_map, addr, size, VM_PROT_NONE, VM_KERN_MEMORY_MLOCK);
1439 return result == KERN_SUCCESS ? 0 : ENOMEM;
1440 }
1441
1442
1443 int
1444 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused int32_t *retval)
1445 {
1446 return ENOSYS;
1447 }
1448
1449 int
1450 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused int32_t *retval)
1451 {
1452 return ENOSYS;
1453 }
1454
1455 #if CONFIG_CODE_DECRYPTION
1456 int
1457 mremap_encrypted(__unused struct proc *p, struct mremap_encrypted_args *uap, __unused int32_t *retval)
1458 {
1459 mach_vm_offset_t user_addr;
1460 mach_vm_size_t user_size;
1461 kern_return_t result;
1462 vm_map_t user_map;
1463 uint32_t cryptid;
1464 cpu_type_t cputype;
1465 cpu_subtype_t cpusubtype;
1466 pager_crypt_info_t crypt_info;
1467 const char * cryptname = 0;
1468 char *vpath;
1469 int len, ret;
1470 struct proc_regioninfo_internal pinfo;
1471 vnode_t vp;
1472 uintptr_t vnodeaddr;
1473 uint32_t vid;
1474
1475 AUDIT_ARG(addr, uap->addr);
1476 AUDIT_ARG(len, uap->len);
1477
1478 user_map = current_map();
1479 user_addr = (mach_vm_offset_t) uap->addr;
1480 user_size = (mach_vm_size_t) uap->len;
1481
1482 cryptid = uap->cryptid;
1483 cputype = uap->cputype;
1484 cpusubtype = uap->cpusubtype;
1485
1486 if (mach_vm_range_overflows(user_addr, user_size)) {
1487 return EINVAL;
1488 }
1489 if (user_addr & vm_map_page_mask(user_map)) {
1490 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1491 return EINVAL;
1492 }
1493
1494 switch (cryptid) {
1495 case CRYPTID_NO_ENCRYPTION:
1496 /* not encrypted, just an empty load command */
1497 return 0;
1498 case CRYPTID_APP_ENCRYPTION:
1499 case CRYPTID_MODEL_ENCRYPTION:
1500 cryptname = "com.apple.unfree";
1501 break;
1502 case 0x10:
1503 /* some random cryptid that you could manually put into
1504 * your binary if you want NULL */
1505 cryptname = "com.apple.null";
1506 break;
1507 default:
1508 return EINVAL;
1509 }
1510
1511 if (NULL == text_crypter_create) {
1512 return ENOTSUP;
1513 }
1514
1515 ret = fill_procregioninfo_onlymappedvnodes( proc_task(p), user_addr, &pinfo, &vnodeaddr, &vid);
1516 if (ret == 0 || !vnodeaddr) {
1517 /* No really, this returns 0 if the memory address is not backed by a file */
1518 return EINVAL;
1519 }
1520
1521 vp = (vnode_t)vnodeaddr;
1522 if ((vnode_getwithvid(vp, vid)) == 0) {
1523 vpath = zalloc(ZV_NAMEI);
1524
1525 len = MAXPATHLEN;
1526 ret = vn_getpath(vp, vpath, &len);
1527 if (ret) {
1528 zfree(ZV_NAMEI, vpath);
1529 vnode_put(vp);
1530 return ret;
1531 }
1532
1533 vnode_put(vp);
1534 } else {
1535 return EINVAL;
1536 }
1537
1538 #if 0
1539 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1540 __FUNCTION__, vpath, cryptid, cputype, cpusubtype, (uint64_t)user_addr, (uint64_t)user_size);
1541 #endif
1542
1543 /* set up decrypter first */
1544 crypt_file_data_t crypt_data = {
1545 .filename = vpath,
1546 .cputype = cputype,
1547 .cpusubtype = cpusubtype
1548 };
1549 result = text_crypter_create(&crypt_info, cryptname, (void*)&crypt_data);
1550 #if VM_MAP_DEBUG_APPLE_PROTECT
1551 if (vm_map_debug_apple_protect) {
1552 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1553 p->p_pid, p->p_comm,
1554 user_map,
1555 (uint64_t) user_addr,
1556 (uint64_t) (user_addr + user_size),
1557 __FUNCTION__, vpath, result);
1558 }
1559 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1560 zfree(ZV_NAMEI, vpath);
1561
1562 if (result) {
1563 printf("%s: unable to create decrypter %s, kr=%d\n",
1564 __FUNCTION__, cryptname, result);
1565 if (result == kIOReturnNotPrivileged) {
1566 /* text encryption returned decryption failure */
1567 return EPERM;
1568 } else {
1569 return ENOMEM;
1570 }
1571 }
1572
1573 /* now remap using the decrypter */
1574 vm_object_offset_t crypto_backing_offset;
1575 crypto_backing_offset = -1; /* i.e. use map entry's offset */
1576 result = vm_map_apple_protected(user_map,
1577 user_addr,
1578 user_addr + user_size,
1579 crypto_backing_offset,
1580 &crypt_info,
1581 cryptid);
1582 if (result) {
1583 printf("%s: mapping failed with %d\n", __FUNCTION__, result);
1584 }
1585
1586 if (result) {
1587 return EPERM;
1588 }
1589 return 0;
1590 }
1591 #endif /* CONFIG_CODE_DECRYPTION */