2 * Copyright (c) 2007-2020 Apple Inc. All Rights Reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1988 University of Utah.
30 * Copyright (c) 1991, 1993
31 * The Regents of the University of California. All rights reserved.
33 * This code is derived from software contributed to Berkeley by
34 * the Systems Programming Group of the University of Utah Computer
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
67 * @(#)vm_mmap.c 8.10 (Berkeley) 2/19/95
70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
71 * support for mandatory and extensible security protections. This notice
72 * is included in support of clause 2.2 (b) of the Apple Public License,
77 * Mapped file (mmap) interface to VM
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/filedesc.h>
83 #include <sys/proc_internal.h>
84 #include <sys/kauth.h>
85 #include <sys/resourcevar.h>
86 #include <sys/vnode_internal.h>
89 #include <sys/file_internal.h>
90 #include <sys/vadvise.h>
91 #include <sys/trace.h>
96 #include <sys/ubc_internal.h>
97 #include <sys/sysproto.h>
99 #include <sys/syscall.h>
100 #include <sys/kdebug.h>
101 #include <sys/bsdtask_info.h>
103 #include <security/audit/audit.h>
104 #include <bsm/audit_kevents.h>
106 #include <mach/mach_types.h>
107 #include <mach/mach_traps.h>
108 #include <mach/vm_sync.h>
109 #include <mach/vm_behavior.h>
110 #include <mach/vm_inherit.h>
111 #include <mach/vm_statistics.h>
112 #include <mach/mach_vm.h>
113 #include <mach/vm_map.h>
114 #include <mach/host_priv.h>
115 #include <mach/sdt.h>
116 #include <mach-o/loader.h>
118 #include <machine/machine_routines.h>
120 #include <kern/cpu_number.h>
121 #include <kern/host.h>
122 #include <kern/task.h>
123 #include <kern/page_decrypt.h>
125 #include <IOKit/IOReturn.h>
127 #include <vm/vm_map.h>
128 #include <vm/vm_kern.h>
129 #include <vm/vm_pager.h>
130 #include <vm/vm_protos.h>
133 #include <security/mac_framework.h>
135 #include <os/overflow.h>
138 * this function implements the same logic as dyld's "dyld_fall_2020_os_versions"
139 * from dyld_priv.h. this way we can consistently deny / allow allocations based
140 * on SDK version at fall 2020 level. Compare output to proc_sdk(current_proc())
143 proc_2020_fall_os_sdk(void)
145 switch (current_proc()->p_platform
) {
147 return 0x000a1000; // DYLD_MACOSX_VERSION_10_16
149 case PLATFORM_IOSSIMULATOR
:
150 case PLATFORM_MACCATALYST
:
151 return 0x000e0000; // DYLD_IOS_VERSION_14_0
152 case PLATFORM_BRIDGEOS
:
153 return 0x00050000; // DYLD_BRIDGEOS_VERSION_5_0
155 case PLATFORM_TVOSSIMULATOR
:
156 return 0x000e0000; // DYLD_TVOS_VERSION_14_0
157 case PLATFORM_WATCHOS
:
158 case PLATFORM_WATCHOSSIMULATOR
:
159 return 0x00070000; // DYLD_WATCHOS_VERSION_7_0
166 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
167 * XXX usage is PROT_* from an interface perspective. Thus the values of
168 * XXX VM_PROT_* and PROT_* need to correspond.
171 mmap(proc_t p
, struct mmap_args
*uap
, user_addr_t
*retval
)
174 * Map in special device (must be SHARED) or file
182 kern_return_t result
;
183 vm_map_offset_t user_addr
;
185 vm_map_size_t user_size
;
186 vm_object_offset_t pageoff
;
187 vm_object_offset_t file_pos
;
189 vm_tag_t tag
= VM_KERN_MEMORY_NONE
;
190 vm_map_kernel_flags_t vmk_flags
= VM_MAP_KERNEL_FLAGS_NONE
;
194 memory_object_t pager
= MEMORY_OBJECT_NULL
;
195 memory_object_control_t control
;
203 * Note that for UNIX03 conformance, there is additional parameter checking for
204 * mmap() system call in libsyscall prior to entering the kernel. The sanity
205 * checks and argument validation done in this function are not the only places
206 * one can get returned errnos.
209 user_map
= current_map();
210 user_addr
= (vm_map_offset_t
)uap
->addr
;
211 user_size
= (vm_map_size_t
) uap
->len
;
213 AUDIT_ARG(addr
, user_addr
);
214 AUDIT_ARG(len
, user_size
);
215 AUDIT_ARG(fd
, uap
->fd
);
217 if (vm_map_range_overflows(user_addr
, user_size
)) {
220 prot
= (uap
->prot
& VM_PROT_ALL
);
223 * Since the hardware currently does not support writing without
224 * read-before-write, or execution-without-read, if the request is
225 * for write or execute access, we must imply read access as well;
226 * otherwise programs expecting this to work will fail to operate.
228 if (prot
& (VM_PROT_EXECUTE
| VM_PROT_WRITE
)) {
229 prot
|= VM_PROT_READ
;
231 #endif /* radar 3777787 */
237 * verify no unknown flags are passed in, and if any are,
238 * fail out early to make sure the logic below never has to deal
239 * with invalid flag values
241 if (flags
& ~(MAP_SHARED
|
247 MAP_RESERVED0080
| //grandfathered in as accepted and ignored
254 MAP_RESILIENT_CODESIGN
|
255 MAP_RESILIENT_MEDIA
|
256 #if XNU_TARGET_OS_OSX
259 MAP_TRANSLATED_ALLOW_EXECUTE
|
261 if (proc_sdk(current_proc()) >= proc_2020_fall_os_sdk()) {
268 * The vm code does not have prototypes & compiler doesn't do
269 * the right thing when you cast 64bit value and pass it in function
270 * call. So here it is.
272 file_pos
= (vm_object_offset_t
)uap
->pos
;
275 /* make sure mapping fits into numeric range etc */
276 if (os_add3_overflow(file_pos
, user_size
, PAGE_SIZE_64
- 1, &sum
)) {
280 if (flags
& MAP_UNIX03
) {
281 vm_map_offset_t offset_alignment_mask
;
284 * Enforce UNIX03 compliance.
287 if (vm_map_is_exotic(current_map())) {
288 offset_alignment_mask
= 0xFFF;
290 offset_alignment_mask
= vm_map_page_mask(current_map());
292 if (file_pos
& offset_alignment_mask
) {
293 /* file offset should be page-aligned */
296 if (!(flags
& (MAP_PRIVATE
| MAP_SHARED
))) {
297 /* need either MAP_PRIVATE or MAP_SHARED */
300 if (user_size
== 0) {
301 /* mapping length should not be 0 */
307 * Align the file position to a page boundary,
308 * and save its page offset component.
310 pageoff
= (file_pos
& vm_map_page_mask(user_map
));
311 file_pos
-= (vm_object_offset_t
)pageoff
;
314 /* Adjust size for rounding (on both ends). */
315 user_size
+= pageoff
; /* low end... */
316 user_size
= vm_map_round_page(user_size
,
317 vm_map_page_mask(user_map
)); /* hi end */
320 if (flags
& MAP_JIT
) {
321 if ((flags
& MAP_FIXED
) ||
322 (flags
& MAP_SHARED
) ||
323 !(flags
& MAP_ANON
) ||
324 (flags
& MAP_RESILIENT_CODESIGN
) ||
325 (flags
& MAP_RESILIENT_MEDIA
)) {
330 if ((flags
& MAP_RESILIENT_CODESIGN
) ||
331 (flags
& MAP_RESILIENT_MEDIA
)) {
332 if ((flags
& MAP_ANON
) ||
337 if (flags
& MAP_RESILIENT_CODESIGN
) {
338 int reject_prot
= ((flags
& MAP_PRIVATE
) ? VM_PROT_EXECUTE
: (VM_PROT_WRITE
| VM_PROT_EXECUTE
));
339 if (prot
& reject_prot
) {
341 * Quick sanity check. maxprot is calculated below and
342 * we will test it again.
347 if (flags
& MAP_SHARED
) {
349 * MAP_RESILIENT_MEDIA is not valid with MAP_SHARED because
350 * there is no place to inject zero-filled pages without
351 * actually adding them to the file.
352 * Since we didn't reject that combination before, there might
353 * already be callers using it and getting a valid MAP_SHARED
354 * mapping but without the resilience.
355 * For backwards compatibility's sake, let's keep ignoring
356 * MAP_RESILIENT_MEDIA in that case.
358 flags
&= ~MAP_RESILIENT_MEDIA
;
360 if (flags
& MAP_RESILIENT_MEDIA
) {
361 if ((flags
& MAP_ANON
) ||
362 (flags
& MAP_SHARED
)) {
368 * Check for illegal addresses. Watch out for address wrap... Note
369 * that VM_*_ADDRESS are not constants due to casts (argh).
371 if (flags
& MAP_FIXED
) {
373 * The specified address must have the same remainder
374 * as the file offset taken modulo PAGE_SIZE, so it
375 * should be aligned after adjustment by pageoff.
377 user_addr
-= pageoff
;
378 if (user_addr
& vm_map_page_mask(user_map
)) {
383 /* DO not have apis to get this info, need to wait till then*/
385 * XXX for non-fixed mappings where no hint is provided or
386 * the hint would fall in the potential heap space,
387 * place it after the end of the largest possible heap.
389 * There should really be a pmap call to determine a reasonable
392 else if (addr
< vm_map_round_page(p
->p_vmspace
->vm_daddr
+ MAXDSIZ
,
393 vm_map_page_mask(user_map
))) {
394 addr
= vm_map_round_page(p
->p_vmspace
->vm_daddr
+ MAXDSIZ
,
395 vm_map_page_mask(user_map
));
402 if (flags
& MAP_ANON
) {
403 maxprot
= VM_PROT_ALL
;
408 error
= mac_proc_check_map_anon(p
, user_addr
, user_size
, prot
, flags
, &maxprot
);
415 * Mapping blank space is trivial. Use positive fds as the alias
416 * value for memory tracking.
420 * Use "fd" to pass (some) Mach VM allocation flags,
421 * (see the VM_FLAGS_* definitions).
423 alloc_flags
= fd
& (VM_FLAGS_ALIAS_MASK
|
424 VM_FLAGS_SUPERPAGE_MASK
|
427 if (alloc_flags
!= fd
) {
428 /* reject if there are any extra flags */
431 VM_GET_FLAGS_ALIAS(alloc_flags
, tag
);
432 alloc_flags
&= ~VM_FLAGS_ALIAS_MASK
;
440 struct vnode_attr va
;
441 vfs_context_t ctx
= vfs_context_current();
443 if (flags
& MAP_JIT
) {
448 * Mapping file, get fp for validation. Obtain vnode and make
449 * sure it is of appropriate type.
451 err
= fp_lookup(p
, fd
, &fp
, 0);
456 switch (FILEGLOB_DTYPE(fp
->fp_glob
)) {
458 uap
->addr
= (user_addr_t
)user_addr
;
459 uap
->len
= (user_size_t
)user_size
;
463 error
= pshm_mmap(p
, uap
, retval
, fp
, (off_t
)pageoff
);
471 vp
= (struct vnode
*)fp
->fp_glob
->fg_data
;
472 error
= vnode_getwithref(vp
);
477 if (vp
->v_type
!= VREG
&& vp
->v_type
!= VCHR
) {
483 AUDIT_ARG(vnpath
, vp
, ARG_VNODE1
);
486 * POSIX: mmap needs to update access time for mapped files
488 if ((vnode_vfsvisflags(vp
) & MNT_NOATIME
) == 0) {
490 nanotime(&va
.va_access_time
);
491 VATTR_SET_ACTIVE(&va
, va_access_time
);
492 vnode_setattr(vp
, &va
, ctx
);
496 * XXX hack to handle use of /dev/zero to map anon memory (ala
499 if (vp
->v_type
== VCHR
|| vp
->v_type
== VSTR
) {
505 * Ensure that file and memory protections are
506 * compatible. Note that we only worry about
507 * writability if mapping is shared; in this case,
508 * current and max prot are dictated by the open file.
509 * XXX use the vnode instead? Problem is: what
510 * credentials do we use for determination? What if
511 * proc does a setuid?
513 maxprot
= VM_PROT_EXECUTE
; /* TODO: Remove this and restrict maxprot? */
514 if (fp
->fp_glob
->fg_flag
& FREAD
) {
515 maxprot
|= VM_PROT_READ
;
516 } else if (prot
& PROT_READ
) {
522 * If we are sharing potential changes (either via
523 * MAP_SHARED or via the implicit sharing of character
524 * device mappings), and we are trying to get write
525 * permission although we opened it without asking
529 if ((flags
& MAP_SHARED
) != 0) {
530 if ((fp
->fp_glob
->fg_flag
& FWRITE
) != 0 &&
532 * Do not allow writable mappings of
533 * swap files (see vm_swapfile_pager.c).
537 * check for write access
539 * Note that we already made this check when granting FWRITE
540 * against the file, so it seems redundant here.
542 error
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_CHECKIMMUTABLE
, ctx
);
544 /* if not granted for any reason, but we wanted it, bad */
545 if ((prot
& PROT_WRITE
) && (error
!= 0)) {
550 /* if writable, remember */
552 maxprot
|= VM_PROT_WRITE
;
554 } else if ((prot
& PROT_WRITE
) != 0) {
560 maxprot
|= VM_PROT_WRITE
;
565 error
= mac_file_check_mmap(vfs_context_ucred(ctx
),
566 fp
->fp_glob
, prot
, flags
, file_pos
+ pageoff
,
574 * Consult the file system to determine if this
575 * particular file object can be mapped.
577 * N.B. If MAP_PRIVATE (i.e. CoW) has been specified,
578 * then we don't check for writeability on the file
579 * object, because it will only ever see reads.
581 error
= VNOP_MMAP_CHECK(vp
, (flags
& MAP_PRIVATE
) ?
582 (prot
& ~PROT_WRITE
) : prot
, ctx
);
590 * No copy-on-read for mmap() mappings themselves.
592 vmk_flags
.vmkf_no_copy_on_read
= 1;
595 if (user_size
== 0) {
604 * We bend a little - round the start and end addresses
605 * to the nearest page boundary.
607 user_size
= vm_map_round_page(user_size
,
608 vm_map_page_mask(user_map
));
610 if (file_pos
& vm_map_page_mask(user_map
)) {
618 if ((flags
& MAP_FIXED
) == 0) {
619 alloc_flags
|= VM_FLAGS_ANYWHERE
;
620 user_addr
= vm_map_round_page(user_addr
,
621 vm_map_page_mask(user_map
));
623 if (user_addr
!= vm_map_trunc_page(user_addr
,
624 vm_map_page_mask(user_map
))) {
632 * mmap(MAP_FIXED) will replace any existing mappings in the
633 * specified range, if the new mapping is successful.
634 * If we just deallocate the specified address range here,
635 * another thread might jump in and allocate memory in that
636 * range before we get a chance to establish the new mapping,
637 * and we won't have a chance to restore the old mappings.
638 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
639 * has to deallocate the existing mappings and establish the
640 * new ones atomically.
642 alloc_flags
|= VM_FLAGS_FIXED
| VM_FLAGS_OVERWRITE
;
645 if (flags
& MAP_NOCACHE
) {
646 alloc_flags
|= VM_FLAGS_NO_CACHE
;
649 if (flags
& MAP_JIT
) {
650 vmk_flags
.vmkf_map_jit
= TRUE
;
654 if (flags
& MAP_RESILIENT_CODESIGN
) {
655 alloc_flags
|= VM_FLAGS_RESILIENT_CODESIGN
;
657 if (flags
& MAP_RESILIENT_MEDIA
) {
658 alloc_flags
|= VM_FLAGS_RESILIENT_MEDIA
;
661 #if XNU_TARGET_OS_OSX
662 /* macOS-specific MAP_32BIT flag handling */
663 if (flags
& MAP_32BIT
) {
664 vmk_flags
.vmkf_32bit_map_va
= TRUE
;
669 * Lookup/allocate object.
671 if (handle
== NULL
) {
675 #if defined(VM_PROT_READ_IS_EXEC)
676 if (prot
& VM_PROT_READ
) {
677 prot
|= VM_PROT_EXECUTE
;
679 if (maxprot
& VM_PROT_READ
) {
680 maxprot
|= VM_PROT_EXECUTE
;
686 if (prot
& (VM_PROT_EXECUTE
| VM_PROT_WRITE
)) {
687 prot
|= VM_PROT_READ
;
689 if (maxprot
& (VM_PROT_EXECUTE
| VM_PROT_WRITE
)) {
690 maxprot
|= VM_PROT_READ
;
692 #endif /* radar 3777787 */
695 result
= vm_map_enter_mem_object(user_map
,
696 &user_addr
, user_size
,
697 0, alloc_flags
, vmk_flags
,
699 IPC_PORT_NULL
, 0, FALSE
,
701 (flags
& MAP_SHARED
) ?
705 /* If a non-binding address was specified for this anonymous
706 * mapping, retry the mapping with a zero base
707 * in the event the mapping operation failed due to
708 * lack of space between the address and the map's maximum.
710 if ((result
== KERN_NO_SPACE
) && ((flags
& MAP_FIXED
) == 0) && user_addr
&& (num_retries
++ == 0)) {
711 user_addr
= vm_map_page_size(user_map
);
715 if (vnode_isswap(vp
)) {
717 * Map swap files with a special pager
718 * that returns obfuscated contents.
721 pager
= swapfile_pager_setup(vp
);
722 if (pager
!= MEMORY_OBJECT_NULL
) {
723 control
= swapfile_pager_control(pager
);
726 control
= ubc_getobject(vp
, UBC_FLAGS_NONE
);
729 if (control
== NULL
) {
737 * FIXME: if we're writing the file we need a way to
738 * ensure that someone doesn't replace our R/W creds
739 * with ones that only work for read.
742 ubc_setthreadcred(vp
, p
, current_thread());
744 if ((flags
& (MAP_ANON
| MAP_SHARED
)) == 0) {
750 #if defined(VM_PROT_READ_IS_EXEC)
751 if (prot
& VM_PROT_READ
) {
752 prot
|= VM_PROT_EXECUTE
;
754 if (maxprot
& VM_PROT_READ
) {
755 maxprot
|= VM_PROT_EXECUTE
;
761 if (prot
& (VM_PROT_EXECUTE
| VM_PROT_WRITE
)) {
762 prot
|= VM_PROT_READ
;
764 if (maxprot
& (VM_PROT_EXECUTE
| VM_PROT_WRITE
)) {
765 maxprot
|= VM_PROT_READ
;
767 #endif /* radar 3777787 */
770 if (flags
& MAP_RESILIENT_CODESIGN
) {
771 int reject_prot
= ((flags
& MAP_PRIVATE
) ? VM_PROT_EXECUTE
: (VM_PROT_WRITE
| VM_PROT_EXECUTE
));
772 if (prot
& reject_prot
) {
774 * Would like to use (prot | maxprot) here
775 * but the assignment of VM_PROT_EXECUTE
776 * to maxprot above would always fail the test.
778 * Skipping the check is ok, however, because we
779 * restrict maxprot to prot just below in this
787 /* strictly limit access to "prot" */
791 vm_object_offset_t end_pos
= 0;
792 if (os_add_overflow(user_size
, file_pos
, &end_pos
)) {
798 result
= vm_map_enter_mem_object_control(user_map
,
799 &user_addr
, user_size
,
800 0, alloc_flags
, vmk_flags
,
803 docow
, prot
, maxprot
,
804 (flags
& MAP_SHARED
) ?
808 /* If a non-binding address was specified for this file backed
809 * mapping, retry the mapping with a zero base
810 * in the event the mapping operation failed due to
811 * lack of space between the address and the map's maximum.
813 if ((result
== KERN_NO_SPACE
) && ((flags
& MAP_FIXED
) == 0) && user_addr
&& (num_retries
++ == 0)) {
814 user_addr
= vm_map_page_size(user_map
);
825 *retval
= user_addr
+ pageoff
;
828 case KERN_INVALID_ADDRESS
:
832 case KERN_PROTECTION_FAILURE
:
840 if (pager
!= MEMORY_OBJECT_NULL
) {
842 * Release the reference on the pager.
843 * If the mapping was successful, it now holds
844 * an extra reference.
846 memory_object_deallocate(pager
);
849 fp_drop(p
, fd
, fp
, 0);
852 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO
, SYS_mmap
) | DBG_FUNC_NONE
), fd
, (uint32_t)(*retval
), (uint32_t)user_size
, error
, 0);
853 #ifndef CONFIG_EMBEDDED
854 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2
, SYS_mmap
) | DBG_FUNC_NONE
), (uint32_t)(*retval
>> 32), (uint32_t)(user_size
>> 32),
855 (uint32_t)(file_pos
>> 32), (uint32_t)file_pos
, 0);
861 msync(__unused proc_t p
, struct msync_args
*uap
, int32_t *retval
)
863 __pthread_testcancel(1);
864 return msync_nocancel(p
, (struct msync_nocancel_args
*)uap
, retval
);
868 msync_nocancel(__unused proc_t p
, struct msync_nocancel_args
*uap
, __unused
int32_t *retval
)
870 mach_vm_offset_t addr
;
875 vm_sync_t sync_flags
= 0;
877 user_map
= current_map();
878 addr
= (mach_vm_offset_t
) uap
->addr
;
879 size
= (mach_vm_size_t
) uap
->len
;
880 #ifndef CONFIG_EMBEDDED
881 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO
, SYS_msync
) | DBG_FUNC_NONE
), (uint32_t)(addr
>> 32), (uint32_t)(size
>> 32), 0, 0, 0);
883 if (mach_vm_range_overflows(addr
, size
)) {
886 if (addr
& vm_map_page_mask(user_map
)) {
887 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
892 * We cannot support this properly without maintaining
893 * list all mmaps done. Cannot use vm_map_entry as they could be
894 * split or coalesced by indepenedant actions. So instead of
895 * inaccurate results, lets just return error as invalid size
898 return EINVAL
; /* XXX breaks posix apps */
902 /* disallow contradictory flags */
903 if ((flags
& (MS_SYNC
| MS_ASYNC
)) == (MS_SYNC
| MS_ASYNC
)) {
907 if (flags
& MS_KILLPAGES
) {
908 sync_flags
|= VM_SYNC_KILLPAGES
;
910 if (flags
& MS_DEACTIVATE
) {
911 sync_flags
|= VM_SYNC_DEACTIVATE
;
913 if (flags
& MS_INVALIDATE
) {
914 sync_flags
|= VM_SYNC_INVALIDATE
;
917 if (!(flags
& (MS_KILLPAGES
| MS_DEACTIVATE
))) {
918 if (flags
& MS_ASYNC
) {
919 sync_flags
|= VM_SYNC_ASYNCHRONOUS
;
921 sync_flags
|= VM_SYNC_SYNCHRONOUS
;
925 sync_flags
|= VM_SYNC_CONTIGUOUS
; /* complain if holes */
927 rv
= mach_vm_msync(user_map
, addr
, size
, sync_flags
);
932 case KERN_INVALID_ADDRESS
: /* hole in region being sync'ed */
944 munmap(__unused proc_t p
, struct munmap_args
*uap
, __unused
int32_t *retval
)
946 mach_vm_offset_t user_addr
;
947 mach_vm_size_t user_size
;
948 kern_return_t result
;
951 user_map
= current_map();
952 user_addr
= (mach_vm_offset_t
) uap
->addr
;
953 user_size
= (mach_vm_size_t
) uap
->len
;
955 AUDIT_ARG(addr
, user_addr
);
956 AUDIT_ARG(len
, user_size
);
958 if (user_addr
& vm_map_page_mask(user_map
)) {
959 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
963 if (mach_vm_range_overflows(user_addr
, user_size
)) {
967 if (user_size
== 0) {
968 /* UNIX SPEC: size is 0, return EINVAL */
972 result
= mach_vm_deallocate(user_map
, user_addr
, user_size
);
973 if (result
!= KERN_SUCCESS
) {
980 mprotect(__unused proc_t p
, struct mprotect_args
*uap
, __unused
int32_t *retval
)
983 mach_vm_offset_t user_addr
;
984 mach_vm_size_t user_size
;
985 kern_return_t result
;
991 AUDIT_ARG(addr
, uap
->addr
);
992 AUDIT_ARG(len
, uap
->len
);
993 AUDIT_ARG(value32
, uap
->prot
);
995 user_map
= current_map();
996 user_addr
= (mach_vm_offset_t
) uap
->addr
;
997 user_size
= (mach_vm_size_t
) uap
->len
;
998 prot
= (vm_prot_t
)(uap
->prot
& (VM_PROT_ALL
| VM_PROT_TRUSTED
| VM_PROT_STRIP_READ
));
1000 if (mach_vm_range_overflows(user_addr
, user_size
)) {
1003 if (user_addr
& vm_map_page_mask(user_map
)) {
1004 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1010 #if defined(VM_PROT_READ_IS_EXEC)
1011 if (prot
& VM_PROT_READ
) {
1012 prot
|= VM_PROT_EXECUTE
;
1018 if (prot
& (VM_PROT_EXECUTE
| VM_PROT_WRITE
)) {
1019 prot
|= VM_PROT_READ
;
1021 #endif /* 3936456 */
1023 #if defined(__arm64__)
1024 if (prot
& VM_PROT_STRIP_READ
) {
1025 prot
&= ~(VM_PROT_READ
| VM_PROT_STRIP_READ
);
1031 * The MAC check for mprotect is of limited use for 2 reasons:
1032 * Without mmap revocation, the caller could have asked for the max
1033 * protections initially instead of a reduced set, so a mprotect
1034 * check would offer no new security.
1035 * It is not possible to extract the vnode from the pager object(s)
1036 * of the target memory range.
1037 * However, the MAC check may be used to prevent a process from,
1038 * e.g., making the stack executable.
1040 error
= mac_proc_check_mprotect(p
, user_addr
,
1047 if (prot
& VM_PROT_TRUSTED
) {
1048 #if CONFIG_DYNAMIC_CODE_SIGNING
1049 /* CODE SIGNING ENFORCEMENT - JIT support */
1050 /* The special protection value VM_PROT_TRUSTED requests that we treat
1051 * this page as if it had a valid code signature.
1052 * If this is enabled, there MUST be a MAC policy implementing the
1053 * mac_proc_check_mprotect() hook above. Otherwise, Codesigning will be
1054 * compromised because the check would always succeed and thusly any
1055 * process could sign dynamically. */
1056 result
= vm_map_sign(
1058 vm_map_trunc_page(user_addr
,
1059 vm_map_page_mask(user_map
)),
1060 vm_map_round_page(user_addr
+ user_size
,
1061 vm_map_page_mask(user_map
)));
1065 case KERN_INVALID_ADDRESS
:
1066 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1075 prot
&= ~VM_PROT_TRUSTED
;
1077 result
= mach_vm_protect(user_map
, user_addr
, user_size
,
1082 case KERN_PROTECTION_FAILURE
:
1084 case KERN_INVALID_ADDRESS
:
1085 /* UNIX SPEC: for an invalid address range, return ENOMEM */
1093 minherit(__unused proc_t p
, struct minherit_args
*uap
, __unused
int32_t *retval
)
1095 mach_vm_offset_t addr
;
1096 mach_vm_size_t size
;
1097 vm_inherit_t inherit
;
1099 kern_return_t result
;
1101 AUDIT_ARG(addr
, uap
->addr
);
1102 AUDIT_ARG(len
, uap
->len
);
1103 AUDIT_ARG(value32
, uap
->inherit
);
1105 addr
= (mach_vm_offset_t
)uap
->addr
;
1106 size
= (mach_vm_size_t
)uap
->len
;
1107 inherit
= uap
->inherit
;
1108 if (mach_vm_range_overflows(addr
, size
)) {
1111 user_map
= current_map();
1112 result
= mach_vm_inherit(user_map
, addr
, size
,
1117 case KERN_PROTECTION_FAILURE
:
1124 madvise(__unused proc_t p
, struct madvise_args
*uap
, __unused
int32_t *retval
)
1127 mach_vm_offset_t start
;
1128 mach_vm_size_t size
;
1129 vm_behavior_t new_behavior
;
1130 kern_return_t result
;
1133 * Since this routine is only advisory, we default to conservative
1136 switch (uap
->behav
) {
1138 new_behavior
= VM_BEHAVIOR_RANDOM
;
1140 case MADV_SEQUENTIAL
:
1141 new_behavior
= VM_BEHAVIOR_SEQUENTIAL
;
1144 new_behavior
= VM_BEHAVIOR_DEFAULT
;
1147 new_behavior
= VM_BEHAVIOR_WILLNEED
;
1150 new_behavior
= VM_BEHAVIOR_DONTNEED
;
1153 new_behavior
= VM_BEHAVIOR_FREE
;
1155 case MADV_ZERO_WIRED_PAGES
:
1156 new_behavior
= VM_BEHAVIOR_ZERO_WIRED_PAGES
;
1158 case MADV_FREE_REUSABLE
:
1159 new_behavior
= VM_BEHAVIOR_REUSABLE
;
1161 case MADV_FREE_REUSE
:
1162 new_behavior
= VM_BEHAVIOR_REUSE
;
1164 case MADV_CAN_REUSE
:
1165 new_behavior
= VM_BEHAVIOR_CAN_REUSE
;
1169 new_behavior
= VM_BEHAVIOR_PAGEOUT
;
1171 #else /* MACH_ASSERT */
1173 #endif /* MACH_ASSERT */
1178 start
= (mach_vm_offset_t
) uap
->addr
;
1179 size
= (mach_vm_size_t
) uap
->len
;
1180 if (mach_vm_range_overflows(start
, size
)) {
1186 (uap
->behav
== MADV_FREE
||
1187 uap
->behav
== MADV_FREE_REUSABLE
)) {
1188 printf("** FOURK_COMPAT: %d[%s] "
1189 "failing madvise(0x%llx,0x%llx,%s)\n",
1190 p
->p_pid
, p
->p_comm
, start
, size
,
1191 ((uap
->behav
== MADV_FREE_REUSABLE
)
1192 ? "MADV_FREE_REUSABLE"
1194 DTRACE_VM3(fourk_compat_madvise
,
1200 #endif /* __arm64__ */
1202 user_map
= current_map();
1204 result
= mach_vm_behavior_set(user_map
, start
, size
, new_behavior
);
1208 case KERN_INVALID_ADDRESS
:
1218 mincore(__unused proc_t p
, struct mincore_args
*uap
, __unused
int32_t *retval
)
1220 mach_vm_offset_t addr
= 0, first_addr
= 0, end
= 0, cur_end
= 0;
1221 vm_map_t map
= VM_MAP_NULL
;
1222 user_addr_t vec
= 0;
1224 int64_t lastvecindex
= 0;
1225 int mincoreinfo
= 0;
1227 uint64_t pqueryinfo_vec_size
= 0;
1228 vm_page_info_basic_t info
= NULL
;
1229 mach_msg_type_number_t count
= 0;
1230 char *kernel_vec
= NULL
;
1231 uint64_t req_vec_size_pages
= 0, cur_vec_size_pages
= 0, vecindex
= 0;
1232 kern_return_t kr
= KERN_SUCCESS
;
1233 int effective_page_shift
, effective_page_size
;
1235 map
= current_map();
1238 * On systems with 4k kernel space and 16k user space, we will
1239 * use the kernel page size to report back the residency information.
1240 * This is for backwards compatibility since we already have
1241 * processes that depend on this behavior.
1243 if (vm_map_page_shift(map
) < PAGE_SHIFT
) {
1244 effective_page_shift
= vm_map_page_shift(map
);
1245 effective_page_size
= vm_map_page_size(map
);
1247 effective_page_shift
= PAGE_SHIFT
;
1248 effective_page_size
= PAGE_SIZE
;
1252 * Make sure that the addresses presented are valid for user
1255 first_addr
= addr
= vm_map_trunc_page(uap
->addr
,
1256 vm_map_page_mask(map
));
1257 end
= vm_map_round_page(uap
->addr
+ uap
->len
,
1258 vm_map_page_mask(map
));
1269 * We are going to loop through the whole 'req_vec_size' pages
1270 * range in chunks of 'cur_vec_size'.
1273 req_vec_size_pages
= (end
- addr
) >> effective_page_shift
;
1274 cur_vec_size_pages
= MIN(req_vec_size_pages
, (MAX_PAGE_RANGE_QUERY
>> effective_page_shift
));
1276 kernel_vec
= (void*) _MALLOC(cur_vec_size_pages
* sizeof(char), M_TEMP
, M_WAITOK
| M_ZERO
);
1278 if (kernel_vec
== NULL
) {
1283 * Address of byte vector
1287 pqueryinfo_vec_size
= cur_vec_size_pages
* sizeof(struct vm_page_info_basic
);
1288 info
= (void*) _MALLOC(pqueryinfo_vec_size
, M_TEMP
, M_WAITOK
);
1291 FREE(kernel_vec
, M_TEMP
);
1295 while (addr
< end
) {
1296 cur_end
= addr
+ (cur_vec_size_pages
* effective_page_size
);
1298 count
= VM_PAGE_INFO_BASIC_COUNT
;
1299 kr
= vm_map_page_range_info_internal(map
,
1302 effective_page_shift
,
1304 (vm_page_info_t
) info
,
1307 assert(kr
== KERN_SUCCESS
);
1310 * Do this on a map entry basis so that if the pages are not
1311 * in the current processes address space, we can easily look
1312 * up the pages elsewhere.
1316 for (; addr
< cur_end
; addr
+= effective_page_size
) {
1317 pqueryinfo
= info
[lastvecindex
+ 1].disposition
;
1321 if (pqueryinfo
& VM_PAGE_QUERY_PAGE_PRESENT
) {
1322 mincoreinfo
|= MINCORE_INCORE
;
1324 if (pqueryinfo
& VM_PAGE_QUERY_PAGE_REF
) {
1325 mincoreinfo
|= MINCORE_REFERENCED
;
1327 if (pqueryinfo
& VM_PAGE_QUERY_PAGE_DIRTY
) {
1328 mincoreinfo
|= MINCORE_MODIFIED
;
1330 if (pqueryinfo
& VM_PAGE_QUERY_PAGE_PAGED_OUT
) {
1331 mincoreinfo
|= MINCORE_PAGED_OUT
;
1333 if (pqueryinfo
& VM_PAGE_QUERY_PAGE_COPIED
) {
1334 mincoreinfo
|= MINCORE_COPIED
;
1336 if ((pqueryinfo
& VM_PAGE_QUERY_PAGE_EXTERNAL
) == 0) {
1337 mincoreinfo
|= MINCORE_ANONYMOUS
;
1340 * calculate index into user supplied byte vector
1342 vecindex
= (addr
- first_addr
) >> effective_page_shift
;
1343 kernel_vec
[vecindex
] = (char)mincoreinfo
;
1344 lastvecindex
= vecindex
;
1348 assert(vecindex
== (cur_vec_size_pages
- 1));
1350 error
= copyout(kernel_vec
, vec
, cur_vec_size_pages
* sizeof(char) /* a char per page */);
1357 * For the next chunk, we'll need:
1358 * - bump the location in the user buffer for our next disposition.
1360 * - starting address
1362 vec
+= cur_vec_size_pages
* sizeof(char);
1363 req_vec_size_pages
= (end
- addr
) >> effective_page_shift
;
1364 cur_vec_size_pages
= MIN(req_vec_size_pages
, (MAX_PAGE_RANGE_QUERY
>> effective_page_shift
));
1369 FREE(kernel_vec
, M_TEMP
);
1380 mlock(__unused proc_t p
, struct mlock_args
*uap
, __unused
int32_t *retvalval
)
1383 vm_map_offset_t addr
;
1384 vm_map_size_t size
, pageoff
;
1385 kern_return_t result
;
1387 AUDIT_ARG(addr
, uap
->addr
);
1388 AUDIT_ARG(len
, uap
->len
);
1390 addr
= (vm_map_offset_t
) uap
->addr
;
1391 size
= (vm_map_size_t
)uap
->len
;
1393 if (vm_map_range_overflows(addr
, size
)) {
1401 user_map
= current_map();
1402 pageoff
= (addr
& vm_map_page_mask(user_map
));
1404 size
= vm_map_round_page(size
+ pageoff
, vm_map_page_mask(user_map
));
1406 /* have to call vm_map_wire directly to pass "I don't know" protections */
1407 result
= vm_map_wire_kernel(user_map
, addr
, addr
+ size
, VM_PROT_NONE
, VM_KERN_MEMORY_MLOCK
, TRUE
);
1409 if (result
== KERN_RESOURCE_SHORTAGE
) {
1411 } else if (result
== KERN_PROTECTION_FAILURE
) {
1413 } else if (result
!= KERN_SUCCESS
) {
1417 return 0; /* KERN_SUCCESS */
1421 munlock(__unused proc_t p
, struct munlock_args
*uap
, __unused
int32_t *retval
)
1423 mach_vm_offset_t addr
;
1424 mach_vm_size_t size
;
1426 kern_return_t result
;
1428 AUDIT_ARG(addr
, uap
->addr
);
1429 AUDIT_ARG(len
, uap
->len
);
1431 addr
= (mach_vm_offset_t
) uap
->addr
;
1432 size
= (mach_vm_size_t
)uap
->len
;
1433 user_map
= current_map();
1434 if (mach_vm_range_overflows(addr
, size
)) {
1437 /* JMM - need to remove all wirings by spec - this just removes one */
1438 result
= mach_vm_wire_kernel(host_priv_self(), user_map
, addr
, size
, VM_PROT_NONE
, VM_KERN_MEMORY_MLOCK
);
1439 return result
== KERN_SUCCESS
? 0 : ENOMEM
;
1444 mlockall(__unused proc_t p
, __unused
struct mlockall_args
*uap
, __unused
int32_t *retval
)
1450 munlockall(__unused proc_t p
, __unused
struct munlockall_args
*uap
, __unused
int32_t *retval
)
1455 #if CONFIG_CODE_DECRYPTION
1457 mremap_encrypted(__unused
struct proc
*p
, struct mremap_encrypted_args
*uap
, __unused
int32_t *retval
)
1459 mach_vm_offset_t user_addr
;
1460 mach_vm_size_t user_size
;
1461 kern_return_t result
;
1465 cpu_subtype_t cpusubtype
;
1466 pager_crypt_info_t crypt_info
;
1467 const char * cryptname
= 0;
1470 struct proc_regioninfo_internal pinfo
;
1472 uintptr_t vnodeaddr
;
1475 AUDIT_ARG(addr
, uap
->addr
);
1476 AUDIT_ARG(len
, uap
->len
);
1478 user_map
= current_map();
1479 user_addr
= (mach_vm_offset_t
) uap
->addr
;
1480 user_size
= (mach_vm_size_t
) uap
->len
;
1482 cryptid
= uap
->cryptid
;
1483 cputype
= uap
->cputype
;
1484 cpusubtype
= uap
->cpusubtype
;
1486 if (mach_vm_range_overflows(user_addr
, user_size
)) {
1489 if (user_addr
& vm_map_page_mask(user_map
)) {
1490 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
1495 case CRYPTID_NO_ENCRYPTION
:
1496 /* not encrypted, just an empty load command */
1498 case CRYPTID_APP_ENCRYPTION
:
1499 case CRYPTID_MODEL_ENCRYPTION
:
1500 cryptname
= "com.apple.unfree";
1503 /* some random cryptid that you could manually put into
1504 * your binary if you want NULL */
1505 cryptname
= "com.apple.null";
1511 if (NULL
== text_crypter_create
) {
1515 ret
= fill_procregioninfo_onlymappedvnodes( proc_task(p
), user_addr
, &pinfo
, &vnodeaddr
, &vid
);
1516 if (ret
== 0 || !vnodeaddr
) {
1517 /* No really, this returns 0 if the memory address is not backed by a file */
1521 vp
= (vnode_t
)vnodeaddr
;
1522 if ((vnode_getwithvid(vp
, vid
)) == 0) {
1523 vpath
= zalloc(ZV_NAMEI
);
1526 ret
= vn_getpath(vp
, vpath
, &len
);
1528 zfree(ZV_NAMEI
, vpath
);
1539 kprintf("%s vpath %s cryptid 0x%08x cputype 0x%08x cpusubtype 0x%08x range 0x%016llx size 0x%016llx\n",
1540 __FUNCTION__
, vpath
, cryptid
, cputype
, cpusubtype
, (uint64_t)user_addr
, (uint64_t)user_size
);
1543 /* set up decrypter first */
1544 crypt_file_data_t crypt_data
= {
1547 .cpusubtype
= cpusubtype
1549 result
= text_crypter_create(&crypt_info
, cryptname
, (void*)&crypt_data
);
1550 #if VM_MAP_DEBUG_APPLE_PROTECT
1551 if (vm_map_debug_apple_protect
) {
1552 printf("APPLE_PROTECT: %d[%s] map %p [0x%llx:0x%llx] %s(%s) -> 0x%x\n",
1553 p
->p_pid
, p
->p_comm
,
1555 (uint64_t) user_addr
,
1556 (uint64_t) (user_addr
+ user_size
),
1557 __FUNCTION__
, vpath
, result
);
1559 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1560 zfree(ZV_NAMEI
, vpath
);
1563 printf("%s: unable to create decrypter %s, kr=%d\n",
1564 __FUNCTION__
, cryptname
, result
);
1565 if (result
== kIOReturnNotPrivileged
) {
1566 /* text encryption returned decryption failure */
1573 /* now remap using the decrypter */
1574 vm_object_offset_t crypto_backing_offset
;
1575 crypto_backing_offset
= -1; /* i.e. use map entry's offset */
1576 result
= vm_map_apple_protected(user_map
,
1578 user_addr
+ user_size
,
1579 crypto_backing_offset
,
1583 printf("%s: mapping failed with %d\n", __FUNCTION__
, result
);
1591 #endif /* CONFIG_CODE_DECRYPTION */