From 0b4c1975fb5e4eccf1012a35081f7e7799b81046 Mon Sep 17 00:00:00 2001 From: Apple Date: Thu, 11 Nov 2010 17:20:01 +0000 Subject: [PATCH] xnu-1504.9.17.tar.gz --- bsd/dev/unix_startup.c | 14 +- bsd/hfs/hfs.h | 2 + bsd/hfs/hfs_catalog.c | 9 +- bsd/hfs/hfs_vfsops.c | 111 +- bsd/hfs/hfs_vfsutils.c | 32 +- bsd/hfs/hfs_vnops.c | 7 + bsd/hfs/hfs_xattr.c | 6 +- bsd/hfs/hfscommon/Misc/FileExtentMapping.c | 18 +- bsd/hfs/hfscommon/Misc/VolumeAllocation.c | 96 +- bsd/hfs/hfscommon/headers/FileMgrInternal.h | 17 +- bsd/kern/kern_credential.c | 87 +- bsd/kern/kern_descrip.c | 2 +- bsd/kern/kern_fork.c | 2 + bsd/kern/kern_malloc.c | 259 ++--- bsd/kern/kern_symfile.c | 13 +- bsd/kern/kpi_mbuf.c | 2 +- bsd/kern/tty.c | 2 + bsd/kern/tty_ptmx.c | 2 - bsd/kern/ubc_subr.c | 2 + bsd/kern/uipc_socket.c | 1 + bsd/net/raw_usrreq.c | 7 +- bsd/net/route.c | 1 + bsd/netinet/in_pcb.c | 13 +- bsd/netinet/ip_input.c | 25 +- bsd/netinet/ip_mroute.c | 2 +- bsd/netinet/ip_output.c | 115 ++- bsd/netinet6/in6_pcb.c | 2 +- bsd/netinet6/in6_proto.c | 12 +- bsd/netinet6/ip6_input.c | 16 +- bsd/netinet6/ip6_var.h | 2 +- bsd/netinet6/nd6.c | 7 +- bsd/nfs/nfs_socket.c | 5 +- bsd/sys/disk.h | 1 + bsd/sys/kdebug.h | 1 + bsd/sys/mount_internal.h | 1 + bsd/vfs/vfs_bio.c | 10 +- bsd/vfs/vfs_cluster.c | 17 +- bsd/vfs/vfs_subr.c | 229 ++++- bsd/vfs/vfs_xattr.c | 6 +- bsd/vm/dp_backing_file.c | 33 +- config/IOKit.exports | 4 + config/MasterVersion | 2 +- iokit/IOKit/IOBufferMemoryDescriptor.h | 1 + iokit/IOKit/IOHibernatePrivate.h | 47 +- iokit/IOKit/IOMemoryDescriptor.h | 4 + iokit/IOKit/IOMessage.h | 7 + iokit/IOKit/pwr_mgt/IOPM.h | 100 +- iokit/IOKit/pwr_mgt/IOPMLibDefs.h | 3 +- iokit/IOKit/pwr_mgt/IOPMPrivate.h | 28 +- iokit/IOKit/pwr_mgt/RootDomain.h | 139 ++- iokit/Kernel/IOBufferMemoryDescriptor.cpp | 185 +--- iokit/Kernel/IOCopyMapper.cpp | 426 -------- iokit/Kernel/IOCopyMapper.h | 89 -- iokit/Kernel/IODMACommand.cpp | 253 +++-- iokit/Kernel/IOHibernateIO.cpp | 463 +++++---- iokit/Kernel/IOHibernateInternal.h | 4 +- iokit/Kernel/IOHibernateRestoreKernel.c | 37 +- iokit/Kernel/IOKitKernelInternal.h | 20 +- iokit/Kernel/IOLib.cpp | 30 +- iokit/Kernel/IOMapper.cpp | 21 - iokit/Kernel/IOMemoryDescriptor.cpp | 60 +- iokit/Kernel/IOPMPowerStateQueue.cpp | 8 +- iokit/Kernel/IOPMPowerStateQueue.h | 7 +- iokit/Kernel/IOPMrootDomain.cpp | 1025 ++++++++++++++++++- iokit/Kernel/IOServicePM.cpp | 4 +- iokit/Kernel/RootDomainUserClient.cpp | 21 + iokit/Kernel/RootDomainUserClient.h | 3 + iokit/conf/files | 1 - kgmacros | 126 ++- osfmk/console/video_console.c | 2 +- osfmk/default_pager/default_pager.c | 3 +- osfmk/default_pager/dp_backing_store.c | 97 +- osfmk/device/device.defs | 6 + osfmk/i386/acpi.c | 42 +- osfmk/i386/bsd_i386.c | 7 +- osfmk/i386/cpu_data.h | 1 - osfmk/i386/cpuid.h | 1 - osfmk/i386/hibernate_i386.c | 14 + osfmk/i386/i386_init.c | 10 +- osfmk/i386/i386_vm_init.c | 159 ++- osfmk/i386/machine_routines.c | 4 +- osfmk/i386/machine_routines.h | 2 - osfmk/i386/machine_routines_asm.s | 27 +- osfmk/i386/misc_protos.h | 3 +- osfmk/i386/pcb.c | 24 +- osfmk/i386/pmCPU.c | 1 + osfmk/i386/pmCPU.h | 3 +- osfmk/i386/pmap.c | 49 +- osfmk/i386/pmap_internal.h | 1 + osfmk/i386/pmap_x86_common.c | 53 + osfmk/i386/rtclock.c | 21 +- osfmk/i386/rtclock.h | 6 +- osfmk/ipc/ipc_init.c | 5 + osfmk/kern/hibernate.c | 10 +- osfmk/kern/host.c | 4 +- osfmk/kern/mk_timer.c | 2 + osfmk/kern/sched_prim.c | 128 +-- osfmk/kern/sync_sema.c | 1 + osfmk/kern/task.c | 1 + osfmk/kern/thread.c | 1 + osfmk/kern/thread_call.c | 1 + osfmk/kern/wait_queue.c | 5 + osfmk/kern/zalloc.c | 30 +- osfmk/kern/zalloc.h | 4 +- osfmk/mach/memory_object_types.h | 3 +- osfmk/ppc/hibernate_ppc.c | 3 + osfmk/ppc/pmap.c | 25 + osfmk/vm/bsd_vm.c | 11 +- osfmk/vm/memory_object.c | 10 +- osfmk/vm/pmap.h | 8 +- osfmk/vm/vm_apple_protect.c | 1 + osfmk/vm/vm_fault.c | 16 +- osfmk/vm/vm_kern.c | 31 +- osfmk/vm/vm_kern.h | 1 + osfmk/vm/vm_map.c | 7 + osfmk/vm/vm_object.c | 28 +- osfmk/vm/vm_object.h | 1 + osfmk/vm/vm_page.h | 24 +- osfmk/vm/vm_pageout.c | 178 ++-- osfmk/vm/vm_pageout.h | 55 +- osfmk/vm/vm_resident.c | 819 ++++++++++++--- osfmk/vm/vm_user.c | 10 +- osfmk/x86_64/machine_routines_asm.s | 19 +- osfmk/x86_64/pmap.c | 6 +- osfmk/x86_64/start.s | 8 +- 125 files changed, 4339 insertions(+), 1960 deletions(-) delete mode 100644 iokit/Kernel/IOCopyMapper.cpp delete mode 100644 iokit/Kernel/IOCopyMapper.h diff --git a/bsd/dev/unix_startup.c b/bsd/dev/unix_startup.c index 9822fbd88..4ec548794 100644 --- a/bsd/dev/unix_startup.c +++ b/bsd/dev/unix_startup.c @@ -61,7 +61,7 @@ extern uint32_t tcp_recvspace; void bsd_bufferinit(void) __attribute__((section("__TEXT, initcode"))); extern void md_prepare_for_shutdown(int, int, char *); -unsigned int bsd_mbuf_cluster_reserve(void); +unsigned int bsd_mbuf_cluster_reserve(boolean_t *); void bsd_srv_setup(int); void bsd_exec_setup(int); @@ -159,7 +159,7 @@ bsd_startupearly(void) #endif int scale; - nmbclusters = bsd_mbuf_cluster_reserve() / MCLBYTES; + nmbclusters = bsd_mbuf_cluster_reserve(NULL) / MCLBYTES; #if INET || INET6 if ((scale = nmbclusters / NMBCLUSTERS) > 1) { @@ -237,9 +237,10 @@ bsd_bufferinit(void) * memory that is present. */ unsigned int -bsd_mbuf_cluster_reserve(void) +bsd_mbuf_cluster_reserve(boolean_t *overridden) { int mbuf_pool = 0; + static boolean_t was_overridden = FALSE; /* If called more than once, return the previously calculated size */ if (mbuf_poolsz != 0) @@ -263,6 +264,10 @@ bsd_mbuf_cluster_reserve(void) ncl = (mbuf_pool << MBSHIFT) >> MCLSHIFT; if (sane_size > (64 * 1024 * 1024) || ncl != 0) { + + if (ncl || srv) + was_overridden = TRUE; + if ((nmbclusters = ncl) == 0) { /* Auto-configure the mbuf pool size */ nmbclusters = mbuf_default_ncl(srv, sane_size); @@ -278,6 +283,9 @@ bsd_mbuf_cluster_reserve(void) } mbuf_poolsz = nmbclusters << MCLSHIFT; done: + if (overridden) + *overridden = was_overridden; + return (mbuf_poolsz); } #if defined(__LP64__) diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index beb10099f..89f97ebc1 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -716,6 +716,8 @@ extern u_int32_t hfs_freeblks(struct hfsmount * hfsmp, int wantreserve); short MacToVFSError(OSErr err); +void hfs_metadatazone_init(struct hfsmount *hfsmp); + /* HFS directory hint functions. */ extern directoryhint_t * hfs_getdirhint(struct cnode *, int, int); extern void hfs_reldirhint(struct cnode *, directoryhint_t *); diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index bb48eda7f..0ae0e2600 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -2111,7 +2111,7 @@ exit: } if (alias_allocated && rsrcforkp->extents[0].startBlock != 0) { (void) BlockDeallocate(hfsmp, rsrcforkp->extents[0].startBlock, - rsrcforkp->extents[0].blockCount); + rsrcforkp->extents[0].blockCount, 0); rsrcforkp->extents[0].startBlock = 0; rsrcforkp->extents[0].blockCount = 0; } @@ -2210,7 +2210,8 @@ cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalog bzero(rsrcforkp, sizeof(HFSPlusForkData)); /* Allocate some disk space for the alias content. */ - result = BlockAllocate(hfsmp, 0, blkcount, blkcount, 1, 1, + result = BlockAllocate(hfsmp, 0, blkcount, blkcount, + HFS_ALLOC_FORCECONTIG | HFS_ALLOC_METAZONE, &rsrcforkp->extents[0].startBlock, &rsrcforkp->extents[0].blockCount); if (result) { @@ -2265,7 +2266,7 @@ cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalog exit: if (result && rsrcforkp->extents[0].startBlock != 0) { - (void) BlockDeallocate(hfsmp, rsrcforkp->extents[0].startBlock, rsrcforkp->extents[0].blockCount); + (void) BlockDeallocate(hfsmp, rsrcforkp->extents[0].startBlock, rsrcforkp->extents[0].blockCount, 0); rsrcforkp->extents[0].startBlock = 0; rsrcforkp->extents[0].blockCount = 0; rsrcforkp->logicalSize = 0; @@ -2329,7 +2330,7 @@ cat_deletelink(struct hfsmount *hfsmp, struct cat_desc *descp) (void) BlockDeallocate(hfsmp, file.resourceFork.extents[i].startBlock, - file.resourceFork.extents[i].blockCount); + file.resourceFork.extents[i].blockCount, 0); totalBlocks -= file.resourceFork.extents[i].blockCount; file.resourceFork.extents[i].startBlock = 0; diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index b85285bdb..aaac6d0df 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -368,7 +368,8 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte /* * Allow hot file clustering if conditions allow. */ - if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && + ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) { (void) hfs_recording_init(hfsmp); } /* Force ACLs on HFS+ file systems. */ @@ -3762,9 +3763,10 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) } } - /* - * TODO: Adjust the size of the metadata zone based on new volume size? + /* + * Update the metadata zone size based on current volume size */ + hfs_metadatazone_init(hfsmp); /* * Adjust the size of hfsmp->hfs_attrdata_vp @@ -3900,13 +3902,26 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) hfsmp->allocLimit = newblkcnt - 2; else hfsmp->allocLimit = newblkcnt - 1; - /* Update the volume free block count to reflect the total number of - * free blocks that will exist after a successful resize. + /* + * Update the volume free block count to reflect the total number + * of free blocks that will exist after a successful resize. + * Relocation of extents will result in no net change in the total + * free space on the disk. Therefore the code that allocates + * space for new extent and deallocates the old extent explicitly + * prevents updating the volume free block count. It will also + * prevent false disk full error when the number of blocks in + * an extent being relocated is more than the free blocks that + * will exist after the volume is resized. */ hfsmp->freeBlocks -= reclaimblks; updateFreeBlocks = true; HFS_MOUNT_UNLOCK(hfsmp, TRUE); + /* + * Update the metadata zone size, and, if required, disable it + */ + hfs_metadatazone_init(hfsmp); + /* * Look for files that have blocks at or beyond the location of the * new alternate volume header @@ -4001,10 +4016,6 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context) if (error) panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error); - /* - * TODO: Adjust the size of the metadata zone based on new volume size? - */ - /* * Adjust the size of hfsmp->hfs_attrdata_vp */ @@ -4034,6 +4045,10 @@ out: hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1; hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS; HFS_MOUNT_UNLOCK(hfsmp, TRUE); + /* On error, reset the metadata zone for original volume size */ + if (error && (updateFreeBlocks == true)) { + hfs_metadatazone_init(hfsmp); + } if (lockflags) { hfs_systemfile_unlock(hfsmp, lockflags); @@ -4250,6 +4265,7 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int struct BTreeIterator *iterator = NULL; u_int8_t forktype; u_int32_t fileID; + u_int32_t alloc_flags; /* If there is no vnode for this file, then there's nothing to do. */ if (vp == NULL) @@ -4343,25 +4359,32 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int end_block = oldStartBlock + oldBlockCount; /* Check if the file overlaps the target space */ if (end_block > startblk) { - /* Allocate a new extent */ - error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, true, (is_sysfile ? true : false), &newStartBlock, &newBlockCount); - if (error) { - printf("hfs_reclaim_file: BlockAllocate (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount); - goto fail; + alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; + if (is_sysfile) { + alloc_flags |= HFS_ALLOC_METAZONE; } - if (newBlockCount != oldBlockCount) { - printf("hfs_reclaim_file: fileID=%u - newBlockCount=%u, oldBlockCount=%u", fileID, newBlockCount, oldBlockCount); - if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) { - hfs_mark_volume_inconsistent(hfsmp); + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + if (error) { + if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) { + /* Try allocating again using the metadata zone */ + alloc_flags |= HFS_ALLOC_METAZONE; + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + } + if (error) { + printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount); + goto fail; + } else { + if (hfs_resize_debug) { + printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount); + } } - goto fail; } /* Copy data from old location to new location */ error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context); if (error) { printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u %u:(%u,%u) to %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount, i, newStartBlock, newBlockCount); - if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) { + if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) { hfs_mark_volume_inconsistent(hfsmp); } goto fail; @@ -4371,7 +4394,7 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int *blks_moved += newBlockCount; /* Deallocate the old extent */ - error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount); + error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); if (error) { printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error); hfs_mark_volume_inconsistent(hfsmp); @@ -4445,22 +4468,30 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int oldBlockCount = record[i].blockCount; end_block = oldStartBlock + oldBlockCount; if (end_block > startblk) { - error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, true, (is_sysfile ? true : false), &newStartBlock, &newBlockCount); - if (error) { - printf("hfs_reclaim_file: BlockAllocate (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount); - goto fail; + alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS; + if (is_sysfile) { + alloc_flags |= HFS_ALLOC_METAZONE; } - if (newBlockCount != oldBlockCount) { - printf("hfs_reclaim_file: fileID=%u - newBlockCount=%u, oldBlockCount=%u", fileID, newBlockCount, oldBlockCount); - if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) { - hfs_mark_volume_inconsistent(hfsmp); + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + if (error) { + if (!is_sysfile && ((error == dskFulErr) || (error == ENOSPC))) { + /* Try allocating again using the metadata zone */ + alloc_flags |= HFS_ALLOC_METAZONE; + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags, &newStartBlock, &newBlockCount); + } + if (error) { + printf("hfs_reclaim_file: BlockAllocate(metazone) (error=%d) for fileID=%u %u:(%u,%u)\n", error, fileID, i, oldStartBlock, oldBlockCount); + goto fail; + } else { + if (hfs_resize_debug) { + printf("hfs_reclaim_file: BlockAllocate(metazone) success for fileID=%u %u:(%u,%u)\n", fileID, i, newStartBlock, newBlockCount); + } } - goto fail; } error = hfs_copy_extent(hfsmp, vp, oldStartBlock, newStartBlock, newBlockCount, context); if (error) { printf("hfs_reclaim_file: hfs_copy_extent error=%d for fileID=%u (%u,%u) to (%u,%u)\n", error, fileID, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount); - if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount)) { + if (BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS)) { hfs_mark_volume_inconsistent(hfsmp); } goto fail; @@ -4483,7 +4514,7 @@ hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_long startblk, int hfs_mark_volume_inconsistent(hfsmp); goto fail; } - error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount); + error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); if (error) { printf("hfs_reclaim_file: BlockDeallocate returned %d\n", error); hfs_mark_volume_inconsistent(hfsmp); @@ -4608,7 +4639,9 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize; /* TODO: Allow the journal to change size based on the new volume size. */ - error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, true, true, &newStartBlock, &newBlockCount); + error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, + &newStartBlock, &newBlockCount); if (error) { printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error); goto fail; @@ -4618,7 +4651,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) goto free_fail; } - error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount); + error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS); if (error) { printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error); goto free_fail; @@ -4668,7 +4701,7 @@ hfs_reclaim_journal_file(struct hfsmount *hfsmp, vfs_context_t context) return error; free_fail: - (void) BlockDeallocate(hfsmp, newStartBlock, newBlockCount); + (void) BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS); fail: hfs_systemfile_unlock(hfsmp, lockflags); (void) hfs_end_transaction(hfsmp); @@ -4704,7 +4737,9 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) } lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK); - error = BlockAllocate(hfsmp, 1, 1, 1, true, true, &newBlock, &blockCount); + error = BlockAllocate(hfsmp, 1, 1, 1, + HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS, + &newBlock, &blockCount); if (error) { printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error); goto fail; @@ -4713,7 +4748,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount); goto free_fail; } - error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1); + error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS); if (error) { printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error); goto free_fail; @@ -4788,7 +4823,7 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context) return error; free_fail: - (void) BlockDeallocate(hfsmp, newBlock, blockCount); + (void) BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS); fail: hfs_systemfile_unlock(hfsmp, lockflags); (void) hfs_end_transaction(hfsmp); diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index 4251c1db8..eb721483c 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -66,7 +66,6 @@ static void ReleaseMetaFileVNode(struct vnode *vp); static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args); -static void hfs_metadatazone_init(struct hfsmount *); static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *); @@ -733,7 +732,8 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, * Allow hot file clustering if conditions allow. */ if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && - ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)) { + ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) && + ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) { (void) hfs_recording_init(hfsmp); } @@ -2401,7 +2401,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a #define HOTBAND_MINIMUM_SIZE (10*1024*1024) #define HOTBAND_MAXIMUM_SIZE (512*1024*1024) -static void +void hfs_metadatazone_init(struct hfsmount *hfsmp) { ExtendedVCB *vcb; @@ -2413,7 +2413,7 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) int items, really_do_it=1; vcb = HFSTOVCB(hfsmp); - fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->totalBlocks; + fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit; /* * For volumes less than 10 GB, don't bother. @@ -2535,16 +2535,34 @@ hfs_metadatazone_init(struct hfsmount *hfsmp) hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize; /* * If doing the round up for hfs_min_alloc_start would push us past - * totalBlocks, then just reset it back to 0. Though using a value - * bigger than totalBlocks would not cause damage in the block allocator + * allocLimit, then just reset it back to 0. Though using a value + * bigger than allocLimit would not cause damage in the block allocator * code, this value could get stored in the volume header and make it out * to disk, making the volume header technically corrupt. */ - if (hfsmp->hfs_min_alloc_start >= hfsmp->totalBlocks) { + if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) { hfsmp->hfs_min_alloc_start = 0; } if (really_do_it == 0) { + /* If metadata zone needs to be disabled because the + * volume was truncated, clear the bit and zero out + * the values that are no longer needed. + */ + if (hfsmp->hfs_flags & HFS_METADATA_ZONE) { + /* Disable metadata zone */ + hfsmp->hfs_flags &= ~HFS_METADATA_ZONE; + + /* Zero out mount point values that are not required */ + hfsmp->hfs_catalog_maxblks = 0; + hfsmp->hfs_hotfile_maxblks = 0; + hfsmp->hfs_hotfile_start = 0; + hfsmp->hfs_hotfile_end = 0; + hfsmp->hfs_hotfile_freeblks = 0; + hfsmp->hfs_metazone_start = 0; + hfsmp->hfs_metazone_end = 0; + } + return; } diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 0535e6c9b..49973d29c 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -4119,6 +4119,13 @@ hfs_makenode(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, /* set the cnode pointer only after successfully acquiring lock */ dcp = VTOC(dvp); + + /* Don't allow creation of new entries in open-unlinked directories */ + if ((error = hfs_checkdeleted (dcp))) { + hfs_unlock (dcp); + return error; + } + dcp->c_flag |= C_DIR_MODIFICATION; hfsmp = VTOHFS(dvp); diff --git a/bsd/hfs/hfs_xattr.c b/bsd/hfs/hfs_xattr.c index f552b9c75..598f1dd7b 100644 --- a/bsd/hfs/hfs_xattr.c +++ b/bsd/hfs/hfs_xattr.c @@ -2237,7 +2237,7 @@ alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, H lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK); for (i = 0; (blkcnt > 0) && (i < maxextents); i++) { - result = BlockAllocate(hfsmp, startblk, blkcnt, blkcnt, 0, 0, + result = BlockAllocate(hfsmp, startblk, blkcnt, blkcnt, 0, &extents[i].startBlock, &extents[i].blockCount); #if HFS_XATTR_VERBOSE printf("hfs: alloc_attr_blks: BA blkcnt %d [%d, %d] (%d)\n", @@ -2262,7 +2262,7 @@ alloc_attr_blks(struct hfsmount *hfsmp, size_t attrsize, size_t extentbufsize, H #endif for (; i <= 0; i--) { if ((blkcnt = extents[i].blockCount) != 0) { - (void) BlockDeallocate(hfsmp, extents[i].startBlock, blkcnt); + (void) BlockDeallocate(hfsmp, extents[i].startBlock, blkcnt, 0); extents[i].startBlock = 0; extents[i].blockCount = 0; } @@ -2301,7 +2301,7 @@ free_attr_blks(struct hfsmount *hfsmp, int blkcnt, HFSPlusExtentDescriptor *exte if (extents[i].startBlock == 0) { break; } - (void)BlockDeallocate(hfsmp, extents[i].startBlock, extents[i].blockCount); + (void)BlockDeallocate(hfsmp, extents[i].startBlock, extents[i].blockCount, 0); remblks -= extents[i].blockCount; extents[i].startBlock = 0; extents[i].blockCount = 0; diff --git a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c index 34c53fe74..5d037026b 100644 --- a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c +++ b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c @@ -576,7 +576,7 @@ static OSErr ReleaseExtents( break; } - err = BlockDeallocate( vcb, extentRecord[extentIndex].startBlock, numAllocationBlocks ); + err = BlockDeallocate( vcb, extentRecord[extentIndex].startBlock, numAllocationBlocks , 0); if ( err != noErr ) break; @@ -1128,8 +1128,8 @@ OSErr ExtendFileC ( startBlock, howmany(MIN(bytesToAdd, availbytes), volumeBlockSize), howmany(MIN(maximumBytes, availbytes), volumeBlockSize), - wantContig, - useMetaZone, + (wantContig ? HFS_ALLOC_FORCECONTIG : 0) | + (useMetaZone ? HFS_ALLOC_METAZONE : 0), &actualStartBlock, &actualNumBlocks); } @@ -1175,7 +1175,7 @@ OSErr ExtendFileC ( if (foundIndex == numExtentsPerRecord) { // This record is full. Need to create a new one. if (FTOC(fcb)->c_fileid == kHFSExtentsFileID) { - (void) BlockDeallocate(vcb, actualStartBlock, actualNumBlocks); + (void) BlockDeallocate(vcb, actualStartBlock, actualNumBlocks, 0); err = dskFulErr; // Oops. Can't extend extents file past first record. break; } @@ -1206,7 +1206,7 @@ OSErr ExtendFileC ( // We couldn't create an extent record because extents B-tree // couldn't grow. Dellocate the extent just allocated and // return a disk full error. - (void) BlockDeallocate(vcb, actualStartBlock, actualNumBlocks); + (void) BlockDeallocate(vcb, actualStartBlock, actualNumBlocks, 0); err = dskFulErr; } if (err != noErr) break; @@ -1398,7 +1398,7 @@ OSErr TruncateFileC ( // Compute first volume allocation block to free startBlock = extentRecord[extentIndex].startBlock + extentRecord[extentIndex].blockCount - numBlocks; // Free the blocks in bitmap - err = BlockDeallocate(vcb, startBlock, numBlocks); + err = BlockDeallocate(vcb, startBlock, numBlocks, 0); if (err != noErr) goto ErrorExit; // Adjust length of this extent extentRecord[extentIndex].blockCount -= numBlocks; @@ -1422,7 +1422,7 @@ OSErr TruncateFileC ( while (extentIndex < numExtentsPerRecord && extentRecord[extentIndex].blockCount != 0) { numBlocks = extentRecord[extentIndex].blockCount; // Deallocate this extent - err = BlockDeallocate(vcb, extentRecord[extentIndex].startBlock, numBlocks); + err = BlockDeallocate(vcb, extentRecord[extentIndex].startBlock, numBlocks, 0); if (err != noErr) goto ErrorExit; // Update next file allocation block number nextBlock += numBlocks; @@ -1502,7 +1502,7 @@ OSErr HeadTruncateFile ( break; /* end of extents */ if (blksfreed < headblks) { - error = BlockDeallocate(vcb, fcb->fcbExtents[i].startBlock, blkcnt); + error = BlockDeallocate(vcb, fcb->fcbExtents[i].startBlock, blkcnt, 0); /* * Any errors after the first BlockDeallocate * must be ignored so we can put the file in @@ -1560,7 +1560,7 @@ OSErr HeadTruncateFile ( break; /* end of extents */ if (blksfreed < headblks) { - error = BlockDeallocate(vcb, extents[i].startBlock, blkcnt); + error = BlockDeallocate(vcb, extents[i].startBlock, blkcnt, 0); if (error) { printf("hfs: HeadTruncateFile: problems deallocating %s (%d)\n", FTOC(fcb)->c_desc.cd_nameptr ? (const char *)FTOC(fcb)->c_desc.cd_nameptr : "", error); diff --git a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c index cef473b5e..3d8255a9e 100644 --- a/bsd/hfs/hfscommon/Misc/VolumeAllocation.c +++ b/bsd/hfs/hfscommon/Misc/VolumeAllocation.c @@ -238,10 +238,7 @@ OSErr BlockAllocate ( u_int32_t startingBlock, /* preferred starting block, or 0 for no preference */ u_int32_t minBlocks, /* desired number of blocks to allocate */ u_int32_t maxBlocks, /* maximum number of blocks to allocate */ - Boolean forceContiguous, /* non-zero to force contiguous allocation and to force */ - /* minBlocks bytes to actually be allocated */ - - Boolean useMetaZone, + u_int32_t flags, /* option flags */ u_int32_t *actualStartBlock, /* actual first block of allocation */ u_int32_t *actualNumBlocks) /* number of blocks actually allocated; if forceContiguous */ /* was zero, then this may represent fewer than minBlocks */ @@ -249,6 +246,20 @@ OSErr BlockAllocate ( u_int32_t freeBlocks; OSErr err; Boolean updateAllocPtr = false; // true if nextAllocation needs to be updated + Boolean useMetaZone; + Boolean forceContiguous; + + if (flags & HFS_ALLOC_FORCECONTIG) { + forceContiguous = true; + } else { + forceContiguous = false; + } + + if (flags & HFS_ALLOC_METAZONE) { + useMetaZone = true; + } else { + useMetaZone = false; + } // // Initialize outputs in case we get an error @@ -257,25 +268,38 @@ OSErr BlockAllocate ( *actualNumBlocks = 0; freeBlocks = hfs_freeblks(VCBTOHFS(vcb), 0); - // - // If the disk is already full, don't bother. - // - if (freeBlocks == 0) { - err = dskFulErr; - goto Exit; - } - if (forceContiguous && freeBlocks < minBlocks) { - err = dskFulErr; - goto Exit; - } - /* - * Clip if necessary so we don't over-subscribe the free blocks. + /* Skip free block check if blocks are being allocated for relocating + * data during truncating a volume. + * + * During hfs_truncatefs(), the volume free block count is updated + * before relocating data to reflect the total number of free blocks + * that will exist on the volume after resize is successful. This + * means that we have reserved allocation blocks required for relocating + * the data and hence there is no need to check the free blocks. + * It will also prevent resize failure when the number of blocks in + * an extent being relocated is more than the free blocks that will + * exist after the volume is resized. */ - if (minBlocks > freeBlocks) { - minBlocks = freeBlocks; - } - if (maxBlocks > freeBlocks) { - maxBlocks = freeBlocks; + if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { + // If the disk is already full, don't bother. + if (freeBlocks == 0) { + err = dskFulErr; + goto Exit; + } + if (forceContiguous && freeBlocks < minBlocks) { + err = dskFulErr; + goto Exit; + } + + /* + * Clip if necessary so we don't over-subscribe the free blocks. + */ + if (minBlocks > freeBlocks) { + minBlocks = freeBlocks; + } + if (maxBlocks > freeBlocks) { + maxBlocks = freeBlocks; + } } // @@ -387,11 +411,16 @@ Exit: // than one entry in the array } } - - // - // Update the number of free blocks on the volume - // - vcb->freeBlocks -= *actualNumBlocks; + + /* + * Update the number of free blocks on the volume + * + * Skip updating the free blocks count if the block are + * being allocated to relocate data as part of hfs_truncatefs() + */ + if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { + vcb->freeBlocks -= *actualNumBlocks; + } MarkVCBDirty(vcb); HFS_MOUNT_UNLOCK(vcb, TRUE); @@ -428,7 +457,8 @@ __private_extern__ OSErr BlockDeallocate ( ExtendedVCB *vcb, // Which volume to deallocate space on u_int32_t firstBlock, // First block in range to deallocate - u_int32_t numBlocks) // Number of contiguous blocks to deallocate + u_int32_t numBlocks, // Number of contiguous blocks to deallocate + u_int32_t flags) { OSErr err; u_int32_t tempWord; @@ -452,7 +482,15 @@ OSErr BlockDeallocate ( // Update the volume's free block count, and mark the VCB as dirty. // HFS_MOUNT_LOCK(vcb, TRUE); - vcb->freeBlocks += numBlocks; + + /* + * Do not update the free block count. This flags is specified + * when a volume is being truncated. + */ + if ((flags & HFS_ALLOC_SKIPFREEBLKS) == 0) { + vcb->freeBlocks += numBlocks; + } + vcb->hfs_freed_block_count += numBlocks; if (firstBlock < vcb->sparseAllocation) { vcb->sparseAllocation = firstBlock; diff --git a/bsd/hfs/hfscommon/headers/FileMgrInternal.h b/bsd/hfs/hfscommon/headers/FileMgrInternal.h index a2d1552ee..307178907 100644 --- a/bsd/hfs/hfscommon/headers/FileMgrInternal.h +++ b/bsd/hfs/hfscommon/headers/FileMgrInternal.h @@ -205,20 +205,31 @@ ReplaceBTreeRecord (FileReference refNum, /* Prototypes for exported routines in VolumeAllocation.c*/ + +/* + * Flags for BlockAllocate() and BlockDeallocate() + */ +/* Force contiguous block allocation and to force minBlocks to actually be allocated */ +#define HFS_ALLOC_FORCECONTIG 0x1 +/* Can use metadata zone blocks */ +#define HFS_ALLOC_METAZONE 0x2 +/* Skip checking and updating of free blocks during allocation and deallocation */ +#define HFS_ALLOC_SKIPFREEBLKS 0x4 + EXTERN_API_C( OSErr ) BlockAllocate (ExtendedVCB * vcb, u_int32_t startingBlock, u_int32_t minBlocks, u_int32_t maxBlocks, - Boolean forceContiguous, - Boolean useMetaZone, + u_int32_t flags, u_int32_t * startBlock, u_int32_t * actualBlocks); EXTERN_API_C( OSErr ) BlockDeallocate (ExtendedVCB * vcb, u_int32_t firstBlock, - u_int32_t numBlocks); + u_int32_t numBlocks, + u_int32_t flags); EXTERN_API_C ( void ) invalidate_free_extent_cache (ExtendedVCB * vcb); diff --git a/bsd/kern/kern_credential.c b/bsd/kern/kern_credential.c index 4b5ba8450..4ab7311ae 100644 --- a/bsd/kern/kern_credential.c +++ b/bsd/kern/kern_credential.c @@ -145,6 +145,7 @@ static lck_mtx_t *kauth_resolver_mtx; static volatile pid_t kauth_resolver_identity; static int kauth_resolver_registered; static uint32_t kauth_resolver_sequence; +static int kauth_resolver_timeout = 30; /* default: 30 seconds */ struct kauth_resolver_work { TAILQ_ENTRY(kauth_resolver_work) kr_link; @@ -251,8 +252,8 @@ kauth_resolver_init(void) * EINTR Operation interrupted (e.g. by * a signal) * ENOMEM Could not allocate work item - * ??? An error from the user space - * daemon + * workp->kr_result:??? An error from the user space + * daemon (includes ENOENT!) * * Notes: Allocate a work queue entry, submit the work and wait for * the operation to either complete or time out. Outstanding @@ -269,7 +270,9 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp) /* no point actually blocking if the resolver isn't up yet */ if (kauth_resolver_identity == 0) { /* - * We've already waited an initial 30 seconds with no result. + * We've already waited an initial + * seconds with no result. + * * Sleep on a stack address so no one wakes us before timeout; * we sleep a half a second in case we are a high priority * process, so that memberd doesn't starve while we are in a @@ -312,7 +315,7 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp) wakeup_one((caddr_t)&kauth_resolver_unsubmitted); for (;;) { /* we could compute a better timeout here */ - ts.tv_sec = 30; + ts.tv_sec = kauth_resolver_timeout; ts.tv_nsec = 0; error = msleep(workp, kauth_resolver_mtx, PCATCH, "kr_submit", &ts); /* request has been completed? */ @@ -359,12 +362,23 @@ kauth_resolver_submit(struct kauth_identity_extlookup *lkp) */ if ((error == EWOULDBLOCK) && (workp->kr_flags & KAUTH_REQUEST_UNSUBMITTED)) { KAUTH_DEBUG("RESOLVER - request timed out without being collected for processing, resolver dead"); + + /* + * Make the current resolver non-authoritative, and mark it as + * no longer registered to prevent kauth_cred_ismember_gid() + * enqueueing more work until a new one is registered. This + * mitigates the damage a crashing resolver may inflict. + */ kauth_resolver_identity = 0; + kauth_resolver_registered = 0; + /* kill all the other requestes that are waiting as well */ TAILQ_FOREACH(killp, &kauth_resolver_submitted, kr_link) wakeup(killp); TAILQ_FOREACH(killp, &kauth_resolver_unsubmitted, kr_link) wakeup(killp); + /* Cause all waiting-for-work threads to return EIO */ + wakeup((caddr_t)&kauth_resolver_unsubmitted); } /* @@ -455,6 +469,14 @@ identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused int3 workp->kr_flags |= KAUTH_REQUEST_UNSUBMITTED; TAILQ_INSERT_HEAD(&kauth_resolver_unsubmitted, workp, kr_link); } + /* + * Allow user space resolver to override the + * external resolution timeout + */ + if (message >= 30 && message <= 10000) { + kauth_resolver_timeout = message; + KAUTH_DEBUG("RESOLVER - new resolver changes timeout to %d seconds\n", (int)message); + } kauth_resolver_identity = new_id; kauth_resolver_registered = 1; wakeup(&kauth_resolver_unsubmitted); @@ -479,7 +501,15 @@ identitysvc(__unused struct proc *p, struct identitysvc_args *uap, __unused int3 struct kauth_resolver_work *killp; KAUTH_RESOLVER_LOCK(); + + /* + * Clear the identity, but also mark it as unregistered so + * there is no explicit future expectation of us getting a + * new resolver any time soon. + */ kauth_resolver_identity = 0; + kauth_resolver_registered = 0; + TAILQ_FOREACH(killp, &kauth_resolver_submitted, kr_link) wakeup(killp); TAILQ_FOREACH(killp, &kauth_resolver_unsubmitted, kr_link) @@ -706,9 +736,14 @@ kauth_resolver_complete(user_addr_t message) KAUTH_DEBUG("RESOLVER - resolver %d died, waiting for a new one", kauth_resolver_identity); /* * Terminate outstanding requests; without an authoritative - * resolver, we are now back on our own authority. + * resolver, we are now back on our own authority. Tag the + * resolver unregistered to prevent kauth_cred_ismember_gid() + * enqueueing more work until a new one is registered. This + * mitigates the damage a crashing resolver may inflict. */ kauth_resolver_identity = 0; + kauth_resolver_registered = 0; + TAILQ_FOREACH(killp, &kauth_resolver_submitted, kr_link) wakeup(killp); TAILQ_FOREACH(killp, &kauth_resolver_unsubmitted, kr_link) @@ -2138,6 +2173,8 @@ kauth_groups_updatecache(struct kauth_identity_extlookup *el) * kauth_resolver_submit:EWOULDBLOCK * kauth_resolver_submit:EINTR * kauth_resolver_submit:ENOMEM + * kauth_resolver_submit:ENOENT User space daemon did not vend + * this credential. * kauth_resolver_submit:??? Unlikely error from user space * * Implicit returns: @@ -2252,6 +2289,8 @@ kauth_cred_ismember_gid(kauth_cred_t cred, gid_t gid, int *resultp) * Returns: 0 Success * kauth_cred_guid2gid:EINVAL * kauth_cred_ismember_gid:ENOENT + * kauth_resolver_submit:ENOENT User space daemon did not vend + * this credential. * kauth_cred_ismember_gid:EWOULDBLOCK * kauth_cred_ismember_gid:EINTR * kauth_cred_ismember_gid:ENOMEM @@ -2839,13 +2878,45 @@ kauth_cred_t kauth_cred_create(kauth_cred_t cred) { kauth_cred_t found_cred, new_cred = NULL; + int is_member = 0; KAUTH_CRED_HASH_LOCK_ASSERT(); - if (cred->cr_flags & CRF_NOMEMBERD) + if (cred->cr_flags & CRF_NOMEMBERD) { cred->cr_gmuid = KAUTH_UID_NONE; - else - cred->cr_gmuid = cred->cr_uid; + } else { + /* + * If the template credential is not opting out of external + * group membership resolution, then we need to check that + * the UID we will be using is resolvable by the external + * resolver. If it's not, then we opt it out anyway, since + * all future external resolution requests will be failing + * anyway, and potentially taking a long time to do it. We + * use gid 0 because we always know it will exist and not + * trigger additional lookups. This is OK, because we end up + * precatching the information here as a result. + */ + if (!kauth_cred_ismember_gid(cred, 0, &is_member)) { + /* + * It's a recognized value; we don't really care about + * the answer, so long as it's something the external + * resolver could have vended. + */ + cred->cr_gmuid = cred->cr_uid; + } else { + /* + * It's not something the external resolver could + * have vended, so we don't want to ask it more + * questions about the credential in the future. This + * speeds up future lookups, as long as the caller + * caches results; otherwise, it the same recurring + * cost. Since most credentials are used multiple + * times, we still get some performance win from this. + */ + cred->cr_gmuid = KAUTH_UID_NONE; + cred->cr_flags |= CRF_NOMEMBERD; + } + } /* Caller *must* specify at least the egid in cr_groups[0] */ if (cred->cr_ngroups < 1) diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index d07556877..d3f483d24 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -789,7 +789,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, int32_t *retval) goto out; } if (fp->f_type == DTYPE_PIPE) { - error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context); + error = fo_ioctl(fp, TIOCSPGRP, (caddr_t)&tmp, &context); goto out; } diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 666748b52..1a164a8c8 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -1375,6 +1375,8 @@ uthread_zone_init(void) THREAD_CHUNK * sizeof(struct uthread), "uthreads"); uthread_zone_inited = 1; + + zone_change(uthread_zone, Z_NOENCRYPT, TRUE); } } diff --git a/bsd/kern/kern_malloc.c b/bsd/kern/kern_malloc.c index d2fa09c77..ff86bfff6 100644 --- a/bsd/kern/kern_malloc.c +++ b/bsd/kern/kern_malloc.c @@ -304,169 +304,170 @@ const char *memname[] = { struct kmzones { size_t kz_elemsize; void *kz_zalloczone; + boolean_t kz_noencrypt; } kmzones[M_LAST] = { #define SOS(sname) sizeof (struct sname) #define SOX(sname) -1 - { -1, 0 }, /* 0 M_FREE */ - { MSIZE, KMZ_CREATEZONE }, /* 1 M_MBUF */ - { 0, KMZ_MALLOC }, /* 2 M_DEVBUF */ - { SOS(socket), KMZ_CREATEZONE }, /* 3 M_SOCKET */ - { SOS(inpcb), KMZ_LOOKUPZONE }, /* 4 M_PCB */ - { M_MBUF, KMZ_SHAREZONE }, /* 5 M_RTABLE */ - { M_MBUF, KMZ_SHAREZONE }, /* 6 M_HTABLE */ - { M_MBUF, KMZ_SHAREZONE }, /* 7 M_FTABLE */ - { SOS(rusage), KMZ_CREATEZONE }, /* 8 M_ZOMBIE */ - { 0, KMZ_MALLOC }, /* 9 M_IFADDR */ - { M_MBUF, KMZ_SHAREZONE }, /* 10 M_SOOPTS */ - { 0, KMZ_MALLOC }, /* 11 M_SONAME */ - { MAXPATHLEN, KMZ_CREATEZONE }, /* 12 M_NAMEI */ - { 0, KMZ_MALLOC }, /* 13 M_GPROF */ - { 0, KMZ_MALLOC }, /* 14 M_IOCTLOPS */ - { 0, KMZ_MALLOC }, /* 15 M_MAPMEM */ - { SOS(ucred), KMZ_CREATEZONE }, /* 16 M_CRED */ - { SOS(pgrp), KMZ_CREATEZONE }, /* 17 M_PGRP */ - { SOS(session), KMZ_CREATEZONE }, /* 18 M_SESSION */ - { SOS(user32_iovec), KMZ_LOOKUPZONE }, /* 19 M_IOV32 */ - { SOS(mount), KMZ_CREATEZONE }, /* 20 M_MOUNT */ - { 0, KMZ_MALLOC }, /* 21 M_FHANDLE */ + { -1, 0, FALSE }, /* 0 M_FREE */ + { MSIZE, KMZ_CREATEZONE, FALSE }, /* 1 M_MBUF */ + { 0, KMZ_MALLOC, FALSE }, /* 2 M_DEVBUF */ + { SOS(socket), KMZ_CREATEZONE, TRUE }, /* 3 M_SOCKET */ + { SOS(inpcb), KMZ_LOOKUPZONE, TRUE }, /* 4 M_PCB */ + { M_MBUF, KMZ_SHAREZONE, FALSE }, /* 5 M_RTABLE */ + { M_MBUF, KMZ_SHAREZONE, FALSE }, /* 6 M_HTABLE */ + { M_MBUF, KMZ_SHAREZONE, FALSE }, /* 7 M_FTABLE */ + { SOS(rusage), KMZ_CREATEZONE, TRUE }, /* 8 M_ZOMBIE */ + { 0, KMZ_MALLOC, FALSE }, /* 9 M_IFADDR */ + { M_MBUF, KMZ_SHAREZONE, FALSE }, /* 10 M_SOOPTS */ + { 0, KMZ_MALLOC, FALSE }, /* 11 M_SONAME */ + { MAXPATHLEN, KMZ_CREATEZONE, FALSE }, /* 12 M_NAMEI */ + { 0, KMZ_MALLOC, FALSE }, /* 13 M_GPROF */ + { 0, KMZ_MALLOC, FALSE }, /* 14 M_IOCTLOPS */ + { 0, KMZ_MALLOC, FALSE }, /* 15 M_MAPMEM */ + { SOS(ucred), KMZ_CREATEZONE, FALSE }, /* 16 M_CRED */ + { SOS(pgrp), KMZ_CREATEZONE, FALSE }, /* 17 M_PGRP */ + { SOS(session), KMZ_CREATEZONE, FALSE }, /* 18 M_SESSION */ + { SOS(user32_iovec), KMZ_LOOKUPZONE, FALSE },/* 19 M_IOV32 */ + { SOS(mount), KMZ_CREATEZONE, FALSE }, /* 20 M_MOUNT */ + { 0, KMZ_MALLOC, FALSE }, /* 21 M_FHANDLE */ #if (NFSCLIENT || NFSSERVER) - { SOS(nfsreq), KMZ_CREATEZONE }, /* 22 M_NFSREQ */ - { SOS(nfsmount), KMZ_CREATEZONE }, /* 23 M_NFSMNT */ - { SOS(nfsnode), KMZ_CREATEZONE }, /* 24 M_NFSNODE */ + { SOS(nfsreq), KMZ_CREATEZONE, FALSE }, /* 22 M_NFSREQ */ + { SOS(nfsmount), KMZ_CREATEZONE, FALSE },/* 23 M_NFSMNT */ + { SOS(nfsnode), KMZ_CREATEZONE, FALSE }, /* 24 M_NFSNODE */ #else - { 0, KMZ_MALLOC }, /* 22 M_NFSREQ */ - { 0, KMZ_MALLOC }, /* 23 M_NFSMNT */ - { 0, KMZ_MALLOC }, /* 24 M_NFSNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 22 M_NFSREQ */ + { 0, KMZ_MALLOC, FALSE }, /* 23 M_NFSMNT */ + { 0, KMZ_MALLOC, FALSE }, /* 24 M_NFSNODE */ #endif - { SOS(vnode), KMZ_CREATEZONE }, /* 25 M_VNODE */ - { SOS(namecache), KMZ_CREATEZONE }, /* 26 M_CACHE */ + { SOS(vnode), KMZ_CREATEZONE, TRUE }, /* 25 M_VNODE */ + { SOS(namecache),KMZ_CREATEZONE, FALSE }, /* 26 M_CACHE */ #if QUOTA - { SOX(dquot), KMZ_LOOKUPZONE }, /* 27 M_DQUOT */ + { SOX(dquot), KMZ_LOOKUPZONE, FALSE }, /* 27 M_DQUOT */ #else - { 0, KMZ_MALLOC }, /* 27 M_DQUOT */ + { 0, KMZ_MALLOC, FALSE }, /* 27 M_DQUOT */ #endif - { 0, KMZ_MALLOC }, /* 28 M_UFSMNT */ - { 0, KMZ_MALLOC }, /* 29 M_CGSUM */ - { SOS(plimit), KMZ_CREATEZONE }, /* 30 M_PLIMIT */ - { SOS(sigacts), KMZ_CREATEZONE }, /* 31 M_SIGACTS */ - { 0, KMZ_MALLOC }, /* 32 M_VMOBJ */ - { 0, KMZ_MALLOC }, /* 33 M_VMOBJHASH */ - { 0, KMZ_MALLOC }, /* 34 M_VMPMAP */ - { 0, KMZ_MALLOC }, /* 35 M_VMPVENT */ - { 0, KMZ_MALLOC }, /* 36 M_VMPAGER */ - { 0, KMZ_MALLOC }, /* 37 M_VMPGDATA */ - { SOS(fileproc), KMZ_CREATEZONE }, /* 38 M_FILEPROC */ - { SOS(filedesc), KMZ_CREATEZONE }, /* 39 M_FILEDESC */ - { SOX(lockf), KMZ_CREATEZONE }, /* 40 M_LOCKF */ - { SOS(proc), KMZ_CREATEZONE }, /* 41 M_PROC */ - { SOS(pstats), KMZ_CREATEZONE }, /* 42 M_PSTATS */ - { 0, KMZ_MALLOC }, /* 43 M_SEGMENT */ - { M_FFSNODE, KMZ_SHAREZONE }, /* 44 M_LFSNODE */ - { 0, KMZ_MALLOC }, /* 45 M_FFSNODE */ - { M_FFSNODE, KMZ_SHAREZONE }, /* 46 M_MFSNODE */ - { 0, KMZ_MALLOC }, /* 47 M_NQLEASE */ - { 0, KMZ_MALLOC }, /* 48 M_NQMHOST */ - { 0, KMZ_MALLOC }, /* 49 M_NETADDR */ + { 0, KMZ_MALLOC, FALSE }, /* 28 M_UFSMNT */ + { 0, KMZ_MALLOC, FALSE }, /* 29 M_CGSUM */ + { SOS(plimit), KMZ_CREATEZONE, TRUE }, /* 30 M_PLIMIT */ + { SOS(sigacts), KMZ_CREATEZONE, TRUE }, /* 31 M_SIGACTS */ + { 0, KMZ_MALLOC, FALSE }, /* 32 M_VMOBJ */ + { 0, KMZ_MALLOC, FALSE }, /* 33 M_VMOBJHASH */ + { 0, KMZ_MALLOC, FALSE }, /* 34 M_VMPMAP */ + { 0, KMZ_MALLOC, FALSE }, /* 35 M_VMPVENT */ + { 0, KMZ_MALLOC, FALSE }, /* 36 M_VMPAGER */ + { 0, KMZ_MALLOC, FALSE }, /* 37 M_VMPGDATA */ + { SOS(fileproc),KMZ_CREATEZONE, TRUE }, /* 38 M_FILEPROC */ + { SOS(filedesc),KMZ_CREATEZONE, TRUE }, /* 39 M_FILEDESC */ + { SOX(lockf), KMZ_CREATEZONE, TRUE }, /* 40 M_LOCKF */ + { SOS(proc), KMZ_CREATEZONE, FALSE }, /* 41 M_PROC */ + { SOS(pstats), KMZ_CREATEZONE, TRUE }, /* 42 M_PSTATS */ + { 0, KMZ_MALLOC, FALSE }, /* 43 M_SEGMENT */ + { M_FFSNODE, KMZ_SHAREZONE, FALSE }, /* 44 M_LFSNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 45 M_FFSNODE */ + { M_FFSNODE, KMZ_SHAREZONE, FALSE }, /* 46 M_MFSNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 47 M_NQLEASE */ + { 0, KMZ_MALLOC, FALSE }, /* 48 M_NQMHOST */ + { 0, KMZ_MALLOC, FALSE }, /* 49 M_NETADDR */ #if (NFSCLIENT || NFSSERVER) { SOX(nfsrv_sock), - KMZ_CREATEZONE }, /* 50 M_NFSSVC */ - { 0, KMZ_MALLOC }, /* 51 M_NFSUID */ + KMZ_CREATEZONE, FALSE }, /* 50 M_NFSSVC */ + { 0, KMZ_MALLOC, FALSE }, /* 51 M_NFSUID */ { SOX(nfsrvcache), - KMZ_CREATEZONE }, /* 52 M_NFSD */ + KMZ_CREATEZONE, FALSE }, /* 52 M_NFSD */ #else - { 0, KMZ_MALLOC }, /* 50 M_NFSSVC */ - { 0, KMZ_MALLOC }, /* 51 M_NFSUID */ - { 0, KMZ_MALLOC }, /* 52 M_NFSD */ + { 0, KMZ_MALLOC, FALSE }, /* 50 M_NFSSVC */ + { 0, KMZ_MALLOC, FALSE }, /* 51 M_NFSUID */ + { 0, KMZ_MALLOC, FALSE }, /* 52 M_NFSD */ #endif { SOX(ip_moptions), - KMZ_LOOKUPZONE }, /* 53 M_IPMOPTS */ - { SOX(in_multi), KMZ_LOOKUPZONE }, /* 54 M_IPMADDR */ + KMZ_LOOKUPZONE, FALSE }, /* 53 M_IPMOPTS */ + { SOX(in_multi),KMZ_LOOKUPZONE, FALSE }, /* 54 M_IPMADDR */ { SOX(ether_multi), - KMZ_LOOKUPZONE }, /* 55 M_IFMADDR */ - { SOX(mrt), KMZ_CREATEZONE }, /* 56 M_MRTABLE */ - { 0, KMZ_MALLOC }, /* 57 unused entry */ - { 0, KMZ_MALLOC }, /* 58 unused entry */ + KMZ_LOOKUPZONE, FALSE }, /* 55 M_IFMADDR */ + { SOX(mrt), KMZ_CREATEZONE, TRUE }, /* 56 M_MRTABLE */ + { 0, KMZ_MALLOC, FALSE }, /* 57 unused entry */ + { 0, KMZ_MALLOC, FALSE }, /* 58 unused entry */ #if (NFSCLIENT || NFSSERVER) { SOS(nfsrv_descript), - KMZ_CREATEZONE }, /* 59 M_NFSRVDESC */ - { SOS(nfsdmap), KMZ_CREATEZONE }, /* 60 M_NFSDIROFF */ - { SOS(fhandle), KMZ_LOOKUPZONE }, /* 61 M_NFSBIGFH */ + KMZ_CREATEZONE, FALSE }, /* 59 M_NFSRVDESC */ + { SOS(nfsdmap), KMZ_CREATEZONE, FALSE }, /* 60 M_NFSDIROFF */ + { SOS(fhandle), KMZ_LOOKUPZONE, FALSE }, /* 61 M_NFSBIGFH */ #else - { 0, KMZ_MALLOC }, /* 59 M_NFSRVDESC */ - { 0, KMZ_MALLOC }, /* 60 M_NFSDIROFF */ - { 0, KMZ_MALLOC }, /* 61 M_NFSBIGFH */ + { 0, KMZ_MALLOC, FALSE }, /* 59 M_NFSRVDESC */ + { 0, KMZ_MALLOC, FALSE }, /* 60 M_NFSDIROFF */ + { 0, KMZ_MALLOC, FALSE }, /* 61 M_NFSBIGFH */ #endif - { 0, KMZ_MALLOC }, /* 62 M_MSDOSFSMNT */ - { 0, KMZ_MALLOC }, /* 63 M_MSDOSFSFAT */ - { 0, KMZ_MALLOC }, /* 64 M_MSDOSFSNODE */ - { SOS(tty), KMZ_CREATEZONE }, /* 65 M_TTYS */ - { 0, KMZ_MALLOC }, /* 66 M_EXEC */ - { 0, KMZ_MALLOC }, /* 67 M_MISCFSMNT */ - { 0, KMZ_MALLOC }, /* 68 M_MISCFSNODE */ - { 0, KMZ_MALLOC }, /* 69 M_ADOSFSMNT */ - { 0, KMZ_MALLOC }, /* 70 M_ADOSFSNODE */ - { 0, KMZ_MALLOC }, /* 71 M_ANODE */ - { SOX(buf), KMZ_CREATEZONE }, /* 72 M_BUFHDR */ + { 0, KMZ_MALLOC, FALSE }, /* 62 M_MSDOSFSMNT */ + { 0, KMZ_MALLOC, FALSE }, /* 63 M_MSDOSFSFAT */ + { 0, KMZ_MALLOC, FALSE }, /* 64 M_MSDOSFSNODE */ + { SOS(tty), KMZ_CREATEZONE, FALSE }, /* 65 M_TTYS */ + { 0, KMZ_MALLOC, FALSE }, /* 66 M_EXEC */ + { 0, KMZ_MALLOC, FALSE }, /* 67 M_MISCFSMNT */ + { 0, KMZ_MALLOC, FALSE }, /* 68 M_MISCFSNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 69 M_ADOSFSMNT */ + { 0, KMZ_MALLOC, FALSE }, /* 70 M_ADOSFSNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 71 M_ANODE */ + { SOX(buf), KMZ_CREATEZONE, TRUE }, /* 72 M_BUFHDR */ { (NDFILE * OFILESIZE), - KMZ_CREATEZONE }, /* 73 M_OFILETABL */ - { MCLBYTES, KMZ_CREATEZONE }, /* 74 M_MCLUST */ + KMZ_CREATEZONE, FALSE }, /* 73 M_OFILETABL */ + { MCLBYTES, KMZ_CREATEZONE, FALSE }, /* 74 M_MCLUST */ #if HFS - { SOX(hfsmount), KMZ_LOOKUPZONE }, /* 75 M_HFSMNT */ - { SOS(cnode), KMZ_CREATEZONE }, /* 76 M_HFSNODE */ - { SOS(filefork), KMZ_CREATEZONE }, /* 77 M_HFSFORK */ + { SOX(hfsmount),KMZ_LOOKUPZONE, FALSE }, /* 75 M_HFSMNT */ + { SOS(cnode), KMZ_CREATEZONE, TRUE }, /* 76 M_HFSNODE */ + { SOS(filefork),KMZ_CREATEZONE, TRUE }, /* 77 M_HFSFORK */ #else - { 0, KMZ_MALLOC }, /* 75 M_HFSMNT */ - { 0, KMZ_MALLOC }, /* 76 M_HFSNODE */ - { 0, KMZ_MALLOC }, /* 77 M_HFSFORK */ + { 0, KMZ_MALLOC, FALSE }, /* 75 M_HFSMNT */ + { 0, KMZ_MALLOC, FALSE }, /* 76 M_HFSNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 77 M_HFSFORK */ #endif - { 0, KMZ_MALLOC }, /* 78 M_ZFSMNT */ - { 0, KMZ_MALLOC }, /* 79 M_ZFSNODE */ - { 0, KMZ_MALLOC }, /* 80 M_TEMP */ - { 0, KMZ_MALLOC }, /* 81 M_SECA */ - { 0, KMZ_MALLOC }, /* 82 M_DEVFS */ - { 0, KMZ_MALLOC }, /* 83 M_IPFW */ - { 0, KMZ_MALLOC }, /* 84 M_UDFNODE */ - { 0, KMZ_MALLOC }, /* 85 M_UDFMOUNT */ - { 0, KMZ_MALLOC }, /* 86 M_IP6NDP */ - { 0, KMZ_MALLOC }, /* 87 M_IP6OPT */ - { 0, KMZ_MALLOC }, /* 88 M_IP6MISC */ - { 0, KMZ_MALLOC }, /* 89 M_TSEGQ */ - { 0, KMZ_MALLOC }, /* 90 M_IGMP */ + { 0, KMZ_MALLOC, FALSE }, /* 78 M_ZFSMNT */ + { 0, KMZ_MALLOC, FALSE }, /* 79 M_ZFSNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 80 M_TEMP */ + { 0, KMZ_MALLOC, FALSE }, /* 81 M_SECA */ + { 0, KMZ_MALLOC, FALSE }, /* 82 M_DEVFS */ + { 0, KMZ_MALLOC, FALSE }, /* 83 M_IPFW */ + { 0, KMZ_MALLOC, FALSE }, /* 84 M_UDFNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 85 M_UDFMOUNT */ + { 0, KMZ_MALLOC, FALSE }, /* 86 M_IP6NDP */ + { 0, KMZ_MALLOC, FALSE }, /* 87 M_IP6OPT */ + { 0, KMZ_MALLOC, FALSE }, /* 88 M_IP6MISC */ + { 0, KMZ_MALLOC, FALSE }, /* 89 M_TSEGQ */ + { 0, KMZ_MALLOC, FALSE }, /* 90 M_IGMP */ #if JOURNALING - { SOS(journal), KMZ_CREATEZONE }, /* 91 M_JNL_JNL */ - { SOS(transaction), KMZ_CREATEZONE }, /* 92 M_JNL_TR */ + { SOS(journal), KMZ_CREATEZONE, FALSE }, /* 91 M_JNL_JNL */ + { SOS(transaction), KMZ_CREATEZONE, FALSE }, /* 92 M_JNL_TR */ #else - { 0, KMZ_MALLOC }, /* 91 M_JNL_JNL */ - { 0, KMZ_MALLOC }, /* 92 M_JNL_TR */ + { 0, KMZ_MALLOC, FALSE }, /* 91 M_JNL_JNL */ + { 0, KMZ_MALLOC, FALSE }, /* 92 M_JNL_TR */ #endif - { SOS(specinfo), KMZ_CREATEZONE }, /* 93 M_SPECINFO */ - { SOS(kqueue), KMZ_CREATEZONE }, /* 94 M_KQUEUE */ + { SOS(specinfo), KMZ_CREATEZONE, TRUE }, /* 93 M_SPECINFO */ + { SOS(kqueue), KMZ_CREATEZONE, FALSE }, /* 94 M_KQUEUE */ #if HFS - { SOS(directoryhint), KMZ_CREATEZONE }, /* 95 M_HFSDIRHINT */ + { SOS(directoryhint), KMZ_CREATEZONE, FALSE }, /* 95 M_HFSDIRHINT */ #else - { 0, KMZ_MALLOC }, /* 95 M_HFSDIRHINT */ + { 0, KMZ_MALLOC, FALSE }, /* 95 M_HFSDIRHINT */ #endif - { SOS(cl_readahead), KMZ_CREATEZONE }, /* 96 M_CLRDAHEAD */ - { SOS(cl_writebehind),KMZ_CREATEZONE }, /* 97 M_CLWRBEHIND */ - { SOS(user64_iovec), KMZ_LOOKUPZONE }, /* 98 M_IOV64 */ - { SOS(fileglob), KMZ_CREATEZONE }, /* 99 M_FILEGLOB */ - { 0, KMZ_MALLOC }, /* 100 M_KAUTH */ - { 0, KMZ_MALLOC }, /* 101 M_DUMMYNET */ + { SOS(cl_readahead), KMZ_CREATEZONE, TRUE }, /* 96 M_CLRDAHEAD */ + { SOS(cl_writebehind),KMZ_CREATEZONE, TRUE }, /* 97 M_CLWRBEHIND */ + { SOS(user64_iovec), KMZ_LOOKUPZONE, FALSE },/* 98 M_IOV64 */ + { SOS(fileglob), KMZ_CREATEZONE, TRUE }, /* 99 M_FILEGLOB */ + { 0, KMZ_MALLOC, FALSE }, /* 100 M_KAUTH */ + { 0, KMZ_MALLOC, FALSE }, /* 101 M_DUMMYNET */ #ifndef __LP64__ - { SOS(unsafe_fsnode),KMZ_CREATEZONE }, /* 102 M_UNSAFEFS */ + { SOS(unsafe_fsnode),KMZ_CREATEZONE, FALSE }, /* 102 M_UNSAFEFS */ #else - { 0, KMZ_MALLOC }, /* 102 M_UNSAFEFS */ + { 0, KMZ_MALLOC, FALSE }, /* 102 M_UNSAFEFS */ #endif /* __LP64__ */ - { 0, KMZ_MALLOC }, /* 103 M_MACPIPELABEL */ - { 0, KMZ_MALLOC }, /* 104 M_MACTEMP */ - { 0, KMZ_MALLOC }, /* 105 M_SBUF */ - { 0, KMZ_MALLOC }, /* 106 M_HFS_EXTATTR */ - { 0, KMZ_MALLOC }, /* 107 M_LCTX */ - { 0, KMZ_MALLOC }, /* 108 M_TRAFFIC_MGT */ + { 0, KMZ_MALLOC, FALSE }, /* 103 M_MACPIPELABEL */ + { 0, KMZ_MALLOC, FALSE }, /* 104 M_MACTEMP */ + { 0, KMZ_MALLOC, FALSE }, /* 105 M_SBUF */ + { 0, KMZ_MALLOC, FALSE }, /* 106 M_HFS_EXTATTR */ + { 0, KMZ_MALLOC, FALSE }, /* 107 M_LCTX */ + { 0, KMZ_MALLOC, FALSE }, /* 108 M_TRAFFIC_MGT */ #if HFS_COMPRESSION - { SOS(decmpfs_cnode),KMZ_CREATEZONE }, /* 109 M_DECMPFS_CNODE */ + { SOS(decmpfs_cnode),KMZ_CREATEZONE, FALSE }, /* 109 M_DECMPFS_CNODE */ #else - { 0, KMZ_MALLOC }, /* 109 M_DECMPFS_CNODE */ + { 0, KMZ_MALLOC, FALSE }, /* 109 M_DECMPFS_CNODE */ #endif /* HFS_COMPRESSION */ #undef SOS #undef SOX @@ -498,6 +499,8 @@ kmeminit(void) kmz->kz_zalloczone = zinit(kmz->kz_elemsize, 1024 * 1024, PAGE_SIZE, memname[kmz - kmzones]); + if (kmz->kz_noencrypt == TRUE) + zone_change(kmz->kz_zalloczone, Z_NOENCRYPT, TRUE); } else if (kmz->kz_zalloczone == KMZ_LOOKUPZONE) kmz->kz_zalloczone = kalloc_zone(kmz->kz_elemsize); diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index 43638af75..ffbff213f 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -96,7 +96,8 @@ kern_open_file_for_direct_io(const char * name, void * callback_ref, dev_t * device_result, uint64_t * partitionbase_result, - uint64_t * maxiocount_result) + uint64_t * maxiocount_result, + boolean_t * solid_state) { struct kern_direct_file_io_ref_t * ref; @@ -225,6 +226,16 @@ kern_open_file_for_direct_io(const char * name, if (maxiocount_result) *maxiocount_result = maxiocount; + if (solid_state) + { + int isssd = 0; + error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd); + if (error) + *solid_state = FALSE; + else + *solid_state = isssd; + } + // generate the block list error = 0; diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c index a6de66c8c..568835be1 100644 --- a/bsd/kern/kpi_mbuf.c +++ b/bsd/kern/kpi_mbuf.c @@ -83,7 +83,7 @@ errno_t mbuf_align_32(mbuf_t mbuf, size_t len) addr64_t mbuf_data_to_physical(void* ptr) { - return (addr64_t)(intptr_t)mcl_to_paddr(ptr); + return (addr64_t)(uintptr_t)mcl_to_paddr(ptr); } errno_t mbuf_get(mbuf_how_t how, mbuf_type_t type, mbuf_t *mbuf) diff --git a/bsd/kern/tty.c b/bsd/kern/tty.c index a03e1f437..841781612 100644 --- a/bsd/kern/tty.c +++ b/bsd/kern/tty.c @@ -3022,6 +3022,8 @@ ttymalloc(void) /* output queue doesn't need quoting */ clalloc(&tp->t_outq, TTYCLSIZE, 0); lck_mtx_init(&tp->t_lock, tty_lck_grp, tty_lck_attr); + klist_init(&tp->t_rsel.si_note); + klist_init(&tp->t_wsel.si_note); } return(tp); } diff --git a/bsd/kern/tty_ptmx.c b/bsd/kern/tty_ptmx.c index d7c8edbf4..19c8e5bcc 100644 --- a/bsd/kern/tty_ptmx.c +++ b/bsd/kern/tty_ptmx.c @@ -1585,12 +1585,10 @@ ptsd_kqfilter(dev_t dev, struct knote *kn) switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &ptsd_kqops_read; - SLIST_INIT(&tp->t_rsel.si_note); KNOTE_ATTACH(&tp->t_rsel.si_note, kn); break; case EVFILT_WRITE: kn->kn_fop = &ptsd_kqops_write; - SLIST_INIT(&tp->t_wsel.si_note); KNOTE_ATTACH(&tp->t_wsel.si_note, kn); break; default: diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index 1f1f99d3e..0fb29a31c 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -374,6 +374,8 @@ ubc_init(void) i = (vm_size_t) sizeof (struct ubc_info); ubc_info_zone = zinit (i, 10000*i, 8192, "ubc_info zone"); + + zone_change(ubc_info_zone, Z_NOENCRYPT, TRUE); } diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index 666f99c46..affd41d8d 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -275,6 +275,7 @@ socketinit(void) get_inpcb_str_size() + 4 + get_tcp_str_size()); so_cache_zone = zinit(str_size, 120000*str_size, 8192, "socache zone"); + zone_change(so_cache_zone, Z_NOENCRYPT, TRUE); #if TEMPDEBUG printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size); #endif diff --git a/bsd/net/raw_usrreq.c b/bsd/net/raw_usrreq.c index 4128af9f5..035c50926 100644 --- a/bsd/net/raw_usrreq.c +++ b/bsd/net/raw_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -304,6 +304,11 @@ raw_usend(struct socket *so, int flags, struct mbuf *m, goto release; } + if (so->so_proto->pr_output == NULL) { + error = EOPNOTSUPP; + goto release; + } + if (control && control->m_len) { error = EOPNOTSUPP; goto release; diff --git a/bsd/net/route.c b/bsd/net/route.c index 28f921b0d..5f1580e8f 100644 --- a/bsd/net/route.c +++ b/bsd/net/route.c @@ -624,6 +624,7 @@ route_init(void) panic("route_init: failed allocating rte_zone"); zone_change(rte_zone, Z_EXPAND, TRUE); + zone_change(rte_zone, Z_NOENCRYPT, TRUE); TAILQ_INIT(&rttrash_head); } diff --git a/bsd/netinet/in_pcb.c b/bsd/netinet/in_pcb.c index 10efe37f0..696222176 100644 --- a/bsd/netinet/in_pcb.c +++ b/bsd/netinet/in_pcb.c @@ -776,7 +776,7 @@ in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct proc *p) inp->inp_lport, 0, NULL); socket_lock(inp->inp_socket, 0); if (pcb != NULL) { - in_pcb_checkstate(pcb, WNT_RELEASE, 0); + in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); return (EADDRINUSE); } if (inp->inp_laddr.s_addr == INADDR_ANY) { @@ -1621,9 +1621,14 @@ inp_route_copyout(struct inpcb *inp, struct route *dst) lck_mtx_assert(inp->inpcb_mtx, LCK_MTX_ASSERT_OWNED); - /* Minor sanity check */ - if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) - panic("%s: wrong or corrupted route: %p", __func__, src); + /* + * If the route in the PCB is not for IPv4, blow it away; + * this is possible in the case of IPv4-mapped address case. + */ + if (src->ro_rt != NULL && rt_key(src->ro_rt)->sa_family != AF_INET) { + rtfree(src->ro_rt); + src->ro_rt = NULL; + } /* Copy everything (rt, dst, flags) from PCB */ bcopy(src, dst, sizeof (*dst)); diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c index a6c47e084..10156a869 100644 --- a/bsd/netinet/ip_input.c +++ b/bsd/netinet/ip_input.c @@ -559,6 +559,7 @@ ip_proto_dispatch_in( int seen = (inject_ipfref == 0); int changed_header = 0; struct ip *ip; + void (*pr_input)(struct mbuf *, int len); if (!TAILQ_EMPTY(&ipv4_filters)) { ipf_ref(); @@ -598,20 +599,21 @@ ip_proto_dispatch_in( * otherwise let the protocol deal with its own locking */ ip = mtod(m, struct ip *); - + if (changed_header) { ip->ip_len = ntohs(ip->ip_len) - hlen; ip->ip_off = ntohs(ip->ip_off); } - - if (!(ip_protox[ip->ip_p]->pr_flags & PR_PROTOLOCK)) { + + if ((pr_input = ip_protox[ip->ip_p]->pr_input) == NULL) { + m_freem(m); + } else if (!(ip_protox[ip->ip_p]->pr_flags & PR_PROTOLOCK)) { lck_mtx_lock(inet_domain_mutex); - (*ip_protox[ip->ip_p]->pr_input)(m, hlen); + pr_input(m, hlen); lck_mtx_unlock(inet_domain_mutex); - } - else - (*ip_protox[ip->ip_p]->pr_input)(m, hlen); - + } else { + pr_input(m, hlen); + } } /* @@ -624,15 +626,16 @@ ip_input(struct mbuf *m) struct ip *ip; struct ipq *fp; struct in_ifaddr *ia = NULL; - int i, hlen, checkif; + int hlen, checkif; u_short sum; struct in_addr pkt_dst; - u_int32_t div_info = 0; /* packet divert/tee info */ #if IPFIREWALL + int i; + u_int32_t div_info = 0; /* packet divert/tee info */ struct ip_fw_args args; + struct m_tag *tag; #endif ipfilter_t inject_filter_ref = 0; - struct m_tag *tag; #if IPFIREWALL args.eh = NULL; diff --git a/bsd/netinet/ip_mroute.c b/bsd/netinet/ip_mroute.c index a5884bd1a..e61d2ed64 100644 --- a/bsd/netinet/ip_mroute.c +++ b/bsd/netinet/ip_mroute.c @@ -77,7 +77,7 @@ #endif -#ifndef MROUTING +#if !MROUTING extern u_int32_t _ip_mcast_src(int vifi); extern int _ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index 2ec224939..07d74f97f 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -3182,7 +3182,7 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) struct in_addr src = ip->ip_src; struct in_addr dst = ip->ip_dst; struct ifnet *rt_ifp; - char s_src[16], s_dst[16]; + char s_src[MAX_IPv4_STR_LEN], s_dst[MAX_IPv4_STR_LEN]; if (ip_select_srcif_debug) { (void) inet_ntop(AF_INET, &src.s_addr, s_src, sizeof (s_src)); @@ -3222,6 +3222,22 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) ifa = (struct ifaddr *)ifa_foraddr_scoped(src.s_addr, scope); + if (ifa == NULL && ip->ip_p != IPPROTO_UDP && + ip->ip_p != IPPROTO_TCP && ipforwarding) { + /* + * If forwarding is enabled, and if the packet isn't + * TCP or UDP, check if the source address belongs + * to one of our own interfaces; if so, demote the + * interface scope and do a route lookup right below. + */ + ifa = (struct ifaddr *)ifa_foraddr(src.s_addr); + if (ifa != NULL) { + ifafree(ifa); + ifa = NULL; + ifscope = IFSCOPE_NONE; + } + } + if (ip_select_srcif_debug && ifa != NULL) { if (ro->ro_rt != NULL) { printf("%s->%s ifscope %d->%d ifa_if %s%d " @@ -3251,6 +3267,103 @@ in_selectsrcif(struct ip *ip, struct route *ro, unsigned int ifscope) if (ifa == NULL && ifscope == IFSCOPE_NONE) { ifa = (struct ifaddr *)ifa_foraddr(src.s_addr); + /* + * If we have the IP address, but not the route, we don't + * really know whether or not it belongs to the correct + * interface (it could be shared across multiple interfaces.) + * The only way to find out is to do a route lookup. + */ + if (ifa != NULL && ro->ro_rt == NULL) { + struct rtentry *rt; + struct sockaddr_in sin; + struct ifaddr *oifa = NULL; + + bzero(&sin, sizeof (sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof (sin); + sin.sin_addr = dst; + + lck_mtx_lock(rnh_lock); + if ((rt = rt_lookup(TRUE, (struct sockaddr *)&sin, NULL, + rt_tables[AF_INET], IFSCOPE_NONE)) != NULL) { + RT_LOCK(rt); + /* + * If the route uses a different interface, + * use that one instead. The IP address of + * the ifaddr that we pick up here is not + * relevant. + */ + if (ifa->ifa_ifp != rt->rt_ifp) { + oifa = ifa; + ifa = rt->rt_ifa; + ifaref(ifa); + RT_UNLOCK(rt); + } else { + RT_UNLOCK(rt); + } + rtfree_locked(rt); + } + lck_mtx_unlock(rnh_lock); + + if (oifa != NULL) { + struct ifaddr *iifa; + + /* + * See if the interface pointed to by the + * route is configured with the source IP + * address of the packet. + */ + iifa = (struct ifaddr *)ifa_foraddr_scoped( + src.s_addr, ifa->ifa_ifp->if_index); + + if (iifa != NULL) { + /* + * Found it; drop the original one + * as well as the route interface + * address, and use this instead. + */ + ifafree(oifa); + ifafree(ifa); + ifa = iifa; + } else if (!ipforwarding || + (rt->rt_flags & RTF_GATEWAY)) { + /* + * This interface doesn't have that + * source IP address; drop the route + * interface address and just use the + * original one, and let the caller + * do a scoped route lookup. + */ + ifafree(ifa); + ifa = oifa; + } else { + /* + * Forwarding is enabled and the source + * address belongs to one of our own + * interfaces which isn't the outgoing + * interface, and we have a route, and + * the destination is on a network that + * is directly attached (onlink); drop + * the original one and use the route + * interface address instead. + */ + ifafree(oifa); + } + } + } else if (ifa != NULL && ro->ro_rt != NULL && + !(ro->ro_rt->rt_flags & RTF_GATEWAY) && + ifa->ifa_ifp != ro->ro_rt->rt_ifp && ipforwarding) { + /* + * Forwarding is enabled and the source address belongs + * to one of our own interfaces which isn't the same + * as the interface used by the known route; drop the + * original one and use the route interface address. + */ + ifafree(ifa); + ifa = ro->ro_rt->rt_ifa; + ifaref(ifa); + } + if (ip_select_srcif_debug && ifa != NULL) { printf("%s->%s ifscope %d ifa_if %s%d\n", s_src, s_dst, ifscope, ifa->ifa_ifp->if_name, diff --git a/bsd/netinet6/in6_pcb.c b/bsd/netinet6/in6_pcb.c index 6d2c98b71..20f39a34d 100644 --- a/bsd/netinet6/in6_pcb.c +++ b/bsd/netinet6/in6_pcb.c @@ -464,7 +464,7 @@ in6_pcbconnect(inp, nam, p) inp->inp_lport, 0, NULL); socket_lock(inp->inp_socket, 0); if (pcb != NULL) { - in_pcb_checkstate(pcb, WNT_RELEASE, 0); + in_pcb_checkstate(pcb, WNT_RELEASE, pcb == inp ? 1 : 0); return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { diff --git a/bsd/netinet6/in6_proto.c b/bsd/netinet6/in6_proto.c index d2621dd30..b7dbae799 100644 --- a/bsd/netinet6/in6_proto.c +++ b/bsd/netinet6/in6_proto.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Apple Inc. All rights reserved. + * Copyright (c) 2008-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -322,16 +322,6 @@ struct ip6protosw inet6sw[] = { 0, rip_unlock, 0, { 0, 0 }, NULL, { 0 } }, -#else -{ SOCK_RAW, &inet6domain, IPPROTO_PIM, PR_ATOMIC|PR_ADDR|PR_LASTHDR, - 0, 0, 0, rip6_ctloutput, - 0, - 0, 0, 0, 0, - 0, - &rip6_usrreqs, - 0, rip_unlock, 0, - { 0, 0 }, NULL, { 0 } -}, #endif /* raw wildcard */ { SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR|PR_LASTHDR, diff --git a/bsd/netinet6/ip6_input.c b/bsd/netinet6/ip6_input.c index cdf3776b2..0a8320298 100644 --- a/bsd/netinet6/ip6_input.c +++ b/bsd/netinet6/ip6_input.c @@ -955,7 +955,8 @@ injectit: while (nxt != IPPROTO_DONE) { struct ipfilter *filter; - + int (*pr_input)(struct mbuf **, int *); + if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) { ip6stat.ip6s_toomanyhdr++; goto badunlocked; @@ -1028,13 +1029,18 @@ injectit: } ipf_unref(); } - if (!(ip6_protox[nxt]->pr_flags & PR_PROTOLOCK)) { + + if ((pr_input = ip6_protox[nxt]->pr_input) == NULL) { + m_freem(m); + m = NULL; + nxt = IPPROTO_DONE; + } else if (!(ip6_protox[nxt]->pr_flags & PR_PROTOLOCK)) { lck_mtx_lock(inet6_domain_mutex); - nxt = (*ip6_protox[nxt]->pr_input)(&m, &off); + nxt = pr_input(&m, &off); lck_mtx_unlock(inet6_domain_mutex); + } else { + nxt = pr_input(&m, &off); } - else - nxt = (*ip6_protox[nxt]->pr_input)(&m, &off); } return; bad: diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h index c5b5bca17..9aa8e0e3f 100644 --- a/bsd/netinet6/ip6_var.h +++ b/bsd/netinet6/ip6_var.h @@ -300,7 +300,7 @@ extern int ip6_neighborgcthresh; /* Threshold # of NDP entries for GC */ extern int ip6_maxifprefixes; /* Max acceptable prefixes via RA per IF */ extern int ip6_maxifdefrouters; /* Max acceptable def routers via RA */ extern int ip6_maxdynroutes; /* Max # of routes created via redirect */ -#ifdef MROUTING +#if MROUTING extern struct socket *ip6_mrouter; /* multicast routing daemon */ #endif extern int ip6_sendredirects; /* send IP redirects when forwarding? */ diff --git a/bsd/netinet6/nd6.c b/bsd/netinet6/nd6.c index 92034fc61..d71746042 100644 --- a/bsd/netinet6/nd6.c +++ b/bsd/netinet6/nd6.c @@ -1193,10 +1193,11 @@ nd6_lookup( * use rt->rt_ifa->ifa_ifp, which would specify the REAL * interface. */ - if (((ifp && (ifp->if_type != IFT_PPP)) && ((ifp->if_eflags & IFEF_NOAUTOIPV6LL) == 0)) && - ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || + if (ifp == NULL || (ifp->if_type == IFT_PPP) || + (ifp->if_eflags & IFEF_NOAUTOIPV6LL) || + (rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 || rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL || - (ifp && rt->rt_ifa->ifa_ifp != ifp))) { + (ifp && rt->rt_ifa->ifa_ifp != ifp)) { RT_REMREF_LOCKED(rt); RT_UNLOCK(rt); if (create) { diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c index ee1a61679..8d6747009 100644 --- a/bsd/nfs/nfs_socket.c +++ b/bsd/nfs/nfs_socket.c @@ -3420,6 +3420,7 @@ nfs_noremotehang(thread_t thd) int nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocked) { + proc_t p; int error = 0; if (nmp == NULL) @@ -3468,8 +3469,8 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *req, thread_t thd, int nmplocke return (EINTR); /* mask off thread and process blocked signals. */ - if ((nmp->nm_flag & NFSMNT_INT) && - proc_pendingsignals(get_bsdthreadtask_info(thd), NFSINT_SIGMASK)) + if ((nmp->nm_flag & NFSMNT_INT) && ((p = get_bsdthreadtask_info(thd))) && + proc_pendingsignals(p, NFSINT_SIGMASK)) return (EINTR); return (0); } diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index 0a3da6952..6013db9df 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -141,6 +141,7 @@ typedef struct #define DKIOCGETBLOCKCOUNT32 _IOR('d', 25, uint32_t) #define DKIOCSETBLOCKSIZE _IOW('d', 24, uint32_t) #define DKIOCGETBSDUNIT _IOR('d', 27, uint32_t) +#define DKIOCISSOLIDSTATE _IOR('d', 79, uint32_t) #define DKIOCISVIRTUAL _IOR('d', 72, uint32_t) #define DKIOCGETBASE _IOR('d', 73, uint64_t) #define DKIOCGETFEATURES _IOR('d', 76, uint32_t) diff --git a/bsd/sys/kdebug.h b/bsd/sys/kdebug.h index 23d982531..6767bad2d 100644 --- a/bsd/sys/kdebug.h +++ b/bsd/sys/kdebug.h @@ -198,6 +198,7 @@ __BEGIN_DECLS #define DBG_IOINFINIBAND 48 /* Infiniband */ #define DBG_IOCPUPM 49 /* CPU Power Management */ #define DBG_IOGRAPHICS 50 /* Graphics */ +#define DBG_HIBERNATE 51 /* hibernation related events */ /* Backwards compatibility */ #define DBG_IOPOINTING DBG_IOHID /* OBSOLETE: Use DBG_IOHID instead */ diff --git a/bsd/sys/mount_internal.h b/bsd/sys/mount_internal.h index cb71406df..d36e8ea74 100644 --- a/bsd/sys/mount_internal.h +++ b/bsd/sys/mount_internal.h @@ -239,6 +239,7 @@ extern struct mount * dead_mountp; #define MNTK_LOCK_LOCAL 0x00100000 /* advisory locking is done above the VFS itself */ #define MNTK_VIRTUALDEV 0x00200000 /* mounted on a virtual device i.e. a disk image */ #define MNTK_ROOTDEV 0x00400000 /* this filesystem resides on the same device as the root */ +#define MNTK_SSD 0x00800000 /* underlying device is of the solid state variety */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_WANTRDWR 0x04000000 /* upgrade to read/write requested */ diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index 269697bd2..69b9e8520 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -125,7 +125,7 @@ static void buf_reassign(buf_t bp, vnode_t newvp); static errno_t buf_acquire_locked(buf_t bp, int flags, int slpflag, int slptimeo); static int buf_iterprepare(vnode_t vp, struct buflists *, int flags); static void buf_itercomplete(vnode_t vp, struct buflists *, int flags); -boolean_t buffer_cache_gc(void); +boolean_t buffer_cache_gc(int); __private_extern__ int bdwrite_internal(buf_t, int); @@ -3676,12 +3676,16 @@ dump_buffer: } boolean_t -buffer_cache_gc(void) +buffer_cache_gc(int all) { buf_t bp; boolean_t did_large_zfree = FALSE; int now = buf_timestamp(); uint32_t count = 0; + int thresh_hold = BUF_STALE_THRESHHOLD; + + if (all) + thresh_hold = 0; lck_mtx_lock_spin(buf_mtxp); @@ -3689,7 +3693,7 @@ buffer_cache_gc(void) bp = TAILQ_FIRST(&bufqueues[BQ_META]); /* Only collect buffers unused in the last N seconds. Note: ordered by timestamp. */ - while ((bp != NULL) && ((now - bp->b_timestamp) > BUF_STALE_THRESHHOLD) && (count < BUF_MAX_GC_COUNT)) { + while ((bp != NULL) && ((now - bp->b_timestamp) > thresh_hold) && (all || (count < BUF_MAX_GC_COUNT))) { int result, size; boolean_t is_zalloc; diff --git a/bsd/vfs/vfs_cluster.c b/bsd/vfs/vfs_cluster.c index d436d781b..f1f9f649b 100644 --- a/bsd/vfs/vfs_cluster.c +++ b/bsd/vfs/vfs_cluster.c @@ -3780,7 +3780,6 @@ cluster_read_direct(vnode_t vp, struct uio *uio, off_t filesize, int *read_type, int force_data_sync; int retval = 0; int no_zero_fill = 0; - int abort_flag = 0; int io_flag = 0; int misaligned = 0; struct clios iostate; @@ -3991,13 +3990,11 @@ next_dread: KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_START, (int)upl_offset, upl_needed_size, (int)iov_base, io_size, 0); - if (upl_offset == 0 && ((io_size & PAGE_MASK) == 0)) { + if (upl_offset == 0 && ((io_size & PAGE_MASK) == 0)) no_zero_fill = 1; - abort_flag = UPL_ABORT_DUMP_PAGES | UPL_ABORT_FREE_ON_EMPTY; - } else { + else no_zero_fill = 0; - abort_flag = UPL_ABORT_FREE_ON_EMPTY; - } + for (force_data_sync = 0; force_data_sync < 3; force_data_sync++) { pages_in_pl = 0; upl_size = upl_needed_size; @@ -4028,13 +4025,13 @@ next_dread: pl = UPL_GET_INTERNAL_PAGE_LIST(upl); for (i = 0; i < pages_in_pl; i++) { - if (!upl_valid_page(pl, i)) + if (!upl_page_present(pl, i)) break; } if (i == pages_in_pl) break; - ubc_upl_abort(upl, abort_flag); + ubc_upl_abort(upl, 0); } if (force_data_sync >= 3) { KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END, @@ -4052,7 +4049,7 @@ next_dread: io_size = 0; } if (io_size == 0) { - ubc_upl_abort(upl, abort_flag); + ubc_upl_abort(upl, 0); goto wait_for_dreads; } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 72)) | DBG_FUNC_END, @@ -4100,7 +4097,7 @@ next_dread: * go wait for any other reads to complete before * returning the error to the caller */ - ubc_upl_abort(upl, abort_flag); + ubc_upl_abort(upl, 0); goto wait_for_dreads; } diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 9f528d32e..f5dffc36e 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -2811,7 +2811,7 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) u_int64_t temp; u_int32_t features; vfs_context_t ctx = vfs_context_current(); - + int isssd = 0; int isvirtual = 0; /* * determine if this mount point exists on the same device as the root @@ -2860,6 +2860,10 @@ vfs_init_io_attributes(vnode_t devvp, mount_t mp) if (isvirtual) mp->mnt_kern_flag |= MNTK_VIRTUALDEV; } + if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, ctx) == 0) { + if (isssd) + mp->mnt_kern_flag |= MNTK_SSD; + } if ((error = VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&features, 0, ctx))) @@ -4927,8 +4931,24 @@ vauth_node_owner(struct vnode_attr *vap, kauth_cred_t cred) return(result); } +/* + * vauth_node_group + * + * Description: Ask if a cred is a member of the group owning the vnode object + * + * Parameters: vap vnode attribute + * vap->va_gid group owner of vnode object + * cred credential to check + * ismember pointer to where to put the answer + * idontknow Return this if we can't get an answer + * + * Returns: 0 Success + * idontknow Can't get information + * kauth_cred_ismember_gid:? Error from kauth subsystem + * kauth_cred_ismember_gid:? Error from kauth subsystem + */ static int -vauth_node_group(struct vnode_attr *vap, kauth_cred_t cred, int *ismember) +vauth_node_group(struct vnode_attr *vap, kauth_cred_t cred, int *ismember, int idontknow) { int error; int result; @@ -4936,11 +4956,43 @@ vauth_node_group(struct vnode_attr *vap, kauth_cred_t cred, int *ismember) error = 0; result = 0; - /* the caller is expected to have asked the filesystem for a group at some point */ + /* + * The caller is expected to have asked the filesystem for a group + * at some point prior to calling this function. The answer may + * have been that there is no group ownership supported for the + * vnode object, in which case we return + */ if (vap && VATTR_IS_SUPPORTED(vap, va_gid)) { error = kauth_cred_ismember_gid(cred, vap->va_gid, &result); + /* + * Credentials which are opted into external group membership + * resolution which are not known to the external resolver + * will result in an ENOENT error. We translate this into + * the appropriate 'idontknow' response for our caller. + * + * XXX We do not make a distinction here between an ENOENT + * XXX arising from a response from the external resolver, + * XXX and an ENOENT which is internally generated. This is + * XXX a deficiency of the published kauth_cred_ismember_gid() + * XXX KPI which can not be overcome without new KPI. For + * XXX all currently known cases, however, this wil result + * XXX in correct behaviour. + */ + if (error == ENOENT) + error = idontknow; } - /* we could test the group UUID here if we had a policy for it */ + /* + * XXX We could test the group UUID here if we had a policy for it, + * XXX but this is problematic from the perspective of synchronizing + * XXX group UUID and POSIX GID ownership of a file and keeping the + * XXX values coherent over time. The problem is that the local + * XXX system will vend transient group UUIDs for unknown POSIX GID + * XXX values, and these are not persistent, whereas storage of values + * XXX is persistent. One potential solution to this is a local + * XXX (persistent) replica of remote directory entries and vended + * XXX local ids in a local directory server (think in terms of a + * XXX caching DNS server). + */ if (!error) *ismember = result; @@ -4968,16 +5020,39 @@ vauth_file_owner(vauth_ctx vcp) return(result); } + +/* + * vauth_file_ingroup + * + * Description: Ask if a user is a member of the group owning the directory + * + * Parameters: vcp The vnode authorization context that + * contains the user and directory info + * vcp->flags_valid Valid flags + * vcp->flags Flags values + * vcp->vap File vnode attributes + * vcp->ctx VFS Context (for user) + * ismember pointer to where to put the answer + * idontknow Return this if we can't get an answer + * + * Returns: 0 Success + * vauth_node_group:? Error from vauth_node_group() + * + * Implicit returns: *ismember 0 The user is not a group member + * 1 The user is a group member + */ static int -vauth_file_ingroup(vauth_ctx vcp, int *ismember) +vauth_file_ingroup(vauth_ctx vcp, int *ismember, int idontknow) { int error; + /* Check for a cached answer first, to avoid the check if possible */ if (vcp->flags_valid & _VAC_IN_GROUP) { *ismember = (vcp->flags & _VAC_IN_GROUP) ? 1 : 0; error = 0; } else { - error = vauth_node_group(vcp->vap, vcp->ctx->vc_ucred, ismember); + /* Otherwise, go look for it */ + error = vauth_node_group(vcp->vap, vcp->ctx->vc_ucred, ismember, idontknow); if (!error) { /* cache our result */ @@ -5014,16 +5089,38 @@ vauth_dir_owner(vauth_ctx vcp) return(result); } +/* + * vauth_dir_ingroup + * + * Description: Ask if a user is a member of the group owning the directory + * + * Parameters: vcp The vnode authorization context that + * contains the user and directory info + * vcp->flags_valid Valid flags + * vcp->flags Flags values + * vcp->dvap Dir vnode attributes + * vcp->ctx VFS Context (for user) + * ismember pointer to where to put the answer + * idontknow Return this if we can't get an answer + * + * Returns: 0 Success + * vauth_node_group:? Error from vauth_node_group() + * + * Implicit returns: *ismember 0 The user is not a group member + * 1 The user is a group member + */ static int -vauth_dir_ingroup(vauth_ctx vcp, int *ismember) +vauth_dir_ingroup(vauth_ctx vcp, int *ismember, int idontknow) { int error; + /* Check for a cached answer first, to avoid the check if possible */ if (vcp->flags_valid & _VAC_IN_DIR_GROUP) { *ismember = (vcp->flags & _VAC_IN_DIR_GROUP) ? 1 : 0; error = 0; } else { - error = vauth_node_group(vcp->dvap, vcp->ctx->vc_ucred, ismember); + /* Otherwise, go look for it */ + error = vauth_node_group(vcp->dvap, vcp->ctx->vc_ucred, ismember, idontknow); if (!error) { /* cache our result */ @@ -5131,11 +5228,19 @@ vnode_authorize_posix(vauth_ctx vcp, int action, int on_dir) } /* Check group membership (most expensive) */ - ismember = 0; + ismember = 0; /* Default to allow, if the target has no group owner */ + + /* + * In the case we can't get an answer about the user from the call to + * vauth_dir_ingroup() or vauth_file_ingroup(), we want to fail on + * the side of caution, rather than simply granting access, or we will + * fail to correctly implement exclusion groups, so we set the third + * parameter on the basis of the state of 'group_ok'. + */ if (on_dir) { - error = vauth_dir_ingroup(vcp, &ismember); + error = vauth_dir_ingroup(vcp, &ismember, (!group_ok ? EACCES : 0)); } else { - error = vauth_file_ingroup(vcp, &ismember); + error = vauth_file_ingroup(vcp, &ismember, (!group_ok ? EACCES : 0)); } if (error) goto out; @@ -5207,14 +5312,22 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child) /* check the ACL on the directory */ delete_child_denied = 0; if (!cached_delete_child && VATTR_IS_NOT(dvap, va_acl, NULL)) { + errno_t posix_error; + eval.ae_requested = KAUTH_VNODE_DELETE_CHILD; eval.ae_acl = &dvap->va_acl->acl_ace[0]; eval.ae_count = dvap->va_acl->acl_entrycount; eval.ae_options = 0; if (vauth_dir_owner(vcp)) eval.ae_options |= KAUTH_AEVAL_IS_OWNER; - if ((error = vauth_dir_ingroup(vcp, &ismember)) != 0) - return(error); + /* + * We use ENOENT as a marker to indicate we could not get + * information in order to delay evaluation until after we + * have the ACL evaluation answer. Previously, we would + * always deny the operation at this point. + */ + if ((posix_error = vauth_dir_ingroup(vcp, &ismember, ENOENT)) != 0 && posix_error != ENOENT) + return(posix_error); if (ismember) eval.ae_options |= KAUTH_AEVAL_IN_GROUP; eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; @@ -5222,31 +5335,58 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child) eval.ae_exp_gwrite = KAUTH_VNODE_GENERIC_WRITE_BITS; eval.ae_exp_gexec = KAUTH_VNODE_GENERIC_EXECUTE_BITS; + /* + * If there is no entry, we are going to defer to other + * authorization mechanisms. + */ error = kauth_acl_evaluate(cred, &eval); if (error != 0) { KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); return(error); } - if (eval.ae_result == KAUTH_RESULT_DENY) + switch(eval.ae_result) { + case KAUTH_RESULT_DENY: delete_child_denied = 1; - if (eval.ae_result == KAUTH_RESULT_ALLOW) { + break; + case KAUTH_RESULT_ALLOW: KAUTH_DEBUG("%p ALLOWED - granted by directory ACL", vcp->vp); return(0); + case KAUTH_RESULT_DEFER: + /* + * If we don't have a POSIX answer of "yes", and we + * can't get an ACL answer, then we deny it now. + */ + if (posix_error == ENOENT) { + delete_child_denied = 1; + break; + } + default: + /* Effectively the same as !delete_child_denied */ + KAUTH_DEBUG("%p DEFERRED - directory ACL", vcp->vp); + break; } } /* check the ACL on the node */ delete_denied = 0; if (VATTR_IS_NOT(vap, va_acl, NULL)) { + errno_t posix_error; + eval.ae_requested = KAUTH_VNODE_DELETE; eval.ae_acl = &vap->va_acl->acl_ace[0]; eval.ae_count = vap->va_acl->acl_entrycount; eval.ae_options = 0; if (vauth_file_owner(vcp)) eval.ae_options |= KAUTH_AEVAL_IS_OWNER; - if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) - return(error); + /* + * We use ENOENT as a marker to indicate we could not get + * information in order to delay evaluation until after we + * have the ACL evaluation answer. Previously, we would + * always deny the operation at this point. + */ + if ((posix_error = vauth_file_ingroup(vcp, &ismember, ENOENT)) != 0 && posix_error != ENOENT) + return(posix_error); if (ismember) eval.ae_options |= KAUTH_AEVAL_IN_GROUP; eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; @@ -5258,17 +5398,32 @@ vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child) KAUTH_DEBUG("%p ERROR during ACL processing - %d", vcp->vp, error); return(error); } - if (eval.ae_result == KAUTH_RESULT_DENY) + + switch(eval.ae_result) { + case KAUTH_RESULT_DENY: delete_denied = 1; - if (eval.ae_result == KAUTH_RESULT_ALLOW) { + break; + case KAUTH_RESULT_ALLOW: KAUTH_DEBUG("%p ALLOWED - granted by file ACL", vcp->vp); return(0); + case KAUTH_RESULT_DEFER: + /* + * If we don't have a POSIX answer of "yes", and we + * can't get an ACL answer, then we deny it now. + */ + if (posix_error == ENOENT) { + delete_denied = 1; + } + default: + /* Effectively the same as !delete_child_denied */ + KAUTH_DEBUG("%p DEFERRED%s - by file ACL", vcp->vp, delete_denied ? "(DENY)" : ""); + break; } } /* if denied by ACL on directory or node, return denial */ if (delete_denied || delete_child_denied) { - KAUTH_DEBUG("%p ALLOWED - denied by ACL", vcp->vp); + KAUTH_DEBUG("%p DENIED - denied by ACL", vcp->vp); return(EACCES); } @@ -5334,14 +5489,22 @@ vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_r /* if we have an ACL, evaluate it */ if (VATTR_IS_NOT(vap, va_acl, NULL)) { + errno_t posix_error; + eval.ae_requested = acl_rights; eval.ae_acl = &vap->va_acl->acl_ace[0]; eval.ae_count = vap->va_acl->acl_entrycount; eval.ae_options = 0; if (vauth_file_owner(vcp)) eval.ae_options |= KAUTH_AEVAL_IS_OWNER; - if ((error = vauth_file_ingroup(vcp, &ismember)) != 0) - return(error); + /* + * We use ENOENT as a marker to indicate we could not get + * information in order to delay evaluation until after we + * have the ACL evaluation answer. Previously, we would + * always deny the operation at this point. + */ + if ((posix_error = vauth_file_ingroup(vcp, &ismember, ENOENT)) != 0 && posix_error != ENOENT) + return(posix_error); if (ismember) eval.ae_options |= KAUTH_AEVAL_IN_GROUP; eval.ae_exp_gall = KAUTH_VNODE_GENERIC_ALL_BITS; @@ -5354,14 +5517,28 @@ vnode_authorize_simple(vauth_ctx vcp, kauth_ace_rights_t acl_rights, kauth_ace_r return(error); } - if (eval.ae_result == KAUTH_RESULT_DENY) { + switch(eval.ae_result) { + case KAUTH_RESULT_DENY: KAUTH_DEBUG("%p DENIED - by ACL", vcp->vp); - return(EACCES); /* deny, deny, counter-allege */ - } - if (eval.ae_result == KAUTH_RESULT_ALLOW) { + return(EACCES); /* deny, deny, counter-allege */ + case KAUTH_RESULT_ALLOW: KAUTH_DEBUG("%p ALLOWED - all rights granted by ACL", vcp->vp); return(0); + case KAUTH_RESULT_DEFER: + /* + * If we don't have a POSIX answer of "yes", and we + * can't get an ACL answer, then we deny it now. + */ + if (posix_error == ENOENT) { + KAUTH_DEBUG("%p DENIED(DEFERRED) - by ACL", vcp->vp); + return(EACCES); /* deny, deny, counter-allege */ + } + default: + /* Effectively the same as !delete_child_denied */ + KAUTH_DEBUG("%p DEFERRED - directory ACL", vcp->vp); + break; } + *found_deny = eval.ae_found_deny; /* fall through and evaluate residual rights */ diff --git a/bsd/vfs/vfs_xattr.c b/bsd/vfs/vfs_xattr.c index 32c2eb625..d15711685 100644 --- a/bsd/vfs/vfs_xattr.c +++ b/bsd/vfs/vfs_xattr.c @@ -946,8 +946,8 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) } } - /* Obtain the vnode for "/tmp" directory. */ - if (vnode_lookup("/tmp", 0, &dvp, context) != 0) { + /* Obtain the vnode for "/var/run" directory. */ + if (vnode_lookup("/var/run", 0, &dvp, context) != 0) { error = ENOTSUP; goto out; } @@ -995,7 +995,7 @@ get_shadow_dir(vnode_t *sdvpp, vfs_context_t context) if (sdvp->v_type != VDIR) { goto baddir; } - /* Obtain the fsid for /tmp directory */ + /* Obtain the fsid for /var/run directory */ VATTR_INIT(&va); VATTR_WANTED(&va, va_fsid); if (VNOP_GETATTR(dvp, &va, context) != 0 || diff --git a/bsd/vm/dp_backing_file.c b/bsd/vm/dp_backing_file.c index 0f72ed2f1..420238db9 100644 --- a/bsd/vm/dp_backing_file.c +++ b/bsd/vm/dp_backing_file.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -205,6 +206,9 @@ macx_triggers( return mach_macx_triggers(args); } + +extern boolean_t dp_isssd; + /* * Routine: macx_swapon * Function: @@ -226,6 +230,8 @@ macx_swapon( off_t file_size; vfs_context_t ctx = vfs_context_current(); struct proc *p = current_proc(); + int dp_cluster_size; + AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); AUDIT_ARG(value32, args->priority); @@ -236,11 +242,6 @@ macx_swapon( if ((error = suser(kauth_cred_get(), 0))) goto swapon_bailout; - if(default_pager_init_flag == 0) { - start_def_pager(NULL); - default_pager_init_flag = 1; - } - /* * Get a vnode for the paging area. */ @@ -273,6 +274,11 @@ macx_swapon( if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0)) goto swapon_bailout; + if (default_pager_init_flag == 0) { + start_def_pager(NULL); + default_pager_init_flag = 1; + } + /* add new backing store to list */ i = 0; while(bs_port_table[i].vp != 0) { @@ -300,9 +306,24 @@ macx_swapon( goto swapon_bailout; } + if (vp->v_mount->mnt_kern_flag & MNTK_SSD) { + /* + * keep the cluster size small since the + * seek cost is effectively 0 which means + * we don't care much about fragmentation + */ + dp_isssd = TRUE; + dp_cluster_size = 2 * PAGE_SIZE; + } else { + /* + * use the default cluster size + */ + dp_isssd = FALSE; + dp_cluster_size = 0; + } kr = default_pager_backing_store_create(default_pager, -1, /* default priority */ - 0, /* default cluster size */ + dp_cluster_size, &backing_store); memory_object_default_deallocate(default_pager); diff --git a/config/IOKit.exports b/config/IOKit.exports index be71c491d..deb1b0fbe 100644 --- a/config/IOKit.exports +++ b/config/IOKit.exports @@ -482,9 +482,13 @@ __ZN14IOPMrootDomain14shutdownSystemEv __ZN14IOPMrootDomain14tellChangeDownEm __ZN14IOPMrootDomain15powerChangeDoneEm __ZN14IOPMrootDomain16tellNoChangeDownEm +__ZN14IOPMrootDomain17createPMAssertionEyjP9IOServicePKc __ZN14IOPMrootDomain17getSleepSupportedEv __ZN14IOPMrootDomain17setAggressivenessEmm __ZN14IOPMrootDomain18changePowerStateToEm +__ZN14IOPMrootDomain18releasePMAssertionEy +__ZN14IOPMrootDomain19getPMAssertionLevelEy +__ZN14IOPMrootDomain19setPMAssertionLevelEyj __ZN14IOPMrootDomain22changePowerStateToPrivEm __ZN14IOPMrootDomain22removePublishedFeatureEj __ZN14IOPMrootDomain23requestPowerDomainStateEmP17IOPowerConnectionm diff --git a/config/MasterVersion b/config/MasterVersion index 469c633be..15b5f9523 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -10.4.0 +10.5.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/iokit/IOKit/IOBufferMemoryDescriptor.h b/iokit/IOKit/IOBufferMemoryDescriptor.h index babd068fd..391a0460e 100644 --- a/iokit/IOKit/IOBufferMemoryDescriptor.h +++ b/iokit/IOKit/IOBufferMemoryDescriptor.h @@ -43,6 +43,7 @@ enum { | kIOMemoryAutoPrepare #endif | kIOMemoryThreadSafe + | kIOMemoryClearEncrypt }; #define _IOBUFFERMEMORYDESCRIPTOR_INTASKWITHOPTIONS_ 1 diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h index dd000cb34..30b307816 100644 --- a/iokit/IOKit/IOHibernatePrivate.h +++ b/iokit/IOKit/IOHibernatePrivate.h @@ -93,8 +93,12 @@ struct IOHibernateImageHeader uint32_t systemTableOffset; uint32_t debugFlags; + uint32_t options; - uint32_t reserved[76]; // make sizeof == 512 + uint32_t reserved[71]; // make sizeof == 512 + + uint64_t encryptEnd __attribute__ ((packed)); + uint64_t deviceBase __attribute__ ((packed)); uint32_t fileExtentMapSize; IOPolledFileExtent fileExtentMap[2]; @@ -106,6 +110,15 @@ enum kIOHibernateDebugRestoreLogs = 0x00000001 }; +// options & IOHibernateOptions property +enum +{ + kIOHibernateOptionSSD = 0x00000001, + kIOHibernateOptionColor = 0x00000002, + kIOHibernateOptionProgress = 0x00000004, + kIOHibernateOptionDarkWake = 0x00000008, +}; + struct hibernate_bitmap_t { uint32_t first_page; @@ -168,8 +181,8 @@ enum struct hibernate_graphics_t { - uint32_t physicalAddress; // Base address of video memory - uint32_t mode; // + uint32_t physicalAddress; // Base address of video memory + int32_t gfxStatus; // EFI config restore status uint32_t rowBytes; // Number of bytes per pixel row uint32_t width; // Width uint32_t height; // Height @@ -220,10 +233,13 @@ typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uin struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, kern_get_file_extents_callback_t callback, - void * callback_ref, - dev_t * device, - uint64_t * partitionbase_result, - uint64_t * maxiocount_result); + void * callback_ref, + dev_t * device_result, + uint64_t * partitionbase_result, + uint64_t * maxiocount_result, + boolean_t * solid_state); + + void kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref); int @@ -236,8 +252,9 @@ hibernate_page_list_allocate(void); kern_return_t hibernate_setup(IOHibernateImageHeader * header, - uint32_t free_page_ratio, - uint32_t free_page_time, + uint32_t free_page_ratio, + uint32_t free_page_time, + boolean_t vmflush, hibernate_page_list_t ** page_list_ret, hibernate_page_list_t ** page_list_wired_ret, boolean_t * encryptedswap); @@ -280,6 +297,9 @@ hibernate_page_list_set_volatile( hibernate_page_list_t * page_list, void hibernate_page_list_discard(hibernate_page_list_t * page_list); +int +hibernate_should_abort(void); + void hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, vm_offset_t ppnum, vm_offset_t count, uint32_t kind); @@ -349,7 +369,8 @@ enum kIOHibernateModeDiscardCleanInactive = 0x00000008, kIOHibernateModeDiscardCleanActive = 0x00000010, kIOHibernateModeSwitch = 0x00000020, - kIOHibernateModeRestart = 0x00000040 + kIOHibernateModeRestart = 0x00000040, + kIOHibernateModeSSDInvert = 0x00000080, }; // IOHibernateImageHeader.signature @@ -383,6 +404,12 @@ enum { kIOHibernatePreviewUpdates = 0x00000002 }; +#define kIOHibernateOptionsKey "IOHibernateOptions" +#define kIOHibernateGfxStatusKey "IOHibernateGfxStatus" +enum { + kIOHibernateGfxStatusUnknown = ((int32_t) 0xFFFFFFFF) +}; + #define kIOHibernateBootImageKey "boot-image" #define kIOHibernateBootImageKeyKey "boot-image-key" #define kIOHibernateBootSignatureKey "boot-signature" diff --git a/iokit/IOKit/IOMemoryDescriptor.h b/iokit/IOKit/IOMemoryDescriptor.h index fb6afd0ac..866da4703 100644 --- a/iokit/IOKit/IOMemoryDescriptor.h +++ b/iokit/IOKit/IOMemoryDescriptor.h @@ -93,6 +93,7 @@ enum { kIOMemoryReserved6156215 = 0x00020000, #endif kIOMemoryThreadSafe = 0x00100000, // Shared with Buffer MD + kIOMemoryClearEncrypt = 0x00200000, // Shared with Buffer MD }; #define kIOMapperSystem ((IOMapper *) 0) @@ -108,6 +109,9 @@ enum { kIOMemoryIncoherentIOFlush = 1, kIOMemoryIncoherentIOStore = 2, + + kIOMemoryClearEncrypted = 50, + kIOMemorySetEncrypted = 51, }; #define IOMEMORYDESCRIPTOR_SUPPORTS_DMACOMMAND 1 diff --git a/iokit/IOKit/IOMessage.h b/iokit/IOKit/IOMessage.h index 3ca9e1e79..77a1001aa 100644 --- a/iokit/IOKit/IOMessage.h +++ b/iokit/IOKit/IOMessage.h @@ -60,6 +60,13 @@ typedef UInt32 IOMessage; #define kIOMessageDeviceWillPowerOff iokit_common_msg(0x210) #define kIOMessageDeviceWillNotPowerOff iokit_common_msg(0x220) #define kIOMessageDeviceHasPoweredOn iokit_common_msg(0x230) + +// IOService power mgt does not send +// kIOMessageDeviceWillPowerOn +// kIOMessageDeviceHasPoweredOff +#define kIOMessageDeviceWillPowerOn iokit_common_msg(0x215) +#define kIOMessageDeviceHasPoweredOff iokit_common_msg(0x225) + #define kIOMessageCanSystemPowerOff iokit_common_msg(0x240) #define kIOMessageSystemWillPowerOff iokit_common_msg(0x250) #define kIOMessageSystemWillNotPowerOff iokit_common_msg(0x260) diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index 7d78225af..fc8d7ca8f 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -221,7 +221,99 @@ enum { * * See IOPMrootDomain notification kIOPMMessageSleepWakeUUIDChange */ - #define kIOPMSleepWakeUUIDKey "SleepWakeUUID" +#define kIOPMSleepWakeUUIDKey "SleepWakeUUID" + +/* kIOPMDeepSleepEnabledKey + * Indicates the Deep Sleep enable state. + * It has a boolean value. + * true == Deep Sleep is enabled + * false == Deep Sleep is disabled + * not present == Deep Sleep is not supported on this hardware + */ +#define kIOPMDeepSleepEnabledKey "DeepSleep Enabled" + +/* kIOPMDeepSleepDelayKey + * Key refers to a CFNumberRef that represents the delay in seconds before + * entering Deep Sleep state. The property is not present if Deep Sleep is + * unsupported. + */ +#define kIOPMDeepSleepDelayKey "DeepSleep Delay" + +/* kIOPMLowBatteryWakeThresholdKey + * Key refers to a CFNumberRef that represents the percentage of battery + * remaining charge that will trigger a system wake followed by Deep Sleep. + */ +#define kIOPMLowBatteryWakeThresholdKey "LowBatteryWakeThreshold" + +/******************************************************************************* + * + * Driver PM Assertions + * + ******************************************************************************/ + +/* Driver Assertion bitfield description + * Driver PM assertions are defined by these bits. + */ +enum { + /*! kIOPMDriverAssertionCPUBit + * When set, PM kernel will prefer to leave the CPU and core hardware + * running in "Dark Wake" state, instead of sleeping. + */ + kIOPMDriverAssertionCPUBit = 0x01, + + /*! kIOPMDriverAssertionUSBExternalDeviceBit + * When set, driver is informing PM that an external USB device is attached. + */ + kIOPMDriverAssertionUSBExternalDeviceBit = 0x04, + + /*! kIOPMDriverAssertionBluetoothHIDDevicePairedBit + * When set, driver is informing PM that a Bluetooth HID device is paired. + */ + kIOPMDriverAssertionBluetoothHIDDevicePairedBit = 0x08, + + /*! kIOPMDriverAssertionExternalMediaMountedBit + * When set, driver is informing PM that an external media is mounted. + */ + kIOPMDriverAssertionExternalMediaMountedBit = 0x10, + + kIOPMDriverAssertionReservedBit5 = 0x20, + kIOPMDriverAssertionReservedBit6 = 0x40, + kIOPMDriverAssertionReservedBit7 = 0x80 +}; + + /* kIOPMAssertionsDriverKey + * This kIOPMrootDomain key refers to a CFNumberRef property, containing + * a bitfield describing the aggregate PM assertion levels. + * Example: A value of 0 indicates that no driver has asserted anything. + * Or, a value of kIOPMDriverAssertionCPUBit + * indicates that a driver (or drivers) have asserted a need fro CPU and video. + */ +#define kIOPMAssertionsDriverKey "DriverPMAssertions" + + /* kIOPMAssertionsDriverKey + * This kIOPMrootDomain key refers to a CFNumberRef property, containing + * a bitfield describing the aggregate PM assertion levels. + * Example: A value of 0 indicates that no driver has asserted anything. + * Or, a value of kIOPMDriverAssertionCPUBit + * indicates that a driver (or drivers) have asserted a need fro CPU and video. + */ +#define kIOPMAssertionsDriverDetailedKey "DriverPMAssertionsDetailed" + +/******************************************************************************* + * + * Kernel Driver assertion detailed dictionary keys + * + * Keys decode the Array & dictionary data structure under IOPMrootDomain property + * kIOPMAssertionsDriverKey. + * + */ +#define kIOPMDriverAssertionIDKey "ID" +#define kIOPMDriverAssertionCreatedTimeKey "CreatedTime" +#define kIOPMDriverAssertionModifiedTimeKey "ModifiedTime" +#define kIOPMDriverAssertionOwnerStringKey "Owner" +#define kIOPMDriverAssertionOwnerServiceKey "ServicePtr" +#define kIOPMDriverAssertionLevelKey "Level" +#define kIOPMDriverAssertionAssertedKey "Assertions" /******************************************************************************* * @@ -300,6 +392,12 @@ enum { */ #define kIOPMMessageSleepWakeUUIDCleared ((void *)0) +/*! kIOPMMessageDriverAssertionsChanged + * Sent when kernel PM driver assertions have changed. + */ +#define kIOPMMessageDriverAssertionsChanged \ + iokit_family_msg(sub_iokit_powermanagement, 0x150) + /******************************************************************************* * * Power commands issued to root domain diff --git a/iokit/IOKit/pwr_mgt/IOPMLibDefs.h b/iokit/IOKit/pwr_mgt/IOPMLibDefs.h index ff25bf381..7e49682ad 100644 --- a/iokit/IOKit/pwr_mgt/IOPMLibDefs.h +++ b/iokit/IOKit/pwr_mgt/IOPMLibDefs.h @@ -36,5 +36,6 @@ #define kPMRestartSystem 6 #define kPMSleepSystemOptions 7 #define kPMSetMaintenanceWakeCalendar 8 +#define kPMSetUserAssertionLevels 9 -#define kNumPMMethods 9 +#define kNumPMMethods 10 diff --git a/iokit/IOKit/pwr_mgt/IOPMPrivate.h b/iokit/IOKit/pwr_mgt/IOPMPrivate.h index bf50d78e7..88ff6c788 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPrivate.h +++ b/iokit/IOKit/pwr_mgt/IOPMPrivate.h @@ -265,5 +265,31 @@ enum { #define kIOPMSleepWakeFailureUUIDKey "UUID" #define kIOPMSleepWakeFailureDateKey "Date" -#endif /* ! _IOKIT_IOPMPRIVATE_H */ +/******************************************************************************/ +/* System sleep policy + * Shared between PM root domain and platform driver. + */ + +// Platform specific property added by the platform driver. +// An OSData that describes the system sleep policy. +#define kIOPlatformSystemSleepPolicyKey "IOPlatformSystemSleepPolicy" + +// Root domain property updated before platform sleep. +// An OSData that describes the system sleep parameters. +#define kIOPMSystemSleepParametersKey "IOPMSystemSleepParameters" + +struct IOPMSystemSleepParameters +{ + uint32_t version; + uint32_t sleepFlags; + uint32_t sleepTimer; + uint32_t wakeEvents; +}; +// Sleep flags +enum { + kIOPMSleepFlagHibernate = 0x00000001, + kIOPMSleepFlagSleepTimerEnable = 0x00000002 +}; + +#endif /* ! _IOKIT_IOPMPRIVATE_H */ diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index 0c2629376..2de4d289c 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -37,12 +37,42 @@ #define ROOT_DOMAIN_RUN_STATES 1 #endif struct AggressivesRecord; -#endif +class PMAssertionsTracker; +#endif /* XNU_KERNEL_PRIVATE */ class IOPMPowerStateQueue; class RootDomainUserClient; class PMTraceWorker; +/*! + * Types for PM Assertions + * For creating, releasing, and getting PM assertion levels. + */ + +/*! IOPMDriverAssertionType + * A bitfield describing a set of assertions. May be used to specify which assertions + * to set with IOPMrootDomain::createPMAssertion; or to query which + * assertions are set with IOPMrootDomain::releasePMAssertion. + */ +typedef uint64_t IOPMDriverAssertionType; + +/* IOPMDriverAssertionID + * Drivers may create PM assertions to request system behavior (keep the system awake, + * or keep the display awake). When a driver creates an assertion via + * IOPMrootDomain::createPMAssertion, PM returns a handle to + * the assertion of type IOPMDriverAssertionID. + */ +typedef uint64_t IOPMDriverAssertionID; +#define kIOPMUndefinedDriverAssertionID 0 + +/* IOPMDriverAssertionLevel + * Possible values for IOPMDriverAssertionLevel are kIOPMDriverAssertionLevelOff + * and kIOPMDriverAssertionLevelOn + */ +typedef uint32_t IOPMDriverAssertionLevel; +#define kIOPMDriverAssertionLevelOff 0 +#define kIOPMDriverAssertionLevelOn 255 + /* * Flags for get/setSleepSupported() */ @@ -75,6 +105,19 @@ enum { #define kIOPMThermalEmergencySleepKey "Thermal Emergency Sleep" #define kIOPMMaintenanceSleepKey "Maintenance Sleep" +enum +{ + kIOPMSleepReasonClamshell = 1, + kIOPMSleepReasonPowerButton = 2, + kIOPMSleepReasonSoftware = 3, + kIOPMSleepReasonOSSwitchHibernation = 4, + kIOPMSleepReasonIdle = 5, + kIOPMSleepReasonLowPower = 6, + kIOPMSleepReasonThermalEmergency = 7, + kIOPMSleepReasonMaintenance = 8, + kIOPMSleepReasonMax +}; + /* * String constants for communication with PM CPU */ @@ -124,6 +167,7 @@ public: IOReturn sleepSystemOptions( OSDictionary *options ); virtual IOReturn setProperties( OSObject * ); + virtual bool serializeProperties( OSSerialize * s ) const; /*! @function systemPowerEventOccurred @abstract Other drivers may inform IOPMrootDomain of system PM events @@ -242,6 +286,54 @@ public: void *param1, void *param2, void *param3, void *param4 ); +/*! @function createPMAssertion + @abstract Creates an assertion to influence system power behavior. + @param whichAssertionBits A bitfield specify the assertion that the caller requests. + @param assertionLevel An integer detailing the initial assertion level, kIOPMDriverAssertionLevelOn + or kIOPMDriverAssertionLevelOff. + @param ownerService A pointer to the caller's IOService class, for tracking. + @param ownerDescription A reverse-DNS string describing the caller's identity and reason. + @result On success, returns a new assertion of type IOPMDriverAssertionID +*/ + IOPMDriverAssertionID createPMAssertion( + IOPMDriverAssertionType whichAssertionsBits, + IOPMDriverAssertionLevel assertionLevel, + IOService *ownerService, + const char *ownerDescription); + +/* @function setPMAssertionLevel + @abstract Modify the level of a pre-existing assertion. + @discussion Change the value of a PM assertion to influence system behavior, + without undergoing the work required to create or destroy an assertion. Suggested + for clients who will assert and de-assert needs for PM behavior several times over + their lifespan. + @param assertionID An assertion ID previously returned by createPMAssertion + @param assertionLevel The new assertion level. + @result kIOReturnSuccess if it worked; kIOReturnNotFound or other IOReturn error on failure. +*/ + IOReturn setPMAssertionLevel(IOPMDriverAssertionID assertionID, IOPMDriverAssertionLevel assertionLevel); + +/*! @function getPMAssertionLevel + @absract Returns the active level of the specified assertion(s). + @discussion Returns kIOPMDriverAssertionLevelOff or + kIOPMDriverAssertionLevelOn. If multiple assertions are specified + in the bitfield, only returns kIOPMDriverAssertionLevelOn + if all assertions are active. + @param whichAssertionBits Bits defining the assertion or assertions the caller is interested in + the level of. If in doubt, pass kIOPMDriverAssertionCPUBit as the argument. + @result Returns kIOPMDriverAssertionLevelOff or + kIOPMDriverAssertionLevelOn indicating the specified assertion's levels, if available. + If the assertions aren't supported on this machine, or aren't recognized by the OS, the + result is undefined. +*/ + IOPMDriverAssertionLevel getPMAssertionLevel(IOPMDriverAssertionType whichAssertionBits); + +/*! @function releasePMAssertion + @abstract Removes an assertion to influence system power behavior. + @result On success, returns a new assertion of type IOPMDriverAssertionID * +*/ + IOReturn releasePMAssertion(IOPMDriverAssertionID releaseAssertion); + private: virtual IOReturn changePowerStateTo( unsigned long ordinal ); virtual IOReturn changePowerStateToPriv( unsigned long ordinal ); @@ -255,13 +347,22 @@ private: /* Root Domain internals */ public: +#if HIBERNATION + bool getHibernateSettings( + uint32_t * hibernateMode, + uint32_t * hibernateFreeRatio, + uint32_t * hibernateFreeTime ); +#endif + #if ROOT_DOMAIN_RUN_STATES void tagPowerPlaneService( IOService * service, uint32_t * rdFlags ); - void handleActivityTickleForService( - IOService * service ); + void handleActivityTickleForService( IOService * service, + unsigned long type, + unsigned long currentPowerState, + uint32_t activityTickleCount ); void handlePowerChangeStartForService( IOService * service, @@ -296,6 +397,8 @@ public: void handleSleepTimerExpiration( void ); void handleForcedSleepTimerExpiration( void ); void stopIgnoringClamshellEventsDuringWakeup( void ); + bool activitySinceSleep(void); + bool abortHibernation(void); IOReturn joinAggressiveness( IOService * service ); void handleAggressivesRequests( void ); @@ -304,6 +407,8 @@ public: private: friend class PMSettingObject; + friend class PMAssertionsTracker; + friend class RootDomainUserClient; // Points to our parent IOService * wrangler; @@ -314,6 +419,7 @@ private: OSArray *allowedPMSettings; PMTraceWorker *pmTracer; + PMAssertionsTracker *pmAssertions; // Settings controller info IORecursiveLock *settingsCtrlLock; @@ -376,6 +482,17 @@ private: unsigned int ignoreChangeDown :1; unsigned int wranglerAsleep :1; + unsigned int sleepTimerMaintenance :1; + unsigned int lowBatteryCondition :1; + unsigned int hibernateDisabled :1; + unsigned int hibernateNoDefeat :1; + unsigned int hibernateAborted :1; + + uint32_t hibernateMode; + uint32_t userActivityCount; + uint32_t userActivityAtSleep; + uint32_t lastSleepReason; + // Info for communicating system state changes to PMCPU int32_t idxPMCPUClamshell; int32_t idxPMCPULimitedPower; @@ -392,7 +509,7 @@ private: IOService * pciHostBridgeDevice; // IOPMrootDomain internal sleep call - IOReturn privateSleepSystem( const char *sleepReason ); + IOReturn privateSleepSystem( uint32_t sleepReason ); void announcePowerSourceChange( void ); void reportUserInput( void ); @@ -420,7 +537,7 @@ private: // Inform PMCPU of changes to state like lid, AC vs. battery void informCPUStateChange( uint32_t type, uint32_t value ); - void dispatchPowerEvent( uint32_t event, void * arg0, void * arg1 ); + void dispatchPowerEvent( uint32_t event, void * arg0, uint64_t arg1 ); void handlePowerNotification( UInt32 msg ); IOReturn setPMSetting(const OSSymbol *, OSObject *); @@ -446,9 +563,17 @@ private: void aggressivenessChanged( void ); + IOReturn setPMAssertionUserLevels(IOPMDriverAssertionType); - void publishSleepWakeUUID( bool shouldPublish ); - + void publishSleepWakeUUID( bool shouldPublish ); + +#if HIBERNATION + bool getSleepOption( const char * key, uint32_t * option ); + bool evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p ); + void evaluateSystemSleepPolicyEarly( void ); + void evaluateSystemSleepPolicyFinal( void ); +#endif /* HIBERNATION */ + #endif /* XNU_KERNEL_PRIVATE */ }; diff --git a/iokit/Kernel/IOBufferMemoryDescriptor.cpp b/iokit/Kernel/IOBufferMemoryDescriptor.cpp index bca55b535..004d2ec89 100644 --- a/iokit/Kernel/IOBufferMemoryDescriptor.cpp +++ b/iokit/Kernel/IOBufferMemoryDescriptor.cpp @@ -37,7 +37,6 @@ #include #include "IOKitKernelInternal.h" -#include "IOCopyMapper.h" __BEGIN_DECLS void ipc_port_release_send(ipc_port_t port); @@ -49,11 +48,10 @@ __END_DECLS enum { - kInternalFlagRealloc = 0x00000001, + kInternalFlagPhysical = 0x00000001, + kInternalFlagPageSized = 0x00000002 }; -volatile ppnum_t gIOHighestAllocatedPage; - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #define super IOGeneralMemoryDescriptor @@ -84,9 +82,7 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( kern_return_t kr; task_t mapTask = NULL; vm_map_t vmmap = NULL; - addr64_t lastIOAddr; mach_vm_address_t highestMask = 0; - bool usePhys; IOOptionBits iomdOptions = kIOMemoryTypeVirtual64 | kIOMemoryAsReference; if (!capacity) @@ -107,14 +103,26 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( // Grab IOMD bits from the Buffer MD options iomdOptions |= (options & kIOBufferDescriptorMemoryFlags); +#if 0 + // workarounds- + if ((options & kIOMemoryPhysicallyContiguous) || ((capacity == 0x1000) && (inTask == kernel_task)) + && !physicalMask) + { + highestMask = physicalMask = 0xFFFFF000; + } + //- +#endif + if (physicalMask && (alignment <= 1)) { alignment = ((physicalMask ^ (-1ULL)) & (physicalMask - 1)); highestMask = (physicalMask | alignment); alignment++; + if (alignment < page_size) + alignment = page_size; } - if ((options & (kIOMemorySharingTypeMask | kIOMapCacheMask)) && (alignment < page_size)) + if ((options & (kIOMemorySharingTypeMask | kIOMapCacheMask | kIOMemoryClearEncrypt)) && (alignment < page_size)) alignment = page_size; if (alignment >= page_size) @@ -128,9 +136,6 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( if ((inTask != kernel_task) && !(options & kIOMemoryPageable)) return false; - if ((options & kIOMemoryPhysicallyContiguous) && !physicalMask) - physicalMask = 0xFFFFFFFF; - // set flags for entry + object create vm_prot_t memEntryCacheMode = VM_PROT_READ | VM_PROT_WRITE; @@ -174,63 +179,41 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( else { memEntryCacheMode |= MAP_MEM_NAMED_REUSE; + vmmap = kernel_map; - if (IOMapper::gSystem) - // assuming mapped space is 2G - lastIOAddr = (1UL << 31) - PAGE_SIZE; - else - lastIOAddr = ptoa_64(gIOHighestAllocatedPage); + // Buffer shouldn't auto prepare they should be prepared explicitly + // But it never was enforced so what are you going to do? + iomdOptions |= kIOMemoryAutoPrepare; - usePhys = (highestMask && (lastIOAddr != (lastIOAddr & highestMask)) - && (alignment <= page_size)); + /* Allocate a wired-down buffer inside kernel space. */ - if (!usePhys && (options & kIOMemoryPhysicallyContiguous)) + if ((options & kIOMemoryPhysicallyContiguous) || highestMask || (alignment > page_size)) { - _buffer = (void *) IOKernelAllocateContiguous(capacity, highestMask, alignment); - usePhys = (NULL == _buffer); + _internalFlags |= kInternalFlagPhysical; + if (highestMask) + { + _internalFlags |= kInternalFlagPageSized; + capacity = round_page(capacity); + } + _buffer = (void *) IOKernelAllocateWithPhysicalRestrict(capacity, highestMask, alignment, + (0 != (options & kIOMemoryPhysicallyContiguous))); } - if (usePhys) + else if (alignment > 1) { - mach_vm_address_t address; - iomdOptions &= ~kIOMemoryTypeVirtual64; - iomdOptions |= kIOMemoryTypePhysical64; - - address = IOMallocPhysical(capacity, highestMask); - _buffer = (void *) address; - if (!_buffer) - return false; - - mapTask = inTask; - inTask = 0; + _buffer = IOMallocAligned(capacity, alignment); } else { - vmmap = kernel_map; - - // Buffer shouldn't auto prepare they should be prepared explicitly - // But it never was enforced so what are you going to do? - iomdOptions |= kIOMemoryAutoPrepare; + _buffer = IOMalloc(capacity); + } - /* Allocate a wired-down buffer inside kernel space. */ - if (options & kIOMemoryPhysicallyContiguous) - { - // attempted allocate already - } - else if (alignment > 1) - { - _buffer = IOMallocAligned(capacity, alignment); - } - else - { - _buffer = IOMalloc(capacity); - } - if (!_buffer) - return false; + if (!_buffer) + { + return false; } } - if( (kIOMemoryTypePhysical64 != (kIOMemoryTypeMask & iomdOptions)) - && (options & (kIOMemoryPageable | kIOMapCacheMask))) { + if( (options & (kIOMemoryPageable | kIOMapCacheMask))) { ipc_port_t sharedMem; vm_size_t size = round_page(capacity); @@ -277,42 +260,6 @@ bool IOBufferMemoryDescriptor::initWithPhysicalMask( inTask, iomdOptions, /* System mapper */ 0)) return false; - if (highestMask && !IOMapper::gSystem) - { - IOMDDMACharacteristics mdSummary; - - bzero(&mdSummary, sizeof(mdSummary)); - IOReturn rtn = dmaCommandOperation( - kIOMDGetCharacteristics, - &mdSummary, sizeof(mdSummary)); - if (rtn) - return false; - - if (mdSummary.fHighestPage) - { - ppnum_t highest; - while (mdSummary.fHighestPage > (highest = gIOHighestAllocatedPage)) - { - if (OSCompareAndSwap(highest, mdSummary.fHighestPage, - (UInt32 *) &gIOHighestAllocatedPage)) - break; - } - lastIOAddr = ptoa_64(mdSummary.fHighestPage); - } - else - lastIOAddr = ptoa_64(gIOLastPage); - - if (lastIOAddr != (lastIOAddr & highestMask)) - { - if (kIOMemoryTypePhysical64 != (_flags & kIOMemoryTypeMask)) - { - // flag a retry - _internalFlags |= kInternalFlagRealloc; - } - return false; - } - } - if (mapTask) { if (!reserved) { @@ -350,18 +297,8 @@ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::inTaskWithOptions( IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; if (me && !me->initWithPhysicalMask(inTask, options, capacity, alignment, 0)) { - bool retry = (0 != (kInternalFlagRealloc & me->_internalFlags)); me->release(); me = 0; - if (retry) - { - me = new IOBufferMemoryDescriptor; - if (me && !me->initWithPhysicalMask(inTask, options, capacity, alignment, 0)) - { - me->release(); - me = 0; - } - } } return me; } @@ -376,18 +313,8 @@ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::inTaskWithPhysicalMask( if (me && !me->initWithPhysicalMask(inTask, options, capacity, 1, physicalMask)) { - bool retry = (0 != (kInternalFlagRealloc & me->_internalFlags)); me->release(); me = 0; - if (retry) - { - me = new IOBufferMemoryDescriptor; - if (me && !me->initWithPhysicalMask(inTask, options, capacity, 1, physicalMask)) - { - me->release(); - me = 0; - } - } } return me; } @@ -410,18 +337,8 @@ IOBufferMemoryDescriptor * IOBufferMemoryDescriptor::withOptions( IOBufferMemoryDescriptor *me = new IOBufferMemoryDescriptor; if (me && !me->initWithPhysicalMask(kernel_task, options, capacity, alignment, 0)) { - bool retry = (0 != (kInternalFlagRealloc & me->_internalFlags)); me->release(); me = 0; - if (retry) - { - me = new IOBufferMemoryDescriptor; - if (me && !me->initWithPhysicalMask(kernel_task, options, capacity, alignment, 0)) - { - me->release(); - me = 0; - } - } } return me; } @@ -490,22 +407,8 @@ IOBufferMemoryDescriptor::withBytes(const void * inBytes, | (inContiguous ? kIOMemoryPhysicallyContiguous : 0), inLength, inLength, 0 )) { - bool retry = (0 != (kInternalFlagRealloc & me->_internalFlags)); me->release(); me = 0; - if (retry) - { - me = new IOBufferMemoryDescriptor; - if (me && !me->initWithPhysicalMask( - kernel_task, inDirection | kIOMemoryUnshared - | (inContiguous ? kIOMemoryPhysicallyContiguous : 0), - inLength, inLength, 0 )) - { - me->release(); - me = 0; - } - } - } if (me) @@ -531,13 +434,13 @@ void IOBufferMemoryDescriptor::free() { // Cache all of the relevant information on the stack for use // after we call super::free()! - IOOptionBits flags = _flags; + IOOptionBits flags = _flags; + IOOptionBits internalFlags = _internalFlags; IOOptionBits options = _options; vm_size_t size = _capacity; void * buffer = _buffer; IOMemoryMap * map = 0; IOAddressRange * range = _ranges.v64; - mach_vm_address_t source = range ? range->address : 0; vm_offset_t alignment = _alignment; if (alignment >= page_size) @@ -562,10 +465,12 @@ void IOBufferMemoryDescriptor::free() } else if (buffer) { - if (kIOMemoryTypePhysical64 == (flags & kIOMemoryTypeMask)) - IOFreePhysical(source, size); - else if (options & kIOMemoryPhysicallyContiguous) - IOKernelFreeContiguous((mach_vm_address_t) buffer, size); + if (internalFlags & kInternalFlagPhysical) + { + if (kInternalFlagPageSized & internalFlags) + size = round_page(size); + IOKernelFreePhysical((mach_vm_address_t) buffer, size); + } else if (alignment > 1) IOFreeAligned(buffer, size); else diff --git a/iokit/Kernel/IOCopyMapper.cpp b/iokit/Kernel/IOCopyMapper.cpp deleted file mode 100644 index da48bdc86..000000000 --- a/iokit/Kernel/IOCopyMapper.cpp +++ /dev/null @@ -1,426 +0,0 @@ -/* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -// 45678901234567890123456789012345678901234567890123456789012345678901234567890 - -#include "IOCopyMapper.h" -#include - -#if 0 -#define DEBG(fmt, args...) { kprintf(fmt, ## args); } -#else -#define DEBG(fmt, args...) {} -#endif - -extern "C" { -extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); -extern void ml_get_bouncepool_info( - vm_offset_t *phys_addr, - vm_size_t *size); -extern unsigned int vm_lopage_max_count; -extern unsigned int vm_himemory_mode; -} - -#define super IOMapper - -OSDefineMetaClassAndStructors(IOCopyMapper, IOMapper); - -// Remember no value can be bigger than 31 bits as the sign bit indicates -// that this entry is valid to the hardware and that would be bad if it wasn't -typedef struct FreeDARTEntry { -#if __BIG_ENDIAN__ - unsigned int - /* bool */ fValid : 1, - /* bool */ fInUse : 1, // Allocated but not inserted yet - /* bool */ : 5, // Align size on nibble boundary for debugging - /* uint */ fSize : 5, - /* uint */ : 2, - /* uint */ fNext :18; // offset of FreeDARTEntry's - -#elif __LITTLE_ENDIAN__ - unsigned int - /* uint */ fNext :18, // offset of FreeDARTEntry's - /* uint */ : 2, - /* uint */ fSize : 5, - /* bool */ : 5, // Align size on nibble boundary for debugging - /* bool */ fInUse : 1, // Allocated but not inserted yet - /* bool */ fValid : 1; -#endif -#if __BIG_ENDIAN__ - unsigned int - /* uint */ :14, - /* uint */ fPrev :18; // offset of FreeDARTEntry's - -#elif __LITTLE_ENDIAN__ - unsigned int - /* uint */ fPrev :18, // offset of FreeDARTEntry's - /* uint */ :14; -#endif -} FreeDARTEntry; - -typedef struct ActiveDARTEntry { -#if __BIG_ENDIAN__ - unsigned int - /* bool */ fValid : 1, // Must be set to one if valid - /* uint */ fPPNum :31; // ppnum_t page of translation -#define ACTIVEDARTENTRY(page) { true, page } - -#elif __LITTLE_ENDIAN__ - unsigned int - /* uint */ fPPNum :31, // ppnum_t page of translation - /* bool */ fValid : 1; // Must be set to one if valid -#define ACTIVEDARTENTRY(page) { page, true } - -#endif -}; - -#define kActivePerFree (sizeof(freeDART[0]) / sizeof(ActiveDARTEntry)) - -static SYSCTL_UINT(_kern, OID_AUTO, copyregionmax, - CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, - (unsigned int *)NULL, 0, ""); - -static SYSCTL_UINT(_kern, OID_AUTO, lowpagemax, - CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, - &vm_lopage_max_count, 0, ""); - -static SYSCTL_UINT(_kern, OID_AUTO, himemorymode, - CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, - &vm_himemory_mode, 0, ""); - -bool IOCopyMapper::initHardware(IOService * provider) -{ - UInt32 dartSizePages = 0; - - vm_offset_t phys_addr; - vm_size_t size; - ml_get_bouncepool_info(&phys_addr, &size); - - if (!size) - return (false); - - fBufferPage = atop_32(phys_addr); - dartSizePages = (atop_32(size) + kTransPerPage - 1) / kTransPerPage; - - fTableLock = IOLockAlloc(); - - if (!fTableLock) - return false; - - if (!allocTable(dartSizePages * kMapperPage)) - return false; - - UInt32 canMapPages = dartSizePages * kTransPerPage; - fMapperRegionSize = canMapPages; - for (fNumZones = 0; canMapPages; fNumZones++) - canMapPages >>= 1; - fNumZones -= 3; // correct for overshoot and minumum 16K pages allocation - - invalidateDART(0, fMapperRegionSize); - - breakUp(0, fNumZones, 0); - ((FreeDARTEntry *) fTable)->fInUse = true; - - fMapperRegionUsed = kMinZoneSize; - fMapperRegionMaxUsed = fMapperRegionUsed; - - sysctl__kern_copyregionmax.oid_arg1 = &fMapperRegionMaxUsed; - - sysctl_register_oid(&sysctl__kern_copyregionmax); - sysctl_register_oid(&sysctl__kern_lowpagemax); - sysctl_register_oid(&sysctl__kern_himemorymode); - - fDummyPage = IOMallocAligned(0x1000, 0x1000); - fDummyPageNumber = - pmap_find_phys(kernel_pmap, (addr64_t) (uintptr_t) fDummyPage); - - return true; -} - -void IOCopyMapper::free() -{ - if (fDummyPage) { - IOFreeAligned(fDummyPage, 0x1000); - fDummyPage = 0; - fDummyPageNumber = 0; - } - - if (fTableLock) { - IOLockFree(fTableLock); - fTableLock = 0; - } - - super::free(); -} - -// Must be called while locked -void IOCopyMapper::breakUp(unsigned startIndex, unsigned endIndex, unsigned freeInd) -{ - unsigned int zoneSize; - FreeDARTEntry *freeDART = (FreeDARTEntry *) fTable; - - do { - // Need to break up bigger blocks of memory till we get one in our - // desired zone. - endIndex--; - zoneSize = (kMinZoneSize/2 << endIndex); - ppnum_t tail = freeInd + zoneSize; - - DEBG("breakup z %d start %x tail %x\n", endIndex, freeInd, tail); - - // By definition free lists must be empty - fFreeLists[endIndex] = tail; - freeDART[tail].fSize = endIndex; - freeDART[tail].fNext = freeDART[tail].fPrev = 0; - } while (endIndex != startIndex); - freeDART[freeInd].fSize = endIndex; -} - -// Zero is never a valid page to return -ppnum_t IOCopyMapper::iovmAlloc(IOItemCount pages) -{ - unsigned int zone, zoneSize, z, cnt; - ppnum_t next, ret = 0; - FreeDARTEntry *freeDART = (FreeDARTEntry *) fTable; - - // Can't alloc anything of less than minumum - if (pages < kMinZoneSize) - pages = kMinZoneSize; - - // Can't alloc anything bigger than 1/2 table - if (pages >= fMapperRegionSize/2) - { - panic("iovmAlloc 0x%lx", (long) pages); - return 0; - } - - // Find the appropriate zone for this allocation - for (zone = 0, zoneSize = kMinZoneSize; pages > zoneSize; zone++) - zoneSize <<= 1; - - { - IOLockLock(fTableLock); - - for (;;) { - for (z = zone; z < fNumZones; z++) { - if ( (ret = fFreeLists[z]) ) - break; - } - if (ret) - break; - - fFreeSleepers++; - IOLockSleep(fTableLock, fFreeLists, THREAD_UNINT); - fFreeSleepers--; - } - - // If we didn't find a entry in our size then break up the free block - // that we did find. - if (zone != z) - { - DEBG("breakup %d, %d, 0x%x\n", zone, z, ret); - breakUp(zone, z, ret); - } - - freeDART[ret].fInUse = true; // Mark entry as In Use - next = freeDART[ret].fNext; - DEBG("va: 0x%lx, %ld, ret %x next %x\n", (ret * kActivePerFree) + fBufferPage, pages, ret, next); - - fFreeLists[z] = next; - if (next) - freeDART[next].fPrev = 0; - - // ret is free list offset not page offset; - ret *= kActivePerFree; - - ActiveDARTEntry pageEntry = ACTIVEDARTENTRY(fDummyPageNumber); - for (cnt = 0; cnt < pages; cnt++) { - ActiveDARTEntry *activeDART = &fMappings[ret + cnt]; - *activeDART = pageEntry; - } - - fMapperRegionUsed += pages; - if (fMapperRegionUsed > fMapperRegionMaxUsed) - fMapperRegionMaxUsed = fMapperRegionUsed; - - IOLockUnlock(fTableLock); - } - - if (ret) - ret += fBufferPage; - - return ret; -} - - -void IOCopyMapper::invalidateDART(ppnum_t pnum, IOItemCount size) -{ - bzero((void *) &fMappings[pnum], size * sizeof(fMappings[0])); -} - -void IOCopyMapper::iovmFree(ppnum_t addr, IOItemCount pages) -{ - unsigned int zone, zoneSize, z; - FreeDARTEntry *freeDART = (FreeDARTEntry *) fTable; - - if (addr < fBufferPage) - panic("addr < fBufferPage"); - addr -= fBufferPage; - - // Can't free anything of less than minumum - if (pages < kMinZoneSize) - pages = kMinZoneSize; - - // Can't free anything bigger than 1/2 table - if (pages >= fMapperRegionSize/2) - return; - - // Find the appropriate zone for this allocation - for (zone = 0, zoneSize = kMinZoneSize; pages > zoneSize; zone++) - zoneSize <<= 1; - - // Grab lock that protects the dart - IOLockLock(fTableLock); - - invalidateDART(addr, pages); - - addr /= kActivePerFree; - - // We are freeing a block, check to see if pairs are available for - // coalescing. We will walk up the entire chain if we can. - for (z = zone; z < fNumZones; z++) { - ppnum_t pair = addr ^ (kMinZoneSize/2 << z); // Find pair address - if (freeDART[pair].fValid || freeDART[pair].fInUse || (freeDART[pair].fSize != z)) - break; - - // The paired alloc entry is free if we are here - ppnum_t next = freeDART[pair].fNext; - ppnum_t prev = freeDART[pair].fPrev; - - // Remove the pair from its freeList - if (prev) - freeDART[prev].fNext = next; - else - fFreeLists[z] = next; - - if (next) - freeDART[next].fPrev = prev; - - // Sort the addr and the pair - if (addr > pair) - addr = pair; - } - - DEBG("vf: 0x%lx, %ld, z %d, head %lx, new %x\n", addr * kActivePerFree + fBufferPage, pages, z, fFreeLists[z], addr); - - // Add the allocation entry into it's free list and re-init it - freeDART[addr].fSize = z; - freeDART[addr].fNext = fFreeLists[z]; - if (fFreeLists[z]) - freeDART[fFreeLists[z]].fPrev = addr; - freeDART[addr].fPrev = 0; - fFreeLists[z] = addr; - - fMapperRegionUsed -= pages; - - if (fFreeSleepers) - IOLockWakeup(fTableLock, fFreeLists, /* oneThread */ false); - - IOLockUnlock(fTableLock); -} - -addr64_t IOCopyMapper::mapAddr(IOPhysicalAddress addr) -{ - if (addr < ptoa_32(fBufferPage)) - { - return (addr64_t) addr; // Not mapped by us anyway - } - - addr -= ptoa_32(fBufferPage); - if (addr >= ptoa_32(fMapperRegionSize)) - { - return (addr64_t) addr; // Not mapped by us anyway - } - else - { - ActiveDARTEntry *activeDART = (ActiveDARTEntry *) fTable; - UInt offset = addr & PAGE_MASK; - - ActiveDARTEntry mappedPage = activeDART[atop_32(addr)]; - if (mappedPage.fValid) - { - return (ptoa_64(mappedPage.fPPNum) | offset); - } - - panic("%s::mapAddr(0x%08lx) not mapped for I/O\n", getName(), (long) addr); - return 0; - } -} - -void IOCopyMapper::iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page) -{ - addr -= fBufferPage; - addr += offset; // Add the offset page to the base address - - ActiveDARTEntry *activeDART = &fMappings[addr]; - ActiveDARTEntry entry = ACTIVEDARTENTRY(page); - *activeDART = entry; -} - -void IOCopyMapper::iovmInsert(ppnum_t addr, IOItemCount offset, - ppnum_t *pageList, IOItemCount pageCount) -{ - addr -= fBufferPage; - addr += offset; // Add the offset page to the base address - - IOItemCount i; - ActiveDARTEntry *activeDART = &fMappings[addr]; - - for (i = 0; i < pageCount; i++) - { - ActiveDARTEntry entry = ACTIVEDARTENTRY(pageList[i]); - activeDART[i] = entry; - } -} - -void IOCopyMapper::iovmInsert(ppnum_t addr, IOItemCount offset, - upl_page_info_t *pageList, IOItemCount pageCount) -{ - addr -= fBufferPage; - addr += offset; // Add the offset page to the base address - - IOItemCount i; - ActiveDARTEntry *activeDART = &fMappings[addr]; - - for (i = 0; i < pageCount; i++) - { - ActiveDARTEntry entry = ACTIVEDARTENTRY(pageList[i].phys_addr); - activeDART[i] = entry; - } -} - - diff --git a/iokit/Kernel/IOCopyMapper.h b/iokit/Kernel/IOCopyMapper.h deleted file mode 100644 index 5371afa04..000000000 --- a/iokit/Kernel/IOCopyMapper.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the License - * may not be used to create, or enable the creation or redistribution of, - * unlawful or unlicensed copies of an Apple operating system, or to - * circumvent, violate, or enable the circumvention or violation of, any - * terms of an Apple operating system software license agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and - * limitations under the License. - * - * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ - */ -// 45678901234567890123456789012345678901234567890123456789012345678901234567890 - -#include - -#include -#include -#include -#include - -// General constants about all VART/DART style Address Re-Mapping Tables -#define kMapperPage (4 * 1024) -#define kTransPerPage (kMapperPage / sizeof(ppnum_t)) - -#define kMinZoneSize 4 // Minimum Zone size in pages -#define kMaxNumZones (31 - 14) // 31 bit mapped in 16K super pages - -class IOCopyMapper : public IOMapper -{ - OSDeclareDefaultStructors(IOCopyMapper); - -// alias the fTable variable into our mappings table -#define fMappings ((ActiveDARTEntry *) super::fTable) - -private: - - UInt32 fFreeLists[kMaxNumZones]; - - IOLock *fTableLock; - - void *fDummyPage; - - UInt32 fNumZones; - UInt32 fMapperRegionSize; - UInt32 fMapperRegionUsed; - UInt32 fMapperRegionMaxUsed; - UInt32 fFreeSleepers; - ppnum_t fDummyPageNumber; - ppnum_t fBufferPage; - - // Internal functions - - void breakUp(unsigned start, unsigned end, unsigned freeInd); - void invalidateDART(ppnum_t pnum, IOItemCount size); - void tlbInvalidate(ppnum_t pnum, IOItemCount size); - - virtual void free(); - - virtual bool initHardware(IOService * provider); -public: - virtual ppnum_t iovmAlloc(IOItemCount pages); - virtual void iovmFree(ppnum_t addr, IOItemCount pages); - - virtual void iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page); - virtual void iovmInsert(ppnum_t addr, IOItemCount offset, - ppnum_t *pageList, IOItemCount pageCount); - virtual void iovmInsert(ppnum_t addr, IOItemCount offset, - upl_page_info_t *pageList, IOItemCount pageCount); - - virtual addr64_t mapAddr(IOPhysicalAddress addr); -}; - -extern IOCopyMapper * gIOCopyMapper; diff --git a/iokit/Kernel/IODMACommand.cpp b/iokit/Kernel/IODMACommand.cpp index 66c178d5f..444abf720 100644 --- a/iokit/Kernel/IODMACommand.cpp +++ b/iokit/Kernel/IODMACommand.cpp @@ -39,7 +39,6 @@ #include #include "IOKitKernelInternal.h" -#include "IOCopyMapper.h" #define MAPTYPE(type) ((UInt) (type) & kTypeMask) #define IS_MAPPED(type) (MAPTYPE(type) == kMapped) @@ -77,12 +76,11 @@ enum #endif #if 0 -#define DEBG(fmt, args...) { kprintf(fmt, ## args); } +#define DEBG(fmt, args...) { IOLog(fmt, ## args); kprintf(fmt, ## args); } #else #define DEBG(fmt, args...) {} #endif - /**************************** class IODMACommand ***************************/ #undef super @@ -312,7 +310,6 @@ IODMACommand::segmentOp( { IOOptionBits op = (uintptr_t) reference; addr64_t maxPhys, address; - addr64_t remapAddr = 0; uint64_t length; uint32_t numPages; @@ -357,8 +354,7 @@ IODMACommand::segmentOp( if (!length) return (kIOReturnSuccess); - numPages = atop_64(round_page_64(length)); - remapAddr = state->fCopyNext; + numPages = atop_64(round_page_64((address & PAGE_MASK) + length)); if (kWalkPreflight & op) { @@ -366,35 +362,58 @@ IODMACommand::segmentOp( } else { + vm_page_t lastPage; + lastPage = NULL; if (kWalkPrepare & op) { + lastPage = state->fCopyNext; for (IOItemCount idx = 0; idx < numPages; idx++) - gIOCopyMapper->iovmInsert(atop_64(remapAddr), idx, atop_64(address) + idx); - } - if (state->fDoubleBuffer) - state->fCopyNext += length; - else - { - state->fCopyNext += round_page(length); - remapAddr += (address & PAGE_MASK); + { + vm_page_set_offset(lastPage, atop_64(address) + idx); + lastPage = vm_page_get_next(lastPage); + } } - if (SHOULD_COPY_DIR(op, target->fMDSummary.fDirection)) + if (!lastPage || SHOULD_COPY_DIR(op, target->fMDSummary.fDirection)) { - DEBG("cpv: 0x%qx %s 0x%qx, 0x%qx, 0x%02lx\n", remapAddr, - (kWalkSyncIn & op) ? "->" : "<-", - address, length, op); - if (kWalkSyncIn & op) - { // cppvNoModSnk - copypv(remapAddr, address, length, - cppvPsnk | cppvFsnk | cppvPsrc | cppvNoRefSrc ); - } - else + lastPage = state->fCopyNext; + for (IOItemCount idx = 0; idx < numPages; idx++) { - copypv(address, remapAddr, length, - cppvPsnk | cppvFsnk | cppvPsrc | cppvNoRefSrc ); + if (SHOULD_COPY_DIR(op, target->fMDSummary.fDirection)) + { + addr64_t remapAddr; + uint64_t chunk; + + remapAddr = ptoa_64(vm_page_get_phys_page(lastPage)); + if (!state->fDoubleBuffer) + { + remapAddr += (address & PAGE_MASK); + } + chunk = PAGE_SIZE - (address & PAGE_MASK); + if (chunk > length) + chunk = length; + + DEBG("cpv: 0x%qx %s 0x%qx, 0x%qx, 0x%02lx\n", remapAddr, + (kWalkSyncIn & op) ? "->" : "<-", + address, chunk, op); + + if (kWalkSyncIn & op) + { // cppvNoModSnk + copypv(remapAddr, address, chunk, + cppvPsnk | cppvFsnk | cppvPsrc | cppvNoRefSrc ); + } + else + { + copypv(address, remapAddr, chunk, + cppvPsnk | cppvFsnk | cppvPsrc | cppvNoRefSrc ); + } + address += chunk; + length -= chunk; + } + lastPage = vm_page_get_next(lastPage); } } + state->fCopyNext = lastPage; } return kIOReturnSuccess; @@ -415,12 +434,12 @@ IODMACommand::walkAll(UInt8 op) state->fMisaligned = false; state->fDoubleBuffer = false; state->fPrepared = false; - state->fCopyNext = 0; - state->fCopyMapperPageAlloc = 0; + state->fCopyNext = NULL; + state->fCopyPageAlloc = 0; state->fLocalMapperPageAlloc = 0; state->fCopyPageCount = 0; - state->fNextRemapIndex = 0; - state->fCopyMD = 0; + state->fNextRemapPage = NULL; + state->fCopyMD = 0; if (!(kWalkDoubleBuffer & op)) { @@ -437,24 +456,26 @@ IODMACommand::walkAll(UInt8 op) if (state->fCopyPageCount) { - IOMapper * mapper; - ppnum_t mapBase = 0; + vm_page_t mapBase = NULL; DEBG("preflight fCopyPageCount %d\n", state->fCopyPageCount); - mapper = gIOCopyMapper; - if (mapper) - mapBase = mapper->iovmAlloc(state->fCopyPageCount); - if (mapBase) + if (!state->fDoubleBuffer) { - state->fCopyMapperPageAlloc = mapBase; - if (state->fCopyMapperPageAlloc && state->fDoubleBuffer) + kern_return_t kr; + kr = vm_page_alloc_list(state->fCopyPageCount, + KMA_LOMEM | KMA_NOPAGEWAIT, &mapBase); + if (KERN_SUCCESS != kr) { - DEBG("contig copy map\n"); - state->fMapContig = true; + DEBG("vm_page_alloc_list(%d) failed (%d)\n", state->fCopyPageCount, kr); + mapBase = NULL; } + } - state->fCopyNext = ptoa_64(state->fCopyMapperPageAlloc); + if (mapBase) + { + state->fCopyPageAlloc = mapBase; + state->fCopyNext = state->fCopyPageAlloc; offset = 0; numSegments = 0-1; ret = genIOVMSegments(op, segmentOp, (void *) op, &offset, state, &numSegments); @@ -464,8 +485,9 @@ IODMACommand::walkAll(UInt8 op) else { DEBG("alloc IOBMD\n"); - state->fCopyMD = IOBufferMemoryDescriptor::withOptions( - fMDSummary.fDirection, state->fPreparedLength, state->fSourceAlignMask); + mach_vm_address_t mask = 0xFFFFF000; //state->fSourceAlignMask + state->fCopyMD = IOBufferMemoryDescriptor::inTaskWithPhysicalMask(kernel_task, + fMDSummary.fDirection, state->fPreparedLength, mask); if (state->fCopyMD) { @@ -495,9 +517,9 @@ IODMACommand::walkAll(UInt8 op) { DEBG("sync fCopyPageCount %d\n", state->fCopyPageCount); - if (state->fCopyMapperPageAlloc) + if (state->fCopyPageAlloc) { - state->fCopyNext = ptoa_64(state->fCopyMapperPageAlloc); + state->fCopyNext = state->fCopyPageAlloc; offset = 0; numSegments = 0-1; ret = genIOVMSegments(op, segmentOp, (void *) op, &offset, state, &numSegments); @@ -536,11 +558,11 @@ IODMACommand::walkAll(UInt8 op) fMapper->iovmFreeDMACommand(this, state->fLocalMapperPageAlloc, state->fLocalMapperPageCount); state->fLocalMapperPageAlloc = 0; state->fLocalMapperPageCount = 0; - } - if (state->fCopyMapperPageAlloc) + } + if (state->fCopyPageAlloc) { - gIOCopyMapper->iovmFree(state->fCopyMapperPageAlloc, state->fCopyPageCount); - state->fCopyMapperPageAlloc = 0; + vm_page_free_list(state->fCopyPageAlloc, FALSE); + state->fCopyPageAlloc = 0; state->fCopyPageCount = 0; } if (state->fCopyMD) @@ -677,10 +699,10 @@ IODMACommand::prepare(UInt64 offset, UInt64 length, bool flushCache, bool synchr state->fMisaligned = false; state->fDoubleBuffer = false; state->fPrepared = false; - state->fCopyNext = 0; - state->fCopyMapperPageAlloc = 0; + state->fCopyNext = NULL; + state->fCopyPageAlloc = 0; state->fCopyPageCount = 0; - state->fNextRemapIndex = 0; + state->fNextRemapPage = NULL; state->fCopyMD = 0; state->fLocalMapperPageAlloc = 0; state->fLocalMapperPageCount = 0; @@ -925,7 +947,7 @@ IODMACommand::genIOVMSegments(uint32_t op, if ((offset == internalState->fPreparedOffset) || (offset != state->fOffset) || internalState->fNewMD) { state->fOffset = 0; state->fIOVMAddr = 0; - internalState->fNextRemapIndex = 0; + internalState->fNextRemapPage = NULL; internalState->fNewMD = false; state->fMapped = (IS_MAPPED(fMappingOptions) && fMapper); mdOp = kIOMDFirstSegment; @@ -943,9 +965,10 @@ IODMACommand::genIOVMSegments(uint32_t op, maxPhys = 0; maxPhys--; - while ((state->fIOVMAddr) || state->fOffset < memLength) + while (state->fIOVMAddr || (state->fOffset < memLength)) { - if (!state->fIOVMAddr) { + // state = next seg + if (!state->fIOVMAddr) { IOReturn rtn; @@ -955,8 +978,6 @@ IODMACommand::genIOVMSegments(uint32_t op, if (internalState->fMapContig && (kWalkClient & op)) { ppnum_t pageNum = internalState->fLocalMapperPageAlloc; - if (!pageNum) - pageNum = internalState->fCopyMapperPageAlloc; state->fIOVMAddr = ptoa_64(pageNum) + offset - internalState->fPreparedOffset; rtn = kIOReturnSuccess; @@ -969,80 +990,90 @@ IODMACommand::genIOVMSegments(uint32_t op, mdOp = kIOMDWalkSegments; } - if (rtn == kIOReturnSuccess) { + if (rtn == kIOReturnSuccess) + { assert(state->fIOVMAddr); assert(state->fLength); + if ((curSeg.fIOVMAddr + curSeg.fLength) == state->fIOVMAddr) { + UInt64 length = state->fLength; + offset += length; + curSeg.fLength += length; + state->fIOVMAddr = 0; + } } else if (rtn == kIOReturnOverrun) state->fIOVMAddr = state->fLength = 0; // At end else return rtn; - }; - - if (!curSeg.fIOVMAddr) { - UInt64 length = state->fLength; + } - offset += length; - curSeg.fIOVMAddr = state->fIOVMAddr | bypassMask; - curSeg.fLength = length; - state->fIOVMAddr = 0; - } - else if ((curSeg.fIOVMAddr + curSeg.fLength == state->fIOVMAddr)) { + // seg = state, offset = end of seg + if (!curSeg.fIOVMAddr) + { UInt64 length = state->fLength; - offset += length; - curSeg.fLength += length; - state->fIOVMAddr = 0; - }; - + offset += length; + curSeg.fIOVMAddr = state->fIOVMAddr | bypassMask; + curSeg.fLength = length; + state->fIOVMAddr = 0; + } if (!state->fIOVMAddr) { - if (kWalkClient & op) + if ((kWalkClient & op) && (curSeg.fIOVMAddr + curSeg.fLength - 1) > maxPhys) { - if ((curSeg.fIOVMAddr + curSeg.fLength - 1) > maxPhys) + if (internalState->fCursor) + { + curSeg.fIOVMAddr = 0; + ret = kIOReturnMessageTooLarge; + break; + } + else if (curSeg.fIOVMAddr <= maxPhys) + { + UInt64 remain, newLength; + + newLength = (maxPhys + 1 - curSeg.fIOVMAddr); + DEBG("trunc %qx, %qx-> %qx\n", curSeg.fIOVMAddr, curSeg.fLength, newLength); + remain = curSeg.fLength - newLength; + state->fIOVMAddr = newLength + curSeg.fIOVMAddr; + curSeg.fLength = newLength; + state->fLength = remain; + offset -= remain; + } + else { - if (internalState->fCursor) + UInt64 addr = curSeg.fIOVMAddr; + ppnum_t addrPage = atop_64(addr); + vm_page_t remap = NULL; + UInt64 remain, newLength; + + DEBG("sparse switch %qx, %qx ", addr, curSeg.fLength); + + remap = internalState->fNextRemapPage; + if (remap && (addrPage == vm_page_get_offset(remap))) { - curSeg.fIOVMAddr = 0; - ret = kIOReturnMessageTooLarge; - break; } - else if (curSeg.fIOVMAddr <= maxPhys) + else for (remap = internalState->fCopyPageAlloc; + remap && (addrPage != vm_page_get_offset(remap)); + remap = vm_page_get_next(remap)) { - UInt64 remain, newLength; - - newLength = (maxPhys + 1 - curSeg.fIOVMAddr); - DEBG("trunc %qx, %qx-> %qx\n", curSeg.fIOVMAddr, curSeg.fLength, newLength); - remain = curSeg.fLength - newLength; - state->fIOVMAddr = newLength + curSeg.fIOVMAddr; - curSeg.fLength = newLength; - state->fLength = remain; - offset -= remain; } - else if (gIOCopyMapper) + + if (!remap) panic("no remap page found"); + + curSeg.fIOVMAddr = ptoa_64(vm_page_get_phys_page(remap)) + + (addr & PAGE_MASK); + internalState->fNextRemapPage = vm_page_get_next(remap); + + newLength = PAGE_SIZE - (addr & PAGE_MASK); + if (newLength < curSeg.fLength) { - DEBG("sparse switch %qx, %qx ", curSeg.fIOVMAddr, curSeg.fLength); - if (trunc_page_64(curSeg.fIOVMAddr) == gIOCopyMapper->mapAddr( - ptoa_64(internalState->fCopyMapperPageAlloc + internalState->fNextRemapIndex))) - { - - curSeg.fIOVMAddr = ptoa_64(internalState->fCopyMapperPageAlloc + internalState->fNextRemapIndex) - + (curSeg.fIOVMAddr & PAGE_MASK); - internalState->fNextRemapIndex += atop_64(round_page(curSeg.fLength)); - } - else for (UInt checkRemapIndex = 0; checkRemapIndex < internalState->fCopyPageCount; checkRemapIndex++) - { - if (trunc_page_64(curSeg.fIOVMAddr) == gIOCopyMapper->mapAddr( - ptoa_64(internalState->fCopyMapperPageAlloc + checkRemapIndex))) - { - curSeg.fIOVMAddr = ptoa_64(internalState->fCopyMapperPageAlloc + checkRemapIndex) - + (curSeg.fIOVMAddr & PAGE_MASK); - internalState->fNextRemapIndex = checkRemapIndex + atop_64(round_page(curSeg.fLength)); - break; - } - } - DEBG("-> %qx, %qx\n", curSeg.fIOVMAddr, curSeg.fLength); + remain = curSeg.fLength - newLength; + state->fIOVMAddr = addr + newLength; + curSeg.fLength = newLength; + state->fLength = remain; + offset -= remain; } + DEBG("-> %qx, %qx offset %qx\n", curSeg.fIOVMAddr, curSeg.fLength, offset); } } diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index 37f190e55..6b906baa9 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -168,6 +168,7 @@ to restrict I/O ops. /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +extern unsigned int save_kdebug_enable; extern uint32_t gIOHibernateState; uint32_t gIOHibernateMode; static char gIOHibernateBootSignature[256+1]; @@ -468,6 +469,12 @@ IOHibernatePollerIODone(IOPolledFileIOVars * vars, bool abortable) } } + if ((kIOReturnSuccess == err) && abortable && hibernate_should_abort()) + { + err = kIOReturnAborted; + HIBLOG("IOPolledInterface::checkForWork sw abort\n"); + } + if (err) { HIBLOG("IOPolledInterface::checkForWork[%d] 0x%x\n", idx, err); @@ -564,15 +571,20 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, &file_extent_callback, &ctx, &hibernate_image_dev, &vars->block0, - &maxiobytes); + &maxiobytes, + &vars->solid_state); if (!vars->fileRef) { err = kIOReturnNoSpace; break; } gIOHibernateFileRef = vars->fileRef; - HIBLOG("Opened file %s, size %qd, partition base 0x%qx, maxio %qx\n", filename, ctx.size, - vars->block0, maxiobytes); + + if (kIOHibernateModeSSDInvert & gIOHibernateMode) + vars->solid_state = vars->solid_state ? false : true; + + HIBLOG("Opened file %s, size %qd, partition base 0x%qx, maxio %qx ssd %d\n", filename, ctx.size, + vars->block0, maxiobytes, vars->solid_state); if (ctx.size < 1*1024*1024) // check against image size estimate! { err = kIOReturnNoSpace; @@ -805,14 +817,18 @@ IOPolledFileWrite(IOPolledFileIOVars * vars, uint32_t length = (vars->bufferOffset); #if CRYPTO - if (cryptvars && vars->encryptStart && (vars->position > vars->encryptStart)) + if (cryptvars && vars->encryptStart + && (vars->position > vars->encryptStart) + && ((vars->position - length) < vars->encryptEnd)) { uint32_t encryptLen, encryptStart; encryptLen = vars->position - vars->encryptStart; if (encryptLen > length) encryptLen = length; encryptStart = length - encryptLen; - + if (vars->position > vars->encryptEnd) + encryptLen -= (vars->position - vars->encryptEnd); + // encrypt the buffer aes_encrypt_cbc(vars->buffer + vars->bufferHalf + encryptStart, &cryptvars->aes_iv[0], @@ -975,8 +991,7 @@ IOHibernateSystemSleep(void) OSData * data; OSObject * obj; OSString * str; - OSNumber * num; - OSDictionary *sleepOverrideOptions; + bool dsSSD; IOHibernateVars * vars = &gIOHibernateVars; @@ -990,59 +1005,13 @@ IOHibernateSystemSleep(void) if (kIOLogHibernate & gIOKitDebug) gIOHibernateDebugFlags |= kIOHibernateDebugRestoreLogs; - /* The invocation of IOPMSleepSystemWithOptions() may override - * existing hibernation settings. - */ - sleepOverrideOptions = (OSDictionary *)OSDynamicCast( OSDictionary, - IOService::getPMRootDomain()->copyProperty(kRootDomainSleepOptionsKey)); - - - /* Hibernate mode overriden by sleep otions ? */ - obj = NULL; - - if (sleepOverrideOptions) { - obj = sleepOverrideOptions->getObject(kIOHibernateModeKey); - if (obj) obj->retain(); - } - - if(!obj) { - obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateModeKey); - } - - if (obj && (num = OSDynamicCast(OSNumber, obj)) ) - { - gIOHibernateMode = num->unsigned32BitValue(); + if (IOService::getPMRootDomain()->getHibernateSettings( + &gIOHibernateMode, &gIOHibernateFreeRatio, &gIOHibernateFreeTime)) if (kIOHibernateModeSleep & gIOHibernateMode) // default to discard clean for safe sleep gIOHibernateMode ^= (kIOHibernateModeDiscardCleanInactive | kIOHibernateModeDiscardCleanActive); - } - if (obj) obj->release(); - - /* Hibernate free rotio overriden by sleep options ? */ - obj = NULL; - - if (sleepOverrideOptions) { - obj = sleepOverrideOptions->getObject(kIOHibernateFreeRatioKey); - if (obj) obj->retain(); - } - - if(!obj) { - obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFreeRatioKey); - } - if (obj && (num = OSDynamicCast(OSNumber, obj))) - { - gIOHibernateFreeRatio = num->unsigned32BitValue(); - } - if (obj) obj->release(); - - if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFreeTimeKey))) - { - if ((num = OSDynamicCast(OSNumber, obj))) - gIOHibernateFreeTime = num->unsigned32BitValue(); - obj->release(); - } if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFileKey))) { if ((str = OSDynamicCast(OSString, obj))) @@ -1051,9 +1020,6 @@ IOHibernateSystemSleep(void) obj->release(); } - if (sleepOverrideOptions) - sleepOverrideOptions->release(); - if (!gIOHibernateMode || !gIOHibernateFilename[0]) return (kIOReturnUnsupported); @@ -1093,22 +1059,46 @@ IOHibernateSystemSleep(void) bzero(gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); gIOHibernateCurrentHeader->debugFlags = gIOHibernateDebugFlags; + dsSSD = (vars->fileVars->solid_state + && (kOSBooleanTrue == IOService::getPMRootDomain()->getProperty(kIOPMDeepSleepEnabledKey))); + + if (dsSSD) + { + gIOHibernateCurrentHeader->options |= + kIOHibernateOptionSSD + | kIOHibernateOptionColor; + } + else + { + gIOHibernateCurrentHeader->options |= kIOHibernateOptionProgress; + } + boolean_t encryptedswap; + AbsoluteTime startTime, endTime; + uint64_t nsec; + + clock_get_uptime(&startTime); err = hibernate_setup(gIOHibernateCurrentHeader, gIOHibernateFreeRatio, gIOHibernateFreeTime, + dsSSD, &vars->page_list, &vars->page_list_wired, &encryptedswap); + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &startTime); + absolutetime_to_nanoseconds(endTime, &nsec); + HIBLOG("hibernate_setup(%d) took %qd ms\n", err, nsec / 1000000ULL); + if (KERN_SUCCESS != err) - { - HIBLOG("hibernate_setup(%d)\n", err); break; - } if (encryptedswap) gIOHibernateMode ^= kIOHibernateModeEncrypt; - vars->videoAllocSize = kVideoMapSize; - if (KERN_SUCCESS != kmem_alloc_pageable(kernel_map, &vars->videoMapping, vars->videoAllocSize)) - vars->videoMapping = 0; + if (kIOHibernateOptionProgress & gIOHibernateCurrentHeader->options) + { + vars->videoAllocSize = kVideoMapSize; + if (KERN_SUCCESS != kmem_alloc_pageable(kernel_map, &vars->videoMapping, vars->videoAllocSize)) + vars->videoMapping = 0; + } // generate crypt keys for (uint32_t i = 0; i < sizeof(vars->wiredCryptKey); i++) @@ -1449,7 +1439,9 @@ IOHibernateSystemHasSlept(void) vars->previewBuffer = 0; } - if (vars->previewBuffer && (data = OSDynamicCast(OSData, + if ((kIOHibernateOptionProgress & gIOHibernateCurrentHeader->options) + && vars->previewBuffer + && (data = OSDynamicCast(OSData, IOService::getPMRootDomain()->getProperty(kIOHibernatePreviewActiveKey)))) { UInt32 flags = *((UInt32 *)data->getBytesNoCopy()); @@ -1471,8 +1463,8 @@ IOHibernateSystemHasSlept(void) vars->consoleMapping = (uint8_t *) consoleInfo.v_baseAddr; HIBPRINT("video %p %d %d %d\n", - vars->consoleMapping, gIOHibernateGraphicsInfo->depth, - gIOHibernateGraphicsInfo->width, gIOHibernateGraphicsInfo->height); + vars->consoleMapping, graphicsInfo->depth, + graphicsInfo->width, graphicsInfo->height); if (vars->consoleMapping) ProgressInit(graphicsInfo, vars->consoleMapping, &graphicsInfo->progressSaveUnder[0][0], sizeof(graphicsInfo->progressSaveUnder)); @@ -1510,6 +1502,29 @@ IOHibernateSystemWake(void) vars->previewBuffer = 0; } + if (kIOHibernateStateWakingFromHibernate == gIOHibernateState) + { + IOService::getPMRootDomain()->setProperty(kIOHibernateOptionsKey, + gIOHibernateCurrentHeader->options, 32); + } + else + { + IOService::getPMRootDomain()->removeProperty(kIOHibernateOptionsKey); + } + + if ((kIOHibernateStateWakingFromHibernate == gIOHibernateState) + && (kIOHibernateGfxStatusUnknown != gIOHibernateGraphicsInfo->gfxStatus)) + { + IOService::getPMRootDomain()->setProperty(kIOHibernateGfxStatusKey, + &gIOHibernateGraphicsInfo->gfxStatus, + sizeof(gIOHibernateGraphicsInfo->gfxStatus)); + } + else + { + IOService::getPMRootDomain()->removeProperty(kIOHibernateGfxStatusKey); + } + + if (vars->fileVars) { IOPolledFileClose(vars->fileVars); @@ -1673,6 +1688,20 @@ hibernate_setup_for_wake(void) #define C_ASSERT(e) typedef char __C_ASSERT__[(e) ? 1 : -1] +static bool +no_encrypt_page(vm_offset_t ppnum) +{ + if (pmap_is_noencrypt((ppnum_t)ppnum) == TRUE) + { + return true; + } + return false; +} + +uint32_t wired_pages_encrypted = 0; +uint32_t dirty_pages_encrypted = 0; +uint32_t wired_pages_clear = 0; + extern "C" uint32_t hibernate_write_image(void) { @@ -1684,15 +1713,15 @@ hibernate_write_image(void) uint32_t pageCount, pagesDone; IOReturn err; - vm_offset_t ppnum; - IOItemCount page, count; + vm_offset_t ppnum, page; + IOItemCount count; uint8_t * src; uint8_t * data; IOByteCount pageCompressedSize; uint64_t compressedSize, uncompressedSize; uint64_t image1Size = 0; uint32_t bitmap_size; - bool iterDone, pollerOpen, needEncryptStart; + bool iterDone, pollerOpen, needEncrypt; uint32_t restore1Sum, sum, sum1, sum2; uint32_t tag; uint32_t pageType; @@ -1700,6 +1729,7 @@ hibernate_write_image(void) AbsoluteTime startTime, endTime; AbsoluteTime allTime, compTime, decoTime; + uint64_t compBytes; uint64_t nsec; uint32_t lastProgressStamp = 0; uint32_t progressStamp; @@ -1708,9 +1738,18 @@ hibernate_write_image(void) hibernate_cryptvars_t _cryptvars; hibernate_cryptvars_t * cryptvars = 0; + wired_pages_encrypted = 0; + dirty_pages_encrypted = 0; + wired_pages_clear = 0; + if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents) return (false /* sleep */ ); + if (kIOHibernateModeSleep & gIOHibernateMode) + kdebug_enable = save_kdebug_enable; + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 1) | DBG_FUNC_START, 0, 0, 0, 0, 0); + restore1Sum = sum1 = sum2 = 0; #if CRYPTO @@ -1763,7 +1802,7 @@ hibernate_write_image(void) } #endif - needEncryptStart = (0 != (kIOHibernateModeEncrypt & gIOHibernateMode)); + needEncrypt = (0 != (kIOHibernateModeEncrypt & gIOHibernateMode)); AbsoluteTime_to_scalar(&compTime) = 0; AbsoluteTime_to_scalar(&decoTime) = 0; @@ -1776,8 +1815,6 @@ hibernate_write_image(void) { compressedSize = 0; uncompressedSize = 0; - iterDone = false; - pageType = 0; // wired pages first IOPolledFileSeek(vars->fileVars, sizeof(IOHibernateImageHeader)); @@ -1825,9 +1862,9 @@ hibernate_write_image(void) for (page = 0; page < count; page++) { if ((src < &gIOHibernateRestoreStack[0]) || (src >= &gIOHibernateRestoreStackEnd[0])) - restore1Sum += hibernate_sum(src, page_size); + restore1Sum += hibernate_sum_page(src, header->restore1CodePage + page); else - restore1Sum += 0x10000001; + restore1Sum += 0x00000000; src += page_size; } sum1 = restore1Sum; @@ -1890,8 +1927,10 @@ hibernate_write_image(void) header->previewSize = count + ppnum; for (page = 0; page < count; page += page_size) - sum1 += hibernate_sum(src + page, page_size); - + { + phys64 = vars->previewBuffer->getPhysicalSegment(page, NULL, kIOMemoryMapperNone); + sum1 += hibernate_sum_page(src + page, atop_64(phys64)); + } err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); if (kIOReturnSuccess != err) break; @@ -1951,138 +1990,187 @@ hibernate_write_image(void) src = (uint8_t *) vars->srcBuffer->getBytesNoCopy(); - ppnum = 0; pagesDone = 0; lastBlob = 0; HIBLOG("writing %d pages\n", pageCount); - do + enum + // pageType + { + kWired = 0x02, + kEncrypt = 0x01, + kWiredEncrypt = kWired | kEncrypt, + kWiredClear = kWired, + kUnwiredEncrypt = kEncrypt + }; + + for (pageType = kWiredEncrypt; pageType >= kUnwiredEncrypt; pageType--) { - count = hibernate_page_list_iterate(pageType ? vars->page_list : vars->page_list_wired, - &ppnum); -// kprintf("[%d](%x : %x)\n", pageType, ppnum, count); - - iterDone = !count; - - pageAndCount[0] = ppnum; - pageAndCount[1] = count; - err = IOPolledFileWrite(vars->fileVars, - (const uint8_t *) &pageAndCount, sizeof(pageAndCount), - cryptvars); - if (kIOReturnSuccess != err) - break; - - for (page = 0; page < count; page++) + if (needEncrypt && (kEncrypt & pageType)) { - err = IOMemoryDescriptorWriteFromPhysical(vars->srcBuffer, 0, ptoa_64(ppnum), page_size); - if (err) + vars->fileVars->encryptStart = (vars->fileVars->position & ~(AES_BLOCK_SIZE - 1)); + vars->fileVars->encryptEnd = UINT64_MAX; + HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart); + + if (kUnwiredEncrypt == pageType) { - HIBLOG("IOMemoryDescriptorWriteFromPhysical %d [%ld] %x\n", __LINE__, (long)ppnum, err); - break; + // start unwired image + bcopy(&cryptvars->aes_iv[0], + &gIOHibernateCryptWakeContext.aes_iv[0], + sizeof(cryptvars->aes_iv)); + cryptvars = &gIOHibernateCryptWakeContext; } + } + for (iterDone = false, ppnum = 0; !iterDone; ) + { + count = hibernate_page_list_iterate((kWired & pageType) + ? vars->page_list_wired : vars->page_list, + &ppnum); +// kprintf("[%d](%x : %x)\n", pageType, ppnum, count); + iterDone = !count; - sum = hibernate_sum(src, page_size); - - clock_get_uptime(&startTime); - - pageCompressedSize = WKdm_compress ((WK_word*) src, (WK_word*) (src + page_size), PAGE_SIZE_IN_WORDS); - - clock_get_uptime(&endTime); - ADD_ABSOLUTETIME(&compTime, &endTime); - SUB_ABSOLUTETIME(&compTime, &startTime); - - if (kIOHibernateModeEncrypt & gIOHibernateMode) - pageCompressedSize = (pageCompressedSize + AES_BLOCK_SIZE - 1) & ~(AES_BLOCK_SIZE - 1); - - if (pageCompressedSize > page_size) + if (count && (kWired & pageType) && needEncrypt) { -// HIBLOG("------------lose: %d\n", pageCompressedSize); - pageCompressedSize = page_size; + uint32_t checkIndex; + for (checkIndex = 0; + (checkIndex < count) + && (((kEncrypt & pageType) == 0) == no_encrypt_page(ppnum + checkIndex)); + checkIndex++) + {} + if (!checkIndex) + { + ppnum++; + continue; + } + count = checkIndex; } - if (pageCompressedSize != page_size) - data = (src + page_size); - else - data = src; - - tag = pageCompressedSize | kIOHibernateTagSignature; - - if (pageType) - sum2 += sum; + switch (pageType) + { + case kWiredEncrypt: wired_pages_encrypted += count; break; + case kWiredClear: wired_pages_clear += count; break; + case kUnwiredEncrypt: dirty_pages_encrypted += count; break; + } + + if (iterDone && (kWiredEncrypt == pageType)) {/* not yet end of wired list */} else - sum1 += sum; - - if (needEncryptStart && (ppnum >= atop_32(sectDATAB))) { - // start encrypting partway into the data about to be written - vars->fileVars->encryptStart = (vars->fileVars->position + AES_BLOCK_SIZE - 1) - & ~(AES_BLOCK_SIZE - 1); - needEncryptStart = false; + pageAndCount[0] = ppnum; + pageAndCount[1] = count; + err = IOPolledFileWrite(vars->fileVars, + (const uint8_t *) &pageAndCount, sizeof(pageAndCount), + cryptvars); + if (kIOReturnSuccess != err) + break; } - - err = IOPolledFileWrite(vars->fileVars, (const uint8_t *) &tag, sizeof(tag), cryptvars); - if (kIOReturnSuccess != err) - break; - - err = IOPolledFileWrite(vars->fileVars, data, (pageCompressedSize + 3) & ~3, cryptvars); - if (kIOReturnSuccess != err) - break; - - compressedSize += pageCompressedSize; - if (pageCompressedSize) - uncompressedSize += page_size; - ppnum++; - pagesDone++; - - if (vars->consoleMapping && (0 == (1023 & pagesDone))) - { - blob = ((pagesDone * kIOHibernateProgressCount) / pageCount); - if (blob != lastBlob) - { - ProgressUpdate(gIOHibernateGraphicsInfo, vars->consoleMapping, lastBlob, blob); - lastBlob = blob; - } - } - if (0 == (8191 & pagesDone)) + + for (page = ppnum; page < (ppnum + count); page++) { + err = IOMemoryDescriptorWriteFromPhysical(vars->srcBuffer, 0, ptoa_64(page), page_size); + if (err) + { + HIBLOG("IOMemoryDescriptorWriteFromPhysical %d [%ld] %x\n", __LINE__, (long)page, err); + break; + } + + sum = hibernate_sum_page(src, page); + if (kWired & pageType) + sum1 += sum; + else + sum2 += sum; + + clock_get_uptime(&startTime); + + pageCompressedSize = WKdm_compress ((WK_word*) src, (WK_word*) (src + page_size), PAGE_SIZE_IN_WORDS); + clock_get_uptime(&endTime); - SUB_ABSOLUTETIME(&endTime, &allTime); - absolutetime_to_nanoseconds(endTime, &nsec); - progressStamp = nsec / 750000000ULL; - if (progressStamp != lastProgressStamp) + ADD_ABSOLUTETIME(&compTime, &endTime); + SUB_ABSOLUTETIME(&compTime, &startTime); + compBytes += page_size; + + if (kIOHibernateModeEncrypt & gIOHibernateMode) + pageCompressedSize = (pageCompressedSize + AES_BLOCK_SIZE - 1) & ~(AES_BLOCK_SIZE - 1); + + if (pageCompressedSize > page_size) { - lastProgressStamp = progressStamp; - HIBPRINT("pages %d (%d%%)\n", pagesDone, (100 * pagesDone) / pageCount); +// HIBLOG("------------lose: %d\n", pageCompressedSize); + pageCompressedSize = page_size; + } + + if (pageCompressedSize != page_size) + data = (src + page_size); + else + data = src; + + tag = pageCompressedSize | kIOHibernateTagSignature; + err = IOPolledFileWrite(vars->fileVars, (const uint8_t *) &tag, sizeof(tag), cryptvars); + if (kIOReturnSuccess != err) + break; + + err = IOPolledFileWrite(vars->fileVars, data, (pageCompressedSize + 3) & ~3, cryptvars); + if (kIOReturnSuccess != err) + break; + + compressedSize += pageCompressedSize; + if (pageCompressedSize) + uncompressedSize += page_size; + pagesDone++; + + if (vars->consoleMapping && (0 == (1023 & pagesDone))) + { + blob = ((pagesDone * kIOHibernateProgressCount) / pageCount); + if (blob != lastBlob) + { + ProgressUpdate(gIOHibernateGraphicsInfo, vars->consoleMapping, lastBlob, blob); + lastBlob = blob; + } + } + if (0 == (8191 & pagesDone)) + { + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &allTime); + absolutetime_to_nanoseconds(endTime, &nsec); + progressStamp = nsec / 750000000ULL; + if (progressStamp != lastProgressStamp) + { + lastProgressStamp = progressStamp; + HIBPRINT("pages %d (%d%%)\n", pagesDone, (100 * pagesDone) / pageCount); + } } } + if (kIOReturnSuccess != err) + break; + ppnum = page; } + if (kIOReturnSuccess != err) break; - if (iterDone && !pageType) + + if ((kEncrypt & pageType)) { + vars->fileVars->encryptEnd = (vars->fileVars->position + AES_BLOCK_SIZE - 1) + & ~(AES_BLOCK_SIZE - 1); + HIBLOG("encryptEnd %qx\n", vars->fileVars->encryptEnd); + } + + if (kWiredEncrypt != pageType) + { + // end of image1/2 - fill to next block err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars); if (kIOReturnSuccess != err) break; - - iterDone = false; - pageType = 1; - ppnum = 0; + } + if (kWiredClear == pageType) + { + // end wired image + header->encryptStart = vars->fileVars->encryptStart; + header->encryptEnd = vars->fileVars->encryptEnd; image1Size = vars->fileVars->position; - if (cryptvars) - { - bcopy(&cryptvars->aes_iv[0], - &gIOHibernateCryptWakeContext.aes_iv[0], - sizeof(cryptvars->aes_iv)); - cryptvars = &gIOHibernateCryptWakeContext; - } - HIBLOG("image1Size %qd\n", image1Size); + HIBLOG("image1Size %qd, encryptStart1 %qx, End1 %qx\n", + image1Size, header->encryptStart, header->encryptEnd); } } - while (!iterDone); - if (kIOReturnSuccess != err) - break; - err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars); if (kIOReturnSuccess != err) break; @@ -2092,7 +2180,6 @@ hibernate_write_image(void) header->image1Size = image1Size; header->bitmapSize = bitmap_size; header->pageCount = pageCount; - header->encryptStart = vars->fileVars->encryptStart; header->restore1Sum = restore1Sum; header->image1Sum = sum1; @@ -2108,6 +2195,8 @@ hibernate_write_image(void) header->fileExtentMapSize = sizeof(header->fileExtentMap); bcopy(&fileExtents[0], &header->fileExtentMap[0], count); + header->deviceBase = vars->fileVars->block0; + IOPolledFileSeek(vars->fileVars, 0); err = IOPolledFileWrite(vars->fileVars, (uint8_t *) header, sizeof(IOHibernateImageHeader), @@ -2147,6 +2236,9 @@ hibernate_write_image(void) uncompressedSize ? ((int) ((compressedSize * 100ULL) / uncompressedSize)) : 0, sum1, sum2); + HIBLOG("wired_pages_encrypted %d, wired_pages_clear %d, dirty_pages_encrypted %d\n", + wired_pages_encrypted, wired_pages_clear, dirty_pages_encrypted); + if (vars->fileVars->io) (void) IOHibernatePollerIODone(vars->fileVars, false); @@ -2162,6 +2254,9 @@ hibernate_write_image(void) // should we come back via regular wake, set the state in memory. gIOHibernateState = kIOHibernateStateInactive; + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 1) | DBG_FUNC_END, + wired_pages_encrypted, wired_pages_clear, dirty_pages_encrypted, 0, 0); + if (kIOReturnSuccess == err) { if (kIOHibernateModeSleep & gIOHibernateMode) @@ -2197,6 +2292,7 @@ hibernate_machine_init(void) IOReturn err; uint32_t sum; uint32_t pagesDone; + uint32_t pagesRead = 0; AbsoluteTime allTime, endTime; uint64_t nsec; uint32_t lastProgressStamp = 0; @@ -2227,9 +2323,9 @@ hibernate_machine_init(void) gIOHibernateCurrentHeader->diag[0], gIOHibernateCurrentHeader->diag[1], gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]); - HIBPRINT("video %x %d %d %d\n", + HIBPRINT("video %x %d %d %d status %x\n", gIOHibernateGraphicsInfo->physicalAddress, gIOHibernateGraphicsInfo->depth, - gIOHibernateGraphicsInfo->width, gIOHibernateGraphicsInfo->height); + gIOHibernateGraphicsInfo->width, gIOHibernateGraphicsInfo->height, gIOHibernateGraphicsInfo->gfxStatus); if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode) hibernate_page_list_discard(vars->page_list); @@ -2359,7 +2455,7 @@ hibernate_machine_init(void) else decoOffset = 0; - sum += hibernate_sum((src + decoOffset), page_size); + sum += hibernate_sum_page((src + decoOffset), ppnum); err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size); if (err) @@ -2370,6 +2466,7 @@ hibernate_machine_init(void) ppnum++; pagesDone++; + pagesRead++; if (vars->videoMapSize && (0 == (1023 & pagesDone))) { @@ -2425,6 +2522,8 @@ hibernate_machine_init(void) HIBLOG("hibernate_machine_init pagesDone %d sum2 %x, time: %qd ms\n", pagesDone, sum, nsec / 1000000ULL); + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 2) | DBG_FUNC_NONE, pagesRead, pagesDone, 0, 0, 0); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/Kernel/IOHibernateInternal.h b/iokit/Kernel/IOHibernateInternal.h index 03e422bae..2b2e5802e 100644 --- a/iokit/Kernel/IOHibernateInternal.h +++ b/iokit/Kernel/IOHibernateInternal.h @@ -68,10 +68,12 @@ struct IOPolledFileIOVars IOByteCount bufferHalf; IOByteCount extentRemaining; IOByteCount lastRead; + boolean_t solid_state; uint64_t block0; uint64_t position; uint64_t extentPosition; uint64_t encryptStart; + uint64_t encryptEnd; IOPolledFileExtent * extentMap; IOPolledFileExtent * currentExtent; bool io; @@ -91,7 +93,7 @@ enum extern "C" #endif /* __cplusplus */ uint32_t -hibernate_sum(uint8_t *buf, int32_t len); +hibernate_sum_page(uint8_t *buf, uint32_t ppnum); extern vm_offset_t sectHIBB; extern unsigned long sectSizeHIB; diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c index ea2180933..280b8c430 100644 --- a/iokit/Kernel/IOHibernateRestoreKernel.c +++ b/iokit/Kernel/IOHibernateRestoreKernel.c @@ -213,39 +213,10 @@ static void fatal(void) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ -#define BASE 65521L /* largest prime smaller than 65536 */ -#define NMAX 5000 -// NMAX (was 5521) the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 - -#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} -#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); -#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); -#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); -#define DO16(buf) DO8(buf,0); DO8(buf,8); - uint32_t -hibernate_sum(uint8_t *buf, int32_t len) +hibernate_sum_page(uint8_t *buf, uint32_t ppnum) { - unsigned long s1 = 1; // adler & 0xffff; - unsigned long s2 = 0; // (adler >> 16) & 0xffff; - int k; - - while (len > 0) { - k = len < NMAX ? len : NMAX; - len -= k; - while (k >= 16) { - DO16(buf); - buf += 16; - k -= 16; - } - if (k != 0) do { - s1 += *buf++; - s2 += s1; - } while (--k); - s1 %= BASE; - s2 %= BASE; - } - return (s2 << 16) | s1; + return (((uint32_t *)buf)[((PAGE_SIZE >> 2) - 1) & ppnum]); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -407,7 +378,7 @@ store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, src = buffer; } - sum = hibernate_sum((uint8_t *) src, PAGE_SIZE); + sum = hibernate_sum_page((uint8_t *) src, ppnum); if (((uint64_t) (uintptr_t) src) == dst) src = 0; @@ -491,7 +462,7 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, if (p3) bcopy_internal(p3, gIOHibernateCryptWakeVars, - sizeof(hibernate_cryptvars_t)); + sizeof(hibernate_cryptwakevars_t)); src = (uint32_t *) (((uintptr_t) &header->fileExtentMap[0]) diff --git a/iokit/Kernel/IOKitKernelInternal.h b/iokit/Kernel/IOKitKernelInternal.h index ea74e3bd9..2deffc415 100644 --- a/iokit/Kernel/IOKitKernelInternal.h +++ b/iokit/Kernel/IOKitKernelInternal.h @@ -34,7 +34,7 @@ __BEGIN_DECLS -#include +#include #include #include @@ -57,10 +57,11 @@ IOMemoryDescriptorMapCopy(vm_map_t map, mach_vm_address_t * address, mach_vm_size_t length); mach_vm_address_t -IOKernelAllocateContiguous(mach_vm_size_t size, - mach_vm_address_t maxPhys, mach_vm_size_t alignment); +IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxPhys, + mach_vm_size_t alignment, bool contiguous); void -IOKernelFreeContiguous(mach_vm_address_t address, mach_vm_size_t size); +IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size); + extern vm_size_t debug_iomallocpageable_size; @@ -114,7 +115,7 @@ struct IODMACommandInternal UInt64 fPreparedOffset; UInt64 fPreparedLength; - UInt32 fSourceAlignMask; + UInt32 fSourceAlignMask; UInt8 fCursor; UInt8 fCheckAddressing; @@ -126,12 +127,13 @@ struct IODMACommandInternal UInt8 fNewMD; UInt8 fLocalMapper; - ppnum_t fCopyMapperPageAlloc; + vm_page_t fCopyPageAlloc; + vm_page_t fCopyNext; + vm_page_t fNextRemapPage; + ppnum_t fCopyPageCount; - ppnum_t fNextRemapIndex; - addr64_t fCopyNext; - ppnum_t fLocalMapperPageAlloc; + ppnum_t fLocalMapperPageAlloc; ppnum_t fLocalMapperPageCount; class IOBufferMemoryDescriptor * fCopyMD; diff --git a/iokit/Kernel/IOLib.cpp b/iokit/Kernel/IOLib.cpp index e40d20afc..a5415e71c 100644 --- a/iokit/Kernel/IOLib.cpp +++ b/iokit/Kernel/IOLib.cpp @@ -294,7 +294,7 @@ void IOFreeAligned(void * address, vm_size_t size) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ void -IOKernelFreeContiguous(mach_vm_address_t address, mach_vm_size_t size) +IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size) { mach_vm_address_t allocationAddress; mach_vm_size_t adjustedSize; @@ -324,8 +324,8 @@ IOKernelFreeContiguous(mach_vm_address_t address, mach_vm_size_t size) } mach_vm_address_t -IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_address_t maxPhys, - mach_vm_size_t alignment) +IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxPhys, + mach_vm_size_t alignment, bool contiguous) { kern_return_t kr; mach_vm_address_t address; @@ -341,11 +341,25 @@ IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_address_t maxPhys, alignMask = alignment - 1; adjustedSize = (2 * size) + sizeof(mach_vm_size_t) + sizeof(mach_vm_address_t); - if (adjustedSize >= page_size) + contiguous = (contiguous && (adjustedSize > page_size)) + || (alignment > page_size); + + if (contiguous || maxPhys) { + int options = 0; vm_offset_t virt; + adjustedSize = size; - if ((adjustedSize > page_size) || (alignment > page_size) || maxPhys) + contiguous = (contiguous && (adjustedSize > page_size)) + || (alignment > page_size); + + if ((!contiguous) && (maxPhys <= 0xFFFFFFFF)) + { + maxPhys = 0; + options |= KMA_LOMEM; + } + + if (contiguous || maxPhys) { kr = kmem_alloc_contig(kernel_map, &virt, size, alignMask, atop(maxPhys), atop(alignMask), 0); @@ -353,7 +367,7 @@ IOKernelAllocateContiguous(mach_vm_size_t size, mach_vm_address_t maxPhys, else { kr = kernel_memory_allocate(kernel_map, &virt, - size, alignMask, 0); + size, alignMask, options); } if (KERN_SUCCESS == kr) address = virt; @@ -414,7 +428,7 @@ void * IOMallocContiguous(vm_size_t size, vm_size_t alignment, /* Do we want a physical address? */ if (!physicalAddress) { - address = IOKernelAllocateContiguous(size, 0 /*maxPhys*/, alignment); + address = IOKernelAllocateWithPhysicalRestrict(size, 0 /*maxPhys*/, alignment, true); } else do { @@ -484,7 +498,7 @@ void IOFreeContiguous(void * _address, vm_size_t size) } else { - IOKernelFreeContiguous((mach_vm_address_t) address, size); + IOKernelFreePhysical((mach_vm_address_t) address, size); } } diff --git a/iokit/Kernel/IOMapper.cpp b/iokit/Kernel/IOMapper.cpp index 171e22ff3..23b0db10e 100644 --- a/iokit/Kernel/IOMapper.cpp +++ b/iokit/Kernel/IOMapper.cpp @@ -30,8 +30,6 @@ #include #include -#include "IOCopyMapper.h" - __BEGIN_DECLS extern ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va); __END_DECLS @@ -390,23 +388,4 @@ void IOMappedWrite64(IOPhysicalAddress address, UInt64 value) ml_phys_write_double((vm_offset_t) address, value); } -mach_vm_address_t IOMallocPhysical(mach_vm_size_t size, mach_vm_address_t mask) -{ - mach_vm_address_t address = 0; - if (gIOCopyMapper) - { - address = ptoa_64(gIOCopyMapper->iovmAlloc(atop_64(round_page(size)))); - } - return (address); -} - -void IOFreePhysical(mach_vm_address_t address, mach_vm_size_t size) -{ - if (gIOCopyMapper) - { - gIOCopyMapper->iovmFree(atop_64(address), atop_64(round_page(size))); - } -} - - __END_DECLS diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index f60f612dc..bd3c67176 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -50,7 +50,6 @@ #include #include "IOKitKernelInternal.h" -#include "IOCopyMapper.h" #include #include @@ -110,8 +109,6 @@ __END_DECLS static IOMapper * gIOSystemMapper = NULL; -IOCopyMapper * gIOCopyMapper = NULL; - static ppnum_t gIOMaximumMappedIOPageCount = atop_32(kIOMaximumMappedIOByteCount); ppnum_t gIOLastPage; @@ -1834,6 +1831,30 @@ IOReturn IOMemoryDescriptor::setPurgeable( IOOptionBits newState, extern "C" void dcache_incoherent_io_flush64(addr64_t pa, unsigned int count); extern "C" void dcache_incoherent_io_store64(addr64_t pa, unsigned int count); +static void SetEncryptOp(addr64_t pa, unsigned int count) +{ + ppnum_t page, end; + + page = atop_64(round_page_64(pa)); + end = atop_64(trunc_page_64(pa + count)); + for (; page < end; page++) + { + pmap_clear_noencrypt(page); + } +} + +static void ClearEncryptOp(addr64_t pa, unsigned int count) +{ + ppnum_t page, end; + + page = atop_64(round_page_64(pa)); + end = atop_64(trunc_page_64(pa + count)); + for (; page < end; page++) + { + pmap_set_noencrypt(page); + } +} + IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options, IOByteCount offset, IOByteCount length ) { @@ -1848,6 +1869,13 @@ IOReturn IOMemoryDescriptor::performOperation( IOOptionBits options, case kIOMemoryIncoherentIOStore: func = &dcache_incoherent_io_store64; break; + + case kIOMemorySetEncrypted: + func = &SetEncryptOp; + break; + case kIOMemoryClearEncrypted: + func = &ClearEncryptOp; + break; } if (!func) @@ -2181,6 +2209,14 @@ IOReturn IOGeneralMemoryDescriptor::prepare(IODirection forDirection) if (kIOReturnSuccess == error) _wireCount++; + if (1 == _wireCount) + { + if (kIOMemoryClearEncrypt & _flags) + { + performOperation(kIOMemoryClearEncrypted, 0, _length); + } + } + if (_prepareLock) IOLockUnlock(_prepareLock); @@ -2210,6 +2246,11 @@ IOReturn IOGeneralMemoryDescriptor::complete(IODirection /* forDirection */) if (_wireCount) { + if ((kIOMemoryClearEncrypt & _flags) && (1 == _wireCount)) + { + performOperation(kIOMemorySetEncrypted, 0, _length); + } + _wireCount--; if (!_wireCount) { @@ -3279,19 +3320,6 @@ void IOMemoryDescriptor::initialize( void ) IORegistryEntry::getRegistryRoot()->setProperty(kIOMaximumMappedIOByteCountKey, ptoa_64(gIOMaximumMappedIOPageCount), 64); - if (!gIOCopyMapper) - { - IOMapper * - mapper = new IOCopyMapper; - if (mapper) - { - if (mapper->init() && mapper->start(NULL)) - gIOCopyMapper = (IOCopyMapper *) mapper; - else - mapper->release(); - } - } - gIOLastPage = IOGetLastPageNumber(); } diff --git a/iokit/Kernel/IOPMPowerStateQueue.cpp b/iokit/Kernel/IOPMPowerStateQueue.cpp index 7081a7fae..cd24b8c53 100644 --- a/iokit/Kernel/IOPMPowerStateQueue.cpp +++ b/iokit/Kernel/IOPMPowerStateQueue.cpp @@ -62,7 +62,7 @@ bool IOPMPowerStateQueue::init( OSObject * inOwner, Action inAction ) bool IOPMPowerStateQueue::submitPowerEvent( uint32_t eventType, void * arg0, - void * arg1 ) + uint64_t arg1 ) { PowerEventEntry * entry; @@ -71,8 +71,8 @@ bool IOPMPowerStateQueue::submitPowerEvent( return false; entry->eventType = eventType; - entry->args[0] = arg0; - entry->args[1] = arg1; + entry->arg0 = arg0; + entry->arg1 = arg1; IOLockLock(queueLock); queue_enter(&queueHead, entry, PowerEventEntry *, chain); @@ -93,7 +93,7 @@ bool IOPMPowerStateQueue::checkForWork( void ) queue_remove_first(&queueHead, entry, PowerEventEntry *, chain); IOLockUnlock(queueLock); - (*queueAction)(owner, entry->eventType, entry->args[0], entry->args[1]); + (*queueAction)(owner, entry->eventType, entry->arg0, entry->arg1); IODelete(entry, PowerEventEntry, 1); IOLockLock(queueLock); diff --git a/iokit/Kernel/IOPMPowerStateQueue.h b/iokit/Kernel/IOPMPowerStateQueue.h index 713cb1afe..2b34e768b 100644 --- a/iokit/Kernel/IOPMPowerStateQueue.h +++ b/iokit/Kernel/IOPMPowerStateQueue.h @@ -33,7 +33,7 @@ #include #include -typedef void (*IOPMPowerStateQueueAction)(OSObject *, uint32_t event, void *, void *); +typedef void (*IOPMPowerStateQueueAction)(OSObject *, uint32_t event, void *, uint64_t); class IOPMPowerStateQueue : public IOEventSource { @@ -43,7 +43,8 @@ private: struct PowerEventEntry { queue_chain_t chain; uint32_t eventType; - void * args[2]; + void * arg0; + uint64_t arg1; }; queue_head_t queueHead; @@ -56,7 +57,7 @@ protected: public: static IOPMPowerStateQueue * PMPowerStateQueue( OSObject * owner, Action action ); - bool submitPowerEvent( uint32_t eventType, void * arg0 = 0, void * arg1 = 0 ); + bool submitPowerEvent( uint32_t eventType, void * arg0 = 0, uint64_t arg1 = 0 ); }; #endif /* _IOPMPOWERSTATEQUEUE_H_ */ diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 848f67200..2144447ae 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -27,6 +27,7 @@ */ #include #include +#include #include #include #include @@ -97,7 +98,10 @@ enum { kPowerEventSystemShutdown, kPowerEventUserDisabledSleep, kPowerEventConfigdRegisteredInterest, - kPowerEventAggressivenessChanged + kPowerEventAggressivenessChanged, + kPowerEventAssertionCreate, // 8 + kPowerEventAssertionRelease, // 9 + kPowerEventAssertionSetLevel // 10 }; extern "C" { @@ -111,9 +115,11 @@ static void wakeupClamshellTimerExpired( thread_call_param_t us, thread_call_par static void notifySystemShutdown( IOService * root, unsigned long event ); static bool clientMessageFilter( OSObject * object, void * context ); static void handleAggressivesFunction( thread_call_param_t param1, thread_call_param_t param2 ); +static void pmEventTimeStamp(uint64_t *recordTS); // "IOPMSetSleepSupported" callPlatformFunction name static const OSSymbol *sleepSupportedPEFunction = NULL; +static const OSSymbol *sleepMessagePEFunction = NULL; #define kIOSleepSupportedKey "IOSleepSupported" @@ -205,6 +211,8 @@ static IONotifier * gConfigdNotifier = 0; #define kIOPMRootDomainRunStateKey "Run State" #define kIOPMRootDomainWakeTypeMaintenance "Maintenance" +#define kIOPMRootDomainWakeTypeSleepTimer "SleepTimer" +#define kIOPMrootDomainWakeTypeLowBattery "LowBattery" #endif /* ROOT_DOMAIN_RUN_STATES */ @@ -310,6 +318,58 @@ public: void free(void); }; +/* + * PMAssertionsTracker + * Tracks kernel and user space PM assertions + */ +class PMAssertionsTracker : public OSObject +{ + OSDeclareFinalStructors(PMAssertionsTracker) +public: + static PMAssertionsTracker *pmAssertionsTracker( IOPMrootDomain * ); + + IOReturn createAssertion(IOPMDriverAssertionType, IOPMDriverAssertionLevel, IOService *, const char *, IOPMDriverAssertionID *); + IOReturn releaseAssertion(IOPMDriverAssertionID); + IOReturn setAssertionLevel(IOPMDriverAssertionID, IOPMDriverAssertionLevel); + IOReturn setUserAssertionLevels(IOPMDriverAssertionType); + + OSArray *copyAssertionsArray(void); + IOPMDriverAssertionType getActivatedAssertions(void); + IOPMDriverAssertionLevel getAssertionLevel(IOPMDriverAssertionType); + + IOReturn handleCreateAssertion(OSData *); + IOReturn handleReleaseAssertion(IOPMDriverAssertionID); + IOReturn handleSetAssertionLevel(IOPMDriverAssertionID, IOPMDriverAssertionLevel); + IOReturn handleSetUserAssertionLevels(void * arg0); + void publishProperties(void); + +private: + typedef struct { + IOPMDriverAssertionID id; + IOPMDriverAssertionType assertionBits; + uint64_t createdTime; + uint64_t modifiedTime; + const OSSymbol *ownerString; + IOService *ownerService; + IOPMDriverAssertionLevel level; + } PMAssertStruct; + + uint32_t tabulateProducerCount; + uint32_t tabulateConsumerCount; + + PMAssertStruct *detailsForID(IOPMDriverAssertionID, int *); + void tabulate(void); + + IOPMrootDomain *owner; + OSArray *assertionsArray; + IOLock *assertionsArrayLock; + IOPMDriverAssertionID issuingUniqueID; + IOPMDriverAssertionType assertionsKernel; + IOPMDriverAssertionType assertionsUser; + IOPMDriverAssertionType assertionsCombined; +}; + +OSDefineMetaClassAndFinalStructors(PMAssertionsTracker, OSObject); /* * PMTraceWorker @@ -637,6 +697,7 @@ bool IOPMrootDomain::start( IOService * nub ) gIOPMStatsApplicationResponseSlow = OSSymbol::withCString(kIOPMStatsResponseSlow); sleepSupportedPEFunction = OSSymbol::withCString("IOPMSetSleepSupported"); + sleepMessagePEFunction = OSSymbol::withCString("IOPMSystemSleepMessage"); const OSSymbol *settingsArr[kRootDomainSettingsCount] = { @@ -686,6 +747,8 @@ bool IOPMrootDomain::start( IOService * nub ) pmTracer = PMTraceWorker::tracer(this); + pmAssertions = PMAssertionsTracker::pmAssertionsTracker(this); + updateRunState(kRStateNormal); userDisabledAllSleep = false; allowSleep = true; @@ -953,6 +1016,15 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) pmTracer->traceLoginWindowPhase( n->unsigned8BitValue() ); } + if ((b = OSDynamicCast(OSBoolean, dict->getObject(kIOPMDeepSleepEnabledKey)))) + { + setProperty(kIOPMDeepSleepEnabledKey, b); + } + if ((n = OSDynamicCast(OSNumber, dict->getObject(kIOPMDeepSleepDelayKey)))) + { + setProperty(kIOPMDeepSleepDelayKey, n); + } + // Relay our allowed PM settings onto our registered PM clients for(i = 0; i < allowedPMSettings->getCount(); i++) { @@ -994,6 +1066,7 @@ exit: if(idle_seconds_string) idle_seconds_string->release(); if(sleepdisabled_string) sleepdisabled_string->release(); if(ondeck_sleepwake_uuid_string) ondeck_sleepwake_uuid_string->release(); + if(loginwindow_tracepoint_string) loginwindow_tracepoint_string->release(); #if HIBERNATION if(hibernatemode_string) hibernatemode_string->release(); if(hibernatefile_string) hibernatefile_string->release(); @@ -1735,18 +1808,29 @@ IOReturn IOPMrootDomain::sleepSystemOptions( OSDictionary *options ) { // Log specific sleep cause for OS Switch hibernation - return privateSleepSystem( kIOPMOSSwitchHibernationKey) ; + return privateSleepSystem( kIOPMSleepReasonOSSwitchHibernation); } else { - return privateSleepSystem( kIOPMSoftwareSleepKey); + return privateSleepSystem( kIOPMSleepReasonSoftware); } } /* private */ -IOReturn IOPMrootDomain::privateSleepSystem( const char *sleepReason ) +IOReturn IOPMrootDomain::privateSleepSystem( uint32_t sleepReason ) { + static const char * IOPMSleepReasons[kIOPMSleepReasonMax] = { + "", + kIOPMClamshellSleepKey, + kIOPMPowerButtonSleepKey, + kIOPMSoftwareSleepKey, + kIOPMOSSwitchHibernationKey, + kIOPMIdleSleepKey, + kIOPMLowPowerSleepKey, + kIOPMClamshellSleepKey, + kIOPMThermalEmergencySleepKey + }; if ( userDisabledAllSleep ) { LOG("Sleep prevented by user disable\n"); @@ -1766,8 +1850,9 @@ IOReturn IOPMrootDomain::privateSleepSystem( const char *sleepReason ) } // Record sleep cause in IORegistry - if (sleepReason) { - setProperty(kRootDomainSleepReasonKey, sleepReason); + lastSleepReason = sleepReason; + if (sleepReason && (sleepReason < kIOPMSleepReasonMax)) { + setProperty(kRootDomainSleepReasonKey, IOPMSleepReasons[sleepReason]); } patriarch->sleepSystem(); @@ -1843,6 +1928,8 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousState ) tracePoint(kIOPMTracePointSystemHibernatePhase); IOHibernateSystemHasSlept(); + + evaluateSystemSleepPolicyFinal(); #else LOG("System Sleep\n"); #endif @@ -1884,6 +1971,7 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousState ) // log system wake getPlatform()->PMLog(kIOPMrootDomainClass, kPMLogSystemWake, 0, 0); + lowBatteryCondition = false; #ifndef __LP64__ // tell the tree we're waking @@ -1892,10 +1980,23 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousState ) #if defined(__i386__) || defined(__x86_64__) + sleepTimerMaintenance = false; #if ROOT_DOMAIN_RUN_STATES OSString * wakeType = OSDynamicCast( OSString, getProperty(kIOPMRootDomainWakeTypeKey)); - if (wakeType && wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance)) + if (wakeType && wakeType->isEqualTo(kIOPMrootDomainWakeTypeLowBattery)) + { + lowBatteryCondition = true; + updateRunState(kRStateMaintenance); + wranglerTickled = false; + } + else if (wakeType && !hibernateAborted && wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer)) + { + sleepTimerMaintenance = true; + updateRunState(kRStateMaintenance); + wranglerTickled = false; + } + else if (wakeType && !hibernateAborted && wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance)) { updateRunState(kRStateMaintenance); wranglerTickled = false; @@ -1946,7 +2047,16 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousState ) (runStateIndex == kRStateMaintenance) && !wranglerTickled) { - setProperty(kRootDomainSleepReasonKey, kIOPMMaintenanceSleepKey); + if (lowBatteryCondition) + { + lastSleepReason = kIOPMSleepReasonLowPower; + setProperty(kRootDomainSleepReasonKey, kIOPMLowPowerSleepKey); + } + else + { + lastSleepReason = kIOPMSleepReasonMaintenance; + setProperty(kRootDomainSleepReasonKey, kIOPMMaintenanceSleepKey); + } changePowerStateWithOverrideTo(SLEEP_STATE); } @@ -2530,6 +2640,273 @@ void IOPMrootDomain::informCPUStateChange( } +#if HIBERNATION + +//****************************************************************************** +// evaluateSystemSleepPolicy +//****************************************************************************** + +struct IOPMSystemSleepPolicyEntry +{ + uint32_t factorMask; + uint32_t factorBits; + uint32_t sleepFlags; + uint32_t wakeEvents; +}; + +struct IOPMSystemSleepPolicyTable +{ + uint8_t signature[4]; + uint16_t version; + uint16_t entryCount; + IOPMSystemSleepPolicyEntry entries[]; +}; + +enum { + kIOPMSleepFactorSleepTimerWake = 0x00000001, + kIOPMSleepFactorLidOpen = 0x00000002, + kIOPMSleepFactorACPower = 0x00000004, + kIOPMSleepFactorLowBattery = 0x00000008, + kIOPMSleepFactorDeepSleepNoDelay = 0x00000010, + kIOPMSleepFactorDeepSleepDemand = 0x00000020, + kIOPMSleepFactorDeepSleepDisable = 0x00000040, + kIOPMSleepFactorUSBExternalDevice = 0x00000080, + kIOPMSleepFactorBluetoothHIDDevice = 0x00000100, + kIOPMSleepFactorExternalMediaMounted = 0x00000200, + kIOPMSleepFactorDriverAssertBit5 = 0x00000400, + kIOPMSleepFactorDriverAssertBit6 = 0x00000800, + kIOPMSleepFactorDriverAssertBit7 = 0x00001000 +}; + +bool IOPMrootDomain::evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p ) +{ + const IOPMSystemSleepPolicyTable * pt; + OSObject * prop = 0; + OSData * policyData; + uint32_t currentFactors; + uint32_t deepSleepDelay = 0; + bool success = false; + + if (getProperty(kIOPMDeepSleepEnabledKey) != kOSBooleanTrue) + return false; + + getSleepOption(kIOPMDeepSleepDelayKey, &deepSleepDelay); + + prop = getServiceRoot()->copyProperty(kIOPlatformSystemSleepPolicyKey); + if (!prop) + return false; + + policyData = OSDynamicCast(OSData, prop); + if (!policyData || + (policyData->getLength() < sizeof(IOPMSystemSleepPolicyTable))) + { + goto done; + } + + pt = (const IOPMSystemSleepPolicyTable *) policyData->getBytesNoCopy(); + if ((pt->signature[0] != 'S') || + (pt->signature[1] != 'L') || + (pt->signature[2] != 'P') || + (pt->signature[3] != 'T') || + (pt->version != 1) || + (pt->entryCount == 0)) + { + goto done; + } + + if ((policyData->getLength() - sizeof(IOPMSystemSleepPolicyTable)) != + (sizeof(IOPMSystemSleepPolicyEntry) * pt->entryCount)) + { + goto done; + } + + currentFactors = 0; + if (getPMAssertionLevel(kIOPMDriverAssertionUSBExternalDeviceBit) != + kIOPMDriverAssertionLevelOff) + currentFactors |= kIOPMSleepFactorUSBExternalDevice; + if (getPMAssertionLevel(kIOPMDriverAssertionBluetoothHIDDevicePairedBit) != + kIOPMDriverAssertionLevelOff) + currentFactors |= kIOPMSleepFactorBluetoothHIDDevice; + if (getPMAssertionLevel(kIOPMDriverAssertionExternalMediaMountedBit) != + kIOPMDriverAssertionLevelOff) + currentFactors |= kIOPMSleepFactorExternalMediaMounted; + if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) != + kIOPMDriverAssertionLevelOff) + currentFactors |= kIOPMSleepFactorDriverAssertBit5; + if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit6) != + kIOPMDriverAssertionLevelOff) + currentFactors |= kIOPMSleepFactorDriverAssertBit6; + if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit7) != + kIOPMDriverAssertionLevelOff) + currentFactors |= kIOPMSleepFactorDriverAssertBit7; + if (0 == deepSleepDelay) + currentFactors |= kIOPMSleepFactorDeepSleepNoDelay; + if (!clamshellIsClosed) + currentFactors |= kIOPMSleepFactorLidOpen; + if (acAdaptorConnected) + currentFactors |= kIOPMSleepFactorACPower; + if (lowBatteryCondition) + currentFactors |= kIOPMSleepFactorLowBattery; + if (sleepTimerMaintenance) + currentFactors |= kIOPMSleepFactorSleepTimerWake; + + // pmset overrides + if ((hibernateMode & kIOHibernateModeOn) == 0) + currentFactors |= kIOPMSleepFactorDeepSleepDisable; + else if ((hibernateMode & kIOHibernateModeSleep) == 0) + currentFactors |= kIOPMSleepFactorDeepSleepDemand; + + DLOG("Sleep policy %u entries, current factors 0x%x\n", + pt->entryCount, currentFactors); + + for (uint32_t i = 0; i < pt->entryCount; i++) + { + const IOPMSystemSleepPolicyEntry * policyEntry = &pt->entries[i]; + + DLOG("factor mask 0x%08x, bits 0x%08x, flags 0x%08x, wake 0x%08x\n", + policyEntry->factorMask, policyEntry->factorBits, + policyEntry->sleepFlags, policyEntry->wakeEvents); + + if ((currentFactors ^ policyEntry->factorBits) & policyEntry->factorMask) + continue; // mismatch, try next + + if (p) + { + p->version = 1; + p->sleepFlags = policyEntry->sleepFlags; + p->sleepTimer = 0; + p->wakeEvents = policyEntry->wakeEvents; + if (p->sleepFlags & kIOPMSleepFlagSleepTimerEnable) + { + p->sleepTimer = deepSleepDelay; + } + } + + DLOG("matched policy entry %u\n", i); + success = true; + break; + } + +done: + if (prop) + prop->release(); + + return success; +} + +void IOPMrootDomain::evaluateSystemSleepPolicyEarly( void ) +{ + IOPMSystemSleepParameters params; + + // Evaluate sleep policy before driver sleep phase. + + DLOG("%s\n", __FUNCTION__); + removeProperty(kIOPMSystemSleepParametersKey); + + hibernateDisabled = false; + hibernateMode = 0; + getSleepOption(kIOHibernateModeKey, &hibernateMode); + + if (!hibernateNoDefeat && + evaluateSystemSleepPolicy(¶ms) && + ((params.sleepFlags & kIOPMSleepFlagHibernate) == 0)) + { + hibernateDisabled = true; + } +} + +void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void ) +{ + IOPMSystemSleepParameters params; + OSData * paramsData; + + // Evaluate sleep policy after drivers but before platform sleep. + + DLOG("%s\n", __FUNCTION__); + + if (evaluateSystemSleepPolicy(¶ms)) + { + if ((hibernateDisabled || hibernateAborted) && + (params.sleepFlags & kIOPMSleepFlagHibernate)) + { + // Should hibernate but unable to or aborted. + // Arm timer for a short sleep and retry or wake fully. + + params.sleepFlags &= ~kIOPMSleepFlagHibernate; + params.sleepFlags |= kIOPMSleepFlagSleepTimerEnable; + params.sleepTimer = 1; + hibernateNoDefeat = true; + DLOG("wake in %u secs for hibernateDisabled %d, hibernateAborted %d\n", + params.sleepTimer, hibernateDisabled, hibernateAborted); + } + else + hibernateNoDefeat = false; + + paramsData = OSData::withBytes(¶ms, sizeof(params)); + if (paramsData) + { + setProperty(kIOPMSystemSleepParametersKey, paramsData); + paramsData->release(); + } + + if (params.sleepFlags & kIOPMSleepFlagHibernate) + { + // Force hibernate + gIOHibernateMode &= ~kIOHibernateModeSleep; + } + } +} + +bool IOPMrootDomain::getHibernateSettings( + uint32_t * hibernateMode, + uint32_t * hibernateFreeRatio, + uint32_t * hibernateFreeTime ) +{ + bool ok = getSleepOption(kIOHibernateModeKey, hibernateMode); + getSleepOption(kIOHibernateFreeRatioKey, hibernateFreeRatio); + getSleepOption(kIOHibernateFreeTimeKey, hibernateFreeTime); + if (hibernateDisabled) + *hibernateMode = 0; + DLOG("hibernateMode 0x%x\n", *hibernateMode); + return ok; +} + +bool IOPMrootDomain::getSleepOption( const char * key, uint32_t * option ) +{ + OSObject * optionsProp; + OSDictionary * optionsDict; + OSObject * obj = 0; + OSNumber * num; + bool ok = false; + + optionsProp = copyProperty(kRootDomainSleepOptionsKey); + optionsDict = OSDynamicCast(OSDictionary, optionsProp); + + if (optionsDict) + { + obj = optionsDict->getObject(key); + if (obj) obj->retain(); + } + if (!obj) + { + obj = copyProperty(key); + } + if (obj && (num = OSDynamicCast(OSNumber, obj))) + { + *option = num->unsigned32BitValue(); + ok = true; + } + + if (obj) + obj->release(); + if (optionsProp) + optionsProp->release(); + + return true; +} +#endif /* HIBERNATION */ + + //****************************************************************************** // dispatchPowerEvent // @@ -2537,9 +2914,9 @@ void IOPMrootDomain::informCPUStateChange( //****************************************************************************** void IOPMrootDomain::dispatchPowerEvent( - uint32_t event, void * arg0, void * arg1 ) + uint32_t event, void * arg0, uint64_t arg1 ) { - DLOG("power event %x args %p %p\n", event, arg0, arg1); + DLOG("power event %u args %p 0x%llx\n", event, arg0, arg1); ASSERT_GATED(); switch (event) @@ -2612,6 +2989,24 @@ void IOPMrootDomain::dispatchPowerEvent( case kPowerEventAggressivenessChanged: aggressivenessChanged(); break; + + case kPowerEventAssertionCreate: + if (pmAssertions) { + pmAssertions->handleCreateAssertion((OSData *)arg0); + } + break; + + case kPowerEventAssertionRelease: + if (pmAssertions) { + pmAssertions->handleReleaseAssertion(arg1); + } + break; + + case kPowerEventAssertionSetLevel: + if (pmAssertions) { + pmAssertions->handleSetAssertionLevel(arg1, (IOPMDriverAssertionLevel)(uintptr_t)arg0); + } + break; } } @@ -2726,10 +3121,10 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) /* * Overtemp */ - if (msg & kIOPMOverTemp) + if (msg & kIOPMOverTemp) { LOG("PowerManagement emergency overtemp signal. Going to sleep!"); - privateSleepSystem (kIOPMThermalEmergencySleepKey); + privateSleepSystem (kIOPMSleepReasonThermalEmergency); } #ifdef __ppc__ @@ -2750,7 +3145,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) */ if (msg & kIOPMSleepNow) { - privateSleepSystem (kIOPMSoftwareSleepKey); + privateSleepSystem (kIOPMSleepReasonSoftware); } /* @@ -2758,7 +3153,8 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) */ if (msg & kIOPMPowerEmergency) { - privateSleepSystem (kIOPMLowPowerSleepKey); + lowBatteryCondition = true; + privateSleepSystem (kIOPMSleepReasonLowPower); } /* @@ -2770,13 +3166,19 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) // Update our internal state and tell general interest clients clamshellIsClosed = false; clamshellExists = true; - + // Tell PMCPU informCPUStateChange(kInformLid, 0); // Tell general interest clients sendClientClamshellNotification(); - } + + bool aborting = ((lastSleepReason == kIOPMSleepReasonClamshell) + || (lastSleepReason == kIOPMSleepReasonIdle) + || (lastSleepReason == kIOPMSleepReasonMaintenance)); + if (aborting) userActivityCount++; + DLOG("clamshell tickled %d lastSleepReason %d\n", userActivityCount, lastSleepReason); + } /* * Clamshell CLOSED @@ -2884,7 +3286,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) // SLEEP! - privateSleepSystem (kIOPMClamshellSleepKey); + privateSleepSystem (kIOPMSleepReasonClamshell); } /* @@ -2908,7 +3310,7 @@ void IOPMrootDomain::handlePowerNotification( UInt32 msg ) // Check that power button sleep is enabled if( pbs ) { if( kOSBooleanTrue != getProperty(pbs)) - privateSleepSystem (kIOPMPowerButtonSleepKey); + privateSleepSystem (kIOPMSleepReasonPowerButton); } } } @@ -3298,6 +3700,10 @@ bool IOPMrootDomain::tellChangeDown( unsigned long stateNum ) if (!ignoreChangeDown) { + userActivityAtSleep = userActivityCount; + hibernateAborted = false; + DLOG("tellChangeDown::userActivityAtSleep %d\n", userActivityAtSleep); + // Direct callout into OSKext so it can disable kext unloads // during sleep/wake to prevent deadlocks. OSKextSystemSleepOrWake( kIOMessageSystemWillSleep ); @@ -3321,6 +3727,12 @@ bool IOPMrootDomain::tellChangeDown( unsigned long stateNum ) // otherwise nothing. } + // Notify platform that sleep has begun + getPlatform()->callPlatformFunction( + sleepMessagePEFunction, false, + (void *)(uintptr_t) kIOMessageSystemWillSleep, + NULL, NULL, NULL); + // Update canSleep and kIOSleepSupportedKey property so drivers // can tell if platform is going to sleep versus doze. @@ -3434,6 +3846,12 @@ void IOPMrootDomain::tellChangeUp( unsigned long stateNum ) // during sleep/wake to prevent deadlocks. OSKextSystemSleepOrWake( kIOMessageSystemHasPoweredOn ); + // Notify platform that sleep was cancelled or resumed. + getPlatform()->callPlatformFunction( + sleepMessagePEFunction, false, + (void *)(uintptr_t) kIOMessageSystemHasPoweredOn, + NULL, NULL, NULL); + if (getPowerState() == ON_STATE) { // this is a quick wake from aborted sleep @@ -3558,6 +3976,36 @@ IOReturn IOPMrootDomain::changePowerStateToPriv( unsigned long ordinal ) return super::changePowerStateToPriv(ordinal); } +//****************************************************************************** +// activity detect +// +//****************************************************************************** + +bool IOPMrootDomain::activitySinceSleep(void) +{ + return (userActivityCount != userActivityAtSleep); +} + +bool IOPMrootDomain::abortHibernation(void) +{ + bool ret = activitySinceSleep(); + + if (ret && !hibernateAborted) + { + DLOG("activitySinceSleep ABORT [%d, %d]\n", userActivityCount, userActivityAtSleep); + hibernateAborted = true; + } + return (ret); +} + +extern "C" int +hibernate_should_abort(void) +{ + if (gRootDomain) + return (gRootDomain->abortHibernation()); + else + return (0); +} //****************************************************************************** // updateRunState @@ -3643,22 +4091,37 @@ void IOPMrootDomain::tagPowerPlaneService( // service to raise power state. Called from driver thread. //****************************************************************************** -void IOPMrootDomain::handleActivityTickleForService( IOService * service ) +void IOPMrootDomain::handleActivityTickleForService( IOService * service, + unsigned long type, + unsigned long currentPowerState, + uint32_t activityTickleCount ) { + if ((service == wrangler) +) + { + bool aborting = ((lastSleepReason == kIOPMSleepReasonIdle) + || (lastSleepReason == kIOPMSleepReasonMaintenance)); + if (aborting) userActivityCount++; + DLOG("display wrangler tickled1 %d lastSleepReason %d\n", userActivityCount, lastSleepReason); + } + // Tickle directed to IODisplayWrangler while graphics is disabled. // Bring graphics online. - if ((service == wrangler) && + if ((!currentPowerState) && + (service == wrangler) && (runStateIndex > kRStateNormal) && - (false == wranglerTickled)) + (false == wranglerTickled) && + (false == lowBatteryCondition)) { DLOG("display wrangler tickled\n"); + if (kIOLogPMRootDomain & gIOKitDebug) + OSReportWithBacktrace("Display Tickle"); wranglerTickled = true; synchronizePowerTree(); } } - //****************************************************************************** // handlePowerChangeStartForService // @@ -3771,6 +4234,8 @@ void IOPMrootDomain::overridePowerStateForService( if (runStateIndex != kRStateNormal) { + sleepTimerMaintenance = false; + hibernateNoDefeat = false; nextRunStateIndex = kRStateNormal; setProperty( kIOPMRootDomainRunStateKey, @@ -3892,14 +4357,18 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon, // down. // We call sync_internal defined in xnu/bsd/vfs/vfs_syscalls.c, // via callout - - // We will ack within 20 seconds - params->returnValue = 20 * 1000 * 1000; #if HIBERNATION - if (gIOHibernateState) - params->returnValue += gIOHibernateFreeTime * 1000; //add in time we could spend freeing pages + rootDomain->evaluateSystemSleepPolicyEarly(); + if (rootDomain->hibernateMode && !rootDomain->hibernateDisabled) + { + // We will ack within 240 seconds + params->returnValue = 240 * 1000 * 1000; + } + else #endif - + // We will ack within 20 seconds + params->returnValue = 20 * 1000 * 1000; + DLOG("sysPowerDownHandler timeout %d s\n", (int) (params->returnValue / 1000 / 1000)); if ( ! OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) ) { // Purposely delay the ack and hope that shutdown occurs quickly. @@ -4176,6 +4645,7 @@ void IOPMrootDomain::adjustPowerState( void ) * * Set last sleep cause accordingly. */ + lastSleepReason = kIOPMSleepReasonIdle; setProperty(kRootDomainSleepReasonKey, kIOPMIdleSleepKey); sleepASAP = false; @@ -4945,6 +5415,76 @@ done: return; } +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOPMDriverAssertionID IOPMrootDomain::createPMAssertion( + IOPMDriverAssertionType whichAssertionBits, + IOPMDriverAssertionLevel assertionLevel, + IOService *ownerService, + const char *ownerDescription) +{ + IOReturn ret; + IOPMDriverAssertionID newAssertion; + + if (!pmAssertions) + return 0; + + ret = pmAssertions->createAssertion(whichAssertionBits, assertionLevel, ownerService, ownerDescription, &newAssertion); + + if (kIOReturnSuccess == ret) + return newAssertion; + else + return 0; +} + +IOReturn IOPMrootDomain::releasePMAssertion(IOPMDriverAssertionID releaseAssertion) +{ + if (!pmAssertions) + return kIOReturnInternalError; + + return pmAssertions->releaseAssertion(releaseAssertion); +} + +IOReturn IOPMrootDomain::setPMAssertionLevel( + IOPMDriverAssertionID assertionID, + IOPMDriverAssertionLevel assertionLevel) +{ + return pmAssertions->setAssertionLevel(assertionID, assertionLevel); +} + +IOPMDriverAssertionLevel IOPMrootDomain::getPMAssertionLevel(IOPMDriverAssertionType whichAssertion) +{ + IOPMDriverAssertionType sysLevels; + + if (!pmAssertions || whichAssertion == 0) + return kIOPMDriverAssertionLevelOff; + + sysLevels = pmAssertions->getActivatedAssertions(); + + // Check that every bit set in argument 'whichAssertion' is asserted + // in the aggregate bits. + if ((sysLevels & whichAssertion) == whichAssertion) + return kIOPMDriverAssertionLevelOn; + else + return kIOPMDriverAssertionLevelOff; +} + +IOReturn IOPMrootDomain::setPMAssertionUserLevels(IOPMDriverAssertionType inLevels) +{ + if (!pmAssertions) + return kIOReturnNotFound; + + return pmAssertions->setUserAssertionLevels(inLevels); +} + +bool IOPMrootDomain::serializeProperties( OSSerialize * s ) const +{ + if (pmAssertions) + { + pmAssertions->publishProperties(); + } + return( IOService::serializeProperties(s) ); +} /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -5055,7 +5595,436 @@ void PMSettingObject::taggedRelease(const void *tag, const int when) const super::taggedRelease(tag, releaseAtCount); } +// MARK: - +// MARK: PMAssertionsTracker + +//********************************************************************************* +//********************************************************************************* +//********************************************************************************* +// class PMAssertionsTracker Implementation + +#define kAssertUniqueIDStart 500 + +PMAssertionsTracker *PMAssertionsTracker::pmAssertionsTracker( IOPMrootDomain *rootDomain ) +{ + PMAssertionsTracker *myself; + + myself = new PMAssertionsTracker; + + if (myself) { + myself->init(); + myself->owner = rootDomain; + myself->issuingUniqueID = kAssertUniqueIDStart; + myself->assertionsArray = OSArray::withCapacity(5); + myself->assertionsKernel = 0; + myself->assertionsUser = 0; + myself->assertionsCombined = 0; + myself->assertionsArrayLock = IOLockAlloc(); + myself->tabulateProducerCount = myself->tabulateConsumerCount = 0; + + if (!myself->assertionsArray || !myself->assertionsArrayLock) + myself = NULL; + } + + return myself; +} + +/* tabulate + * - Update assertionsKernel to reflect the state of all + * assertions in the kernel. + * - Update assertionsCombined to reflect both kernel & user space. + */ +void PMAssertionsTracker::tabulate(void) +{ + int i; + int count; + PMAssertStruct *_a = NULL; + OSData *_d = NULL; + + IOPMDriverAssertionType oldKernel = assertionsKernel; + IOPMDriverAssertionType oldCombined = assertionsCombined; + + ASSERT_GATED(); + + assertionsKernel = 0; + assertionsCombined = 0; + + if (!assertionsArray) + return; + + if ((count = assertionsArray->getCount())) + { + for (i=0; igetObject(i)); + if (_d) + { + _a = (PMAssertStruct *)_d->getBytesNoCopy(); + if (_a && (kIOPMDriverAssertionLevelOn == _a->level)) + assertionsKernel |= _a->assertionBits; + } + } + } + + tabulateProducerCount++; + assertionsCombined = assertionsKernel | assertionsUser; + + if ((assertionsKernel != oldKernel) || + (assertionsCombined != oldCombined)) + { + owner->messageClients(kIOPMMessageDriverAssertionsChanged); + } +} + +void PMAssertionsTracker::publishProperties( void ) +{ + OSArray *assertionsSummary = NULL; + + if (tabulateConsumerCount != tabulateProducerCount) + { + IOLockLock(assertionsArrayLock); + + tabulateConsumerCount = tabulateProducerCount; + + /* Publish the IOPMrootDomain property "DriverPMAssertionsDetailed" + */ + assertionsSummary = copyAssertionsArray(); + if (assertionsSummary) + { + owner->setProperty(kIOPMAssertionsDriverDetailedKey, assertionsSummary); + assertionsSummary->release(); + } + else + { + owner->removeProperty(kIOPMAssertionsDriverDetailedKey); + } + + /* Publish the IOPMrootDomain property "DriverPMAssertions" + */ + owner->setProperty(kIOPMAssertionsDriverKey, assertionsKernel, 64); + + IOLockUnlock(assertionsArrayLock); + } +} + +PMAssertionsTracker::PMAssertStruct *PMAssertionsTracker::detailsForID(IOPMDriverAssertionID _id, int *index) +{ + PMAssertStruct *_a = NULL; + OSData *_d = NULL; + int found = -1; + int count = 0; + int i = 0; + + if (assertionsArray + && (count = assertionsArray->getCount())) + { + for (i=0; igetObject(i)); + if (_d) + { + _a = (PMAssertStruct *)_d->getBytesNoCopy(); + if (_a && (_id == _a->id)) { + found = i; + break; + } + } + } + } + + if (-1 == found) { + return NULL; + } else { + if (index) + *index = found; + return _a; + } +} + +/* PMAssertionsTracker::handleCreateAssertion + * Perform assertion work on the PM workloop. Do not call directly. + */ +IOReturn PMAssertionsTracker::handleCreateAssertion(OSData *newAssertion) +{ + ASSERT_GATED(); + + if (newAssertion) + { + IOLockLock(assertionsArrayLock); + assertionsArray->setObject(newAssertion); + IOLockUnlock(assertionsArrayLock); + newAssertion->release(); + + tabulate(); + } + return kIOReturnSuccess; +} + +/* PMAssertionsTracker::createAssertion + * createAssertion allocates memory for a new PM assertion, and affects system behavior, if + * appropiate. + */ +IOReturn PMAssertionsTracker::createAssertion( + IOPMDriverAssertionType which, + IOPMDriverAssertionLevel level, + IOService *serviceID, + const char *whoItIs, + IOPMDriverAssertionID *outID) +{ + OSData *dataStore = NULL; + PMAssertStruct track; + + // Warning: trillions and trillions of created assertions may overflow the unique ID. +#ifdef __ppc__ + track.id = issuingUniqueID++; // FIXME: need OSIncrementAtomic64() for ppc +#else + track.id = OSIncrementAtomic64((SInt64*) &issuingUniqueID); +#endif + track.level = level; + track.assertionBits = which; + track.ownerString = whoItIs ? OSSymbol::withCString(whoItIs) : 0; + track.ownerService = serviceID; + track.modifiedTime = 0; + pmEventTimeStamp(&track.createdTime); + + dataStore = OSData::withBytes(&track, sizeof(PMAssertStruct)); + if (!dataStore) + { + if (track.ownerString) + track.ownerString->release(); + return kIOReturnNoMemory; + } + + *outID = track.id; + + if (owner && owner->pmPowerStateQueue) { + owner->pmPowerStateQueue->submitPowerEvent(kPowerEventAssertionCreate, (void *)dataStore); + } + + return kIOReturnSuccess; +} +/* PMAssertionsTracker::handleReleaseAssertion + * Runs in PM workloop. Do not call directly. + */ +IOReturn PMAssertionsTracker::handleReleaseAssertion( + IOPMDriverAssertionID _id) +{ + ASSERT_GATED(); + + int index; + PMAssertStruct *assertStruct = detailsForID(_id, &index); + + if (!assertStruct) + return kIOReturnNotFound; + + IOLockLock(assertionsArrayLock); + if (assertStruct->ownerString) + assertStruct->ownerString->release(); + + assertionsArray->removeObject(index); + IOLockUnlock(assertionsArrayLock); + + tabulate(); + return kIOReturnSuccess; +} + +/* PMAssertionsTracker::releaseAssertion + * Releases an assertion and affects system behavior if appropiate. + * Actual work happens on PM workloop. + */ +IOReturn PMAssertionsTracker::releaseAssertion( + IOPMDriverAssertionID _id) +{ + if (owner && owner->pmPowerStateQueue) { + owner->pmPowerStateQueue->submitPowerEvent(kPowerEventAssertionRelease, 0, _id); + } + return kIOReturnSuccess; +} + +/* PMAssertionsTracker::handleSetAssertionLevel + * Runs in PM workloop. Do not call directly. + */ +IOReturn PMAssertionsTracker::handleSetAssertionLevel( + IOPMDriverAssertionID _id, + IOPMDriverAssertionLevel _level) +{ + PMAssertStruct *assertStruct = detailsForID(_id, NULL); + + ASSERT_GATED(); + + if (!assertStruct) { + return kIOReturnNotFound; + } + + IOLockLock(assertionsArrayLock); + pmEventTimeStamp(&assertStruct->modifiedTime); + assertStruct->level = _level; + IOLockUnlock(assertionsArrayLock); + + tabulate(); + return kIOReturnSuccess; +} + +/* PMAssertionsTracker::setAssertionLevel + */ +IOReturn PMAssertionsTracker::setAssertionLevel( + IOPMDriverAssertionID _id, + IOPMDriverAssertionLevel _level) +{ + if (owner && owner->pmPowerStateQueue) { + owner->pmPowerStateQueue->submitPowerEvent(kPowerEventAssertionSetLevel, + (void *)_level, _id); + } + + return kIOReturnSuccess; +} + +IOReturn PMAssertionsTracker::handleSetUserAssertionLevels(void * arg0) +{ + IOPMDriverAssertionType new_user_levels = *(IOPMDriverAssertionType *) arg0; + + ASSERT_GATED(); + + if (new_user_levels != assertionsUser) + { + assertionsUser = new_user_levels; + DLOG("assertionsUser 0x%llx\n", assertionsUser); + } + + tabulate(); + return kIOReturnSuccess; +} + +IOReturn PMAssertionsTracker::setUserAssertionLevels( + IOPMDriverAssertionType new_user_levels) +{ + if (gIOPMWorkLoop) { + gIOPMWorkLoop->runAction( + OSMemberFunctionCast( + IOWorkLoop::Action, + this, + &PMAssertionsTracker::handleSetUserAssertionLevels), + this, + (void *) &new_user_levels, 0, 0, 0); + } + + return kIOReturnSuccess; +} + + +OSArray *PMAssertionsTracker::copyAssertionsArray(void) +{ + int count; + int i; + OSArray *outArray = NULL; + + if (!assertionsArray || + (0 == (count = assertionsArray->getCount())) || + (NULL == (outArray = OSArray::withCapacity(count)))) + { + goto exit; + } + + for (i=0; igetObject(i)); + if (_d && (_a = (PMAssertStruct *)_d->getBytesNoCopy())) + { + OSNumber *_n = NULL; + + details = OSDictionary::withCapacity(7); + if (!details) + continue; + + outArray->setObject(details); + details->release(); + + _n = OSNumber::withNumber(_a->id, 64); + if (_n) { + details->setObject(kIOPMDriverAssertionIDKey, _n); + _n->release(); + } + _n = OSNumber::withNumber(_a->createdTime, 64); + if (_n) { + details->setObject(kIOPMDriverAssertionCreatedTimeKey, _n); + _n->release(); + } + _n = OSNumber::withNumber(_a->modifiedTime, 64); + if (_n) { + details->setObject(kIOPMDriverAssertionModifiedTimeKey, _n); + _n->release(); + } + _n = OSNumber::withNumber((uintptr_t)_a->ownerService, 64); + if (_n) { + details->setObject(kIOPMDriverAssertionOwnerServiceKey, _n); + _n->release(); + } + _n = OSNumber::withNumber(_a->level, 64); + if (_n) { + details->setObject(kIOPMDriverAssertionLevelKey, _n); + _n->release(); + } + _n = OSNumber::withNumber(_a->assertionBits, 64); + if (_n) { + details->setObject(kIOPMDriverAssertionAssertedKey, _n); + _n->release(); + } + + if (_a->ownerString) { + details->setObject(kIOPMDriverAssertionOwnerStringKey, _a->ownerString); + } + } + } + +exit: + return outArray; +} + +IOPMDriverAssertionType PMAssertionsTracker::getActivatedAssertions(void) +{ + return assertionsCombined; +} + +IOPMDriverAssertionLevel PMAssertionsTracker::getAssertionLevel( + IOPMDriverAssertionType type) +{ + if (type && ((type & assertionsKernel) == assertionsKernel)) + { + return kIOPMDriverAssertionLevelOn; + } else { + return kIOPMDriverAssertionLevelOff; + } +} + +//********************************************************************************* +//********************************************************************************* +//********************************************************************************* + +static void pmEventTimeStamp(uint64_t *recordTS) +{ + clock_sec_t tsec; + clock_usec_t tusec; + + if (!recordTS) + return; + + // We assume tsec fits into 32 bits; 32 bits holds enough + // seconds for 136 years since the epoch in 1970. + clock_get_calendar_microtime(&tsec, &tusec); + + + // Pack the sec & microsec calendar time into a uint64_t, for fun. + *recordTS = 0; + *recordTS |= (uint32_t)tusec; + *recordTS |= ((uint64_t)tsec << 32); + + return; +} /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index c7b4319ed..57d40396f 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -2492,8 +2492,8 @@ bool IOService::activityTickle ( unsigned long type, unsigned long stateNumber ) clock_get_uptime(&fDeviceActiveTimestamp); #if ROOT_DOMAIN_RUN_STATES - if (fCurrentPowerState == 0) - getPMRootDomain()->handleActivityTickleForService(this); + getPMRootDomain()->handleActivityTickleForService(this, type, + fCurrentPowerState, fActivityTickleCount); #endif // Record the last tickle power state. diff --git a/iokit/Kernel/RootDomainUserClient.cpp b/iokit/Kernel/RootDomainUserClient.cpp index 8dbb20b0e..69c0dfa1a 100644 --- a/iokit/Kernel/RootDomainUserClient.cpp +++ b/iokit/Kernel/RootDomainUserClient.cpp @@ -194,6 +194,23 @@ IOReturn RootDomainUserClient::secureSetMaintenanceWakeCalendar( #endif } +IOReturn RootDomainUserClient::secureSetUserAssertionLevels( + uint32_t assertBits ) +{ + int admin_priv = 0; + IOReturn ret = kIOReturnNotPrivileged; + + ret = clientHasPrivilege(fOwningTask, kIOClientPrivilegeAdministrator); + admin_priv = (kIOReturnSuccess == ret); + + if (admin_priv && fOwner) { + ret = fOwner->setPMAssertionUserLevels(assertBits); + } else { + ret = kIOReturnNotPrivileged; + } + return kIOReturnSuccess; +} + IOReturn RootDomainUserClient::clientClose( void ) { detach(fOwner); @@ -238,6 +255,10 @@ RootDomainUserClient::getTargetAndMethodForIndex( IOService ** targetP, UInt32 i { // kPMSetMaintenanceWakeCalendar, 8 (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetMaintenanceWakeCalendar, kIOUCStructIStructO, sizeof(IOPMCalendarStruct), sizeof(uint32_t) + }, + { // kPMSetUserAssertionLevels, 9 + (IOService *)1, (IOMethod)&RootDomainUserClient::secureSetUserAssertionLevels, + kIOUCScalarIScalarO, 1, 0 } }; diff --git a/iokit/Kernel/RootDomainUserClient.h b/iokit/Kernel/RootDomainUserClient.h index c66daabcc..4a277749c 100644 --- a/iokit/Kernel/RootDomainUserClient.h +++ b/iokit/Kernel/RootDomainUserClient.h @@ -63,6 +63,9 @@ private: void * p1, void * p2, void * p3, void * p4, void * p5, void * p6 ); + IOReturn secureSetUserAssertionLevels( + uint32_t assertBits ); + public: virtual IOReturn clientClose( void ); diff --git a/iokit/conf/files b/iokit/conf/files index a2732ee64..18d44275a 100644 --- a/iokit/conf/files +++ b/iokit/conf/files @@ -53,7 +53,6 @@ iokit/Kernel/IODMACommand.cpp optional iokitcpp iokit/Kernel/IODeviceMemory.cpp optional iokitcpp iokit/Kernel/IOInterleavedMemoryDescriptor.cpp optional iokitcpp iokit/Kernel/IOMapper.cpp optional iokitcpp -iokit/Kernel/IOCopyMapper.cpp optional iokitcpp iokit/Kernel/IOMemoryCursor.cpp optional iokitcpp iokit/Kernel/IOMemoryDescriptor.cpp optional iokitcpp iokit/Kernel/IOMultiMemoryDescriptor.cpp optional iokitcpp diff --git a/kgmacros b/kgmacros index 5a4897abd..53e0a769a 100644 --- a/kgmacros +++ b/kgmacros @@ -781,7 +781,7 @@ define showwaitqwaitercount set $kgm_wc_wqe = (WaitQueueElement *)$kgm_wc_linksp->next set $kgm_wc_count = 0 while ( (queue_entry_t)$kgm_wc_wqe != (queue_entry_t)$kgm_wc_linksp) - if ($kgm_wc_wqe->wqe_type != &_wait_queue_link) + if ($kgm_wc_wqe->wqe_type != &_wait_queue_link) && ($kgm_wc_wqe->wqe_type != &_wait_queue_link_noalloc) set $kgm_wc_count = $kgm_wc_count + 1 end set $kgm_wc_wqe = (WaitQueueElement *)$kgm_wc_wqe->wqe_links.next @@ -790,7 +790,7 @@ define showwaitqwaitercount end define showwaitqmembercount - set $kgm_mc_waitqsetp = (WaitQueueSet*)$arg0 + set $kgm_mc_waitqsetp = (struct wait_queue_set *)$arg0 set $kgm_mc_setlinksp = &($kgm_mc_waitqsetp->wqs_setlinks) set $kgm_mc_wql = (WaitQueueLink *)$kgm_mc_setlinksp->next set $kgm_mc_count = 0 @@ -852,7 +852,7 @@ define showwaitqmemberof end define showwaitqmembers - set $kgm_ms_waitqsetp = (WaitQueueSet*)$arg0 + set $kgm_ms_waitqsetp = (struct wait_queue_set *)$arg0 set $kgm_ms_setlinksp = &($kgm_ms_waitqsetp->wqs_setlinks) set $kgm_ms_wql = (WaitQueueLink *)$kgm_ms_setlinksp->next set $kgm_ms_found = 0 @@ -868,15 +868,15 @@ define showwaitqmembers end define showwaitqheader - printf "wait_queue ref_count interlock " + printf "wait_queue prepostq interlock " printf "pol type member_cnt waiter_cnt\n" end define showwaitqint - set $kgm_waitqp = (WaitQueue*)$arg0 + set $kgm_waitqp = (WaitQueue *)$arg0 printf "0x%08x ", $kgm_waitqp if ($kgm_waitqp->wq_type == 0xf1d1) - printf "0x%08x ", ((WaitQueueSet*)$kgm_waitqp)->wqs_refcount + printf "0x%08x ", &((struct wait_queue_set *)$kgm_waitqp)->wqs_preposts else printf "0x00000000 " end @@ -988,6 +988,93 @@ define showvmint end +define showmapwiredp + set $kgm_mapp = (vm_map_t)$arg0 + set $kgm_map = *$kgm_mapp + set $kgm_head_vmep = &($kgm_mapp->hdr.links) + set $kgm_vmep = $kgm_map.hdr.links.next + set $kgm_objp_prev = (struct vm_object *)0 + if $arg1 == 0 + set $kgm_saw_kernel_obj = 0 + set $kgm_wired_count = 0 + set $kgm_objp_print_space = 1 + else + set $kgm_objp_print_space = 0 + end + while (($kgm_vmep != 0) && ($kgm_vmep != $kgm_head_vmep)) + set $kgm_vme = *$kgm_vmep + set $kgm_objp = $kgm_vme.object.vm_object + if $kgm_vme.is_sub_map + if $arg1 == 0 + set $kgm_mapp_orig = $kgm_mapp + set $kgm_vmep_orig = $kgm_vmep + set $kgm_vme_orig = $kgm_vme + set $kgm_head_vmep_orig = $kgm_head_vmep + printf "\n****" + showptr $kgm_objp + showmapwiredp $kgm_objp 1 + set $kgm_vme = $kgm_vme_orig + set $kgm_vmep = $kgm_vmep_orig + set $kgm_mapp = $kgm_mapp_orig + set $kgm_head_vmep = $kgm_head_vmep_orig + set $kgm_objp = (struct vm_object *)0 + else + printf "\n????" + showptr $kgm_mapp + printf " " + showptr $kgm_vmep + set $kgm_objp = (struct vm_object *)0 + printf "\n" + end + end + if ($kgm_objp == $kgm_objp_prev) + set $kgm_objp = (struct vm_object *)0 + end + if $kgm_objp == kernel_object + if $kgm_saw_kernel_obj + set $kgm_objp = (struct vm_object *)0 + end + set $kgm_saw_kernel_obj = 1 + end + if $kgm_objp && $kgm_objp->wired_page_count + if $kgm_objp_print_space == 1 + printf " " + showptr $kgm_mapp + end + set $kgm_objp_print_space = 1 + printf " " + showptr $kgm_vmep + printf " 0x%016llx ", $kgm_vme.links.start + printf "%5d", $kgm_vme.alias + printf "%6d ",($kgm_vme.links.end - $kgm_vme.links.start) >> 12 + showptr $kgm_objp + printf "[%3d]", $kgm_objp->ref_count + printf "%7d\n", $kgm_objp->wired_page_count + set $kgm_wired_count = $kgm_wired_count + $kgm_objp->wired_page_count + set $kgm_objp_prev = $kgm_objp + end + set $kgm_vmep = $kgm_vme.links.next + end + if $arg1 == 0 + printf "total wired count = %d\n", $kgm_wired_count + end +end + +define showmapwired + printf " map " + showptrhdrpad + printf " entry " + showptrhdrpad + printf " start alias #page object " + showptrhdrpad + printf " wired\n" + showmapwiredp $arg0 0 +end +document showmapwired +Syntax: (gdb) showmapwired +| Routine to print out a summary listing of all the entries with wired pages in a vm_map +end + define showmapvme showmapheader showvmint $arg0 1 @@ -1138,9 +1225,11 @@ define showipcint if $kgm_ie.ie_bits & 0x001f0000 set $kgm_name = (($kgm_iindex << 8)|($kgm_ie.ie_bits >> 24)) showipceint $kgm_iep $kgm_name - if $arg2 != 0 && $kgm_ie.ie_object != 0 && ($kgm_ie.ie_bits & 0x00070000) && ((ipc_port_t) $kgm_ie.ie_object)->ip_callstack[0] != 0 - printf " user bt: " - showportbt $kgm_ie.ie_object $kgm_is.is_task + if $arg2 != 0 && ipc_portbt != 0 + if $kgm_ie.ie_object != 0 && ($kgm_ie.ie_bits & 0x00070000) && ((ipc_port_t) $kgm_ie.ie_object)->ip_callstack[0] != 0 + printf " user bt: " + showportbt $kgm_ie.ie_object $kgm_is.is_task + end end end set $kgm_iindex = $kgm_iindex + 1 @@ -1711,11 +1800,11 @@ define showpsetint printf "Set " printf "%5d ", $kgm_psetp->ips_object.io_references printf "0x%08x ", $kgm_psetp->ips_messages.data.pset.local_name - set $kgm_setlinksp = &($kgm_psetp->ips_messages.data.set_queue.wqs_setlinks) + set $kgm_setlinksp = &($kgm_psetp->ips_messages.data.pset.set_queue.wqs_setlinks) set $kgm_wql = (WaitQueueLink *)$kgm_setlinksp->next set $kgm_found = 0 while ( (queue_entry_t)$kgm_wql != (queue_entry_t)$kgm_setlinksp) - set $kgm_portp = (struct ipc_port *)((int)($kgm_wql->wql_element->wqe_queue) - ((int)$kgm_portoff)) + set $kgm_portp = (struct ipc_port *)((uintptr_t)$kgm_wql->wql_element.wqe_queue - $kgm_portoff) if !$kgm_found set $kgm_destspacep = (struct ipc_space *)0 showportdestproc $kgm_portp @@ -1751,17 +1840,17 @@ end define showmqueue set $kgm_mqueue = *(struct ipc_mqueue *)$arg0 + set $kgm_psetoff = (uintptr_t)&(((struct ipc_pset *)0)->ips_messages) + set $kgm_portoff = (uintptr_t)&(((struct ipc_port *)0)->ip_messages) if ($kgm_mqueue.data.pset.set_queue.wqs_wait_queue.wq_type == 0xf1d1) - set $kgm_psetoff = &(((struct ipc_pset *)0)->ips_messages) - set $kgm_pset = (((long)$arg0) - ((long)$kgm_psetoff)) + set $kgm_psetp = (struct ipc_pset *)(((uintptr_t)$arg0) - $kgm_psetoff) showpsetheader - showpsetint $kgm_pset 1 + showpsetint $kgm_psetp 1 end if ($kgm_mqueue.data.pset.set_queue.wqs_wait_queue.wq_type == 0xf1d0) - set $kgm_portoff = &(((struct ipc_port *)0)->ip_messages) - set $kgm_port = (((long)$arg0) - ((long)$kgm_portoff)) + set $kgm_portp = (struct ipc_port *)(((uintptr_t)$arg0) - $kgm_portoff) showportheader - showportint $kgm_port 1 + showportint $kgm_portp 1 end end @@ -1785,6 +1874,9 @@ define zprint_one if ($kgm_zone->expandable) printf "X" end + if ($kgm_zone->noencrypt) + printf "$" + end printf "\n" end diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c index 8c0dc3bf2..ebd35c82b 100644 --- a/osfmk/console/video_console.c +++ b/osfmk/console/video_console.c @@ -1240,7 +1240,7 @@ gc_update_color(int color, boolean_t fore) void vcputc(__unused int l, __unused int u, int c) { - if ( gc_enabled || debug_mode ) + if ( gc_initialized && ( gc_enabled || debug_mode ) ) { spl_t s; diff --git a/osfmk/default_pager/default_pager.c b/osfmk/default_pager/default_pager.c index ab1c491a0..20f3e361a 100644 --- a/osfmk/default_pager/default_pager.c +++ b/osfmk/default_pager/default_pager.c @@ -396,7 +396,8 @@ default_pager_initialize(void) vstruct_zone = zinit(sizeof(struct vstruct), 10000 * sizeof(struct vstruct), 8192, "vstruct zone"); - + zone_change(vstruct_zone, Z_NOENCRYPT, TRUE); + VSL_LOCK_INIT(); queue_init(&vstruct_list.vsl_queue); vstruct_list.vsl_count = 0; diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c index 11a27e146..ceda0a902 100644 --- a/osfmk/default_pager/dp_backing_store.c +++ b/osfmk/default_pager/dp_backing_store.c @@ -101,16 +101,19 @@ int physical_transfer_cluster_count = 0; #define VM_SUPER_CLUSTER 0x40000 -#define VM_SUPER_PAGES 64 +#define VM_SUPER_PAGES (VM_SUPER_CLUSTER / PAGE_SIZE) /* * 0 means no shift to pages, so == 1 page/cluster. 1 would mean * 2 pages/cluster, 2 means 4 pages/cluster, and so on. */ +#define VSTRUCT_MIN_CLSHIFT 0 + #define VSTRUCT_DEF_CLSHIFT 2 -int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT; int default_pager_clsize = 0; +int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT; + /* statistics */ unsigned int clustered_writes[VM_SUPER_PAGES+1]; unsigned int clustered_reads[VM_SUPER_PAGES+1]; @@ -171,6 +174,8 @@ boolean_t dp_encryption_inited = FALSE; /* Should we encrypt swap ? */ boolean_t dp_encryption = FALSE; +boolean_t dp_isssd = FALSE; + /* * Object sizes are rounded up to the next power of 2, @@ -2694,8 +2699,8 @@ pvs_cluster_read( int cl_index; unsigned int xfer_size; dp_offset_t orig_vs_offset; - dp_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; - paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT]; + dp_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT]; + paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT]; struct clmap clmap; upl_t upl; unsigned int page_list_count; @@ -3076,16 +3081,25 @@ vs_cluster_write( upl_t upl; upl_page_info_t *pl; int page_index; + unsigned int page_max_index; int list_size; int pages_in_cl; unsigned int cl_size; int base_index; unsigned int seg_size; unsigned int upl_offset_in_object; + boolean_t minimal_clustering = FALSE; + boolean_t found_dirty; pages_in_cl = 1 << vs->vs_clshift; cl_size = pages_in_cl * vm_page_size; +#if CONFIG_FREEZE + minimal_clustering = TRUE; +#endif + if (dp_isssd == TRUE) + minimal_clustering = TRUE; + if (!dp_internal) { unsigned int page_list_count; int request_flags; @@ -3095,24 +3109,20 @@ vs_cluster_write( int num_of_pages; int seg_index; upl_offset_t upl_offset; + upl_offset_t upl_offset_aligned; dp_offset_t seg_offset; - dp_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; - paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1]; + dp_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT) + 1]; + paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT) + 1]; - if (bs_low) { + if (bs_low) super_size = cl_size; - - request_flags = UPL_NOBLOCK | - UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | - UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE; - } else { + else super_size = VM_SUPER_CLUSTER; - request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE | - UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | + request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE | + UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE; - } if (!dp_encryption_inited) { /* @@ -3133,7 +3143,6 @@ vs_cluster_write( request_flags |= UPL_ENCRYPT; flags |= UPL_PAGING_ENCRYPTED; } - page_list_count = 0; memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)offset, @@ -3153,28 +3162,52 @@ vs_cluster_write( pl = UPL_GET_INTERNAL_PAGE_LIST(upl); seg_size = cl_size - (upl_offset_in_object % cl_size); - upl_offset = upl_offset_in_object & ~(cl_size - 1); + upl_offset_aligned = upl_offset_in_object & ~(cl_size - 1); + page_index = 0; + page_max_index = upl->size / PAGE_SIZE; + found_dirty = TRUE; - for (seg_index = 0, transfer_size = upl->size; - transfer_size > 0; ) { - ps_offset[seg_index] = - ps_clmap(vs, - upl_offset, - &clmap, CL_ALLOC, - cl_size, 0); + for (seg_index = 0, transfer_size = upl->size; transfer_size > 0; ) { + unsigned int seg_pgcnt; - if (ps_offset[seg_index] == (dp_offset_t) -1) { - upl_abort(upl, 0); - upl_deallocate(upl); - - return KERN_FAILURE; + seg_pgcnt = seg_size / PAGE_SIZE; - } - psp[seg_index] = CLMAP_PS(clmap); + if (minimal_clustering == TRUE) { + unsigned int non_dirty; + non_dirty = 0; + found_dirty = FALSE; + + for (; non_dirty < seg_pgcnt; non_dirty++) { + if ((page_index + non_dirty) >= page_max_index) + break; + + if (UPL_DIRTY_PAGE(pl, page_index + non_dirty) || + UPL_PRECIOUS_PAGE(pl, page_index + non_dirty)) { + found_dirty = TRUE; + break; + } + } + } + if (found_dirty == TRUE) { + ps_offset[seg_index] = + ps_clmap(vs, + upl_offset_aligned, + &clmap, CL_ALLOC, + cl_size, 0); + + if (ps_offset[seg_index] == (dp_offset_t) -1) { + upl_abort(upl, 0); + upl_deallocate(upl); + + return KERN_FAILURE; + } + psp[seg_index] = CLMAP_PS(clmap); + } if (transfer_size > seg_size) { + page_index += seg_pgcnt; transfer_size -= seg_size; - upl_offset += cl_size; + upl_offset_aligned += cl_size; seg_size = cl_size; seg_index++; } else diff --git a/osfmk/device/device.defs b/osfmk/device/device.defs index 31da78748..5410b050e 100644 --- a/osfmk/device/device.defs +++ b/osfmk/device/device.defs @@ -665,6 +665,12 @@ routine FUNC_NAME(io_service_add_notification_ool)( out notification : io_object_t ); +#else + + skip; + skip; + skip; + skip; #endif /* KERNEL_SERVER || __LP64__ */ diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c index b27d050b2..f90433cfb 100644 --- a/osfmk/i386/acpi.c +++ b/osfmk/i386/acpi.c @@ -47,14 +47,19 @@ #endif #include +#include + #include #include +#include #if HIBERNATION #include #endif #include +#include + #if CONFIG_SLEEP extern void acpi_sleep_cpu(acpi_sleep_callback, void * refcon); extern void acpi_wake_prot(void); @@ -80,6 +85,10 @@ struct acpi_hibernate_callback_data { }; typedef struct acpi_hibernate_callback_data acpi_hibernate_callback_data_t; +unsigned int save_kdebug_enable = 0; +static uint64_t acpi_sleep_abstime; + + #if CONFIG_SLEEP static void acpi_hibernate(void *refcon) @@ -125,6 +134,9 @@ acpi_hibernate(void *refcon) cpu_IA32e_disable(current_cpu_datap()); #endif } + kdebug_enable = 0; + + acpi_sleep_abstime = mach_absolute_time(); (data->func)(data->refcon); @@ -133,8 +145,7 @@ acpi_hibernate(void *refcon) #endif /* CONFIG_SLEEP */ #endif /* HIBERNATION */ -static uint64_t acpi_sleep_abstime; -extern void slave_pstart(void); +extern void slave_pstart(void); void acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) @@ -146,6 +157,9 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) unsigned int cpu; kern_return_t rc; unsigned int my_cpu; + uint64_t now; + uint64_t my_tsc; + uint64_t my_abs; kprintf("acpi_sleep_kernel hib=%d\n", current_cpu_datap()->cpu_hibernate); @@ -185,6 +199,10 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) */ cpu_IA32e_disable(current_cpu_datap()); #endif + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_START, 0, 0, 0, 0, 0); + + save_kdebug_enable = kdebug_enable; + kdebug_enable = 0; acpi_sleep_abstime = mach_absolute_time(); @@ -258,13 +276,29 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) */ pmMarkAllCPUsOff(); + ml_get_timebase(&now); + /* let the realtime clock reset */ rtc_sleep_wakeup(acpi_sleep_abstime); - if (did_hibernate){ + kdebug_enable = save_kdebug_enable; + + if (did_hibernate) { + + my_tsc = (now >> 32) | (now << 32); + my_abs = tmrCvt(my_tsc, tscFCvtt2n); + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 2) | DBG_FUNC_START, + (uint32_t)(my_abs >> 32), (uint32_t)my_abs, 0, 0, 0); hibernate_machine_init(); + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 2) | DBG_FUNC_END, 0, 0, 0, 0, 0); + current_cpu_datap()->cpu_hibernate = 0; - } + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_END, 0, 0, 0, 0, 0); + } else + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 0) | DBG_FUNC_END, 0, 0, 0, 0, 0); + /* re-enable and re-init local apic */ if (lapic_probe()) lapic_configure(); diff --git a/osfmk/i386/bsd_i386.c b/osfmk/i386/bsd_i386.c index 66939fac9..4b933d763 100644 --- a/osfmk/i386/bsd_i386.c +++ b/osfmk/i386/bsd_i386.c @@ -498,6 +498,7 @@ kern_return_t thread_fast_set_cthread_self64(uint64_t self) { pcb_t pcb = current_thread()->machine.pcb; + cpu_data_t *cdp; /* check for canonical address, set 0 otherwise */ if (!IS_USERADDR64_CANONICAL(self)) @@ -505,11 +506,13 @@ thread_fast_set_cthread_self64(uint64_t self) pcb->cthread_self = self; mp_disable_preemption(); + cdp = current_cpu_datap(); #if defined(__x86_64__) - if (current_cpu_datap()->cpu_uber.cu_user_gs_base != self) + if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || + (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) wrmsr64(MSR_IA32_KERNEL_GS_BASE, self); #endif - current_cpu_datap()->cpu_uber.cu_user_gs_base = self; + cdp->cpu_uber.cu_user_gs_base = self; mp_enable_preemption(); return (USER_CTHREAD); } diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index 64add963d..9578cc80b 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -354,7 +354,6 @@ current_cpu_datap(void) static inline cpu_data_t * cpu_datap(int cpu) { - assert(cpu_data_ptr[cpu]); return cpu_data_ptr[cpu]; } diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index 9d48b1e85..ce8b2a378 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -134,7 +134,6 @@ #define CPUID_MODEL_MEROM 15 #define CPUID_MODEL_PENRYN 23 #define CPUID_MODEL_NEHALEM 26 -#define CPUID_MODEL_ATOM 28 #define CPUID_MODEL_FIELDS 30 /* Lynnfield, Clarksfield, Jasper */ #define CPUID_MODEL_DALES 31 /* Havendale, Auburndale */ #define CPUID_MODEL_NEHALEM_EX 46 diff --git a/osfmk/i386/hibernate_i386.c b/osfmk/i386/hibernate_i386.c index f164b8ec9..b333db549 100644 --- a/osfmk/i386/hibernate_i386.c +++ b/osfmk/i386/hibernate_i386.c @@ -44,8 +44,12 @@ #include #include +extern ppnum_t max_ppnum; + #define MAX_BANKS 32 +int hibernate_page_list_allocate_avoided; + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ hibernate_page_list_t * @@ -69,11 +73,18 @@ hibernate_page_list_allocate(void) msize = args->MemoryMapDescriptorSize; mcount = args->MemoryMapSize / msize; + hibernate_page_list_allocate_avoided = 0; + num_banks = 0; for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); num = (ppnum_t) mptr->NumberOfPages; + + if (base > max_ppnum) + continue; + if ((base + num - 1) > max_ppnum) + num = max_ppnum - base + 1; if (!num) continue; @@ -120,6 +131,9 @@ hibernate_page_list_allocate(void) case kEfiRuntimeServicesData: // contents are volatile once the platform expert starts case kEfiACPIReclaimMemory: + hibernate_page_list_allocate_avoided += num; + break; + // non dram case kEfiReservedMemoryType: case kEfiUnusableMemory: diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index 445c6afed..135f7a942 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -504,14 +504,14 @@ i386_init(vm_offset_t boot_args_start) if ( ! PE_parse_boot_argn("novmx", &noVMX, sizeof (noVMX))) noVMX = 0; /* OK to support Altivec in rosetta? */ - tsc_init(); - power_management_init(); - - PE_init_platform(TRUE, kernelBootArgs); - /* create the console for verbose or pretty mode */ + /* Note: doing this prior to tsc_init() allows for graceful panic! */ + PE_init_platform(TRUE, kernelBootArgs); PE_create_console(); + tsc_init(); + power_management_init(); + processor_bootstrap(); thread_bootstrap(); diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index f2815aae4..bddcb54c7 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -99,17 +99,14 @@ uint64_t max_mem; /* Size of physical memory (bytes), adjusted by maxmem uint64_t mem_actual; uint64_t sane_size = 0; /* Memory size to use for defaults calculations */ -#define MAXBOUNCEPOOL (128 * 1024 * 1024) #define MAXLORESERVE ( 32 * 1024 * 1024) -extern unsigned int bsd_mbuf_cluster_reserve(void); - - -uint32_t bounce_pool_base = 0; -uint32_t bounce_pool_size = 0; - -static void reserve_bouncepool(uint32_t); +ppnum_t max_ppnum = 0; +ppnum_t lowest_lo = 0; +ppnum_t lowest_hi = 0; +ppnum_t highest_hi = 0; +extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *); pmap_paddr_t avail_start, avail_end; vm_offset_t virtual_avail, virtual_end; @@ -133,7 +130,6 @@ void *sectLINKB; unsigned long sectSizeLINK; void *sectPRELINKB; unsigned long sectSizePRELINK; void *sectHIBB; unsigned long sectSizeHIB; void *sectINITPTB; unsigned long sectSizeINITPT; -extern int srv; extern uint64_t firmware_Conventional_bytes; extern uint64_t firmware_RuntimeServices_bytes; @@ -163,8 +159,6 @@ i386_vm_init(uint64_t maxmem, unsigned int safeboot; ppnum_t maxpg = 0; uint32_t pmap_type; - uint32_t maxbouncepoolsize; - uint32_t maxloreserve; uint32_t maxdmaaddr; /* @@ -233,6 +227,7 @@ i386_vm_init(uint64_t maxmem, mcount = args->MemoryMapSize / msize; #define FOURGIG 0x0000000100000000ULL +#define ONEGIG 0x0000000040000000ULL for (i = 0; i < mcount; i++, mptr = (EfiMemoryRange *)(((vm_offset_t)mptr) + msize)) { ppnum_t base, top; @@ -502,41 +497,47 @@ i386_vm_init(uint64_t maxmem, kprintf("Physical memory %llu MB\n", sane_size/MB); - if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr))) - max_valid_dma_address = 4 * GB; - else - max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB; - - if (!PE_parse_boot_argn("maxbouncepool", &maxbouncepoolsize, sizeof (maxbouncepoolsize))) - maxbouncepoolsize = MAXBOUNCEPOOL; - else - maxbouncepoolsize = maxbouncepoolsize * (1024 * 1024); - - /* since bsd_mbuf_cluster_reserve() is going to be called, we need to check for server */ - if (PE_parse_boot_argn("srv", &srv, sizeof (srv))) { - srv = 1; - } - - - /* - * bsd_mbuf_cluster_reserve depends on sane_size being set - * in order to correctly determine the size of the mbuf pool - * that will be reserved - */ - if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve))) - maxloreserve = MAXLORESERVE + bsd_mbuf_cluster_reserve(); - else - maxloreserve = maxloreserve * (1024 * 1024); + max_valid_low_ppnum = (2 * GB) / PAGE_SIZE; + if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr))) { + max_valid_dma_address = (uint64_t)4 * (uint64_t)GB; + } else { + max_valid_dma_address = ((uint64_t) maxdmaaddr) * MB; + if ((max_valid_dma_address / PAGE_SIZE) < max_valid_low_ppnum) + max_valid_low_ppnum = (ppnum_t)(max_valid_dma_address / PAGE_SIZE); + } if (avail_end >= max_valid_dma_address) { - if (maxbouncepoolsize) - reserve_bouncepool(maxbouncepoolsize); - - if (maxloreserve) - vm_lopage_poolsize = maxloreserve / PAGE_SIZE; + uint32_t maxloreserve; + uint32_t mbuf_reserve = 0; + boolean_t mbuf_override = FALSE; + + if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve))) { + + if (sane_size >= (ONEGIG * 15)) + maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 4; + else if (sane_size >= (ONEGIG * 7)) + maxloreserve = (MAXLORESERVE / PAGE_SIZE) * 2; + else + maxloreserve = MAXLORESERVE / PAGE_SIZE; + + mbuf_reserve = bsd_mbuf_cluster_reserve(&mbuf_override) / PAGE_SIZE; + } else + maxloreserve = (maxloreserve * (1024 * 1024)) / PAGE_SIZE; + + if (maxloreserve) { + vm_lopage_free_limit = maxloreserve; + + if (mbuf_override == TRUE) { + vm_lopage_free_limit += mbuf_reserve; + vm_lopage_lowater = 0; + } else + vm_lopage_lowater = vm_lopage_free_limit / 16; + + vm_lopage_refill = TRUE; + vm_lopage_needed = TRUE; + } } - /* * Initialize kernel physical map. * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS. @@ -551,24 +552,40 @@ pmap_free_pages(void) return (unsigned int)avail_remaining; } -#if defined(__LP64__) -/* On large memory systems, early allocations should prefer memory from the - * last region, which is typically all physical memory >4GB. This is used - * by pmap_steal_memory and pmap_pre_expand during init only. */ boolean_t -pmap_next_page_k64( ppnum_t *pn) +pmap_next_page_hi( + ppnum_t *pn) { - if(max_mem >= (32*GB)) { - pmap_memory_region_t *last_region = &pmap_memory_regions[pmap_memory_region_count-1]; - if (last_region->alloc != last_region->end) { - *pn = last_region->alloc++; - avail_remaining--; - return TRUE; + pmap_memory_region_t *region; + int n; + + if (avail_remaining) { + for (n = pmap_memory_region_count - 1; n >= 0; n--) { + region = &pmap_memory_regions[n]; + + if (region->alloc != region->end) { + *pn = region->alloc++; + avail_remaining--; + + if (*pn > max_ppnum) + max_ppnum = *pn; + + if (lowest_lo == 0 || *pn < lowest_lo) + lowest_lo = *pn; + + if (lowest_hi == 0 || *pn < lowest_hi) + lowest_hi = *pn; + + if (*pn > highest_hi) + highest_hi = *pn; + + return TRUE; + } } } - return pmap_next_page(pn); + return FALSE; } -#endif + boolean_t pmap_next_page( @@ -583,6 +600,12 @@ pmap_next_page( *pn = pmap_memory_regions[pmap_memory_region_current].alloc++; avail_remaining--; + if (*pn > max_ppnum) + max_ppnum = *pn; + + if (lowest_lo == 0 || *pn < lowest_lo) + lowest_lo = *pn; + return TRUE; } return FALSE; @@ -603,32 +626,6 @@ pmap_valid_page( return FALSE; } - -static void -reserve_bouncepool(uint32_t bounce_pool_wanted) -{ - pmap_memory_region_t *pmptr = pmap_memory_regions; - pmap_memory_region_t *lowest = NULL; - unsigned int i; - unsigned int pages_needed; - - pages_needed = bounce_pool_wanted / PAGE_SIZE; - - for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { - if ( (pmptr->end - pmptr->alloc) >= pages_needed ) { - if ( (lowest == NULL) || (pmptr->alloc < lowest->alloc) ) - lowest = pmptr; - } - } - if ( (lowest != NULL) ) { - bounce_pool_base = lowest->alloc * PAGE_SIZE; - bounce_pool_size = bounce_pool_wanted; - - lowest->alloc += pages_needed; - avail_remaining -= pages_needed; - } -} - /* * Called once VM is fully initialized so that we can release unused * sections of low memory to the general pool. diff --git a/osfmk/i386/machine_routines.c b/osfmk/i386/machine_routines.c index 24dc7afa8..dc50a1ed1 100644 --- a/osfmk/i386/machine_routines.c +++ b/osfmk/i386/machine_routines.c @@ -96,8 +96,8 @@ vm_offset_t ml_static_malloc( void ml_get_bouncepool_info(vm_offset_t *phys_addr, vm_size_t *size) { - *phys_addr = bounce_pool_base; - *size = bounce_pool_size; + *phys_addr = 0; + *size = 0; } diff --git a/osfmk/i386/machine_routines.h b/osfmk/i386/machine_routines.h index 0112edf0c..24c9aeca3 100644 --- a/osfmk/i386/machine_routines.h +++ b/osfmk/i386/machine_routines.h @@ -124,8 +124,6 @@ vm_offset_t ml_io_map( vm_offset_t phys_addr, vm_size_t size); -extern uint32_t bounce_pool_base; -extern uint32_t bounce_pool_size; void ml_get_bouncepool_info( vm_offset_t *phys_addr, diff --git a/osfmk/i386/machine_routines_asm.s b/osfmk/i386/machine_routines_asm.s index 249c0ebf7..ae2e8aaf1 100644 --- a/osfmk/i386/machine_routines_asm.s +++ b/osfmk/i386/machine_routines_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -181,6 +181,31 @@ LEXT(_rtc_nanotime_store) ret +/* void _rtc_nanotime_adjust( + uint64_t tsc_base_delta, + rtc_nanotime_t *dst); +*/ + .globl EXT(_rtc_nanotime_adjust) + .align FALIGN + +LEXT(_rtc_nanotime_adjust) + mov 12(%esp),%edx /* ptr to rtc_nanotime_info */ + + movl RNT_GENERATION(%edx),%ecx /* get current generation */ + movl $0,RNT_GENERATION(%edx) /* flag data as being updated */ + + movl 4(%esp),%eax /* get lower 32-bits of delta */ + addl %eax,RNT_TSC_BASE(%edx) + adcl $0,RNT_TSC_BASE+4(%edx) /* propagate carry */ + + incl %ecx /* next generation */ + jnz 1f + incl %ecx /* skip 0, which is a flag */ +1: movl %ecx,RNT_GENERATION(%edx) /* update generation and make usable */ + + ret + + /* unint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow ); * * This is the same as the commpage nanotime routine, except that it uses the diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h index 025396b31..3e54df7b5 100644 --- a/osfmk/i386/misc_protos.h +++ b/osfmk/i386/misc_protos.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -137,6 +137,7 @@ extern void rtc_clock_stepped( uint32_t new_frequency, uint32_t old_frequency); extern void rtc_clock_napped(uint64_t, uint64_t); +extern void rtc_clock_adjust(uint64_t); extern void pmap_lowmem_finalize(void); diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c index 36d4fec6e..fcf202e6b 100644 --- a/osfmk/i386/pcb.c +++ b/osfmk/i386/pcb.c @@ -528,13 +528,18 @@ act_machine_switch_pcb( thread_t new ) /* * Switch user's GS base if necessary - * by setting the Kernel's GS base MSR + * by setting the Kernel GS base MSR * - this will become the user's on the swapgs when - * returning to user-space. + * returning to user-space. Avoid this for + * kernel threads (no user TLS support required) + * and verify the memory shadow of the segment base + * in the event it was altered in user space. */ - if (cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) { - cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; - wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); + if ((pcb->cthread_self != 0) || (new->task != kernel_task)) { + if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) { + cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; + wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); + } } } else { x86_saved_state_compat32_t *iss32compat; @@ -2088,6 +2093,15 @@ machine_thread_create( pcb->cthread_self = 0; pcb->uldt_selector = 0; + /* Ensure that the "cthread" descriptor describes a valid + * segment. + */ + if ((pcb->cthread_desc.access & ACC_P) == 0) { + struct real_descriptor *ldtp; + ldtp = (struct real_descriptor *)current_ldt(); + pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; + } + return(KERN_SUCCESS); } diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index 44ea7eaf7..91e3799bb 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -681,6 +681,7 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, callbacks->GetSavedRunCount = pmGetSavedRunCount; callbacks->pmSendIPI = pmSendIPI; callbacks->GetNanotimeInfo = pmGetNanotimeInfo; + callbacks->RTCClockAdjust = rtc_clock_adjust; callbacks->topoParms = &topoParms; } else { panic("Version mis-match between Kernel and CPU PM"); diff --git a/osfmk/i386/pmCPU.h b/osfmk/i386/pmCPU.h index 1e553690b..c6e36a616 100644 --- a/osfmk/i386/pmCPU.h +++ b/osfmk/i386/pmCPU.h @@ -38,7 +38,7 @@ * This value should be changed each time that pmDsipatch_t or pmCallBacks_t * changes. */ -#define PM_DISPATCH_VERSION 20 +#define PM_DISPATCH_VERSION 21 /* * Dispatch table for functions that get installed when the power @@ -100,6 +100,7 @@ typedef struct { uint32_t (*GetSavedRunCount)(void); void (*pmSendIPI)(int cpu); rtc_nanotime_t *(*GetNanotimeInfo)(void); + void (*RTCClockAdjust)(uint64_t adjustment); x86_topology_parameters_t *topoParms; } pmCallBacks_t; diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index e7135803a..fc2147d4b 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -273,12 +273,9 @@ static vm_object_t kptobj; char *pmap_phys_attributes; unsigned int last_managed_page = 0; -/* - * Physical page attributes. Copy bits from PTE definition. - */ -#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */ -#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */ -#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */ +extern ppnum_t lowest_lo; +extern ppnum_t lowest_hi; +extern ppnum_t highest_hi; /* * Amount of virtual memory mapped by one @@ -677,13 +674,7 @@ pmap_cpu_init(void) { /* * Here early in the life of a processor (from cpu_mode_init()). - * If we're not in 64-bit mode, enable the global TLB feature. - * Note: regardless of mode we continue to set the global attribute - * bit in ptes for all (32-bit) global pages such as the commpage. */ - if (!cpu_64bit) { - set_cr4(get_cr4() | CR4_PGE); - } /* * Initialize the per-cpu, TLB-related fields. @@ -1037,11 +1028,11 @@ pmap_virtual_space( void pmap_init(void) { - register long npages; - vm_offset_t addr; - register vm_size_t s; - vm_map_offset_t vaddr; - ppnum_t ppn; + long npages; + vm_map_offset_t vaddr; + vm_offset_t addr; + vm_size_t s, vsize; + ppnum_t ppn; /* * Allocate memory for the pv_head_table and its lock bits, @@ -1068,6 +1059,9 @@ pmap_init(void) memset((char *)addr, 0, s); + vaddr = addr; + vsize = s; + #if PV_DEBUG if (0 == npvhash) panic("npvhash not initialized"); #endif @@ -1105,11 +1099,24 @@ pmap_init(void) if (pn > last_managed_page) last_managed_page = pn; + + if (pn < lowest_lo) + pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; + else if (pn >= lowest_hi && pn <= highest_hi) + pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; } } } } } + while (vsize) { + ppn = pmap_find_phys(kernel_pmap, vaddr); + + pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT; + + vaddr += PAGE_SIZE; + vsize -= PAGE_SIZE; + } /* * Create the zone of physical maps, @@ -1117,10 +1124,15 @@ pmap_init(void) */ s = (vm_size_t) sizeof(struct pmap); pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ + zone_change(pmap_zone, Z_NOENCRYPT, TRUE); + s = (vm_size_t) sizeof(struct pv_hashed_entry); pv_hashed_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */ + zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE); + s = 63; pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */ + zone_change(pdpt_zone, Z_NOENCRYPT, TRUE); kptobj = &kptobj_object_store; _vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG), kptobj); @@ -1845,6 +1857,7 @@ pmap_expand_pml4( OSAddAtomic(-1, &inuse_ptepages_count); return; } + pmap_set_noencrypt(pn); #if 0 /* DEBUG */ if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) { @@ -1934,6 +1947,7 @@ pmap_expand_pdpt( OSAddAtomic(-1, &inuse_ptepages_count); return; } + pmap_set_noencrypt(pn); #if 0 /* DEBUG */ if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) { @@ -2046,6 +2060,7 @@ pmap_expand( OSAddAtomic(-1, &inuse_ptepages_count); return; } + pmap_set_noencrypt(pn); #if 0 /* DEBUG */ if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) { diff --git a/osfmk/i386/pmap_internal.h b/osfmk/i386/pmap_internal.h index 04f4aa008..eef4f7c4d 100644 --- a/osfmk/i386/pmap_internal.h +++ b/osfmk/i386/pmap_internal.h @@ -278,6 +278,7 @@ typedef struct pv_hashed_entry { /* first three entries must match pv_rooted #define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */ #define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */ #define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */ +#define PHYS_NOENCRYPT INTEL_PTE_USER /* no need to encrypt this page in the hibernation image */ /* * Amount of virtual memory mapped by one diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c index 53c1996e1..63ec071f4 100644 --- a/osfmk/i386/pmap_x86_common.c +++ b/osfmk/i386/pmap_x86_common.c @@ -1112,6 +1112,8 @@ pmap_page_protect( * Fix up head later. */ pv_h->pmap = PMAP_NULL; + + pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT; } else { /* * Delete this entry. @@ -1276,3 +1278,54 @@ mapping_adjust(void) mappingrecurse = 0; } + +boolean_t +pmap_is_noencrypt(ppnum_t pn) +{ + int pai; + + pai = ppn_to_pai(pn); + + if (!IS_MANAGED_PAGE(pai)) + return (TRUE); + + if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT) + return (TRUE); + + return (FALSE); +} + + +void +pmap_set_noencrypt(ppnum_t pn) +{ + int pai; + + pai = ppn_to_pai(pn); + + if (IS_MANAGED_PAGE(pai)) { + LOCK_PVH(pai); + + pmap_phys_attributes[pai] |= PHYS_NOENCRYPT; + + UNLOCK_PVH(pai); + } +} + + +void +pmap_clear_noencrypt(ppnum_t pn) +{ + int pai; + + pai = ppn_to_pai(pn); + + if (IS_MANAGED_PAGE(pai)) { + LOCK_PVH(pai); + + pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT; + + UNLOCK_PVH(pai); + } +} + diff --git a/osfmk/i386/rtclock.c b/osfmk/i386/rtclock.c index 13410afc8..244e787e0 100644 --- a/osfmk/i386/rtclock.c +++ b/osfmk/i386/rtclock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2009 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -322,6 +322,25 @@ rtc_clock_napped(uint64_t base, uint64_t tsc_base) } } + +/* + * Invoked from power management to correct the SFLM TSC entry drift problem: + * a small delta is added to the tsc_base. This is equivalent to nudging time + * backwards. We require this of the order of a TSC quantum which won't cause + * callers of mach_absolute_time() to see time going backwards! + */ +void +rtc_clock_adjust(uint64_t tsc_base_delta) +{ + rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; + + assert(!ml_get_interrupts_enabled()); + assert(tsc_base_delta < 100ULL); /* i.e. it's small */ + _rtc_nanotime_adjust(tsc_base_delta, rntp); + rtc_nanotime_set_commpage(rntp); +} + + void rtc_clock_stepping(__unused uint32_t new_frequency, __unused uint32_t old_frequency) diff --git a/osfmk/i386/rtclock.h b/osfmk/i386/rtclock.h index 35280788a..82441c209 100644 --- a/osfmk/i386/rtclock.h +++ b/osfmk/i386/rtclock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2007 Apple Inc. All rights reserved. + * Copyright (c) 2004-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -68,6 +68,10 @@ extern void _rtc_nanotime_store( uint32_t shift, rtc_nanotime_t *dst); +extern void _rtc_nanotime_adjust( + uint64_t tsc_base_delta, + rtc_nanotime_t *dst); + extern uint64_t _rtc_nanotime_read( rtc_nanotime_t *rntp, int slow); diff --git a/osfmk/ipc/ipc_init.c b/osfmk/ipc/ipc_init.c index 7417911d1..8e4773748 100644 --- a/osfmk/ipc/ipc_init.c +++ b/osfmk/ipc/ipc_init.c @@ -155,6 +155,7 @@ ipc_bootstrap(void) /* make it exhaustible */ zone_change(ipc_space_zone, Z_EXHAUST, TRUE); #endif + zone_change(ipc_space_zone, Z_NOENCRYPT, TRUE); ipc_tree_entry_zone = zinit(sizeof(struct ipc_tree_entry), @@ -165,6 +166,7 @@ ipc_bootstrap(void) /* make it exhaustible */ zone_change(ipc_tree_entry_zone, Z_EXHAUST, TRUE); #endif + zone_change(ipc_tree_entry_zone, Z_NOENCRYPT, TRUE); /* * populate all port(set) zones @@ -179,6 +181,7 @@ ipc_bootstrap(void) * XXX panics when port allocation for an internal object fails. *zone_change(ipc_object_zones[IOT_PORT], Z_EXHAUST, TRUE); */ + zone_change(ipc_object_zones[IOT_PORT], Z_NOENCRYPT, TRUE); ipc_object_zones[IOT_PORT_SET] = zinit(sizeof(struct ipc_pset), @@ -187,6 +190,7 @@ ipc_bootstrap(void) "ipc port sets"); /* make it exhaustible */ zone_change(ipc_object_zones[IOT_PORT_SET], Z_EXHAUST, TRUE); + zone_change(ipc_object_zones[IOT_PORT_SET], Z_NOENCRYPT, TRUE); /* * Create the basic ipc_kmsg_t zone (the one we also cache) @@ -197,6 +201,7 @@ ipc_bootstrap(void) IKM_SAVED_KMSG_SIZE, IKM_SAVED_KMSG_SIZE, "ipc kmsgs"); + zone_change(ipc_kmsg_zone, Z_NOENCRYPT, TRUE); #if CONFIG_MACF_MACH ipc_labelh_zone = diff --git a/osfmk/kern/hibernate.c b/osfmk/kern/hibernate.c index 3bf72594e..b56317bf9 100644 --- a/osfmk/kern/hibernate.c +++ b/osfmk/kern/hibernate.c @@ -42,13 +42,13 @@ #include #include - /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ kern_return_t hibernate_setup(IOHibernateImageHeader * header, - uint32_t free_page_ratio, - uint32_t free_page_time, + uint32_t free_page_ratio, + uint32_t free_page_time, + boolean_t vmflush, hibernate_page_list_t ** page_list_ret, hibernate_page_list_t ** page_list_wired_ret, boolean_t * encryptedswap) @@ -59,7 +59,9 @@ hibernate_setup(IOHibernateImageHeader * header, *page_list_ret = NULL; *page_list_wired_ret = NULL; - + + if (vmflush) + hibernate_flush_memory(); page_list = hibernate_page_list_allocate(); if (!page_list) diff --git a/osfmk/kern/host.c b/osfmk/kern/host.c index 7a3fbf595..f77cccb71 100644 --- a/osfmk/kern/host.c +++ b/osfmk/kern/host.c @@ -340,7 +340,7 @@ host_statistics( #if CONFIG_EMBEDDED stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count); #else - stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count + vm_page_throttled_count); + stat32->wire_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(vm_page_wire_count + vm_page_throttled_count + vm_lopage_free_count); #endif stat32->zero_fill_count = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.zero_fill_count); stat32->reactivations = VM_STATISTICS_TRUNCATE_TO_32_BIT(host_vm_stat.reactivations); @@ -489,7 +489,7 @@ host_statistics64( #if CONFIG_EMBEDDED stat->wire_count = vm_page_wire_count; #else - stat->wire_count = vm_page_wire_count + vm_page_throttled_count; + stat->wire_count = vm_page_wire_count + vm_page_throttled_count + vm_lopage_free_count; #endif stat->zero_fill_count = host_vm_stat.zero_fill_count; stat->reactivations = host_vm_stat.reactivations; diff --git a/osfmk/kern/mk_timer.c b/osfmk/kern/mk_timer.c index 2469de4f5..6bbb1b194 100644 --- a/osfmk/kern/mk_timer.c +++ b/osfmk/kern/mk_timer.c @@ -137,6 +137,8 @@ mk_timer_init(void) assert(!(mk_timer_zone != NULL)); mk_timer_zone = zinit(s, (4096 * s), (16 * s), "mk_timer"); + + zone_change(mk_timer_zone, Z_NOENCRYPT, TRUE); } static void diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index 78b43fa83..2ed0f12e7 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -466,7 +466,6 @@ thread_unblock( if (processor != current_processor()) machine_signal_idle(processor); } - result = TRUE; } @@ -1258,12 +1257,8 @@ thread_select( } else { enqueue_head(&processor->processor_meta->idle_queue, (queue_entry_t)processor); - - if (thread->sched_pri < BASEPRI_RTQUEUES) { - pset_unlock(pset); - - return (processor->idle_thread); - } + pset_unlock(pset); + return (processor->idle_thread); } } @@ -2263,38 +2258,32 @@ choose_processor( { processor_set_t nset, cset = pset; processor_meta_t pmeta = PROCESSOR_META_NULL; - + processor_t mprocessor; + /* * Prefer the hinted processor, when appropriate. */ - if (processor != PROCESSOR_NULL) { - processor_t mprocessor; + if (processor != PROCESSOR_NULL) { if (processor->processor_meta != PROCESSOR_META_NULL) processor = processor->processor_meta->primary; + } - mprocessor = machine_choose_processor(pset, processor); - if (mprocessor != PROCESSOR_NULL) - processor = mprocessor; + mprocessor = machine_choose_processor(pset, processor); + if (mprocessor != PROCESSOR_NULL) + processor = mprocessor; - if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE || - processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE) + if (processor != PROCESSOR_NULL) { + if (processor->processor_set != pset || + processor->state == PROCESSOR_INACTIVE || + processor->state == PROCESSOR_SHUTDOWN || + processor->state == PROCESSOR_OFF_LINE) processor = PROCESSOR_NULL; else - if (processor->state == PROCESSOR_IDLE) - return (processor); - } - else { - processor = machine_choose_processor(pset, processor); - - if (processor != PROCESSOR_NULL) { - if (processor->processor_set != pset || processor->state == PROCESSOR_INACTIVE || - processor->state == PROCESSOR_SHUTDOWN || processor->state == PROCESSOR_OFF_LINE) - processor = PROCESSOR_NULL; - else - if (processor->state == PROCESSOR_IDLE) - return (processor); - } + if (processor->state == PROCESSOR_IDLE || + ((thread->sched_pri >= BASEPRI_RTQUEUES) && + (processor->current_pri < BASEPRI_RTQUEUES))) + return (processor); } /* @@ -2309,27 +2298,67 @@ choose_processor( return ((processor_t)queue_first(&cset->idle_queue)); if (thread->sched_pri >= BASEPRI_RTQUEUES) { - /* - * For an RT thread, iterate through active processors, first fit. - */ + integer_t lowest_priority = MAXPRI + 1; + integer_t lowest_unpaired = MAXPRI + 1; + uint64_t furthest_deadline = 1; + processor_t lp_processor = PROCESSOR_NULL; + processor_t lp_unpaired = PROCESSOR_NULL; + processor_t fd_processor = PROCESSOR_NULL; + + lp_processor = cset->low_pri; + /* Consider hinted processor */ + if (lp_processor != PROCESSOR_NULL && + ((lp_processor->processor_meta == PROCESSOR_META_NULL) || + ((lp_processor == lp_processor->processor_meta->primary) && + !queue_empty(&lp_processor->processor_meta->idle_queue))) && + lp_processor->state != PROCESSOR_INACTIVE && + lp_processor->state != PROCESSOR_SHUTDOWN && + lp_processor->state != PROCESSOR_OFF_LINE && + (lp_processor->current_pri < thread->sched_pri)) + return lp_processor; + processor = (processor_t)queue_first(&cset->active_queue); while (!queue_end(&cset->active_queue, (queue_entry_t)processor)) { - if (thread->sched_pri > processor->current_pri || - thread->realtime.deadline < processor->deadline) - return (processor); + /* Discover the processor executing the + * thread with the lowest priority within + * this pset, or the one with the furthest + * deadline + */ + integer_t cpri = processor->current_pri; + if (cpri < lowest_priority) { + lowest_priority = cpri; + lp_processor = processor; + } - if (pmeta == PROCESSOR_META_NULL) { - if (processor->processor_meta != PROCESSOR_META_NULL && - !queue_empty(&processor->processor_meta->idle_queue)) - pmeta = processor->processor_meta; + if ((cpri >= BASEPRI_RTQUEUES) && (processor->deadline > furthest_deadline)) { + furthest_deadline = processor->deadline; + fd_processor = processor; } + + if (processor->processor_meta != PROCESSOR_META_NULL && + !queue_empty(&processor->processor_meta->idle_queue)) { + if (cpri < lowest_unpaired) { + lowest_unpaired = cpri; + lp_unpaired = processor; + pmeta = processor->processor_meta; + } + else + if (pmeta == PROCESSOR_META_NULL) + pmeta = processor->processor_meta; + } processor = (processor_t)queue_next((queue_entry_t)processor); } + if (thread->sched_pri > lowest_unpaired) + return lp_unpaired; + if (pmeta != PROCESSOR_META_NULL) return ((processor_t)queue_first(&pmeta->idle_queue)); - + if (thread->sched_pri > lowest_priority) + return lp_processor; + if (thread->realtime.deadline < furthest_deadline) + return fd_processor; processor = PROCESSOR_NULL; } else { @@ -2358,10 +2387,9 @@ choose_processor( if (processor != PROCESSOR_NULL) enqueue_tail(&cset->active_queue, (queue_entry_t)processor); } - if (processor != PROCESSOR_NULL && pmeta == PROCESSOR_META_NULL) { if (processor->processor_meta != PROCESSOR_META_NULL && - !queue_empty(&processor->processor_meta->idle_queue)) + !queue_empty(&processor->processor_meta->idle_queue)) pmeta = processor->processor_meta; } } @@ -2491,21 +2519,7 @@ thread_setrun( processor = thread->last_processor; pset = processor->processor_set; pset_lock(pset); - - /* - * Choose a different processor in certain cases. - */ - if (thread->sched_pri >= BASEPRI_RTQUEUES) { - /* - * If the processor is executing an RT thread with - * an earlier deadline, choose another. - */ - if (thread->sched_pri <= processor->current_pri || - thread->realtime.deadline >= processor->deadline) - processor = choose_processor(pset, PROCESSOR_NULL, thread); - } - else - processor = choose_processor(pset, processor, thread); + processor = choose_processor(pset, processor, thread); } else { /* diff --git a/osfmk/kern/sync_sema.c b/osfmk/kern/sync_sema.c index eef5c13ad..a072684ad 100644 --- a/osfmk/kern/sync_sema.c +++ b/osfmk/kern/sync_sema.c @@ -143,6 +143,7 @@ semaphore_init(void) semaphore_max * sizeof(struct semaphore), sizeof(struct semaphore), "semaphores"); + zone_change(semaphore_zone, Z_NOENCRYPT, TRUE); } /* diff --git a/osfmk/kern/task.c b/osfmk/kern/task.c index aedd993a1..c5efca2a7 100644 --- a/osfmk/kern/task.c +++ b/osfmk/kern/task.c @@ -285,6 +285,7 @@ task_init(void) task_max * sizeof(struct task), TASK_CHUNK * sizeof(struct task), "tasks"); + zone_change(task_zone, Z_NOENCRYPT, TRUE); /* * Create the kernel task as the first task. diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 581a37c7f..adff14820 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -259,6 +259,7 @@ thread_init(void) thread_max * sizeof(struct thread), THREAD_CHUNK * sizeof(struct thread), "threads"); + zone_change(thread_zone, Z_NOENCRYPT, TRUE); lck_grp_attr_setdefault(&thread_lck_grp_attr); lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr); diff --git a/osfmk/kern/thread_call.c b/osfmk/kern/thread_call.c index 92f0b642b..a50c6d7d3 100644 --- a/osfmk/kern/thread_call.c +++ b/osfmk/kern/thread_call.c @@ -138,6 +138,7 @@ thread_call_initialize(void) i = sizeof (thread_call_data_t); thread_call_zone = zinit(i, 4096 * i, 16 * i, "thread_call"); + zone_change(thread_call_zone, Z_NOENCRYPT, TRUE); simple_lock_init(&thread_call_lock, 0); diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c index a7a19a024..6763ac65c 100644 --- a/osfmk/kern/wait_queue.c +++ b/osfmk/kern/wait_queue.c @@ -169,14 +169,19 @@ wait_queue_bootstrap(void) WAIT_QUEUE_MAX * sizeof(struct wait_queue), sizeof(struct wait_queue), "wait queues"); + zone_change(_wait_queue_zone, Z_NOENCRYPT, TRUE); + _wait_queue_set_zone = zinit(sizeof(struct wait_queue_set), WAIT_QUEUE_SET_MAX * sizeof(struct wait_queue_set), sizeof(struct wait_queue_set), "wait queue sets"); + zone_change(_wait_queue_set_zone, Z_NOENCRYPT, TRUE); + _wait_queue_link_zone = zinit(sizeof(struct _wait_queue_link), WAIT_QUEUE_LINK_MAX * sizeof(struct _wait_queue_link), sizeof(struct _wait_queue_link), "wait queue links"); + zone_change(_wait_queue_link_zone, Z_NOENCRYPT, TRUE); } /* diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c index d1d08bf96..c0e567eaf 100644 --- a/osfmk/kern/zalloc.c +++ b/osfmk/kern/zalloc.c @@ -266,6 +266,7 @@ MACRO_END #define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock) kern_return_t zget_space( + zone_t zone, vm_offset_t size, vm_offset_t *result); @@ -473,7 +474,7 @@ zinit( zone_t z; if (zone_zone == ZONE_NULL) { - if (zget_space(sizeof(struct zone), (vm_offset_t *)&z) + if (zget_space(NULL, sizeof(struct zone), (vm_offset_t *)&z) != KERN_SUCCESS) return(ZONE_NULL); } else @@ -544,6 +545,7 @@ use_this_allocation: z->expandable = TRUE; z->waiting = FALSE; z->async_pending = FALSE; + z->noencrypt = FALSE; #if ZONE_DEBUG z->active_zones.next = z->active_zones.prev = NULL; @@ -639,6 +641,7 @@ zcram( kern_return_t zget_space( + zone_t zone, vm_offset_t size, vm_offset_t *result) { @@ -655,6 +658,8 @@ zget_space( if (new_space == 0) { kern_return_t retval; + int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; + /* * Memory cannot be wired down while holding * any locks that the pageout daemon might @@ -670,8 +675,10 @@ zget_space( simple_unlock(&zget_space_lock); - retval = kernel_memory_allocate(zone_map, &new_space, - space_to_add, 0, KMA_KOBJECT|KMA_NOPAGEWAIT); + if (zone == NULL || zone->noencrypt) + zflags |= KMA_NOENCRYPT; + + retval = kernel_memory_allocate(zone_map, &new_space, space_to_add, 0, zflags); if (retval != KERN_SUCCESS) return(retval); #if ZONE_ALIAS_ADDR @@ -827,8 +834,10 @@ zone_bootstrap(void) zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone), sizeof(struct zone), "zones"); zone_change(zone_zone, Z_COLLECT, FALSE); + zone_change(zone_zone, Z_NOENCRYPT, TRUE); + zone_zone_size = zalloc_end_of_space - zalloc_next_space; - zget_space(zone_zone_size, &zone_zone_space); + zget_space(NULL, zone_zone_size, &zone_zone_space); zcram(zone_zone, (void *)zone_zone_space, zone_zone_size); } @@ -948,6 +957,7 @@ zalloc_canblock( int retry = 0; for (;;) { + int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; if (vm_pool_low() || retry >= 1) alloc_size = @@ -955,9 +965,10 @@ zalloc_canblock( else alloc_size = zone->alloc_size; - retval = kernel_memory_allocate(zone_map, - &space, alloc_size, 0, - KMA_KOBJECT|KMA_NOPAGEWAIT); + if (zone->noencrypt) + zflags |= KMA_NOENCRYPT; + + retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags); if (retval == KERN_SUCCESS) { #if ZONE_ALIAS_ADDR if (alloc_size == PAGE_SIZE) @@ -1000,7 +1011,7 @@ zalloc_canblock( } } else { vm_offset_t space; - retval = zget_space(zone->elem_size, &space); + retval = zget_space(zone, zone->elem_size, &space); lock_zone(zone); zone->doing_alloc = FALSE; @@ -1356,6 +1367,9 @@ zone_change( assert( value == TRUE || value == FALSE ); switch(item){ + case Z_NOENCRYPT: + zone->noencrypt = value; + break; case Z_EXHAUST: zone->exhaustible = value; break; diff --git a/osfmk/kern/zalloc.h b/osfmk/kern/zalloc.h index b21f71253..22f9d78b4 100644 --- a/osfmk/kern/zalloc.h +++ b/osfmk/kern/zalloc.h @@ -108,7 +108,8 @@ struct zone { /* boolean_t */ doing_alloc :1, /* is zone expanding now? */ /* boolean_t */ waiting :1, /* is thread waiting for expansion? */ /* boolean_t */ async_pending :1, /* asynchronous allocation pending? */ - /* boolean_t */ doing_gc :1; /* garbage collect in progress? */ + /* boolean_t */ doing_gc :1, /* garbage collect in progress? */ + /* boolean_t */ noencrypt :1; struct zone * next_zone; /* Link for all-zones list */ call_entry_data_t call_async_alloc; /* callout for asynchronous alloc */ const char *zone_name; /* a name for the zone */ @@ -219,6 +220,7 @@ extern void zone_change( #define Z_COLLECT 2 /* Make zone collectable */ #define Z_EXPAND 3 /* Make zone expandable */ #define Z_FOREIGN 4 /* Allow collectable zone to contain foreign elements */ +#define Z_NOENCRYPT 6 /* Don't encrypt zone during hibernation */ /* Preallocate space for zone from zone map */ extern void zprealloc( diff --git a/osfmk/mach/memory_object_types.h b/osfmk/mach/memory_object_types.h index 8f3044430..c4a5df888 100644 --- a/osfmk/mach/memory_object_types.h +++ b/osfmk/mach/memory_object_types.h @@ -561,8 +561,9 @@ typedef uint32_t upl_size_t; /* page-aligned byte size */ #define UPL_COMMIT_CS_VALIDATED 0x40 #define UPL_COMMIT_CLEAR_PRECIOUS 0x80 #define UPL_COMMIT_SPECULATE 0x100 +#define UPL_COMMIT_FREE_ABSENT 0x200 -#define UPL_COMMIT_KERNEL_ONLY_FLAGS (UPL_COMMIT_CS_VALIDATED) +#define UPL_COMMIT_KERNEL_ONLY_FLAGS (UPL_COMMIT_CS_VALIDATED | UPL_COMMIT_FREE_ABSENT) /* flags for return of state from vm_map_get_upl, vm_upl address space */ /* based call */ diff --git a/osfmk/ppc/hibernate_ppc.c b/osfmk/ppc/hibernate_ppc.c index c807d26a9..2bd051994 100644 --- a/osfmk/ppc/hibernate_ppc.c +++ b/osfmk/ppc/hibernate_ppc.c @@ -54,6 +54,9 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +unsigned int save_kdebug_enable = 0; + + hibernate_page_list_t * hibernate_page_list_allocate(void) { diff --git a/osfmk/ppc/pmap.c b/osfmk/ppc/pmap.c index 495124287..90940a9ab 100644 --- a/osfmk/ppc/pmap.c +++ b/osfmk/ppc/pmap.c @@ -609,6 +609,13 @@ unsigned int pmap_free_pages(void) * This function allocates physical pages. */ +boolean_t +pmap_next_page_hi(ppnum_t * pnum) +{ + return pmap_next_page(pnum); +} + + /* Non-optimal, but only used for virtual memory startup. * Allocate memory from a table of free physical addresses * If there are no more free entries, too bad. @@ -1046,6 +1053,24 @@ unsigned int pmap_disconnect( return (mapping_tst_refmod(pa)); /* Return page ref and chg in generic format */ } + +boolean_t +pmap_is_noencrypt(__unused ppnum_t pn) +{ + return (FALSE); +} + +void +pmap_set_noencrypt(__unused ppnum_t pn) +{ +} + +void +pmap_clear_noencrypt(__unused ppnum_t pn) +{ +} + + /* * pmap_protect(pmap, s, e, prot) * changes the protection on all virtual addresses v in the diff --git a/osfmk/vm/bsd_vm.c b/osfmk/vm/bsd_vm.c index 347a76883..9b583cd0c 100644 --- a/osfmk/vm/bsd_vm.c +++ b/osfmk/vm/bsd_vm.c @@ -452,11 +452,14 @@ memory_object_control_uiomove( */ break; } - if (dst_page->pageout) { + if (dst_page->pageout || dst_page->cleaning) { /* * this is the list_req_pending | pageout | busy case - * which can originate from both the pageout_scan and - * msync worlds... we need to reset the state of this page to indicate + * or the list_req_pending | cleaning case... + * which originate from the pageout_scan and + * msync worlds for the pageout case and the hibernate + * pre-cleaning world for the cleaning case... + * we need to reset the state of this page to indicate * it should stay in the cache marked dirty... nothing else we * can do at this point... we can't block on it, we can't busy * it and we can't clean it from this routine. @@ -599,6 +602,8 @@ vnode_pager_bootstrap(void) size = (vm_size_t) sizeof(struct vnode_pager); vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size, PAGE_SIZE, "vnode pager structures"); + zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE); + #if CONFIG_CODE_DECRYPTION apple_protect_pager_bootstrap(); #endif /* CONFIG_CODE_DECRYPTION */ diff --git a/osfmk/vm/memory_object.c b/osfmk/vm/memory_object.c index d54ddb42b..0fece7fc0 100644 --- a/osfmk/vm/memory_object.c +++ b/osfmk/vm/memory_object.c @@ -182,12 +182,14 @@ memory_object_lock_page( */ if (m->busy || m->cleaning) { - if (m->list_req_pending && m->pageout && + if (m->list_req_pending && (m->pageout || m->cleaning) && should_return == MEMORY_OBJECT_RETURN_NONE && should_flush == TRUE) { /* - * page was earmarked by vm_pageout_scan - * to be cleaned and stolen... we're going + * if pageout is set, page was earmarked by vm_pageout_scan + * to be cleaned and stolen... if cleaning is set, we're + * pre-cleaning pages for a hibernate... + * in either case, we're going * to take it back since we are being asked to * flush the page w/o cleaning it (i.e. we don't * care that it's dirty, we want it gone from @@ -839,6 +841,7 @@ vm_object_update( fault_info.hi_offset = copy_size; fault_info.no_cache = FALSE; fault_info.stealth = TRUE; + fault_info.mark_zf_absent = FALSE; vm_object_paging_begin(copy_object); @@ -1967,6 +1970,7 @@ memory_object_control_bootstrap(void) i = (vm_size_t) sizeof (struct memory_object_control); mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control"); + zone_change(mem_obj_control_zone, Z_NOENCRYPT, TRUE); return; } diff --git a/osfmk/vm/pmap.h b/osfmk/vm/pmap.h index bf7cbed6c..5d6253718 100644 --- a/osfmk/vm/pmap.h +++ b/osfmk/vm/pmap.h @@ -169,9 +169,7 @@ extern void mapping_free_prime(void); /* Primes the mapping block release list */ extern boolean_t pmap_next_page(ppnum_t *pnum); -#if defined(__LP64__) -extern boolean_t pmap_next_page_k64(ppnum_t *pnum); -#endif +extern boolean_t pmap_next_page_hi(ppnum_t *pnum); /* During VM initialization, * return the next unused * physical page. @@ -466,6 +464,10 @@ extern kern_return_t pmap_unnest(pmap_t grand, extern boolean_t pmap_adjust_unnest_parameters(pmap_t, vm_map_offset_t *, vm_map_offset_t *); #endif /* MACH_KERNEL_PRIVATE */ +extern boolean_t pmap_is_noencrypt(ppnum_t); +extern void pmap_set_noencrypt(ppnum_t pn); +extern void pmap_clear_noencrypt(ppnum_t pn); + /* * JMM - This portion is exported to other kernel components right now, * but will be pulled back in the future when the needed functionality diff --git a/osfmk/vm/vm_apple_protect.c b/osfmk/vm/vm_apple_protect.c index d57cfc71a..953a4e139 100644 --- a/osfmk/vm/vm_apple_protect.c +++ b/osfmk/vm/vm_apple_protect.c @@ -354,6 +354,7 @@ apple_protect_pager_data_request( upl_pl = NULL; fault_info = *((struct vm_object_fault_info *) mo_fault_info); fault_info.stealth = TRUE; + fault_info.mark_zf_absent = FALSE; interruptible = fault_info.interruptible; pager = apple_protect_pager_lookup(mem_obj); diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index de5708199..a36714b57 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -1150,6 +1150,8 @@ vm_fault_page( */ my_fault = vm_fault_zero_page(m, no_zero_fill); + if (fault_info->mark_zf_absent && no_zero_fill == TRUE) + m->absent = TRUE; break; } else { if (must_be_resident) @@ -1623,6 +1625,8 @@ vm_fault_page( } my_fault = vm_fault_zero_page(m, no_zero_fill); + if (fault_info->mark_zf_absent && no_zero_fill == TRUE) + m->absent = TRUE; break; } else { @@ -2444,7 +2448,7 @@ vm_fault_enter(vm_page_t m, vm_page_wire(m); } } else { - vm_page_unwire(m); + vm_page_unwire(m, TRUE); } vm_page_unlock_queues(); @@ -2654,6 +2658,7 @@ RetryFault: pmap = real_map->pmap; fault_info.interruptible = interruptible; fault_info.stealth = FALSE; + fault_info.mark_zf_absent = FALSE; /* * If the page is wired, we must fault for the current protection @@ -3883,6 +3888,7 @@ vm_fault_unwire( fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; fault_info.no_cache = entry->no_cache; fault_info.stealth = TRUE; + fault_info.mark_zf_absent = FALSE; /* * Since the pages are wired down, we must be able to @@ -3961,7 +3967,7 @@ vm_fault_unwire( } else { if (VM_PAGE_WIRED(result_page)) { vm_page_lockspin_queues(); - vm_page_unwire(result_page); + vm_page_unwire(result_page, TRUE); vm_page_unlock_queues(); } if(entry->zero_wired_pages) { @@ -4035,7 +4041,7 @@ vm_fault_wire_fast( #define RELEASE_PAGE(m) { \ PAGE_WAKEUP_DONE(m); \ vm_page_lockspin_queues(); \ - vm_page_unwire(m); \ + vm_page_unwire(m, TRUE); \ vm_page_unlock_queues(); \ } @@ -4205,7 +4211,7 @@ vm_fault_copy_dst_cleanup( object = page->object; vm_object_lock(object); vm_page_lockspin_queues(); - vm_page_unwire(page); + vm_page_unwire(page, TRUE); vm_page_unlock_queues(); vm_object_paging_end(object); vm_object_unlock(object); @@ -4289,6 +4295,7 @@ vm_fault_copy( fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left; fault_info_src.no_cache = FALSE; fault_info_src.stealth = TRUE; + fault_info_src.mark_zf_absent = FALSE; fault_info_dst.interruptible = interruptible; fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL; @@ -4297,6 +4304,7 @@ vm_fault_copy( fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left; fault_info_dst.no_cache = FALSE; fault_info_dst.stealth = TRUE; + fault_info_dst.mark_zf_absent = FALSE; do { /* while (amount_left > 0) */ /* diff --git a/osfmk/vm/vm_kern.c b/osfmk/vm/vm_kern.c index ee3c02b65..aa0dbafe2 100644 --- a/osfmk/vm/vm_kern.c +++ b/osfmk/vm/vm_kern.c @@ -253,13 +253,6 @@ kernel_memory_allocate( *addrp = 0; return KERN_INVALID_ARGUMENT; } - if (flags & KMA_LOMEM) { - if ( !(flags & KMA_NOPAGEWAIT) ) { - *addrp = 0; - return KERN_INVALID_ARGUMENT; - } - } - map_size = vm_map_round_page(size); map_mask = (vm_map_offset_t) mask; vm_alloc_flags = 0; @@ -348,6 +341,10 @@ kernel_memory_allocate( kr = KERN_RESOURCE_SHORTAGE; goto out; } + if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) { + kr = KERN_RESOURCE_SHORTAGE; + goto out; + } unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE; if (unavailable > max_mem || map_size > (max_mem - unavailable)) { @@ -426,6 +423,12 @@ kernel_memory_allocate( PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, VM_PROT_READ | VM_PROT_WRITE, object->wimg_bits & VM_WIMG_MASK, TRUE); + + if (flags & KMA_NOENCRYPT) { + bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE); + + pmap_set_noencrypt(mem->phys_page); + } } if ((fill_start + fill_size) < map_size) { if (guard_page_list == NULL) @@ -889,6 +892,7 @@ kmem_suballoc( return (KERN_SUCCESS); } + /* * kmem_init: * @@ -925,19 +929,6 @@ kmem_init( VM_PROT_NONE, VM_PROT_NONE, VM_INHERIT_DEFAULT); } - - - /* - * Account for kernel memory (text, data, bss, vm shenanigans). - * This may include inaccessible "holes" as determined by what - * the machine-dependent init code includes in max_mem. - */ - assert(atop_64(max_mem) == (unsigned int) atop_64(max_mem)); - vm_page_wire_count = ((unsigned int) atop_64(max_mem) - - (vm_page_free_count + - vm_page_active_count + - vm_page_inactive_count)); - /* * Set the default global user wire limit which limits the amount of * memory that can be locked via mlock(). We set this to the total diff --git a/osfmk/vm/vm_kern.h b/osfmk/vm/vm_kern.h index b1b21a8f4..1c03bac0c 100644 --- a/osfmk/vm/vm_kern.h +++ b/osfmk/vm/vm_kern.h @@ -87,6 +87,7 @@ extern kern_return_t kernel_memory_allocate( #define KMA_GUARD_FIRST 0x10 #define KMA_GUARD_LAST 0x20 #define KMA_PERMANENT 0x40 +#define KMA_NOENCRYPT 0x80 extern kern_return_t kmem_alloc_contig( vm_map_t map, diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index a0f5e8c9b..64ef466b6 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -563,17 +563,22 @@ vm_map_init( { vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024, PAGE_SIZE, "maps"); + zone_change(vm_map_zone, Z_NOENCRYPT, TRUE); + vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), 1024*1024, PAGE_SIZE*5, "non-kernel map entries"); + zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE); vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), kentry_data_size, kentry_data_size, "kernel map entries"); + zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE); vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), 16*1024, PAGE_SIZE, "map copies"); + zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE); /* * Cram the map and kentry zones with initial data. @@ -8711,6 +8716,7 @@ submap_recurse: fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; fault_info->no_cache = entry->no_cache; fault_info->stealth = FALSE; + fault_info->mark_zf_absent = FALSE; } /* @@ -10053,6 +10059,7 @@ vm_map_willneed( fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; fault_info.no_cache = FALSE; /* ignored value */ fault_info.stealth = TRUE; + fault_info.mark_zf_absent = FALSE; /* * The MADV_WILLNEED operation doesn't require any changes to the diff --git a/osfmk/vm/vm_object.c b/osfmk/vm/vm_object.c index 7f48b127e..c71627a4f 100644 --- a/osfmk/vm/vm_object.c +++ b/osfmk/vm/vm_object.c @@ -485,6 +485,7 @@ vm_object_bootstrap(void) round_page(512*1024), round_page(12*1024), "vm objects"); + zone_change(vm_object_zone, Z_NOENCRYPT, TRUE); vm_object_init_lck_grp(); @@ -514,6 +515,7 @@ vm_object_bootstrap(void) round_page(512*1024), round_page(12*1024), "vm object hash entries"); + zone_change(vm_object_hash_zone, Z_NOENCRYPT, TRUE); for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) queue_init(&vm_object_hashtable[i]); @@ -1491,10 +1493,10 @@ restart_after_sleep: } if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) { - if (reap_type == REAP_DATA_FLUSH && (p->pageout == TRUE && p->list_req_pending == TRUE)) { + if (reap_type == REAP_DATA_FLUSH && + ((p->pageout == TRUE || p->cleaning == TRUE) && p->list_req_pending == TRUE)) { p->list_req_pending = FALSE; p->cleaning = FALSE; - p->pageout = FALSE; /* * need to drop the laundry count... * we may also need to remove it @@ -1509,13 +1511,15 @@ restart_after_sleep: #else vm_pageout_throttle_up(p); #endif - - /* - * toss the wire count we picked up - * when we intially set this page up - * to be cleaned... - */ - vm_page_unwire(p); + if (p->pageout == TRUE) { + /* + * toss the wire count we picked up + * when we initially set this page up + * to be cleaned and stolen... + */ + vm_page_unwire(p, TRUE); + p->pageout = FALSE; + } PAGE_WAKEUP(p); } else if (p->busy || p->cleaning) { @@ -2854,6 +2858,7 @@ vm_object_copy_slowly( fault_info.hi_offset = src_offset + size; fault_info.no_cache = FALSE; fault_info.stealth = TRUE; + fault_info.mark_zf_absent = FALSE; for ( ; size != 0 ; @@ -5202,7 +5207,7 @@ vm_object_page_map( } assert((ppnum_t) addr == addr); - vm_page_init(m, (ppnum_t) addr); + vm_page_init(m, (ppnum_t) addr, FALSE); /* * private normally requires lock_queues but since we * are initializing the page, its not necessary here @@ -7408,7 +7413,8 @@ _vm_object_lock_try(vm_object_t object) boolean_t vm_object_lock_try(vm_object_t object) { - if (vm_object_lock_avoid(object)) { + // called from hibernate path so check before blocking + if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled()) { mutex_pause(2); } return _vm_object_lock_try(object); diff --git a/osfmk/vm/vm_object.h b/osfmk/vm/vm_object.h index 8ad7db64e..eacd7d65e 100644 --- a/osfmk/vm/vm_object.h +++ b/osfmk/vm/vm_object.h @@ -109,6 +109,7 @@ struct vm_object_fault_info { vm_map_offset_t hi_offset; boolean_t no_cache; boolean_t stealth; + boolean_t mark_zf_absent; }; diff --git a/osfmk/vm/vm_page.h b/osfmk/vm/vm_page.h index 15eac5b2d..ecae81c15 100644 --- a/osfmk/vm/vm_page.h +++ b/osfmk/vm/vm_page.h @@ -249,7 +249,8 @@ struct vm_page { /* other pages */ zero_fill:1, reusable:1, - __unused_object_bits:7; /* 7 bits available here */ + lopage:1, + __unused_object_bits:6; /* 6 bits available here */ #if __LP64__ unsigned int __unused_padding; /* Pad structure explicitly @@ -474,13 +475,20 @@ extern ppnum_t vm_page_guard_addr; extern boolean_t vm_page_deactivate_hint; -// 0 = all pages avail, 1 = disable high mem, 2 = prefer himem +/* + 0 = all pages avail ( default. ) + 1 = disable high mem ( cap max pages to 4G) + 2 = prefer himem +*/ extern int vm_himemory_mode; -extern ppnum_t vm_lopage_poolend; -extern int vm_lopage_poolsize; +extern boolean_t vm_lopage_needed; +extern uint32_t vm_lopage_free_count; +extern uint32_t vm_lopage_free_limit; +extern uint32_t vm_lopage_lowater; +extern boolean_t vm_lopage_refill; extern uint64_t max_valid_dma_address; - +extern ppnum_t max_valid_low_ppnum; /* * Prototypes for functions exported by this module. @@ -536,7 +544,8 @@ extern vm_page_t vm_page_alloc_guard( extern void vm_page_init( vm_page_t page, - ppnum_t phys_page); + ppnum_t phys_page, + boolean_t lopage); extern void vm_page_free( vm_page_t page); @@ -619,7 +628,8 @@ extern void vm_page_wire( vm_page_t page); extern void vm_page_unwire( - vm_page_t page); + vm_page_t page, + boolean_t queueit); extern void vm_set_page_size(void); diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index 6dae066d4..96a171162 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -246,25 +246,6 @@ extern u_int32_t random(void); /* from */ #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100 -/* - * must hold the page queues lock to - * manipulate this structure - */ -struct vm_pageout_queue { - queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */ - unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */ - unsigned int pgo_maxlaundry; - - unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */ - pgo_busy:1, /* iothread is currently processing request from pgo_pending */ - pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */ - :0; -}; - -#define VM_PAGE_Q_THROTTLED(q) \ - ((q)->pgo_laundry >= (q)->pgo_maxlaundry) - - /* * Exported variable used to broadcast the activation of the pageout scan * Working Set uses this to throttle its use of pmap removes. In this @@ -385,7 +366,7 @@ unsigned int vm_page_speculative_target = 0; vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL; -static boolean_t (* volatile consider_buffer_cache_collect)(void) = NULL; +boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL; #if DEVELOPMENT || DEBUG unsigned long vm_cs_validated_resets = 0; @@ -526,7 +507,7 @@ vm_pageout_object_terminate( if (m->dirty) { CLUSTER_STAT(vm_pageout_target_page_dirtied++;) - vm_page_unwire(m);/* reactivates */ + vm_page_unwire(m, TRUE); /* reactivates */ VM_STAT_INCR(reactivations); PAGE_WAKEUP_DONE(m); } else { @@ -569,7 +550,7 @@ vm_pageout_object_terminate( /* case. Occurs when the original page was wired */ /* at the time of the list request */ assert(VM_PAGE_WIRED(m)); - vm_page_unwire(m);/* reactivates */ + vm_page_unwire(m, TRUE); /* reactivates */ m->overwriting = FALSE; } else { /* @@ -836,7 +817,7 @@ vm_pageout_cluster(vm_page_t m) /* * pgo_laundry count is tied to the laundry bit */ - m->laundry = TRUE; + m->laundry = TRUE; q->pgo_laundry++; m->pageout_queue = TRUE; @@ -870,11 +851,11 @@ vm_pageout_throttle_up( assert(m->object != kernel_object); vm_pageout_throttle_up_count++; - + if (m->object->internal == TRUE) - q = &vm_pageout_queue_internal; + q = &vm_pageout_queue_internal; else - q = &vm_pageout_queue_external; + q = &vm_pageout_queue_external; if (m->pageout_queue == TRUE) { @@ -886,15 +867,18 @@ vm_pageout_throttle_up( vm_object_paging_end(m->object); } - - if ( m->laundry == TRUE ) { - + if (m->laundry == TRUE) { m->laundry = FALSE; q->pgo_laundry--; + if (q->pgo_throttled == TRUE) { q->pgo_throttled = FALSE; thread_wakeup((event_t) &q->pgo_laundry); } + if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { + q->pgo_draining = FALSE; + thread_wakeup((event_t) (&q->pgo_laundry+1)); + } } } @@ -2359,7 +2343,6 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) { vm_page_t m = NULL; vm_object_t object; - boolean_t need_wakeup; memory_object_t pager; thread_t self = current_thread(); @@ -2450,13 +2433,27 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) * This pager has been destroyed by either * memory_object_destroy or vm_object_destroy, and * so there is nowhere for the page to go. - * Just free the page... VM_PAGE_FREE takes - * care of cleaning up all the state... - * including doing the vm_pageout_throttle_up */ + if (m->pageout) { + /* + * Just free the page... VM_PAGE_FREE takes + * care of cleaning up all the state... + * including doing the vm_pageout_throttle_up + */ + VM_PAGE_FREE(m); + } else { + vm_page_lockspin_queues(); - VM_PAGE_FREE(m); + vm_pageout_queue_steal(m, TRUE); + vm_page_activate(m); + + vm_page_unlock_queues(); + /* + * And we are done with it. + */ + PAGE_WAKEUP_DONE(m); + } vm_object_paging_end(object); vm_object_unlock(object); @@ -2494,20 +2491,18 @@ vm_pageout_iothread_continue(struct vm_pageout_queue *q) } assert_wait((event_t) q, THREAD_UNINT); - if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) { q->pgo_throttled = FALSE; - need_wakeup = TRUE; - } else - need_wakeup = FALSE; - + thread_wakeup((event_t) &q->pgo_laundry); + } + if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { + q->pgo_draining = FALSE; + thread_wakeup((event_t) (&q->pgo_laundry+1)); + } q->pgo_busy = FALSE; q->pgo_idle = TRUE; vm_page_unlock_queues(); - if (need_wakeup == TRUE) - thread_wakeup((event_t) &q->pgo_laundry); - thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending); /*NOTREACHED*/ } @@ -2537,7 +2532,7 @@ vm_pageout_iothread_internal(void) } kern_return_t -vm_set_buffer_cleanup_callout(boolean_t (*func)(void)) +vm_set_buffer_cleanup_callout(boolean_t (*func)(int)) { if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) { return KERN_SUCCESS; @@ -2559,7 +2554,7 @@ vm_pageout_garbage_collect(int collect) */ consider_machine_collect(); if (consider_buffer_cache_collect != NULL) { - buf_large_zfree = (*consider_buffer_cache_collect)(); + buf_large_zfree = (*consider_buffer_cache_collect)(0); } consider_zone_gc(buf_large_zfree); @@ -2653,6 +2648,7 @@ vm_pageout(void) vm_pageout_queue_external.pgo_idle = FALSE; vm_pageout_queue_external.pgo_busy = FALSE; vm_pageout_queue_external.pgo_throttled = FALSE; + vm_pageout_queue_external.pgo_draining = FALSE; queue_init(&vm_pageout_queue_internal.pgo_pending); vm_pageout_queue_internal.pgo_maxlaundry = 0; @@ -2660,6 +2656,7 @@ vm_pageout(void) vm_pageout_queue_internal.pgo_idle = FALSE; vm_pageout_queue_internal.pgo_busy = FALSE; vm_pageout_queue_internal.pgo_throttled = FALSE; + vm_pageout_queue_internal.pgo_draining = FALSE; /* internal pageout thread started when default pager registered first time */ @@ -3177,7 +3174,7 @@ vm_object_upl_request( * currently on the inactive queue or it meets the page * ticket (generation count) check */ - if ( !(refmod_state & VM_MEM_REFERENCED) && + if ( (cntrl_flags & UPL_CLEAN_IN_PLACE || !(refmod_state & VM_MEM_REFERENCED)) && ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) { goto check_busy; } @@ -3192,7 +3189,7 @@ dont_return: goto try_next_page; } check_busy: - if (dst_page->busy && (!(dst_page->list_req_pending && dst_page->pageout))) { + if (dst_page->busy && (!(dst_page->list_req_pending && (dst_page->pageout || dst_page->cleaning)))) { if (cntrl_flags & UPL_NOBLOCK) { if (user_page_list) user_page_list[entry].phys_addr = 0; @@ -3457,7 +3454,7 @@ check_busy: dst_page->list_req_pending = FALSE; dst_page->busy = FALSE; - } else if (dst_page->pageout) { + } else if (dst_page->pageout || dst_page->cleaning) { /* * page was earmarked by vm_pageout_scan * to be cleaned and stolen... we're going @@ -4453,9 +4450,13 @@ dw_do_work( if (dwp->dw_mask & DW_vm_page_wire) vm_page_wire(dwp->dw_m); - else if (dwp->dw_mask & DW_vm_page_unwire) - vm_page_unwire(dwp->dw_m); + else if (dwp->dw_mask & DW_vm_page_unwire) { + boolean_t queueit; + queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE; + + vm_page_unwire(dwp->dw_m, queueit); + } if (dwp->dw_mask & DW_vm_page_free) { if (held_as_spin == TRUE) { vm_page_lockconvert_queues(); @@ -4706,6 +4707,12 @@ process_upl_to_commit: */ dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); } + if (m->absent) { + if (flags & UPL_COMMIT_FREE_ABSENT) + dwp->dw_mask |= DW_vm_page_free; + else + m->absent = FALSE; + } goto commit_next_page; } /* @@ -5069,7 +5076,7 @@ upl_abort_range( return KERN_INVALID_ARGUMENT; if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) ) - return upl_commit_range(upl, offset, size, 0, NULL, 0, empty); + return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty); if((isVectorUPL = vector_upl_is_valid(upl))) { vector_upl = upl; @@ -5452,7 +5459,7 @@ vm_object_iopl_request( */ return KERN_INVALID_VALUE; } - if (vm_lopage_poolsize == 0) + if (vm_lopage_needed == FALSE) cntrl_flags &= ~UPL_NEED_32BIT_ADDR; if (cntrl_flags & UPL_NEED_32BIT_ADDR) { @@ -5646,6 +5653,7 @@ vm_object_iopl_request( fault_info.hi_offset = offset + xfer_size; fault_info.no_cache = FALSE; fault_info.stealth = FALSE; + fault_info.mark_zf_absent = TRUE; dwp = &dw_array[0]; dw_count = 0; @@ -5939,23 +5947,55 @@ return_err: dw_index = 0; for (; offset < dst_offset; offset += PAGE_SIZE) { + boolean_t need_unwire; + dst_page = vm_page_lookup(object, offset); if (dst_page == VM_PAGE_NULL) panic("vm_object_iopl_request: Wired pages missing. \n"); + /* + * if we've already processed this page in an earlier + * dw_do_work, we need to undo the wiring... we will + * leave the dirty and reference bits on if they + * were set, since we don't have a good way of knowing + * what the previous state was and we won't get here + * under any normal circumstances... we will always + * clear BUSY and wakeup any waiters via vm_page_free + * or PAGE_WAKEUP_DONE + */ + need_unwire = TRUE; + if (dw_count) { if (dw_array[dw_index].dw_m == dst_page) { - dw_index++; - dw_count--; - continue; + /* + * still in the deferred work list + * which means we haven't yet called + * vm_page_wire on this page + */ + need_unwire = FALSE; } + dw_index++; + dw_count--; } - vm_page_lockspin_queues(); - vm_page_unwire(dst_page); + vm_page_lock_queues(); + + if (need_unwire == TRUE) { + boolean_t queueit; + + queueit = (dst_page->absent) ? FALSE : TRUE; + + vm_page_unwire(dst_page, queueit); + } + if (dst_page->absent) + vm_page_free(dst_page); + else + PAGE_WAKEUP_DONE(dst_page); + vm_page_unlock_queues(); - VM_STAT_INCR(reactivations); + if (need_unwire == TRUE) + VM_STAT_INCR(reactivations); } #if UPL_DEBUG upl->upl_state = 2; @@ -6939,6 +6979,10 @@ vm_page_decrypt( void vm_pageout_queue_steal(vm_page_t page, boolean_t queues_locked) { + boolean_t pageout; + + pageout = page->pageout; + page->list_req_pending = FALSE; page->cleaning = FALSE; page->pageout = FALSE; @@ -6955,20 +6999,16 @@ vm_pageout_queue_steal(vm_page_t page, boolean_t queues_locked) * * the laundry and pageout_queue flags are cleared... */ -#if CONFIG_EMBEDDED - if (page->laundry) - vm_pageout_throttle_up(page); -#else vm_pageout_throttle_up(page); -#endif - - /* - * toss the wire count we picked up - * when we intially set this page up - * to be cleaned... - */ - vm_page_unwire(page); + if (pageout == TRUE) { + /* + * toss the wire count we picked up + * when we intially set this page up + * to be cleaned... + */ + vm_page_unwire(page, TRUE); + } vm_page_steal_pageout_page++; if (!queues_locked) { diff --git a/osfmk/vm/vm_pageout.h b/osfmk/vm/vm_pageout.h index b76023182..c296ea10d 100644 --- a/osfmk/vm/vm_pageout.h +++ b/osfmk/vm/vm_pageout.h @@ -81,6 +81,11 @@ #include +#ifdef MACH_KERNEL_PRIVATE +#include +#endif + + extern kern_return_t vm_map_create_upl( vm_map_t map, vm_map_address_t offset, @@ -96,6 +101,26 @@ extern ppnum_t upl_get_highest_page( extern upl_size_t upl_get_size( upl_t upl); + +#ifndef MACH_KERNEL_PRIVATE +typedef struct vm_page *vm_page_t; +#endif + + +extern void vm_page_free_list( + vm_page_t mem, + boolean_t prepare_object); + +extern kern_return_t vm_page_alloc_list( + int page_count, + int flags, + vm_page_t * list); + +extern void vm_page_set_offset(vm_page_t page, vm_object_offset_t offset); +extern vm_object_offset_t vm_page_get_offset(vm_page_t page); +extern ppnum_t vm_page_get_phys_page(vm_page_t page); +extern vm_page_t vm_page_get_next(vm_page_t page); + #ifdef MACH_KERNEL_PRIVATE #include @@ -134,6 +159,28 @@ extern uint64_t vm_zf_count; #endif /* !(defined(__ppc__)) */ +/* + * must hold the page queues lock to + * manipulate this structure + */ +struct vm_pageout_queue { + queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */ + unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */ + unsigned int pgo_maxlaundry; + + unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */ + pgo_busy:1, /* iothread is currently processing request from pgo_pending */ + pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */ + pgo_draining:1, + :0; +}; + +#define VM_PAGE_Q_THROTTLED(q) \ + ((q)->pgo_laundry >= (q)->pgo_maxlaundry) + +extern struct vm_pageout_queue vm_pageout_queue_internal; +extern struct vm_pageout_queue vm_pageout_queue_external; + /* * Routines exported to Mach. */ @@ -295,10 +342,6 @@ extern kern_return_t vm_map_remove_upl( /* wired page list structure */ typedef uint32_t *wpl_array_t; -extern void vm_page_free_list( - vm_page_t mem, - boolean_t prepare_object); - extern void vm_page_free_reserve(int pages); extern void vm_pageout_throttle_down(vm_page_t page); @@ -371,7 +414,7 @@ extern kern_return_t mach_vm_pressure_monitor( extern kern_return_t vm_set_buffer_cleanup_callout( - boolean_t (*func)(void)); + boolean_t (*func)(int)); struct vm_page_stats_reusable { SInt32 reusable_count; @@ -393,6 +436,8 @@ struct vm_page_stats_reusable { }; extern struct vm_page_stats_reusable vm_page_stats_reusable; +extern int hibernate_flush_memory(void); + #endif /* KERNEL_PRIVATE */ #endif /* _VM_VM_PAGEOUT_H_ */ diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index 9552295d8..dae49ac1a 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -103,12 +103,18 @@ boolean_t vm_page_free_verify = TRUE; -int speculative_age_index = 0; -int speculative_steal_index = 0; +uint_t vm_lopage_free_count = 0; +uint_t vm_lopage_free_limit = 0; +uint_t vm_lopage_lowater = 0; +boolean_t vm_lopage_refill = FALSE; +boolean_t vm_lopage_needed = FALSE; + lck_mtx_ext_t vm_page_queue_lock_ext; lck_mtx_ext_t vm_page_queue_free_lock_ext; lck_mtx_ext_t vm_purgeable_queue_lock_ext; +int speculative_age_index = 0; +int speculative_steal_index = 0; struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1]; @@ -224,6 +230,7 @@ struct vm_page vm_page_template; vm_page_t vm_pages = VM_PAGE_NULL; unsigned int vm_pages_count = 0; +ppnum_t vm_page_lowest = 0; /* * Resident pages that represent real memory @@ -299,6 +306,7 @@ unsigned int vm_page_inactive_count; unsigned int vm_page_throttled_count; unsigned int vm_page_speculative_count; unsigned int vm_page_wire_count; +unsigned int vm_page_wire_count_initial; unsigned int vm_page_gobble_count = 0; unsigned int vm_page_wire_count_warning = 0; unsigned int vm_page_gobble_count_warning = 0; @@ -313,10 +321,8 @@ unsigned int vm_page_speculative_created = 0; unsigned int vm_page_speculative_used = 0; #endif -ppnum_t vm_lopage_poolstart = 0; -ppnum_t vm_lopage_poolend = 0; -int vm_lopage_poolsize = 0; uint64_t max_valid_dma_address = 0xffffffffffffffffULL; +ppnum_t max_valid_low_ppnum = 0xffffffff; /* @@ -452,6 +458,11 @@ vm_page_init_local_q() } +uint64_t initial_max_mem; +int initial_wire_count; +int initial_free_count; +int initial_lopage_count; + /* * vm_page_bootstrap: * @@ -674,9 +685,15 @@ vm_page_bootstrap( * all VM managed pages are "free", courtesy of pmap_startup. */ assert((unsigned int) atop_64(max_mem) == atop_64(max_mem)); - vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count; /* initial value */ + vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */ + vm_page_wire_count_initial = vm_page_wire_count; vm_page_free_count_minimum = vm_page_free_count; + initial_max_mem = max_mem; + initial_wire_count = vm_page_wire_count; + initial_free_count = vm_page_free_count; + initial_lopage_count = vm_lopage_free_count; + printf("vm_page_bootstrap: %d free pages and %d wired pages\n", vm_page_free_count, vm_page_wire_count); @@ -735,12 +752,8 @@ pmap_steal_memory( for (vaddr = round_page(addr); vaddr < addr + size; vaddr += PAGE_SIZE) { -#if defined(__LP64__) - if (!pmap_next_page_k64(&phys_page)) -#else - if (!pmap_next_page(&phys_page)) -#endif + if (!pmap_next_page_hi(&phys_page)) panic("pmap_steal_memory"); /* @@ -772,8 +785,6 @@ pmap_startup( unsigned int i, npages, pages_initialized, fill, fillval; ppnum_t phys_page; addr64_t tmpaddr; - unsigned int num_of_lopages = 0; - unsigned int last_index; /* * We calculate how many page frames we will have @@ -792,8 +803,10 @@ pmap_startup( for (i = 0, pages_initialized = 0; i < npages; i++) { if (!pmap_next_page(&phys_page)) break; + if (pages_initialized == 0 || phys_page < vm_page_lowest) + vm_page_lowest = phys_page; - vm_page_init(&vm_pages[i], phys_page); + vm_page_init(&vm_pages[i], phys_page, FALSE); vm_page_pages++; pages_initialized++; } @@ -805,46 +818,10 @@ pmap_startup( fill = 0; /* Assume no fill */ if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */ - - /* - * if vm_lopage_poolsize is non-zero, than we need to reserve - * a pool of pages whose addresess are less than 4G... this pool - * is used by drivers whose hardware can't DMA beyond 32 bits... - * - * note that I'm assuming that the page list is ascending and - * ordered w/r to the physical address - */ - for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) { - vm_page_t m; - - m = &vm_pages[i]; - - if (m->phys_page >= (1 << (32 - PAGE_SHIFT))) - panic("couldn't reserve the lopage pool: not enough lo pages\n"); - - if (m->phys_page < vm_lopage_poolend) - panic("couldn't reserve the lopage pool: page list out of order\n"); - - vm_lopage_poolend = m->phys_page; - - if (vm_lopage_poolstart == 0) - vm_lopage_poolstart = m->phys_page; - else { - if (m->phys_page < vm_lopage_poolstart) - panic("couldn't reserve the lopage pool: page list out of order\n"); - } - - if (fill) - fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */ - - vm_page_release(m); - } - last_index = i; - // -debug code remove if (2 == vm_himemory_mode) { // free low -> high so high is preferred - for (i = last_index + 1; i <= pages_initialized; i++) { + for (i = 1; i <= pages_initialized; i++) { if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ vm_page_release(&vm_pages[i - 1]); } @@ -858,7 +835,7 @@ pmap_startup( * the devices (which must address physical memory) happy if * they require several consecutive pages. */ - for (i = pages_initialized; i > last_index; i--) { + for (i = pages_initialized; i > 0; i--) { if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ vm_page_release(&vm_pages[i - 1]); } @@ -971,7 +948,8 @@ vm_page_create( == VM_PAGE_NULL) vm_page_more_fictitious(); - vm_page_init(m, phys_page); + vm_page_init(m, phys_page, FALSE); + pmap_clear_noencrypt(phys_page); vm_page_pages++; vm_page_release(m); } @@ -1480,11 +1458,14 @@ vm_page_rename( void vm_page_init( vm_page_t mem, - ppnum_t phys_page) + ppnum_t phys_page, + boolean_t lopage) { assert(phys_page); + *mem = vm_page_template; mem->phys_page = phys_page; + mem->lopage = lopage; } /* @@ -1507,7 +1488,7 @@ vm_page_grab_fictitious_common( m = (vm_page_t)zget(vm_page_zone); if (m) { - vm_page_init(m, phys_addr); + vm_page_init(m, phys_addr, FALSE); m->fictitious = TRUE; } @@ -1639,7 +1620,7 @@ void vm_page_more_fictitious(void) */ m = (vm_page_t)addr; for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) { - vm_page_init(m, vm_page_fictitious_addr); + vm_page_init(m, vm_page_fictitious_addr, FALSE); m->fictitious = TRUE; m++; } @@ -1676,48 +1657,69 @@ int vm_himemory_mode = 0; * incapable of generating DMAs with more than 32 bits * of address on platforms with physical memory > 4G... */ -unsigned int vm_lopage_free_count = 0; -unsigned int vm_lopage_max_count = 0; +unsigned int vm_lopages_allocated_q = 0; +unsigned int vm_lopages_allocated_cpm_success = 0; +unsigned int vm_lopages_allocated_cpm_failed = 0; queue_head_t vm_lopage_queue_free; vm_page_t vm_page_grablo(void) { - register vm_page_t mem; - unsigned int vm_lopage_alloc_count; + vm_page_t mem; - if (vm_lopage_poolsize == 0) + if (vm_lopage_needed == FALSE) return (vm_page_grab()); lck_mtx_lock_spin(&vm_page_queue_free_lock); - if (! queue_empty(&vm_lopage_queue_free)) { - queue_remove_first(&vm_lopage_queue_free, - mem, - vm_page_t, - pageq); - assert(mem->free); - assert(mem->busy); - assert(!mem->pmapped); - assert(!mem->wpmapped); + if ( !queue_empty(&vm_lopage_queue_free)) { + queue_remove_first(&vm_lopage_queue_free, + mem, + vm_page_t, + pageq); + assert(vm_lopage_free_count); - mem->pageq.next = NULL; - mem->pageq.prev = NULL; - mem->free = FALSE; + vm_lopage_free_count--; + vm_lopages_allocated_q++; + + if (vm_lopage_free_count < vm_lopage_lowater) + vm_lopage_refill = TRUE; - vm_lopage_free_count--; - vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count; - if (vm_lopage_alloc_count > vm_lopage_max_count) - vm_lopage_max_count = vm_lopage_alloc_count; + lck_mtx_unlock(&vm_page_queue_free_lock); } else { - mem = VM_PAGE_NULL; + lck_mtx_unlock(&vm_page_queue_free_lock); + + if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) { + + lck_mtx_lock_spin(&vm_page_queue_free_lock); + vm_lopages_allocated_cpm_failed++; + lck_mtx_unlock(&vm_page_queue_free_lock); + + return (VM_PAGE_NULL); + } + mem->busy = TRUE; + + vm_page_lockspin_queues(); + + mem->gobbled = FALSE; + vm_page_gobble_count--; + vm_page_wire_count--; + + vm_lopages_allocated_cpm_success++; + vm_page_unlock_queues(); } - lck_mtx_unlock(&vm_page_queue_free_lock); + assert(mem->gobbled); + assert(mem->busy); + assert(!mem->free); + assert(!mem->pmapped); + assert(!mem->wpmapped); + + mem->pageq.next = NULL; + mem->pageq.prev = NULL; return (mem); } - /* * vm_page_grab: * @@ -1968,8 +1970,6 @@ vm_page_release( if (mem->free) panic("vm_page_release"); #endif - mem->free = TRUE; - assert(mem->busy); assert(!mem->laundry); assert(mem->object == VM_OBJECT_NULL); @@ -1978,7 +1978,9 @@ vm_page_release( assert(mem->listq.next == NULL && mem->listq.prev == NULL); - if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) { + if ((mem->lopage || vm_lopage_refill == TRUE) && + vm_lopage_free_count < vm_lopage_free_limit && + mem->phys_page < max_valid_low_ppnum) { /* * this exists to support hardware controllers * incapable of generating DMAs with more than 32 bits @@ -1989,7 +1991,15 @@ vm_page_release( vm_page_t, pageq); vm_lopage_free_count++; + + if (vm_lopage_free_count >= vm_lopage_free_limit) + vm_lopage_refill = FALSE; + + mem->lopage = TRUE; } else { + mem->lopage = FALSE; + mem->free = TRUE; + color = mem->phys_page & vm_color_mask; queue_enter_first(&vm_page_queue_free[color], mem, @@ -2287,7 +2297,7 @@ vm_page_free_prepare_object( } else { if (mem->zero_fill == TRUE) VM_ZF_COUNT_DECR(); - vm_page_init(mem, mem->phys_page); + vm_page_init(mem, mem->phys_page, mem->lopage); } } @@ -2353,6 +2363,7 @@ vm_page_free_list( assert(!mem->throttled); assert(!mem->free); assert(!mem->speculative); + assert(!VM_PAGE_WIRED(mem)); assert(mem->pageq.prev == NULL); nxt = (vm_page_t)(mem->pageq.next); @@ -2366,7 +2377,9 @@ vm_page_free_list( assert(mem->busy); if (!mem->fictitious) { - if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) { + if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) && + vm_lopage_free_count < vm_lopage_free_limit && + mem->phys_page < max_valid_low_ppnum) { mem->pageq.next = NULL; vm_page_release(mem); } else { @@ -2671,7 +2684,8 @@ vm_page_gobble( */ void vm_page_unwire( - register vm_page_t mem) + vm_page_t mem, + boolean_t queueit) { // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */ @@ -2698,10 +2712,13 @@ vm_page_unwire( assert(!mem->laundry); assert(mem->object != kernel_object); assert(mem->pageq.next == NULL && mem->pageq.prev == NULL); - if (mem->object->purgable == VM_PURGABLE_EMPTY) { - vm_page_deactivate(mem); - } else { - vm_page_activate(mem); + + if (queueit == TRUE) { + if (mem->object->purgable == VM_PURGABLE_EMPTY) { + vm_page_deactivate(mem); + } else { + vm_page_activate(mem); + } } #if CONFIG_EMBEDDED { @@ -2763,6 +2780,10 @@ vm_page_deactivate_internal( * inactive queue. Note wired pages should not have * their reference bit cleared. */ + + if (m->absent && !m->unusual) + panic("vm_page_deactivate: %p absent", m); + if (m->gobbled) { /* can this happen? */ assert( !VM_PAGE_WIRED(m)); @@ -2838,6 +2859,10 @@ vm_page_activate( #if DEBUG lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); #endif + + if (m->absent && !m->unusual) + panic("vm_page_activate: %p absent", m); + if (m->gobbled) { assert( !VM_PAGE_WIRED(m)); if (!m->private && !m->fictitious) @@ -2905,6 +2930,9 @@ vm_page_speculate( lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); #endif + if (m->absent && !m->unusual) + panic("vm_page_speculate: %p absent", m); + VM_PAGE_QUEUES_REMOVE(m); if ( !VM_PAGE_WIRED(m)) { @@ -3690,17 +3718,7 @@ retry: /* no more low pages... */ break; } - if ( !(flags & KMA_LOMEM) && m->phys_page <= vm_lopage_poolend && - m->phys_page >= vm_lopage_poolstart) { - /* - * don't want to take pages from our - * reserved pool of low memory - * so don't consider it which - * means starting a new run - */ - RESET_STATE_OF_RUN(); - - } else if (!npages && ((m->phys_page & pnum_mask) != 0)) { + if (!npages && ((m->phys_page & pnum_mask) != 0)) { /* * not aligned */ @@ -3880,46 +3898,23 @@ did_consider: #endif if (m1->free) { - if ( m1->phys_page <= vm_lopage_poolend && - m1->phys_page >= vm_lopage_poolstart) { + unsigned int color; - assert( flags & KMA_LOMEM ); + color = m1->phys_page & vm_color_mask; #if MACH_ASSERT - vm_page_verify_free_list(&vm_lopage_queue_free, - (unsigned int) -1, m1, TRUE); + vm_page_verify_free_list(&vm_page_queue_free[color], + color, m1, TRUE); #endif - queue_remove(&vm_lopage_queue_free, - m1, - vm_page_t, - pageq); - vm_lopage_free_count--; - -#if MACH_ASSERT - vm_page_verify_free_list(&vm_lopage_queue_free, - (unsigned int) -1, VM_PAGE_NULL, FALSE); -#endif - } else { - - unsigned int color; - - color = m1->phys_page & vm_color_mask; -#if MACH_ASSERT - vm_page_verify_free_list(&vm_page_queue_free[color], - color, m1, TRUE); -#endif - queue_remove(&vm_page_queue_free[color], - m1, - vm_page_t, - pageq); - vm_page_free_count--; -#if MACH_ASSERT - vm_page_verify_free_list(&vm_page_queue_free[color], - color, VM_PAGE_NULL, FALSE); -#endif - } - + queue_remove(&vm_page_queue_free[color], + m1, + vm_page_t, + pageq); m1->pageq.next = NULL; m1->pageq.prev = NULL; +#if MACH_ASSERT + vm_page_verify_free_list(&vm_page_queue_free[color], + color, VM_PAGE_NULL, FALSE); +#endif /* * Clear the "free" bit so that this page * does not get considered for another @@ -3927,6 +3922,8 @@ did_consider: */ m1->free = FALSE; assert(m1->busy); + + vm_page_free_count--; } } /* @@ -4242,12 +4239,484 @@ cpm_allocate( return KERN_SUCCESS; } + +kern_return_t +vm_page_alloc_list( + int page_count, + int flags, + vm_page_t *list) +{ + vm_page_t lo_page_list = VM_PAGE_NULL; + vm_page_t mem; + int i; + + if ( !(flags & KMA_LOMEM)) + panic("vm_page_alloc_list: called w/o KMA_LOMEM"); + + for (i = 0; i < page_count; i++) { + + mem = vm_page_grablo(); + + if (mem == VM_PAGE_NULL) { + if (lo_page_list) + vm_page_free_list(lo_page_list, FALSE); + + *list = VM_PAGE_NULL; + + return (KERN_RESOURCE_SHORTAGE); + } + mem->pageq.next = (queue_entry_t) lo_page_list; + lo_page_list = mem; + } + *list = lo_page_list; + + return (KERN_SUCCESS); +} + +void +vm_page_set_offset(vm_page_t page, vm_object_offset_t offset) +{ + page->offset = offset; +} + +vm_page_t +vm_page_get_next(vm_page_t page) +{ + return ((vm_page_t) page->pageq.next); +} + +vm_object_offset_t +vm_page_get_offset(vm_page_t page) +{ + return (page->offset); +} + +ppnum_t +vm_page_get_phys_page(vm_page_t page) +{ + return (page->phys_page); +} + + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #if HIBERNATION static vm_page_t hibernate_gobble_queue; +extern boolean_t (* volatile consider_buffer_cache_collect)(int); + +static int hibernate_drain_pageout_queue(struct vm_pageout_queue *); +static int hibernate_flush_dirty_pages(void); +static int hibernate_flush_queue(queue_head_t *, int); +static void hibernate_dirty_page(vm_page_t); + +void hibernate_flush_wait(void); +void hibernate_mark_in_progress(void); +void hibernate_clear_in_progress(void); + + +struct hibernate_statistics { + int hibernate_considered; + int hibernate_reentered_on_q; + int hibernate_found_dirty; + int hibernate_skipped_cleaning; + int hibernate_skipped_transient; + int hibernate_skipped_precious; + int hibernate_queue_nolock; + int hibernate_queue_paused; + int hibernate_throttled; + int hibernate_throttle_timeout; + int hibernate_drained; + int hibernate_drain_timeout; + int cd_lock_failed; + int cd_found_precious; + int cd_found_wired; + int cd_found_busy; + int cd_found_unusual; + int cd_found_cleaning; + int cd_found_laundry; + int cd_found_dirty; + int cd_local_free; + int cd_total_free; + int cd_vm_page_wire_count; + int cd_pages; + int cd_discarded; + int cd_count_wire; +} hibernate_stats; + + + +static int +hibernate_drain_pageout_queue(struct vm_pageout_queue *q) +{ + wait_result_t wait_result; + + vm_page_lock_queues(); + + while (q->pgo_laundry) { + + q->pgo_draining = TRUE; + + assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC); + + vm_page_unlock_queues(); + + wait_result = thread_block(THREAD_CONTINUE_NULL); + + if (wait_result == THREAD_TIMED_OUT) { + hibernate_stats.hibernate_drain_timeout++; + return (1); + } + vm_page_lock_queues(); + + hibernate_stats.hibernate_drained++; + } + vm_page_unlock_queues(); + + return (0); +} + +static void +hibernate_dirty_page(vm_page_t m) +{ + vm_object_t object = m->object; + struct vm_pageout_queue *q; + +#if DEBUG + lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); +#endif + vm_object_lock_assert_exclusive(object); + + /* + * protect the object from collapse - + * locking in the object's paging_offset. + */ + vm_object_paging_begin(object); + + m->list_req_pending = TRUE; + m->cleaning = TRUE; + m->busy = TRUE; + + if (object->internal == TRUE) + q = &vm_pageout_queue_internal; + else + q = &vm_pageout_queue_external; + + /* + * pgo_laundry count is tied to the laundry bit + */ + m->laundry = TRUE; + q->pgo_laundry++; + + m->pageout_queue = TRUE; + queue_enter(&q->pgo_pending, m, vm_page_t, pageq); + + if (q->pgo_idle == TRUE) { + q->pgo_idle = FALSE; + thread_wakeup((event_t) &q->pgo_pending); + } +} + +static int +hibernate_flush_queue(queue_head_t *q, int qcount) +{ + vm_page_t m; + vm_object_t l_object = NULL; + vm_object_t m_object = NULL; + int refmod_state = 0; + int try_failed_count = 0; + int retval = 0; + int current_run = 0; + struct vm_pageout_queue *iq; + struct vm_pageout_queue *eq; + struct vm_pageout_queue *tq; + + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0); + + iq = &vm_pageout_queue_internal; + eq = &vm_pageout_queue_external; + + vm_page_lock_queues(); + + while (qcount && !queue_empty(q)) { + + if (current_run++ == 1000) { + if (hibernate_should_abort()) { + retval = 1; + break; + } + current_run = 0; + } + + m = (vm_page_t) queue_first(q); + m_object = m->object; + + /* + * check to see if we currently are working + * with the same object... if so, we've + * already got the lock + */ + if (m_object != l_object) { + /* + * the object associated with candidate page is + * different from the one we were just working + * with... dump the lock if we still own it + */ + if (l_object != NULL) { + vm_object_unlock(l_object); + l_object = NULL; + } + /* + * Try to lock object; since we've alread got the + * page queues lock, we can only 'try' for this one. + * if the 'try' fails, we need to do a mutex_pause + * to allow the owner of the object lock a chance to + * run... + */ + if ( !vm_object_lock_try_scan(m_object)) { + + if (try_failed_count > 20) { + hibernate_stats.hibernate_queue_nolock++; + + goto reenter_pg_on_q; + } + vm_pageout_scan_wants_object = m_object; + + vm_page_unlock_queues(); + mutex_pause(try_failed_count++); + vm_page_lock_queues(); + + hibernate_stats.hibernate_queue_paused++; + continue; + } else { + l_object = m_object; + vm_pageout_scan_wants_object = VM_OBJECT_NULL; + } + } + if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) { + /* + * page is not to be cleaned + * put it back on the head of its queue + */ + if (m->cleaning) + hibernate_stats.hibernate_skipped_cleaning++; + else + hibernate_stats.hibernate_skipped_transient++; + + goto reenter_pg_on_q; + } + if ( !m_object->pager_initialized && m_object->pager_created) + goto reenter_pg_on_q; + + if (m_object->copy == VM_OBJECT_NULL) { + if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) { + /* + * let the normal hibernate image path + * deal with these + */ + goto reenter_pg_on_q; + } + } + if ( !m->dirty && m->pmapped) { + refmod_state = pmap_get_refmod(m->phys_page); + + if ((refmod_state & VM_MEM_MODIFIED)) + m->dirty = TRUE; + } else + refmod_state = 0; + + if ( !m->dirty) { + /* + * page is not to be cleaned + * put it back on the head of its queue + */ + if (m->precious) + hibernate_stats.hibernate_skipped_precious++; + + goto reenter_pg_on_q; + } + tq = NULL; + + if (m_object->internal) { + if (VM_PAGE_Q_THROTTLED(iq)) + tq = iq; + } else if (VM_PAGE_Q_THROTTLED(eq)) + tq = eq; + + if (tq != NULL) { + wait_result_t wait_result; + int wait_count = 5; + + if (l_object != NULL) { + vm_object_unlock(l_object); + l_object = NULL; + } + vm_pageout_scan_wants_object = VM_OBJECT_NULL; + + tq->pgo_throttled = TRUE; + + while (retval == 0) { + + assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC); + + vm_page_unlock_queues(); + + wait_result = thread_block(THREAD_CONTINUE_NULL); + + vm_page_lock_queues(); + + if (hibernate_should_abort()) + retval = 1; + + if (wait_result != THREAD_TIMED_OUT) + break; + + if (--wait_count == 0) { + hibernate_stats.hibernate_throttle_timeout++; + retval = 1; + } + } + if (retval) + break; + + hibernate_stats.hibernate_throttled++; + + continue; + } + VM_PAGE_QUEUES_REMOVE(m); + + hibernate_dirty_page(m); + + hibernate_stats.hibernate_found_dirty++; + + goto next_pg; + +reenter_pg_on_q: + queue_remove(q, m, vm_page_t, pageq); + queue_enter(q, m, vm_page_t, pageq); + + hibernate_stats.hibernate_reentered_on_q++; +next_pg: + hibernate_stats.hibernate_considered++; + + qcount--; + try_failed_count = 0; + } + if (l_object != NULL) { + vm_object_unlock(l_object); + l_object = NULL; + } + vm_pageout_scan_wants_object = VM_OBJECT_NULL; + + vm_page_unlock_queues(); + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0); + + return (retval); +} + + +static int +hibernate_flush_dirty_pages() +{ + struct vm_speculative_age_q *aq; + uint32_t i; + + bzero(&hibernate_stats, sizeof(struct hibernate_statistics)); + + if (vm_page_local_q) { + for (i = 0; i < vm_page_local_q_count; i++) + vm_page_reactivate_local(i, TRUE, FALSE); + } + + for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) { + int qcount; + vm_page_t m; + + aq = &vm_page_queue_speculative[i]; + + if (queue_empty(&aq->age_q)) + continue; + qcount = 0; + + vm_page_lockspin_queues(); + + queue_iterate(&aq->age_q, + m, + vm_page_t, + pageq) + { + qcount++; + } + vm_page_unlock_queues(); + + if (qcount) { + if (hibernate_flush_queue(&aq->age_q, qcount)) + return (1); + } + } + if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) + return (1); + if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count)) + return (1); + if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count)) + return (1); + + if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) + return (1); + return (hibernate_drain_pageout_queue(&vm_pageout_queue_external)); +} + + +extern void IOSleep(unsigned int); +extern int sync_internal(void); + +int +hibernate_flush_memory() +{ + int retval; + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0); + + IOSleep(2 * 1000); + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0); + + if ((retval = hibernate_flush_dirty_pages()) == 0) { + if (consider_buffer_cache_collect != NULL) { + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0); + + sync_internal(); + (void)(*consider_buffer_cache_collect)(1); + consider_zone_gc(1); + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0); + } + } + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0); + + HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n", + hibernate_stats.hibernate_considered, + hibernate_stats.hibernate_reentered_on_q, + hibernate_stats.hibernate_found_dirty); + HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n", + hibernate_stats.hibernate_skipped_cleaning, + hibernate_stats.hibernate_skipped_transient, + hibernate_stats.hibernate_skipped_precious, + hibernate_stats.hibernate_queue_nolock); + HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n", + hibernate_stats.hibernate_queue_paused, + hibernate_stats.hibernate_throttled, + hibernate_stats.hibernate_throttle_timeout, + hibernate_stats.hibernate_drained, + hibernate_stats.hibernate_drain_timeout); + + return (retval); +} + static void hibernate_page_list_zero(hibernate_page_list_t *list) { @@ -4330,37 +4799,45 @@ hibernate_consider_discard(vm_page_t m) do { - if(m->private) + if (m->private) panic("hibernate_consider_discard: private"); - if (!vm_object_lock_try(m->object)) + if (!vm_object_lock_try(m->object)) { + hibernate_stats.cd_lock_failed++; break; - + } object = m->object; - if (VM_PAGE_WIRED(m)) + if (VM_PAGE_WIRED(m)) { + hibernate_stats.cd_found_wired++; break; - if (m->precious) + } + if (m->precious) { + hibernate_stats.cd_found_precious++; break; - - if (m->busy || !object->alive) + } + if (m->busy || !object->alive) { /* * Somebody is playing with this page. */ - break; - - if (m->absent || m->unusual || m->error) + hibernate_stats.cd_found_busy++; + break; + } + if (m->absent || m->unusual || m->error) { /* * If it's unusual in anyway, ignore it */ + hibernate_stats.cd_found_unusual++; break; - - if (m->cleaning) + } + if (m->cleaning) { + hibernate_stats.cd_found_cleaning++; break; - - if (m->laundry || m->list_req_pending) + } + if (m->laundry || m->list_req_pending) { + hibernate_stats.cd_found_laundry++; break; - + } if (!m->dirty) { refmod_state = pmap_get_refmod(m->phys_page); @@ -4376,7 +4853,10 @@ hibernate_consider_discard(vm_page_t m) */ discard = (!m->dirty) || (VM_PURGABLE_VOLATILE == object->purgable) - || (VM_PURGABLE_EMPTY == m->object->purgable); + || (VM_PURGABLE_EMPTY == object->purgable); + + if (discard == FALSE) + hibernate_stats.cd_found_dirty++; } while (FALSE); @@ -4450,13 +4930,18 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, hibernate_bitmap_t * bitmap_wired; - HIBLOG("hibernate_page_list_setall start\n"); + HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired); + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0); clock_get_uptime(&start); hibernate_page_list_zero(page_list); hibernate_page_list_zero(page_list_wired); + hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count; + hibernate_stats.cd_pages = pages; + if (vm_page_local_q) { for (i = 0; i < vm_page_local_q_count; i++) vm_page_reactivate_local(i, TRUE, TRUE); @@ -4471,7 +4956,24 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); m = (vm_page_t) m->pageq.next; } - +#ifndef PPC + for( i = 0; i < real_ncpus; i++ ) + { + if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor) + { + for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next) + { + pages--; + count_wire--; + hibernate_page_bitset(page_list, TRUE, m->phys_page); + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + + hibernate_stats.cd_local_free++; + hibernate_stats.cd_total_free++; + } + } + } +#endif for( i = 0; i < vm_colors; i++ ) { queue_iterate(&vm_page_queue_free[i], @@ -4483,6 +4985,8 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, count_wire--; hibernate_page_bitset(page_list, TRUE, m->phys_page); hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + + hibernate_stats.cd_total_free++; } } @@ -4495,6 +4999,8 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, count_wire--; hibernate_page_bitset(page_list, TRUE, m->phys_page); hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + + hibernate_stats.cd_total_free++; } queue_iterate( &vm_page_queue_throttled, @@ -4609,6 +5115,9 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, // machine dependent adjustments hibernate_page_list_setall_machine(page_list, page_list_wired, &pages); + hibernate_stats.cd_count_wire = count_wire; + hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative; + clock_get_uptime(&end); absolutetime_to_nanoseconds(end - start, &nsec); HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL); @@ -4618,6 +5127,8 @@ hibernate_page_list_setall(hibernate_page_list_t * page_list, count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative); *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative; + + KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0); } void diff --git a/osfmk/vm/vm_user.c b/osfmk/vm/vm_user.c index 7e68d60ca..59c26ff70 100644 --- a/osfmk/vm/vm_user.c +++ b/osfmk/vm/vm_user.c @@ -2417,10 +2417,12 @@ redo_lookup: make_mem_done: if (user_handle != IP_NULL) { - ipc_port_dealloc_kernel(user_handle); - } - if (user_entry != NULL) { - kfree(user_entry, sizeof *user_entry); + /* + * Releasing "user_handle" causes the kernel object + * associated with it ("user_entry" here) to also be + * released and freed. + */ + mach_memory_entry_port_release(user_handle); } return kr; } diff --git a/osfmk/x86_64/machine_routines_asm.s b/osfmk/x86_64/machine_routines_asm.s index 641cd9cdc..f8fecaccf 100644 --- a/osfmk/x86_64/machine_routines_asm.s +++ b/osfmk/x86_64/machine_routines_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -109,6 +109,23 @@ ENTRY(_rtc_nanotime_store) ret +/* + * void _rtc_nanotime_adjust( + * uint64_t tsc_base_delta, // %rdi + * rtc_nanotime_t *dst); // %rsi + */ +ENTRY(_rtc_nanotime_adjust) + movl RNT_GENERATION(%rsi),%eax /* get current generation */ + movl $0,RNT_GENERATION(%rsi) /* flag data as being updated */ + addq %rdi,RNT_TSC_BASE(%rsi) + + incl %eax /* next generation */ + jnz 1f + incl %eax /* skip 0, which is a flag */ +1: movl %eax,RNT_GENERATION(%rsi) /* update generation */ + + ret + /* * unint64_t _rtc_nanotime_read(rtc_nanotime_t *rntp, int slow); * diff --git a/osfmk/x86_64/pmap.c b/osfmk/x86_64/pmap.c index e53843224..a7ad2bb9d 100644 --- a/osfmk/x86_64/pmap.c +++ b/osfmk/x86_64/pmap.c @@ -1470,7 +1470,7 @@ pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr) { PMAP_LOCK(pmap); if(pmap64_pdpt(pmap, vaddr) == PDPT_ENTRY_NULL) { - if (!pmap_next_page_k64(&pn)) + if (!pmap_next_page_hi(&pn)) panic("pmap_pre_expand"); pmap_zero_page(pn); @@ -1484,7 +1484,7 @@ pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr) { } if(pmap64_pde(pmap, vaddr) == PD_ENTRY_NULL) { - if (!pmap_next_page_k64(&pn)) + if (!pmap_next_page_hi(&pn)) panic("pmap_pre_expand"); pmap_zero_page(pn); @@ -1498,7 +1498,7 @@ pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr) { } if(pmap_pte(pmap, vaddr) == PT_ENTRY_NULL) { - if (!pmap_next_page_k64(&pn)) + if (!pmap_next_page_hi(&pn)) panic("pmap_pre_expand"); pmap_zero_page(pn); diff --git a/osfmk/x86_64/start.s b/osfmk/x86_64/start.s index ccba4f64b..fd0b8491a 100644 --- a/osfmk/x86_64/start.s +++ b/osfmk/x86_64/start.s @@ -529,11 +529,13 @@ Lwake_64: /* restore segment registers */ movw saved_es(%rip), %es - movw saved_fs(%rip), %fs - movw saved_gs(%rip), %gs movw saved_ss(%rip), %ss - /* save the 64bit kernel gs base */ + /* Program FS/GS with a NULL selector, precautionary */ + xor %rax, %rax + movw %ax, %fs + movw %ax, %gs + /* restore the 64bit kernel gs base */ mov $MSR_IA32_KERNEL_GS_BASE, %rcx movl saved_kgs_base(%rip), %eax movl saved_kgs_base+4(%rip), %edx -- 2.45.2