From 7ddcb079202367355dddccdfa4318e57d50318be Mon Sep 17 00:00:00 2001 From: Apple Date: Thu, 10 May 2012 15:24:53 +0000 Subject: [PATCH] xnu-1699.26.8.tar.gz --- bsd/dev/i386/sysctl.c | 1 - bsd/hfs/hfs_readwrite.c | 157 ++--- bsd/hfs/hfscommon/Misc/FileExtentMapping.c | 7 + bsd/kern/kern_fork.c | 2 - bsd/kern/kern_panicinfo.c | 8 +- bsd/kern/kern_proc.c | 16 +- bsd/kern/kern_symfile.c | 88 ++- bsd/kern/netboot.c | 4 +- bsd/kern/pthread_support.c | 218 +++++-- bsd/kern/trace.codes | 1 + bsd/miscfs/specfs/spec_vnops.c | 76 ++- bsd/miscfs/specfs/specdev.h | 6 + bsd/net/dlil.c | 59 +- bsd/net/if.c | 6 + bsd/net/if_var.h | 6 +- bsd/nfs/nfs_vfsops.c | 8 +- bsd/sys/buf.h | 16 + bsd/sys/buf_internal.h | 14 + bsd/sys/disk.h | 4 + bsd/sys/kernel_types.h | 3 + bsd/sys/sysctl.h | 2 + bsd/vfs/vfs_bio.c | 15 + bsd/vfs/vfs_fsevents.c | 9 +- bsd/vfs/vfs_subr.c | 2 + config/MasterVersion | 2 +- config/Private.exports | 2 + config/Private.i386.exports | 1 + config/Private.x86_64.exports | 1 + config/Unsupported.i386.exports | 2 +- config/Unsupported.x86_64.exports | 3 +- iokit/IOKit/IOHibernatePrivate.h | 12 +- iokit/IOKit/IOService.h | 1 + iokit/IOKit/pwr_mgt/IOPM.h | 16 +- iokit/IOKit/pwr_mgt/IOPMLibDefs.h | 3 +- iokit/IOKit/pwr_mgt/IOPMPrivate.h | 234 ++++++- iokit/IOKit/pwr_mgt/RootDomain.h | 18 +- iokit/Kernel/IOHibernateIO.cpp | 26 +- iokit/Kernel/IOHibernateRestoreKernel.c | 175 ++--- iokit/Kernel/IOLib.cpp | 30 +- iokit/Kernel/IOMemoryDescriptor.cpp | 2 +- iokit/Kernel/IOPMrootDomain.cpp | 513 ++++++++++----- iokit/Kernel/IOServicePM.cpp | 28 +- iokit/Kernel/IOServicePMPrivate.h | 3 + iokit/Kernel/RootDomainUserClient.cpp | 5 + libkern/c++/OSKext.cpp | 10 +- libkern/libkern/OSAtomic.h | 8 +- osfmk/console/video_console.c | 154 ++++- osfmk/i386/AT386/model_dep.c | 9 +- osfmk/i386/commpage/commpage.c | 3 +- osfmk/i386/cpu_capabilities.h | 3 + osfmk/i386/cpu_threads.c | 228 +++---- osfmk/i386/cpu_threads.h | 9 + osfmk/i386/cpu_topology.c | 57 +- osfmk/i386/cpuid.c | 175 +++-- osfmk/i386/cpuid.h | 22 +- osfmk/i386/fpu.c | 42 +- osfmk/i386/hibernate_restore.c | 81 +-- osfmk/i386/i386_init.c | 29 +- osfmk/i386/i386_vm_init.c | 73 ++- osfmk/i386/locks_i386.c | 5 +- osfmk/i386/misc_protos.h | 3 +- osfmk/i386/mp.c | 14 +- osfmk/i386/pal_hibernate.h | 12 +- osfmk/i386/phys.c | 41 +- osfmk/i386/pmCPU.c | 64 +- osfmk/i386/pmap.c | 54 +- osfmk/i386/pmap.h | 14 +- osfmk/i386/pmap_common.c | 14 +- osfmk/i386/pmap_x86_common.c | 49 ++ osfmk/i386/proc_reg.h | 32 +- osfmk/i386/trap.c | 42 +- osfmk/ipc/ipc_init.c | 1 - osfmk/kdp/kdp.h | 2 + osfmk/kdp/kdp_udp.c | 9 +- osfmk/kdp/ml/i386/kdp_x86_common.c | 35 + osfmk/kern/debug.c | 13 +- osfmk/kern/startup.c | 13 +- osfmk/kern/thread.c | 2 - osfmk/kern/zalloc.c | 724 ++++++++++++--------- osfmk/kern/zalloc.h | 14 +- osfmk/mach/i386/thread_state.h | 2 +- osfmk/vm/vm_init.h | 2 +- osfmk/vm/vm_map.c | 161 +++-- osfmk/vm/vm_map.h | 5 +- osfmk/vm/vm_pageout.c | 3 + osfmk/vm/vm_resident.c | 21 +- osfmk/x86_64/idt64.s | 5 +- osfmk/x86_64/loose_ends.c | 55 +- osfmk/x86_64/pmap.c | 54 +- pexpert/i386/pe_init.c | 88 ++- pexpert/i386/pe_kprintf.c | 8 +- pexpert/pexpert/i386/boot.h | 16 +- 92 files changed, 2812 insertions(+), 1473 deletions(-) diff --git a/bsd/dev/i386/sysctl.c b/bsd/dev/i386/sysctl.c index ba3bfc1ee..cb15eb632 100644 --- a/bsd/dev/i386/sysctl.c +++ b/bsd/dev/i386/sysctl.c @@ -440,7 +440,6 @@ SYSCTL_PROC(_machdep_cpu_thermal, OID_AUTO, energy_policy, sizeof(boolean_t), cpu_thermal, "I", "Energy Efficient Policy Support"); - SYSCTL_NODE(_machdep_cpu, OID_AUTO, xsave, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "xsave"); diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index 7bf65093c..63acbac05 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -259,6 +259,7 @@ hfs_vnop_write(struct vnop_write_args *ap) int do_snapshot = 1; time_t orig_ctime=VTOC(vp)->c_ctime; int took_truncate_lock = 0; + struct rl_entry *invalid_range; #if HFS_COMPRESSION if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */ @@ -328,7 +329,14 @@ hfs_vnop_write(struct vnop_write_args *ap) again: /* Protect against a size change. */ - if (ioflag & IO_APPEND) { + /* + * Protect against a size change. + * + * Note: If took_truncate_lock is true, then we previously got the lock shared + * but needed to upgrade to exclusive. So try getting it exclusive from the + * start. + */ + if (ioflag & IO_APPEND || took_truncate_lock) { hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK); } else { @@ -350,17 +358,42 @@ again: writelimit = offset + resid; filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize; - /* If the truncate lock is shared, and if we either have virtual - * blocks or will need to extend the file, upgrade the truncate - * to exclusive lock. If upgrade fails, we lose the lock and - * have to get exclusive lock again. Note that we want to - * grab the truncate lock exclusive even if we're not allocating new blocks - * because we could still be growing past the LEOF. + /* + * We may need an exclusive truncate lock for several reasons, all + * of which are because we may be writing to a (portion of a) block + * for the first time, and we need to make sure no readers see the + * prior, uninitialized contents of the block. The cases are: + * + * 1. We have unallocated (delayed allocation) blocks. We may be + * allocating new blocks to the file and writing to them. + * (A more precise check would be whether the range we're writing + * to contains delayed allocation blocks.) + * 2. We need to extend the file. The bytes between the old EOF + * and the new EOF are not yet initialized. This is important + * even if we're not allocating new blocks to the file. If the + * old EOF and new EOF are in the same block, we still need to + * protect that range of bytes until they are written for the + * first time. + * 3. The write overlaps some invalid ranges (delayed zero fill; that + * part of the file has been allocated, but not yet written). + * + * If we had a shared lock with the above cases, we need to try to upgrade + * to an exclusive lock. If the upgrade fails, we will lose the shared + * lock, and will need to take the truncate lock again; the took_truncate_lock + * flag will still be set, causing us to try for an exclusive lock next time. + * + * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode + * lock is held, since it protects the range lists. */ if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && - ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) { - /* Lock upgrade failed and we lost our shared lock, try again */ + ((fp->ff_unallocblocks != 0) || + (writelimit > origFileSize))) { if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) { + /* + * Lock upgrade failed and we lost our shared lock, try again. + * Note: we do not set took_truncate_lock=0 here. Leaving it + * set to 1 will cause us to try to get the lock exclusive. + */ goto again; } else { @@ -374,11 +407,28 @@ again: } cnode_locked = 1; - if (cp->c_truncatelockowner == HFS_SHARED_OWNER) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, - (int)offset, uio_resid(uio), (int)fp->ff_size, - (int)filebytes, 0); + /* + * Now that we have the cnode lock, see if there are delayed zero fill ranges + * overlapping our write. If so, we need the truncate lock exclusive (see above). + */ + if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) && + (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) { + /* + * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes + * a deadlock, rather than simply returning failure. (That is, it apparently does + * not behave like a "try_lock"). Since this condition is rare, just drop the + * cnode lock and try again. Since took_truncate_lock is set, we will + * automatically take the truncate lock exclusive. + */ + hfs_unlock(cp); + cnode_locked = 0; + hfs_unlock_truncate(cp, 0); + goto again; } + + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START, + (int)offset, uio_resid(uio), (int)fp->ff_size, + (int)filebytes, 0); /* Check if we do not need to extend the file */ if (writelimit <= filebytes) { @@ -452,7 +502,6 @@ sizeok: off_t inval_end; off_t io_start; int lflag; - struct rl_entry *invalid_range; if (writelimit > fp->ff_size) filesize = writelimit; @@ -1966,85 +2015,7 @@ fail_change_next_allocation: case F_READBOOTSTRAP: case F_WRITEBOOTSTRAP: - { - struct vnode *devvp = NULL; - user_fbootstraptransfer_t *user_bootstrapp; - int devBlockSize; - int error; - uio_t auio; - daddr64_t blockNumber; - u_int32_t blockOffset; - u_int32_t xfersize; - struct buf *bp; - user_fbootstraptransfer_t user_bootstrap; - - if (!vnode_isvroot(vp)) - return (EINVAL); - /* LP64 - when caller is a 64 bit process then we are passed a pointer - * to a user_fbootstraptransfer_t else we get a pointer to a - * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t - */ - if ((hfsmp->hfs_flags & HFS_READ_ONLY) - && (ap->a_command == F_WRITEBOOTSTRAP)) { - return (EROFS); - } - if (is64bit) { - user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data; - } - else { - user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data; - user_bootstrapp = &user_bootstrap; - user_bootstrap.fbt_offset = bootstrapp->fbt_offset; - user_bootstrap.fbt_length = bootstrapp->fbt_length; - user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer); - } - - if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) || - (user_bootstrapp->fbt_length > 1024)) { - return EINVAL; - } - - if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024) - return EINVAL; - - devvp = VTOHFS(vp)->hfs_devvp; - auio = uio_create(1, user_bootstrapp->fbt_offset, - is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32, - (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ); - uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length); - - devBlockSize = vfs_devblocksize(vnode_mount(vp)); - - while (uio_resid(auio) > 0) { - blockNumber = uio_offset(auio) / devBlockSize; - error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp); - if (error) { - if (bp) buf_brelse(bp); - uio_free(auio); - return error; - }; - - blockOffset = uio_offset(auio) % devBlockSize; - xfersize = devBlockSize - blockOffset; - error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio); - if (error) { - buf_brelse(bp); - uio_free(auio); - return error; - }; - if (uio_rw(auio) == UIO_WRITE) { - error = VNOP_BWRITE(bp); - if (error) { - uio_free(auio); - return error; - } - } else { - buf_brelse(bp); - }; - }; - uio_free(auio); - }; - return 0; + return 0; case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */ { diff --git a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c index ec9881da8..998f97fa9 100644 --- a/bsd/hfs/hfscommon/Misc/FileExtentMapping.c +++ b/bsd/hfs/hfscommon/Misc/FileExtentMapping.c @@ -497,6 +497,7 @@ OSErr MapFileBlockC ( // // Determine the end of the available space. It will either be the end of the extent, // or the file's PEOF, whichever is smaller. + // dataEnd = (off_t)((off_t)(nextFABN) * (off_t)(allocBlockSize)); // Assume valid data through end of this extent if (((off_t)fcb->ff_blocks * (off_t)allocBlockSize) < dataEnd) // Is PEOF shorter? @@ -529,6 +530,12 @@ OSErr MapFileBlockC ( if (availableBytes) { tmpOff = dataEnd - offset; + /* + * Disallow negative runs. + */ + if (tmpOff <= 0) { + return EINVAL; + } if (tmpOff > (off_t)(numberOfBytes)) *availableBytes = numberOfBytes; // more there than they asked for, so pin the output else diff --git a/bsd/kern/kern_fork.c b/bsd/kern/kern_fork.c index 7746398bf..76c1fbae6 100644 --- a/bsd/kern/kern_fork.c +++ b/bsd/kern/kern_fork.c @@ -1373,8 +1373,6 @@ uthread_zone_init(void) THREAD_CHUNK * sizeof(struct uthread), "uthreads"); uthread_zone_inited = 1; - - zone_change(uthread_zone, Z_NOENCRYPT, TRUE); } } diff --git a/bsd/kern/kern_panicinfo.c b/bsd/kern/kern_panicinfo.c index 1a949de7b..eb5c5bfbd 100644 --- a/bsd/kern/kern_panicinfo.c +++ b/bsd/kern/kern_panicinfo.c @@ -43,6 +43,7 @@ /* prototypes not exported by osfmk/console. */ extern void panic_dialog_test( void ); +extern void noroot_icon_test(void); extern int panic_dialog_set_image( const unsigned char * ptr, unsigned int size ); extern void panic_dialog_get_image( unsigned char ** ptr, unsigned int * size ); @@ -51,7 +52,6 @@ static int sysctl_dopanicinfo SYSCTL_HANDLER_ARGS; #define PANIC_IMAGE_SIZE_LIMIT (32 * 4096) /* 128K - Maximum amount of memory consumed for the panic UI */ -#define KERN_PANICINFO_TEST (KERN_PANICINFO_IMAGE+2) /* Allow the panic UI to be tested by root without causing a panic */ /* Local data */ static int image_size_limit = PANIC_IMAGE_SIZE_LIMIT; @@ -92,6 +92,12 @@ sysctl_dopanicinfo SYSCTL_HANDLER_ARGS panic_dialog_test(); break; + case KERN_PANICINFO_NOROOT_TEST: + printf("Testing noroot icon \n"); + + noroot_icon_test(); + break; + case KERN_PANICINFO_MAXSIZE: /* return the image size limits */ diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index 042a3a864..f352a55bf 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -2679,9 +2679,11 @@ cs_invalid_page( if (p->p_csflags & CS_KILL) { p->p_csflags |= CS_KILLED; proc_unlock(p); - printf("CODE SIGNING: cs_invalid_page(0x%llx): " - "p=%d[%s] honoring CS_KILL, final status 0x%x\n", - vaddr, p->p_pid, p->p_comm, p->p_csflags); + if (cs_debug) { + printf("CODE SIGNING: cs_invalid_page(0x%llx): " + "p=%d[%s] honoring CS_KILL, final status 0x%x\n", + vaddr, p->p_pid, p->p_comm, p->p_csflags); + } cs_procs_killed++; psignal(p, SIGKILL); proc_lock(p); @@ -2690,9 +2692,11 @@ cs_invalid_page( /* CS_HARD means fail the mapping operation so the process stays valid. */ if (p->p_csflags & CS_HARD) { proc_unlock(p); - printf("CODE SIGNING: cs_invalid_page(0x%llx): " - "p=%d[%s] honoring CS_HARD\n", - vaddr, p->p_pid, p->p_comm); + if (cs_debug) { + printf("CODE SIGNING: cs_invalid_page(0x%llx): " + "p=%d[%s] honoring CS_HARD\n", + vaddr, p->p_pid, p->p_comm); + } retval = 1; } else { if (p->p_csflags & CS_VALID) { diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index dc6531b42..b1db73f0c 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -65,6 +65,8 @@ * export, as there are no internal consumers. */ int +get_kernel_symfile(__unused proc_t p, __unused char const **symfile); +int get_kernel_symfile(__unused proc_t p, __unused char const **symfile) { return KERN_FAILURE; @@ -75,6 +77,8 @@ struct kern_direct_file_io_ref_t vfs_context_t ctx; struct vnode * vp; dev_t device; + uint32_t blksize; + off_t filelength; }; @@ -91,6 +95,11 @@ static int device_ioctl(void * p1, __unused void * p2, u_long theIoctl, caddr_t return (VNOP_IOCTL(p1, theIoctl, result, 0, p2)); } +void +kern_unmap_file(struct kern_direct_file_io_ref_t * ref, off_t f_offset, off_t end); +int +kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len); + struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, kern_get_file_extents_callback_t callback, @@ -110,7 +119,6 @@ kern_open_file_for_direct_io(const char * name, struct vnode_attr va; int error; off_t f_offset; - off_t filelength; uint64_t fileblk; size_t filechunk; uint64_t physoffset; @@ -198,22 +206,22 @@ kern_open_file_for_direct_io(const char * name, // get block size - error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize); + error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize); if (error) goto out; if (ref->vp->v_type == VREG) - filelength = va.va_data_size; + ref->filelength = va.va_data_size; else { error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk); if (error) goto out; - filelength = fileblk * blksize; + ref->filelength = fileblk * ref->blksize; } f_offset = 0; - while (f_offset < filelength) + while (f_offset < ref->filelength) { if (ref->vp->v_type == VREG) { @@ -224,12 +232,12 @@ kern_open_file_for_direct_io(const char * name, if (error) goto out; - fileblk = blkno * blksize; + fileblk = blkno * ref->blksize; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { fileblk = f_offset; - filechunk = f_offset ? 0 : filelength; + filechunk = f_offset ? 0 : ref->filelength; } physoffset = 0; @@ -362,9 +370,65 @@ kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t ad vfs_context_proc(ref->ctx))); } +void +kern_unmap_file(struct kern_direct_file_io_ref_t * ref, off_t offset, off_t end) +{ + int error; + int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); + void * p1; + void * p2; + dk_extent_t extent; + dk_unmap_t unmap; + uint64_t fileblk; + size_t filechunk; + + bzero(&extent, sizeof(dk_extent_t)); + bzero(&unmap, sizeof(dk_unmap_t)); + if (ref->vp->v_type == VREG) + { + p1 = &ref->device; + p2 = kernproc; + do_ioctl = &file_ioctl; + } + else + { + /* Partition. */ + p1 = ref->vp; + p2 = ref->ctx; + do_ioctl = &device_ioctl; + } + while (offset < end) + { + if (ref->vp->v_type == VREG) + { + daddr64_t blkno; + filechunk = 1*1024*1024*1024; + if (filechunk > (size_t)(end - offset)) + filechunk = (size_t)(end - offset); + error = VNOP_BLOCKMAP(ref->vp, offset, filechunk, &blkno, &filechunk, NULL, 0, NULL); + if (error) break; + fileblk = blkno * ref->blksize; + } + else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) + { + fileblk = offset; + filechunk = ref->filelength; + } + extent.offset = fileblk; + extent.length = filechunk; + unmap.extents = &extent; + unmap.extentsCount = 1; + error = do_ioctl(p1, p2, DKIOCUNMAP, (caddr_t)&unmap); +// kprintf("DKIOCUNMAP(%d) 0x%qx, 0x%qx\n", error, extent.offset, extent.length); + if (error) break; + offset += filechunk; + } +} + void kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, - off_t offset, caddr_t addr, vm_size_t len) + off_t write_offset, caddr_t addr, vm_size_t write_length, + off_t discard_offset, off_t discard_end) { int error; kprintf("kern_close_file_for_direct_io\n"); @@ -392,9 +456,13 @@ kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, } (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); - if (addr && len) + if (addr && write_length) + { + (void) kern_write_file(ref, write_offset, addr, write_length); + } + if (discard_offset && discard_end) { - (void) kern_write_file(ref, offset, addr, len); + (void) kern_unmap_file(ref, discard_offset, discard_end); } error = vnode_close(ref->vp, FWRITE, ref->ctx); diff --git a/bsd/kern/netboot.c b/bsd/kern/netboot.c index 664f03ef7..1eb975ed2 100644 --- a/bsd/kern/netboot.c +++ b/bsd/kern/netboot.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001-2010 Apple Inc. All rights reserved. + * Copyright (c) 2001-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -591,6 +591,7 @@ find_interface(void) { struct ifnet * ifp = NULL; + dlil_if_lock(); if (rootdevice[0]) { ifp = ifunit((char *)rootdevice); } @@ -601,6 +602,7 @@ find_interface(void) break; ifnet_head_done(); } + dlil_if_unlock(); return (ifp); } diff --git a/bsd/kern/pthread_support.c b/bsd/kern/pthread_support.c index e5626dfa2..bcb0b0997 100644 --- a/bsd/kern/pthread_support.c +++ b/bsd/kern/pthread_support.c @@ -478,7 +478,7 @@ extern int ksyn_findobj(uint64_t mutex, uint64_t * object, uint64_t * offset); static void UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, uint64_t tid, int wqtype); extern thread_t port_name_to_thread(mach_port_name_t port_name); -int ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int log); +kern_return_t ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int log, thread_continue_t, void * parameter); kern_return_t ksyn_wakeup_thread(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe); void ksyn_freeallkwe(ksyn_queue_t kq); @@ -503,6 +503,8 @@ void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *update void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep, ksyn_queue_t kfreeq, int release); ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq); ksyn_waitq_element_t ksyn_queue_find_threadseq(ksyn_wait_queue_t ckwq, ksyn_queue_t kq, thread_t th, uint32_t toseq); +void psynch_cvcontinue(void *, wait_result_t); +void psynch_mtxcontinue(void *, wait_result_t); int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int longreadset, int allreaders, uint32_t updatebits, int * wokenp); int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int * type, uint32_t lowest[]); @@ -762,6 +764,7 @@ psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t int firstfit = flags & _PTHREAD_MUTEX_POLICY_FIRSTFIT; uint32_t lockseq, updatebits=0; ksyn_waitq_element_t kwe; + kern_return_t kret; #if _PSYNCH_TRACE_ __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_START, (uint32_t)mutex, mgen, ugen, flags, 0); @@ -862,14 +865,50 @@ psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t goto out; } - error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0); - /* drops the wq lock */ + kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, psynch_mtxcontinue, (void *)kwq); + + psynch_mtxcontinue((void *)kwq, kret); + + /* not expected to return from unix_syscall_return */ + panic("psynch_mtxcontinue returned from unix_syscall_return"); + +out: + ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); +#if _PSYNCH_TRACE_ + __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0xeeeeeeed, updatebits, error, 0); +#endif /* _PSYNCH_TRACE_ */ + + return(error); +} + +void +psynch_mtxcontinue(void * parameter, wait_result_t result) +{ + int error = 0; + uint32_t updatebits = 0; + uthread_t uth = current_uthread(); + ksyn_wait_queue_t kwq = (ksyn_wait_queue_t)parameter; + ksyn_waitq_element_t kwe; + + kwe = &uth->uu_kwe; + + switch (result) { + case THREAD_TIMED_OUT: + error = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + default: + error = 0; + break; + } if (error != 0) { ksyn_wqlock(kwq); #if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)mutex, 3, 0xdeadbeef, error, 0); + __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_NONE, (uint32_t)kwq->kw_addr, 3, 0xdeadbeef, error, 0); #endif /* _PSYNCH_TRACE_ */ if (kwe->kwe_kwqqueue != NULL) ksyn_queue_removeitem(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe); @@ -877,18 +916,17 @@ psynch_mutexwait(__unused proc_t p, struct psynch_mutexwait_args * uap, uint32_t } else { updatebits = kwe->kwe_psynchretval; updatebits &= ~PTH_RWL_MTX_WAIT; - *retval = updatebits; + uth->uu_rval[0] = updatebits; if (updatebits == 0) __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n"); } -out: ksyn_wqrelease(kwq, NULL, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX)); #if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)mutex, 0xeeeeeeed, updatebits, error, 0); + __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_MLWAIT | DBG_FUNC_END, (uint32_t)kwq->kw_addr, 0xeeeeeeed, updatebits, error, 0); #endif /* _PSYNCH_TRACE_ */ - return(error); + unix_syscall_return(error); } /* @@ -1205,10 +1243,7 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret uthread_t uth; ksyn_waitq_element_t kwe, nkwe = NULL; struct ksyn_queue *kq, kfreeq; -#if __TESTPANICS__ - //int timeoutval = 3; /* 3 secs */ - //u_int64_t ntime = 0; -#endif /* __TESTPANICS__ */ + kern_return_t kret; /* for conformance reasons */ __pthread_testcancel(0); @@ -1243,9 +1278,6 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret return(error); } -#if __TESTPANICS__ - //clock_interval_to_deadline(timeoutval, NSEC_PER_SEC, &ntime); -#endif /* __TESTPANICS__ */ if (mutex != (user_addr_t)0) { error = ksyn_wqfind(mutex, mgen, ugen, 0, 0, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX), &kwq); @@ -1367,20 +1399,53 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret goto out; } -#if 0 /* __TESTPANICS__ */ - /* if no timeout is passed, set 5 secs timeout to catch hangs */ - error = ksyn_block_thread_locked(ckwq, (abstime == 0) ? ntime : abstime, kwe, 1); -#else - error = ksyn_block_thread_locked(ckwq, abstime, kwe, 1); -#endif /* __TESTPANICS__ */ + kret = ksyn_block_thread_locked(ckwq, abstime, kwe, 1, psynch_cvcontinue, (void *)ckwq); /* lock dropped */ - + psynch_cvcontinue(ckwq, kret); + /* not expected to return from unix_syscall_return */ + panic("psynch_cvcontinue returned from unix_syscall_return"); + +out: +#if _PSYNCH_TRACE_ + __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, local_error, 0); +#endif /* _PSYNCH_TRACE_ */ + ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); + return(local_error); +} + + +void +psynch_cvcontinue(void * parameter, wait_result_t result) +{ + int error = 0, local_error = 0; + uthread_t uth = current_uthread(); + ksyn_wait_queue_t ckwq = (ksyn_wait_queue_t)parameter; + ksyn_waitq_element_t kwe; + struct ksyn_queue kfreeq; + + switch (result) { + case THREAD_TIMED_OUT: + error = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + default: + error = 0; + break; + } +#if _PSYNCH_TRACE_ + __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_THWAKEUP | DBG_FUNC_NONE, 0xf4f3f2f1, (uintptr_t)uth, result, 0, 0); +#endif /* _PSYNCH_TRACE_ */ + local_error = error; + kwe = &uth->uu_kwe; + if (error != 0) { ksyn_wqlock(ckwq); /* just in case it got woken up as we were granting */ - *retval = kwe->kwe_psynchretval; + uth->uu_rval[0] = kwe->kwe_psynchretval; #if __TESTPANICS__ if ((kwe->kwe_kwqqueue != NULL) && (kwe->kwe_kwqqueue != ckwq)) @@ -1394,31 +1459,28 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret kwe->kwe_kwqqueue = NULL; } if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) { - /* the condition var granted. + /* the condition var granted. * reset the error so that the thread returns back. */ local_error = 0; /* no need to set any bits just return as cvsig/broad covers this */ ksyn_wqunlock(ckwq); - *retval = 0; goto out; } ckwq->kw_sword += PTHRW_INC; - /* set C and P bits, in the local error as well as updatebits */ + /* set C and P bits, in the local error */ if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) { - updatebits |= PTH_RWS_CV_CBIT; local_error |= ECVCERORR; if (ckwq->kw_inqueue != 0) { - (void)ksyn_queue_move_tofree(ckwq, kq, (ckwq->kw_lword & PTHRW_COUNT_MASK), &kfreeq, 1, 1); + (void)ksyn_queue_move_tofree(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], (ckwq->kw_lword & PTHRW_COUNT_MASK), &kfreeq, 1, 1); } ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0; ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT; } else { /* everythig in the queue is a fake entry ? */ if ((ckwq->kw_inqueue != 0) && (ckwq->kw_fakecount == ckwq->kw_inqueue)) { - updatebits |= PTH_RWS_CV_PBIT; local_error |= ECVPERORR; } } @@ -1427,17 +1489,19 @@ psynch_cvwait(__unused proc_t p, struct psynch_cvwait_args * uap, uint32_t * ret } else { /* PTH_RWL_MTX_WAIT is removed */ if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) - *retval = PTHRW_INC | PTH_RWS_CV_CBIT; + uth->uu_rval[0] = PTHRW_INC | PTH_RWS_CV_CBIT; else - *retval = 0; + uth->uu_rval[0] = 0; local_error = 0; } out: #if _PSYNCH_TRACE_ - __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)cond, 0xeeeeeeed, (uint32_t)*retval, local_error, 0); + __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_CVWAIT | DBG_FUNC_END, (uint32_t)ckwq->kw_addr, 0xeeeeeeed, uth->uu_rval[0], local_error, 0); #endif /* _PSYNCH_TRACE_ */ ksyn_wqrelease(ckwq, NULL, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR)); - return(local_error); + + unix_syscall_return(local_error); + } /* @@ -1524,6 +1588,7 @@ psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t int isinit = lgen & PTHRW_RWL_INIT; uint32_t returnbits = 0; ksyn_waitq_element_t kwe; + kern_return_t kret; #if _PSYNCH_TRACE_ __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWRDLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); @@ -1635,8 +1700,19 @@ psynch_rw_rdlock(__unused proc_t p, struct psynch_rw_rdlock_args * uap, uint32_t if (error != 0) panic("psynch_rw_rdlock: failed to enqueue\n"); #endif /* __TESTPANICS__ */ - error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0); + kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); /* drops the kwq lock */ + switch (kret) { + case THREAD_TIMED_OUT: + error = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + default: + error = 0; + break; + } out: if (error != 0) { @@ -1674,6 +1750,7 @@ psynch_rw_longrdlock(__unused proc_t p, __unused struct psynch_rw_longrdlock_arg int isinit = lgen & PTHRW_RWL_INIT; uint32_t returnbits=0; ksyn_waitq_element_t kwe; + kern_return_t kret; ksyn_wait_queue_t kwq; int error=0, block = 0 ; @@ -1764,8 +1841,19 @@ psynch_rw_longrdlock(__unused proc_t p, __unused struct psynch_rw_longrdlock_arg panic("psynch_rw_longrdlock: failed to enqueue\n"); #endif /* __TESTPANICS__ */ - error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0); + kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); /* drops the kwq lock */ + switch (kret) { + case THREAD_TIMED_OUT: + error = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + default: + error = 0; + break; + } out: if (error != 0) { #if _PSYNCH_TRACE_ @@ -1809,6 +1897,7 @@ psynch_rw_wrlock(__unused proc_t p, struct psynch_rw_wrlock_args * uap, uint32_t int isinit = lgen & PTHRW_RWL_INIT; uint32_t returnbits = 0; ksyn_waitq_element_t kwe; + kern_return_t kret; #if _PSYNCH_TRACE_ __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); @@ -1899,8 +1988,19 @@ psynch_rw_wrlock(__unused proc_t p, struct psynch_rw_wrlock_args * uap, uint32_t panic("psynch_rw_wrlock: failed to enqueue\n"); #endif /* __TESTPANICS__ */ - error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0); + kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); /* drops the wq lock */ + switch (kret) { + case THREAD_TIMED_OUT: + error = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + default: + error = 0; + break; + } out: if (error != 0) { @@ -1944,6 +2044,7 @@ psynch_rw_yieldwrlock(__unused proc_t p, __unused struct psynch_rw_yieldwrlock_ uthread_t uth; uint32_t returnbits=0; ksyn_waitq_element_t kwe; + kern_return_t kret; #if _PSYNCH_TRACE_ __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWYWRLOCK | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); @@ -2031,7 +2132,18 @@ psynch_rw_yieldwrlock(__unused proc_t p, __unused struct psynch_rw_yieldwrlock_ panic("psynch_rw_yieldwrlock: failed to enqueue\n"); #endif /* __TESTPANICS__ */ - error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0); + kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); + switch (kret) { + case THREAD_TIMED_OUT: + error = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + default: + error = 0; + break; + } out: if (error != 0) { @@ -2190,6 +2302,7 @@ psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32 uint32_t lockseq = 0, updatebits = 0, preseq = 0; int isinit = lgen & PTHRW_RWL_INIT; ksyn_waitq_element_t kwe; + kern_return_t kret; #if _PSYNCH_TRACE_ __PTHREAD_TRACE_DEBUG(_PSYNCH_TRACE_RWUPGRADE | DBG_FUNC_START, (uint32_t)rwlock, lgen, ugen, rw_wc, 0); @@ -2276,8 +2389,19 @@ psynch_rw_upgrade(__unused proc_t p, struct psynch_rw_upgrade_args * uap, uint32 #endif /* __TESTPANICS__ */ - error = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0); + kret = ksyn_block_thread_locked(kwq, (uint64_t)0, kwe, 0, THREAD_CONTINUE_NULL, NULL); /* drops the lock */ + switch (kret) { + case THREAD_TIMED_OUT: + error = ETIMEDOUT; + break; + case THREAD_INTERRUPTED: + error = EINTR; + break; + default: + error = 0; + break; + } out: if (error != 0) { @@ -2934,8 +3058,12 @@ psynch_wq_cleanup(__unused void * param, __unused void * param1) } -int -ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int mylog) +kern_return_t +#if _PSYNCH_TRACE_ +ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, int mylog, thread_continue_t continuation, void * parameter) +#else +ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_element_t kwe, __unused int mylog, thread_continue_t continuation, void * parameter) +#endif { kern_return_t kret; int error = 0; @@ -2947,7 +3075,12 @@ ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_ele assert_wait_deadline(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, abstime); ksyn_wqunlock(kwq); - kret = thread_block(NULL); + if (continuation == THREAD_CONTINUE_NULL) + kret = thread_block(NULL); + else + kret = thread_block_parameter(continuation, parameter); + +#if _PSYNCH_TRACE_ switch (kret) { case THREAD_TIMED_OUT: error = ETIMEDOUT; @@ -2956,7 +3089,6 @@ ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_ele error = EINTR; break; } -#if _PSYNCH_TRACE_ uth = current_uthread(); #if defined(__i386__) if (mylog != 0) @@ -2967,7 +3099,7 @@ ksyn_block_thread_locked(ksyn_wait_queue_t kwq, uint64_t abstime, ksyn_waitq_ele #endif #endif /* _PSYNCH_TRACE_ */ - return(error); + return(kret); } kern_return_t diff --git a/bsd/kern/trace.codes b/bsd/kern/trace.codes index 3792f3d37..fbc026fb2 100644 --- a/bsd/kern/trace.codes +++ b/bsd/kern/trace.codes @@ -1207,6 +1207,7 @@ 0x53101a0 CPUPM_TEST_INFO 0x53101a4 CPUPM_TEST_RUN_INFO 0x53101a8 CPUPM_TEST_SLAVE_INFO +0x53101ac CPUPM_FORCED_IDLE 0x5330000 HIBERNATE 0x5330004 HIBERNATE_WRITE_IMAGE 0x5330008 HIBERNATE_MACHINE_INIT diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index 8050679f8..52cf1c806 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -154,6 +154,20 @@ struct vnodeopv_desc spec_vnodeop_opv_desc = static void set_blocksize(vnode_t, dev_t); +struct _throttle_io_info_t { + struct timeval last_normal_IO_timestamp; + struct timeval last_IO_timestamp; + SInt32 numthreads_throttling; + SInt32 refcnt; + SInt32 alloc; +}; + +struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV]; + +static void throttle_info_update_internal(void *throttle_info, int flags, boolean_t isssd); + + + /* * Trivial lookup routine that always fails. */ @@ -259,6 +273,38 @@ spec_open(struct vnop_open_args *ap) } devsw_unlock(dev, S_IFCHR); + + if (error == 0 && cdevsw[maj].d_type == D_DISK && !vp->v_un.vu_specinfo->si_initted) { + int isssd = 0; + uint64_t throttle_mask = 0; + uint32_t devbsdunit = 0; + + if (VNOP_IOCTL(vp, DKIOCGETTHROTTLEMASK, (caddr_t)&throttle_mask, 0, NULL) == 0) { + + if (VNOP_IOCTL(vp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, ap->a_context) == 0) { + /* + * as a reasonable approximation, only use the lowest bit of the mask + * to generate a disk unit number + */ + devbsdunit = num_trailing_0(throttle_mask); + + vnode_lock(vp); + + vp->v_un.vu_specinfo->si_isssd = isssd; + vp->v_un.vu_specinfo->si_devbsdunit = devbsdunit; + vp->v_un.vu_specinfo->si_throttle_mask = throttle_mask; + vp->v_un.vu_specinfo->si_throttleable = 1; + vp->v_un.vu_specinfo->si_initted = 1; + + vnode_unlock(vp); + } + } + if (vp->v_un.vu_specinfo->si_initted == 0) { + vnode_lock(vp); + vp->v_un.vu_specinfo->si_initted = 1; + vnode_unlock(vp); + } + } return (error); case VBLK: @@ -357,8 +403,17 @@ spec_read(struct vnop_read_args *ap) switch (vp->v_type) { case VCHR: + if (cdevsw[major(vp->v_rdev)].d_type == D_DISK && vp->v_un.vu_specinfo->si_throttleable) { + struct _throttle_io_info_t *throttle_info; + + throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit]; + + throttle_info_update_internal(throttle_info, 0, vp->v_un.vu_specinfo->si_isssd); + } + error = (*cdevsw[major(vp->v_rdev)].d_read) (vp->v_rdev, uio, ap->a_ioflag); + return (error); case VBLK: @@ -442,8 +497,19 @@ spec_write(struct vnop_write_args *ap) switch (vp->v_type) { case VCHR: + if (cdevsw[major(vp->v_rdev)].d_type == D_DISK && vp->v_un.vu_specinfo->si_throttleable) { + struct _throttle_io_info_t *throttle_info; + + throttle_info = &_throttle_io_info[vp->v_un.vu_specinfo->si_devbsdunit]; + + throttle_info_update_internal(throttle_info, 0, vp->v_un.vu_specinfo->si_isssd); + + microuptime(&throttle_info->last_IO_timestamp); + } + error = (*cdevsw[major(vp->v_rdev)].d_write) (vp->v_rdev, uio, ap->a_ioflag); + return (error); case VBLK: @@ -645,15 +711,6 @@ void IOSleep(int); #define LOWPRI_SLEEP_INTERVAL 2 #endif -struct _throttle_io_info_t { - struct timeval last_normal_IO_timestamp; - struct timeval last_IO_timestamp; - SInt32 numthreads_throttling; - SInt32 refcnt; - SInt32 alloc; -}; - -struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV]; int lowpri_IO_initial_window_msecs = LOWPRI_INITIAL_WINDOW_MSECS; int lowpri_IO_window_msecs_inc = LOWPRI_WINDOW_MSECS_INC; int lowpri_max_window_msecs = LOWPRI_MAX_WINDOW_MSECS; @@ -1210,6 +1267,7 @@ spec_strategy(struct vnop_strategy_args *ap) if (policy == IOPOL_THROTTLE) { bp->b_flags |= B_THROTTLED_IO; + bp->b_attr.ba_flags |= BA_THROTTLED_IO; bp->b_flags &= ~B_PASSIVE; } else if (policy == IOPOL_PASSIVE) bp->b_flags |= B_PASSIVE; diff --git a/bsd/miscfs/specfs/specdev.h b/bsd/miscfs/specfs/specdev.h index 3394fedbf..7b44d40e3 100644 --- a/bsd/miscfs/specfs/specdev.h +++ b/bsd/miscfs/specfs/specdev.h @@ -83,6 +83,12 @@ struct specinfo { daddr_t si_size; /* device block size in bytes */ daddr64_t si_lastr; /* last read blkno (read-ahead) */ u_int64_t si_devsize; /* actual device size in bytes */ + + u_int8_t si_initted; + u_int8_t si_throttleable; + u_int16_t si_isssd; + u_int32_t si_devbsdunit; + u_int64_t si_throttle_mask; }; /* * Exported shorthand diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index 272388f02..9cca104a1 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1999-2011 Apple Inc. All rights reserved. + * Copyright (c) 1999-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -235,8 +235,8 @@ static lck_grp_t *dlil_lock_group; lck_grp_t *ifnet_lock_group; static lck_grp_t *ifnet_head_lock_group; lck_attr_t *ifnet_lock_attr; -decl_lck_rw_data(, ifnet_head_lock); -decl_lck_mtx_data(, dlil_ifnet_lock); +decl_lck_rw_data(static, ifnet_head_lock); +decl_lck_mtx_data(static, dlil_ifnet_lock); u_int32_t dlil_filter_count = 0; extern u_int32_t ipv4_ll_arp_aware; @@ -2771,11 +2771,19 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) if (ifp == NULL) return (EINVAL); + /* + * Serialize ifnet attach using dlil_ifnet_lock, in order to + * prevent the interface from being configured while it is + * embryonic, as ifnet_head_lock is dropped and reacquired + * below prior to marking the ifnet with IFRF_ATTACHED. + */ + dlil_if_lock(); ifnet_head_lock_exclusive(); /* Verify we aren't already on the list */ TAILQ_FOREACH(tmp_if, &ifnet_head, if_link) { if (tmp_if == ifp) { ifnet_head_done(); + dlil_if_unlock(); return (EEXIST); } } @@ -2800,6 +2808,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) } else if (ll_addr->sdl_alen != ifp->if_addrlen) { ifnet_lock_done(ifp); ifnet_head_done(); + dlil_if_unlock(); return (EINVAL); } } @@ -2813,6 +2822,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) "family module - %d\n", __func__, ifp->if_family); ifnet_lock_done(ifp); ifnet_head_done(); + dlil_if_unlock(); return (ENODEV); } @@ -2822,6 +2832,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) if (ifp->if_proto_hash == NULL) { ifnet_lock_done(ifp); ifnet_head_done(); + dlil_if_unlock(); return (ENOBUFS); } bzero(ifp->if_proto_hash, dlif_phash_size); @@ -2855,6 +2866,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) ifp->if_index = 0; ifnet_lock_done(ifp); ifnet_head_done(); + dlil_if_unlock(); return (ENOBUFS); } ifp->if_index = idx; @@ -2868,6 +2880,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) if (ifa == NULL) { ifnet_lock_done(ifp); ifnet_head_done(); + dlil_if_unlock(); return (ENOBUFS); } @@ -3026,6 +3039,7 @@ ifnet_attach(ifnet_t ifp, const struct sockaddr_dl *ll_addr) } ifnet_lock_done(ifp); lck_mtx_unlock(rnh_lock); + dlil_if_unlock(); #if PF /* @@ -3252,9 +3266,9 @@ ifnet_detach(ifnet_t ifp) dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_IF_DETACHING, NULL, 0); /* Let worker thread take care of the rest, to avoid reentrancy */ - lck_mtx_lock(&dlil_ifnet_lock); + dlil_if_lock(); ifnet_detaching_enqueue(ifp); - lck_mtx_unlock(&dlil_ifnet_lock); + dlil_if_unlock(); return (0); } @@ -3262,7 +3276,7 @@ ifnet_detach(ifnet_t ifp) static void ifnet_detaching_enqueue(struct ifnet *ifp) { - lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED); + dlil_if_lock_assert(); ++ifnet_detaching_cnt; VERIFY(ifnet_detaching_cnt != 0); @@ -3275,7 +3289,7 @@ ifnet_detaching_dequeue(void) { struct ifnet *ifp; - lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED); + dlil_if_lock_assert(); ifp = TAILQ_FIRST(&ifnet_detaching_head); VERIFY(ifnet_detaching_cnt != 0 || ifp == NULL); @@ -3295,7 +3309,7 @@ ifnet_delayed_thread_func(void) struct ifnet *ifp; for (;;) { - lck_mtx_lock(&dlil_ifnet_lock); + dlil_if_lock(); while (ifnet_detaching_cnt == 0) { (void) msleep(&ifnet_delayed_run, &dlil_ifnet_lock, (PZERO - 1), "ifnet_delayed_thread", NULL); @@ -3305,12 +3319,9 @@ ifnet_delayed_thread_func(void) /* Take care of detaching ifnet */ ifp = ifnet_detaching_dequeue(); - if (ifp != NULL) { - lck_mtx_unlock(&dlil_ifnet_lock); + dlil_if_unlock(); + if (ifp != NULL) ifnet_detach_final(ifp); - } else { - lck_mtx_unlock(&dlil_ifnet_lock); - } } } @@ -3618,7 +3629,7 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, void *buf, *base, **pbuf; int ret = 0; - lck_mtx_lock(&dlil_ifnet_lock); + dlil_if_lock(); TAILQ_FOREACH(dlifp1, &dlil_ifnet_head, dl_if_link) { ifp1 = (struct ifnet *)dlifp1; @@ -3705,7 +3716,7 @@ int dlil_if_acquire(u_int32_t family, const void *uniqueid, *ifp = ifp1; end: - lck_mtx_unlock(&dlil_ifnet_lock); + dlil_if_unlock(); VERIFY(dlifp1 == NULL || (IS_P2ALIGNED(dlifp1, sizeof (u_int64_t)) && IS_P2ALIGNED(&ifp1->if_data, sizeof (u_int64_t)))); @@ -3736,6 +3747,24 @@ dlil_if_release(ifnet_t ifp) ifnet_lock_done(ifp); } +__private_extern__ void +dlil_if_lock(void) +{ + lck_mtx_lock(&dlil_ifnet_lock); +} + +__private_extern__ void +dlil_if_unlock(void) +{ + lck_mtx_unlock(&dlil_ifnet_lock); +} + +__private_extern__ void +dlil_if_lock_assert(void) +{ + lck_mtx_assert(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED); +} + __private_extern__ void dlil_proto_unplumb_all(struct ifnet *ifp) { diff --git a/bsd/net/if.c b/bsd/net/if.c index 26314b948..595fcaea9 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -1452,7 +1452,13 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct proc *p) #endif /* IF_CLONE_LIST */ } + /* + * ioctls which require ifp. Note that we acquire dlil_ifnet_lock + * here to ensure that the ifnet, if found, has been fully attached. + */ + dlil_if_lock(); ifp = ifunit(ifr->ifr_name); + dlil_if_unlock(); if (ifp == NULL) return (ENXIO); diff --git a/bsd/net/if_var.h b/bsd/net/if_var.h index a76aa7dbb..f3e64b0e3 100644 --- a/bsd/net/if_var.h +++ b/bsd/net/if_var.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2011 Apple Inc. All rights reserved. + * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -807,6 +807,10 @@ __private_extern__ void if_attach_link_ifa(struct ifnet *, struct ifaddr *); __private_extern__ void if_detach_ifa(struct ifnet *, struct ifaddr *); __private_extern__ void if_detach_link_ifa(struct ifnet *, struct ifaddr *); +__private_extern__ void dlil_if_lock(void); +__private_extern__ void dlil_if_unlock(void); +__private_extern__ void dlil_if_lock_assert(void); + extern struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); extern struct ifaddr *ifa_ifwithaddr_scoped(const struct sockaddr *, unsigned int); extern struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *); diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index 484e47c2b..7a0323fde 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -1575,12 +1575,8 @@ nfs_convert_old_nfs_args(mount_t mp, user_addr_t data, vfs_context_t ctx, int ar /* copy socket address */ if (inkernel) bcopy(CAST_DOWN(void *, args.addr), &ss, args.addrlen); - else { - if ((size_t)args.addrlen > sizeof (struct sockaddr_storage)) - error = EINVAL; - else - error = copyin(args.addr, &ss, args.addrlen); - } + else + error = copyin(args.addr, &ss, args.addrlen); nfsmout_if(error); ss.ss_len = args.addrlen; diff --git a/bsd/sys/buf.h b/bsd/sys/buf.h index f1d7f924b..08216cac0 100644 --- a/bsd/sys/buf.h +++ b/bsd/sys/buf.h @@ -1025,6 +1025,14 @@ uint32_t buf_redundancy_flags(buf_t); */ void buf_set_redundancy_flags(buf_t, uint32_t); +/*! + @function buf_attr + @abstract Gets the attributes for this buf. + @param bp Buffer whose attributes to get. + @return bufattr_t. + */ +bufattr_t buf_attr(buf_t); + #ifdef KERNEL_PRIVATE void buf_setfilter(buf_t, void (*)(buf_t, void *), void *, void (**)(buf_t, void *), void **); @@ -1044,6 +1052,14 @@ void buf_setcpaddr(buf_t, void *); @return int. */ void *buf_getcpaddr(buf_t); + +/*! + @function buf_throttled + @abstract Check if a buffer is throttled. + @param bap Buffer attribute to test. + @return Nonzero if the buffer is throttled, 0 otherwise. + */ +int bufattr_throttled(bufattr_t bap); #endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/buf_internal.h b/bsd/sys/buf_internal.h index d80eb21c8..2d259ac2a 100644 --- a/bsd/sys/buf_internal.h +++ b/bsd/sys/buf_internal.h @@ -82,6 +82,13 @@ #define NOLIST ((struct buf *)0x87654321) +/* + * Attributes of an I/O to be used by lower layers + */ +struct bufattr { + uint64_t ba_flags; /* flags. Some are only in-use on embedded devices */ +}; + /* * The buffer header describes an I/O operation in the kernel. */ @@ -128,6 +135,7 @@ struct buf { #if CONFIG_PROTECT struct cprotect *b_cpentry; /* address of cp_entry, to be passed further down */ #endif /* CONFIG_PROTECT */ + struct bufattr b_attr; #ifdef JOE_DEBUG void * b_owner; int b_tag; @@ -218,6 +226,12 @@ struct buf { #define B_SYNC 0x02 /* Do all allocations synchronously. */ #define B_NOBUFF 0x04 /* Do not allocate struct buf */ +/* + * ba_flags (Buffer Attribute flags) + * Some of these may be in-use only on embedded devices. + */ +#define BA_THROTTLED_IO 0x000000002 + extern int niobuf_headers; /* The number of IO buffer headers for cluster IO */ extern int nbuf_headers; /* The number of buffer headers */ diff --git a/bsd/sys/disk.h b/bsd/sys/disk.h index 745aa6710..2bdd79a55 100644 --- a/bsd/sys/disk.h +++ b/bsd/sys/disk.h @@ -182,6 +182,10 @@ typedef struct #define DKIOCLOCKPHYSICALEXTENTS _IO('d', 81) #define DKIOCGETPHYSICALEXTENT _IOWR('d', 82, dk_physical_extent_t) #define DKIOCUNLOCKPHYSICALEXTENTS _IO('d', 83) + +#ifdef PRIVATE +#define _DKIOCGETMIGRATIONUNITBYTESIZE _IOR('d', 85, uint32_t) +#endif /* PRIVATE */ #endif /* KERNEL */ #endif /* _SYS_DISK_H_ */ diff --git a/bsd/sys/kernel_types.h b/bsd/sys/kernel_types.h index 155b57524..20a0bafb3 100644 --- a/bsd/sys/kernel_types.h +++ b/bsd/sys/kernel_types.h @@ -84,6 +84,7 @@ struct __ifmultiaddr; struct __ifnet_filter; struct __rtentry; struct __if_clone; +struct __bufattr; typedef struct __ifnet* ifnet_t; typedef struct __mbuf* mbuf_t; @@ -95,6 +96,7 @@ typedef struct __ifmultiaddr* ifmultiaddr_t; typedef struct __ifnet_filter* interface_filter_t; typedef struct __rtentry* route_t; typedef struct __if_clone* if_clone_t; +typedef struct __bufattr* bufattr_t; #else /* BSD_BUILD */ @@ -123,6 +125,7 @@ typedef struct ifmultiaddr* ifmultiaddr_t; typedef struct ifnet_filter* interface_filter_t; typedef struct rtentry* route_t; typedef struct if_clone* if_clone_t; +typedef struct bufattr* bufattr_t; #endif /* KERNEL_PRIVATE */ #endif /* !BSD_BUILD */ diff --git a/bsd/sys/sysctl.h b/bsd/sys/sysctl.h index 1da032f48..a1f06467a 100644 --- a/bsd/sys/sysctl.h +++ b/bsd/sys/sysctl.h @@ -549,6 +549,8 @@ SYSCTL_DECL(_user); /* KERN_PANICINFO types */ #define KERN_PANICINFO_MAXSIZE 1 /* quad: panic UI image size limit */ #define KERN_PANICINFO_IMAGE 2 /* panic UI in 8-bit kraw format */ +#define KERN_PANICINFO_TEST 4 /* Allow the panic UI to be tested by root without causing a panic */ +#define KERN_PANICINFO_NOROOT_TEST 5 /* Allow the noroot UI to be tested by root */ #define CTL_KERN_NAMES { \ { 0, 0 }, \ diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index 0d474ed28..53f4f5576 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -405,6 +405,18 @@ buf_setcpaddr(buf_t bp __unused, void *cp_entry_addr __unused) { } #endif /* CONFIG_PROTECT */ +int +bufattr_throttled(bufattr_t bap) { + if ( (bap->ba_flags & BA_THROTTLED_IO) ) + return 1; + return 0; +} + +bufattr_t +buf_attr(buf_t bp) { + return &bp->b_attr; +} + errno_t buf_error(buf_t bp) { @@ -3400,6 +3412,7 @@ bcleanbuf(buf_t bp, boolean_t discard) #ifdef CONFIG_PROTECT bp->b_cpentry = 0; #endif + bzero(&bp->b_attr, sizeof(struct bufattr)); lck_mtx_lock_spin(buf_mtxp); } @@ -3659,6 +3672,7 @@ buf_biodone(buf_t bp) * indicators */ CLR(bp->b_flags, (B_WASDIRTY | B_THROTTLED_IO | B_PASSIVE)); + CLR(bp->b_attr.ba_flags, (BA_THROTTLED_IO)); DTRACE_IO1(done, buf_t, bp); if (!ISSET(bp->b_flags, B_READ) && !ISSET(bp->b_flags, B_RAW)) @@ -3853,6 +3867,7 @@ alloc_io_buf(vnode_t vp, int priv) #ifdef CONFIG_PROTECT bp->b_cpentry = 0; #endif + bzero(&bp->b_attr, sizeof(struct bufattr)); if (vp && (vp->v_type == VBLK || vp->v_type == VCHR)) bp->b_dev = vp->v_rdev; diff --git a/bsd/vfs/vfs_fsevents.c b/bsd/vfs/vfs_fsevents.c index 0132a60dd..b92b69a28 100644 --- a/bsd/vfs/vfs_fsevents.c +++ b/bsd/vfs/vfs_fsevents.c @@ -198,15 +198,16 @@ fsevents_internal_init(void) printf("fsevents: failed to initialize the event zone.\n"); } - if (zfill(event_zone, MAX_KFS_EVENTS) != MAX_KFS_EVENTS) { - printf("fsevents: failed to pre-fill the event zone.\n"); - } - // mark the zone as exhaustible so that it will not // ever grow beyond what we initially filled it with zone_change(event_zone, Z_EXHAUST, TRUE); zone_change(event_zone, Z_COLLECT, FALSE); zone_change(event_zone, Z_CALLERACCT, FALSE); + + if (zfill(event_zone, MAX_KFS_EVENTS) < MAX_KFS_EVENTS) { + printf("fsevents: failed to pre-fill the event zone.\n"); + } + } static void diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index 4280f3bfd..43352545d 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -1394,6 +1394,8 @@ found_alias: nvp->v_specflags = 0; nvp->v_speclastr = -1; nvp->v_specinfo->si_opencount = 0; + nvp->v_specinfo->si_initted = 0; + nvp->v_specinfo->si_throttleable = 0; SPECHASH_LOCK(); diff --git a/config/MasterVersion b/config/MasterVersion index b5a6d2aac..23ba4083b 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -11.3.0 +11.4.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/Private.exports b/config/Private.exports index 299cabf8e..364d84069 100644 --- a/config/Private.exports +++ b/config/Private.exports @@ -15,11 +15,13 @@ _bdevsw _boot _bsd_hostname _bsd_set_dependency_capable +_buf_attr _buf_create_shadow _buf_getcpaddr _buf_setcpaddr _buf_setfilter _buf_shadow +_bufattr_throttled _cdevsw _cdevsw_setkqueueok _clalloc diff --git a/config/Private.i386.exports b/config/Private.i386.exports index b6b05d103..acb5515de 100644 --- a/config/Private.i386.exports +++ b/config/Private.i386.exports @@ -33,3 +33,4 @@ _xts_decrypt _xts_done _xts_encrypt _xts_start +_aes_decrypt diff --git a/config/Private.x86_64.exports b/config/Private.x86_64.exports index a19ab484b..9432c31a1 100644 --- a/config/Private.x86_64.exports +++ b/config/Private.x86_64.exports @@ -35,3 +35,4 @@ _xts_decrypt _xts_done _xts_encrypt _xts_start +_aes_decrypt diff --git a/config/Unsupported.i386.exports b/config/Unsupported.i386.exports index 38b70f0ff..99112a161 100644 --- a/config/Unsupported.i386.exports +++ b/config/Unsupported.i386.exports @@ -57,7 +57,6 @@ _pffinddomain _pffindproto _pmCPUControl _pmKextRegister -_pm_init_lock _pru_abort_notsupp _pru_accept_notsupp _pru_bind_notsupp @@ -124,3 +123,4 @@ _udbinfo _hibernate_vm_lock _hibernate_vm_unlock _clock_get_system_value +_PE_state diff --git a/config/Unsupported.x86_64.exports b/config/Unsupported.x86_64.exports index 9413c7dec..2e7f007d1 100644 --- a/config/Unsupported.x86_64.exports +++ b/config/Unsupported.x86_64.exports @@ -19,7 +19,6 @@ _mp_rendezvous _mp_rendezvous_no_intrs _pmCPUControl _pmKextRegister -_pm_init_lock _rdmsr_carefully _real_ncpus _rtc_clock_napped @@ -32,4 +31,4 @@ _tmrCvt _tsc_get_info _hibernate_vm_lock _hibernate_vm_unlock - +_PE_state diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h index 0fb3c53f3..0cc86a55c 100644 --- a/iokit/IOKit/IOHibernatePrivate.h +++ b/iokit/IOKit/IOHibernatePrivate.h @@ -263,14 +263,10 @@ kern_open_file_for_direct_io(const char * name, off_t offset, caddr_t addr, vm_size_t len); - - void kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref, - off_t offset, caddr_t addr, vm_size_t len); -int -kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len); -int get_kernel_symfile(struct proc *p, char const **symfile); + off_t write_offset, caddr_t addr, vm_size_t write_length, + off_t discard_offset, off_t discard_end); #endif /* _SYS_CONF_H_ */ hibernate_page_list_t * @@ -354,9 +350,9 @@ uint32_t hibernate_write_image(void); long -hibernate_machine_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4); +hibernate_machine_entrypoint(uint32_t p1, uint32_t p2, uint32_t p3, uint32_t p4); long -hibernate_kernel_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4); +hibernate_kernel_entrypoint(uint32_t p1, uint32_t p2, uint32_t p3, uint32_t p4); void hibernate_newruntime_map(void * map, vm_size_t map_size, uint32_t system_table_offset); diff --git a/iokit/IOKit/IOService.h b/iokit/IOKit/IOService.h index c3282f8ea..99c30699b 100644 --- a/iokit/IOKit/IOService.h +++ b/iokit/IOKit/IOService.h @@ -1721,6 +1721,7 @@ public: void deassertPMDriverCall( IOPMDriverCallEntry * callEntry ); IOReturn changePowerStateWithOverrideTo( unsigned long ordinal ); static const char * getIOMessageString( uint32_t msg ); + IOReturn setIgnoreIdleTimer( bool ignore ); #ifdef __LP64__ static IOWorkLoop * getPMworkloop( void ); diff --git a/iokit/IOKit/pwr_mgt/IOPM.h b/iokit/IOKit/pwr_mgt/IOPM.h index f0002d5d6..4bdddb751 100644 --- a/iokit/IOKit/pwr_mgt/IOPM.h +++ b/iokit/IOKit/pwr_mgt/IOPM.h @@ -245,7 +245,7 @@ enum { * false == Retain FV key when going to standby mode * not present == Retain FV key when going to standby mode */ -#define kIOPMDestroyFVKeyOnStandbyKey "DestroyFVKeyOnStandby" +#define kIOPMDestroyFVKeyOnStandbyKey "DestroyFVKeyOnStandby" /******************************************************************************* * @@ -288,7 +288,15 @@ enum { */ kIOPMDriverAssertionPreventDisplaySleepBit = 0x40, - kIOPMDriverAssertionReservedBit7 = 0x80 + /*! kIOPMDriverAssertionReservedBit7 + * Reserved for storage family. + */ + kIOPMDriverAssertionReservedBit7 = 0x80, + + /*! kIOPMDriverAssertionReservedBit8 + * Reserved for networking family. + */ + kIOPMDriverAssertionReservedBit8 = 0x100 }; /* kIOPMAssertionsDriverKey @@ -665,6 +673,7 @@ enum { // Maintenance wake calendar. #define kIOPMSettingMaintenanceWakeCalendarKey "MaintenanceWakeCalendarDate" + struct IOPMCalendarStruct { UInt32 year; UInt8 month; @@ -672,6 +681,7 @@ struct IOPMCalendarStruct { UInt8 hour; UInt8 minute; UInt8 second; + UInt8 selector; }; typedef struct IOPMCalendarStruct IOPMCalendarStruct; @@ -760,6 +770,7 @@ struct stateChangeNote { }; typedef struct stateChangeNote stateChangeNote; +#endif /* KERNEL && __cplusplus */ struct IOPowerStateChangeNotification { void * powerRef; unsigned long returnValue; @@ -768,7 +779,6 @@ struct IOPowerStateChangeNotification { }; typedef struct IOPowerStateChangeNotification IOPowerStateChangeNotification; typedef IOPowerStateChangeNotification sleepWakeNote; -#endif /* KERNEL && __cplusplus */ /*! @struct IOPMSystemCapabilityChangeParameters @abstract A structure describing a system capability change. diff --git a/iokit/IOKit/pwr_mgt/IOPMLibDefs.h b/iokit/IOKit/pwr_mgt/IOPMLibDefs.h index 7e49682ad..117732a71 100644 --- a/iokit/IOKit/pwr_mgt/IOPMLibDefs.h +++ b/iokit/IOKit/pwr_mgt/IOPMLibDefs.h @@ -37,5 +37,6 @@ #define kPMSleepSystemOptions 7 #define kPMSetMaintenanceWakeCalendar 8 #define kPMSetUserAssertionLevels 9 +#define kPMActivityTickle 10 -#define kNumPMMethods 10 +#define kNumPMMethods 11 diff --git a/iokit/IOKit/pwr_mgt/IOPMPrivate.h b/iokit/IOKit/pwr_mgt/IOPMPrivate.h index 3e61d81e0..09fdb19e8 100644 --- a/iokit/IOKit/pwr_mgt/IOPMPrivate.h +++ b/iokit/IOKit/pwr_mgt/IOPMPrivate.h @@ -43,28 +43,28 @@ /* @constant kIOPMTimelineDictionaryKey * @abstract RootDomain key for dictionary describing Timeline's info */ -#define kIOPMTimelineDictionaryKey "PMTimelineLogging" +#define kIOPMTimelineDictionaryKey "PMTimelineLogging" /* @constant kIOPMTimelineEnabledKey * @abstract Boolean value indicating whether the system is recording PM events. * @discussion Key may be found in the dictionary at IOPMrootDomain's property * kIOPMTimelineDictionaryKey. uint32_t value; may be 0. */ -#define kIOPMTimelineEnabledKey "TimelineEnabled" +#define kIOPMTimelineEnabledKey "TimelineEnabled" /* @constant kIOMPTimelineSystemNumberTrackedKey * @abstract The maximum number of system power events the system may record. * @discussion Key may be found in the dictionary at IOPMrootDomain's property * kIOPMTimelineDictionaryKey. uint32_t value; may be 0. */ -#define kIOPMTimelineSystemNumberTrackedKey "TimelineSystemEventsTracked" +#define kIOPMTimelineSystemNumberTrackedKey "TimelineSystemEventsTracked" /* @constant kIOPMTimelineSystemBufferSizeKey * @abstract Size in bytes of buffer recording system PM events * @discussion Key may be found in the dictionary at IOPMrootDomain's property * kIOPMTimelineDictionaryKey. uint32_t value; may be 0. */ -#define kIOPMTimelineSystemBufferSizeKey "TimelineSystemBufferSize" +#define kIOPMTimelineSystemBufferSizeKey "TimelineSystemBufferSize" @@ -130,7 +130,8 @@ enum { kIOPMSleepReasonIdle = 105, kIOPMSleepReasonLowPower = 106, kIOPMSleepReasonThermalEmergency = 107, - kIOPMSleepReasonMaintenance = 108 + kIOPMSleepReasonMaintenance = 108, + kIOPMSleepReasonSleepServiceExit = 109 }; /* @@ -143,6 +144,7 @@ enum { #define kIOPMIdleSleepKey "Idle Sleep" #define kIOPMLowPowerSleepKey "Low Power Sleep" #define kIOPMThermalEmergencySleepKey "Thermal Emergency Sleep" +#define kIOPMSleepServiceExitKey "Sleep Service Back to Sleep" enum { @@ -239,7 +241,32 @@ enum { * PM notification types */ -/* @constant kIOPMStateConsoleUserShutdown +/*! @constant kIOPMSleepServiceScheduleImmediate + * + * Setting type used in calls to IOPMrootDomain::registerPMSettingController + * Use this type between powerd and IOKit.framework + * + */ +#define kIOPMSleepServiceScheduleImmediate "SleepServiceImmediate" + +/*! @constant kIOPMSettingSleepServiceScheduleImmediate + * + * Setting type used in calls to IOPMrootDomain::registerPMSettingController + * Use this type between xnu and AppleRTC + */ +#define kIOPMSettingSleepServiceWakeCalendarKey "SleepServiceWakeCalendarKey" + +/*! @constant kIOPMCalendarWakeTypes + * + * These are valid values for IOPM.h:IOPMCalendarStruct->selector + */ +enum { + kPMCalendarTypeMaintenance = 1, + kPMCalendarTypeSleepService = 2 +}; + + +/* @constant kIOPMStateConsoleShutdown * @abstract Notification of GUI shutdown state available to kexts. * @discussion This type can be passed as arguments to registerPMSettingController() * to receive callbacks. @@ -247,7 +274,7 @@ enum { #define kIOPMStateConsoleShutdown "ConsoleShutdown" /* @enum ShutdownValues - * @abstract Potential values shared with key kIOPMStateConsoleUserShutdown + * @abstract Potential values shared with key kIOPMStateConsoleShutdown */ enum { /* @constant kIOPMStateConsoleShutdownNone @@ -272,6 +299,22 @@ enum { kIOPMStateConsoleShutdownCertain = 4 }; +/* @constant kIOPMSettingSilentRunningKey + * @abstract Notification of silent running mode changes to kexts. + * @discussion This key can be passed as an argument to registerPMSettingController() + * and also identifies the type of PMSetting notification callback. + */ +#define kIOPMSettingSilentRunningKey "SilentRunning" +#define kIOPMFeatureSilentRunningKey kIOPMSettingSilentRunningKey + +/* @enum SilentRunningFlags + * @abstract The kIOPMSettingSilentRunningKey notification provides an OSNumber + * object with a value described by the following flags. + */ +enum { + kIOPMSilentRunningModeOn = 0x00000001 +}; + /*****************************************************************************/ /*****************************************************************************/ @@ -311,14 +354,14 @@ enum { #define kIOPMStatsResponseCancel "ResponseCancel" #define kIOPMStatsResponseSlow "ResponseSlow" +struct PMStatsBounds{ + uint64_t start; + uint64_t stop; +}; typedef struct { - struct bounds{ - uint64_t start; - uint64_t stop; - }; - struct bounds hibWrite; - struct bounds hibRead; + struct PMStatsBounds hibWrite; + struct PMStatsBounds hibRead; // bounds driverNotifySleep; // bounds driverNotifyWake; // bounds appNotifySleep; @@ -575,31 +618,162 @@ enum { #define kIOPMSleepWakeFailureUUIDKey "UUID" #define kIOPMSleepWakeFailureDateKey "Date" -/******************************************************************************/ -/* System sleep policy - * Shared between PM root domain and platform driver. +/***************************************************************************** + * + * Root Domain private property keys + * + *****************************************************************************/ + +/* kIOPMAutoPowerOffEnabledKey + * Indicates if Auto Power Off is enabled. + * It has a boolean value. + * true == Auto Power Off is enabled + * false == Auto Power Off is disabled + * not present == Auto Power Off is not supported on this hardware */ +#define kIOPMAutoPowerOffEnabledKey "AutoPowerOff Enabled" -// Platform specific property added by the platform driver. -// An OSData that describes the system sleep policy. -#define kIOPlatformSystemSleepPolicyKey "IOPlatformSystemSleepPolicy" +/* kIOPMAutoPowerOffDelayKey + * Key refers to a CFNumberRef that represents the delay in seconds before + * entering the Auto Power Off state. The property is not present if Auto + * Power Off is unsupported. + */ +#define kIOPMAutoPowerOffDelayKey "AutoPowerOff Delay" -// Root domain property updated before platform sleep. -// An OSData that describes the system sleep parameters. -#define kIOPMSystemSleepParametersKey "IOPMSystemSleepParameters" +/***************************************************************************** + * + * System Sleep Policy + * + *****************************************************************************/ -struct IOPMSystemSleepParameters +#define kIOPMSystemSleepPolicySignature 0x54504c53 +#define kIOPMSystemSleepPolicyVersion 2 + +/*! + * @defined kIOPMSystemSleepTypeKey + * @abstract Indicates the type of system sleep. + * @discussion An OSNumber property of root domain that describes the type + * of system sleep. This property is set after notifying priority sleep/wake + * clients, but before informing interested drivers and shutting down power + * plane drivers. + */ +#define kIOPMSystemSleepTypeKey "IOPMSystemSleepType" + +struct IOPMSystemSleepPolicyVariables { - uint32_t version; - uint32_t sleepFlags; - uint32_t sleepTimer; - uint32_t wakeEvents; + uint32_t signature; // kIOPMSystemSleepPolicySignature + uint32_t version; // kIOPMSystemSleepPolicyVersion + + uint64_t currentCapability; // current system capability bits + uint64_t highestCapability; // highest system capability bits + + uint64_t sleepFactors; // sleep factor bits + uint32_t sleepReason; // kIOPMSleepReason* + uint32_t sleepPhase; // identify the sleep phase + uint32_t hibernateMode; // current hibernate mode + + uint32_t standbyDelay; // standby delay in seconds + uint32_t poweroffDelay; // auto-poweroff delay in seconds + + uint32_t reserved[51]; // pad sizeof 256 bytes +}; + +enum { + kIOPMSleepPhase1 = 1, + kIOPMSleepPhase2 +}; + +// Sleep Factor Mask / Bits +enum { + kIOPMSleepFactorSleepTimerWake = 0x00000001ULL, + kIOPMSleepFactorLidOpen = 0x00000002ULL, + kIOPMSleepFactorACPower = 0x00000004ULL, + kIOPMSleepFactorBatteryLow = 0x00000008ULL, + kIOPMSleepFactorStandbyNoDelay = 0x00000010ULL, + kIOPMSleepFactorStandbyForced = 0x00000020ULL, + kIOPMSleepFactorStandbyDisabled = 0x00000040ULL, + kIOPMSleepFactorUSBExternalDevice = 0x00000080ULL, + kIOPMSleepFactorBluetoothHIDDevice = 0x00000100ULL, + kIOPMSleepFactorExternalMediaMounted = 0x00000200ULL, + kIOPMSleepFactorThunderboltDevice = 0x00000400ULL, + kIOPMSleepFactorRTCAlarmScheduled = 0x00000800ULL, + kIOPMSleepFactorMagicPacketWakeEnabled = 0x00001000ULL, + kIOPMSleepFactorHibernateForced = 0x00010000ULL, + kIOPMSleepFactorAutoPowerOffDisabled = 0x00020000ULL, + kIOPMSleepFactorAutoPowerOffForced = 0x00040000ULL +}; + +// System Sleep Types +enum { + kIOPMSleepTypeInvalid = 0, + kIOPMSleepTypeAbortedSleep = 1, + kIOPMSleepTypeNormalSleep = 2, + kIOPMSleepTypeSafeSleep = 3, + kIOPMSleepTypeHibernate = 4, + kIOPMSleepTypeStandby = 5, + kIOPMSleepTypePowerOff = 6, + kIOPMSleepTypeLast = 7 +}; + +// System Sleep Flags +enum { + kIOPMSleepFlagDisableHibernateAbort = 0x00000001, + kIOPMSleepFlagDisableUSBWakeEvents = 0x00000002, + kIOPMSleepFlagDisableBatlowAssertion = 0x00000004 }; -// Sleep flags +// System Wake Events enum { - kIOPMSleepFlagHibernate = 0x00000001, - kIOPMSleepFlagSleepTimerEnable = 0x00000002 + kIOPMWakeEventLidOpen = 0x00000001, + kIOPMWakeEventLidClose = 0x00000002, + kIOPMWakeEventACAttach = 0x00000004, + kIOPMWakeEventACDetach = 0x00000008, + kIOPMWakeEventPowerButton = 0x00000100, + kIOPMWakeEventUserPME = 0x00000400, + kIOPMWakeEventSleepTimer = 0x00000800, + kIOPMWakeEventBatteryLow = 0x00001000, + kIOPMWakeEventDarkPME = 0x00002000 }; +/*! + * @defined kIOPMSystemSleepParametersKey + * @abstract Sleep parameters describing the upcoming sleep + * @discussion Root domain updates this OSData property before system sleep + * to pass sleep parameters to the platform driver. Some of the parameters + * are based on the chosen entry in the system sleep policy table. + */ +#define kIOPMSystemSleepParametersKey "IOPMSystemSleepParameters" +#define kIOPMSystemSleepParametersVersion 2 + +struct IOPMSystemSleepParameters +{ + uint16_t version; + uint16_t reserved1; + uint32_t sleepType; + uint32_t sleepFlags; + uint32_t ecWakeEvents; + uint32_t ecWakeTimer; + uint32_t ecPoweroffTimer; + uint32_t reserved2[10]; +} __attribute__((packed)); + +#ifdef KERNEL + +/*! + * @defined kIOPMInstallSystemSleepPolicyHandlerKey + * @abstract Name of the platform function to install a sleep policy handler. + * @discussion Pass to IOPMrootDomain::callPlatformFunction(), with a pointer + * to the C-function handler at param1, and an optional target at param2, to + * register a sleep policy handler. Only a single sleep policy handler can + * be installed. + */ +#define kIOPMInstallSystemSleepPolicyHandlerKey \ + "IOPMInstallSystemSleepPolicyHandler" + +typedef IOReturn (*IOPMSystemSleepPolicyHandler)( + void * target, const IOPMSystemSleepPolicyVariables * vars, + IOPMSystemSleepParameters * params ); + +#endif /* KERNEL */ + #endif /* ! _IOKIT_IOPMPRIVATE_H */ diff --git a/iokit/IOKit/pwr_mgt/RootDomain.h b/iokit/IOKit/pwr_mgt/RootDomain.h index 760e7d674..55f4ebe94 100644 --- a/iokit/IOKit/pwr_mgt/RootDomain.h +++ b/iokit/IOKit/pwr_mgt/RootDomain.h @@ -381,8 +381,7 @@ public: void handleQueueSleepWakeUUID( OSObject *obj); - IOReturn setMaintenanceWakeCalendar( - const IOPMCalendarStruct * calendar ); + IOReturn setMaintenanceWakeCalendar(const IOPMCalendarStruct * calendar ); // Handle callbacks from IOService::systemWillShutdown() void acknowledgeSystemWillShutdown( IOService * from ); @@ -407,6 +406,9 @@ public: bool systemMessageFilter( void * object, void * arg1, void * arg2, void * arg3 ); + void publishPMSetting( + const OSSymbol * feature, uint32_t where, uint32_t * featureID ); + /*! @function recordPMEvent @abstract Logs IOService PM event timing. @discussion Should only be called from IOServicePM. Should not be exported. @@ -467,6 +469,7 @@ private: IOPMPowerStateQueue *pmPowerStateQueue; OSArray *allowedPMSettings; + OSArray *noPublishPMSettings; PMTraceWorker *pmTracer; PMAssertionsTracker *pmAssertions; @@ -565,6 +568,7 @@ private: unsigned int logGraphicsClamp :1; unsigned int darkWakeToSleepASAP :1; unsigned int darkWakeMaintenance :1; + unsigned int darkWakeSleepService :1; unsigned int darkWakePostTickle :1; unsigned int sleepTimerMaintenance :1; @@ -585,6 +589,7 @@ private: IOOptionBits platformSleepSupport; uint32_t _debugWakeSeconds; + uint32_t _lastDebugWakeSeconds; queue_head_t aggressivesQueue; thread_call_t aggressivesThreadCall; @@ -601,6 +606,10 @@ private: IOPMTimeline *timeline; + IOPMSystemSleepPolicyHandler _sleepPolicyHandler; + void * _sleepPolicyTarget; + IOPMSystemSleepPolicyVariables *_sleepPolicyVars; + // IOPMrootDomain internal sleep call IOReturn privateSleepSystem( uint32_t sleepReason ); void reportUserInput( void ); @@ -647,11 +656,14 @@ private: void evaluatePolicy( int stimulus, uint32_t arg = 0 ); + void evaluateAssertions(IOPMDriverAssertionType newAssertions, + IOPMDriverAssertionType oldAssertions); + void deregisterPMSettingObject( PMSettingObject * pmso ); #if HIBERNATION bool getSleepOption( const char * key, uint32_t * option ); - bool evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p ); + bool evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p, int phase ); void evaluateSystemSleepPolicyEarly( void ); void evaluateSystemSleepPolicyFinal( void ); #endif /* HIBERNATION */ diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp index a4d7dbb4d..002055ff1 100644 --- a/iokit/Kernel/IOHibernateIO.cpp +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -792,7 +792,7 @@ IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, HIBLOG("error 0x%x opening hibernation file\n", err); if (vars->fileRef) { - kern_close_file_for_direct_io(vars->fileRef, 0, 0, 0); + kern_close_file_for_direct_io(vars->fileRef, 0, 0, 0, 0, 0); gIOHibernateFileRef = vars->fileRef = NULL; } } @@ -897,7 +897,7 @@ IOPolledFileWrite(IOPolledFileIOVars * vars, { AbsoluteTime startTime, endTime; - uint32_t encryptLen, encryptStart; + uint64_t encryptLen, encryptStart; encryptLen = vars->position - vars->encryptStart; if (encryptLen > length) encryptLen = length; @@ -1713,7 +1713,7 @@ IOHibernateSystemWake(void) if (vars->ioBuffer) vars->ioBuffer->release(); bzero(&gIOHibernateHandoffPages[0], gIOHibernateHandoffPageCount * sizeof(gIOHibernateHandoffPages[0])); - if (vars->handoffBuffer) + if (vars->handoffBuffer && (kIOHibernateStateWakingFromHibernate == gIOHibernateState)) { IOHibernateHandoff * handoff; bool done = false; @@ -1721,7 +1721,7 @@ IOHibernateSystemWake(void) !done; handoff = (IOHibernateHandoff *) &handoff->data[handoff->bytecount]) { -// HIBPRINT("handoff %p, %x, %x\n", handoff, handoff->type, handoff->bytecount); + HIBPRINT("handoff %p, %x, %x\n", handoff, handoff->type, handoff->bytecount); uint8_t * data = &handoff->data[0]; switch (handoff->type) { @@ -1772,7 +1772,9 @@ IOHibernateSystemPostWake(void) gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; kern_close_file_for_direct_io(gIOHibernateFileRef, 0, (caddr_t) gIOHibernateCurrentHeader, - sizeof(IOHibernateImageHeader)); + sizeof(IOHibernateImageHeader), + sizeof(IOHibernateImageHeader), + gIOHibernateCurrentHeader->imageSize); gIOHibernateFileRef = 0; } return (kIOReturnSuccess); @@ -2198,7 +2200,7 @@ hibernate_write_image(void) { if (needEncrypt && (kEncrypt & pageType)) { - vars->fileVars->encryptStart = (vars->fileVars->position & ~(AES_BLOCK_SIZE - 1)); + vars->fileVars->encryptStart = (vars->fileVars->position & ~(((uint64_t)AES_BLOCK_SIZE) - 1)); vars->fileVars->encryptEnd = UINT64_MAX; HIBLOG("encryptStart %qx\n", vars->fileVars->encryptStart); @@ -2338,8 +2340,7 @@ hibernate_write_image(void) if ((kEncrypt & pageType)) { - vars->fileVars->encryptEnd = (vars->fileVars->position + AES_BLOCK_SIZE - 1) - & ~(AES_BLOCK_SIZE - 1); + vars->fileVars->encryptEnd = ((vars->fileVars->position + 511) & ~511ULL); HIBLOG("encryptEnd %qx\n", vars->fileVars->encryptEnd); } @@ -2352,11 +2353,14 @@ hibernate_write_image(void) } if (kWiredClear == pageType) { + // enlarge wired image for test +// err = IOPolledFileWrite(vars->fileVars, 0, 0x60000000, cryptvars); + // end wired image header->encryptStart = vars->fileVars->encryptStart; header->encryptEnd = vars->fileVars->encryptEnd; image1Size = vars->fileVars->position; - HIBLOG("image1Size %qd, encryptStart1 %qx, End1 %qx\n", + HIBLOG("image1Size 0x%qx, encryptStart1 0x%qx, End1 0x%qx\n", image1Size, header->encryptStart, header->encryptEnd); } } @@ -2736,8 +2740,8 @@ hibernate_machine_init(void) } } } - if (pagesDone == gIOHibernateCurrentHeader->actualUncompressedPages) - err = kIOReturnLockedRead; + if ((kIOReturnSuccess == err) && (pagesDone == gIOHibernateCurrentHeader->actualUncompressedPages)) + err = kIOReturnLockedRead; if (kIOReturnSuccess != err) panic("Hibernate restore error %x", err); diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c index 7259ab3ec..79410326b 100644 --- a/iokit/Kernel/IOHibernateRestoreKernel.c +++ b/iokit/Kernel/IOHibernateRestoreKernel.c @@ -201,7 +201,12 @@ enum kIOHibernateRestoreCodeWakeMapSize = 'wkms', kIOHibernateRestoreCodeConflictPage = 'cfpg', kIOHibernateRestoreCodeConflictSource = 'cfsr', - kIOHibernateRestoreCodeNoMemory = 'nomm' + kIOHibernateRestoreCodeNoMemory = 'nomm', + kIOHibernateRestoreCodeTag = 'tag ', + kIOHibernateRestoreCodeSignature = 'sign', + kIOHibernateRestoreCodeMapVirt = 'mapV', + kIOHibernateRestoreCodeHandoffPages = 'hand', + kIOHibernateRestoreCodeHandoffCount = 'hndc', }; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -335,6 +340,8 @@ hibernate_page_bitmap_count(hibernate_bitmap_t * bitmap, uint32_t set, uint32_t } } + if ((page + count) > (bitmap->last_page + 1)) count = (bitmap->last_page + 1) - page; + return (count); } @@ -403,12 +410,15 @@ bcopy_internal(const void *src, void *dst, uint32_t len) #define C_ASSERT(e) typedef char __C_ASSERT__[(e) ? 1 : -1] long -hibernate_kernel_entrypoint(IOHibernateImageHeader * header, - void * p2, void * p3, void * p4) +hibernate_kernel_entrypoint(uint32_t p1, + uint32_t p2, uint32_t p3, uint32_t p4) { + uint64_t headerPhys; + uint64_t mapPhys; + uint64_t srcPhys; + uint64_t imageReadPhys; + uint64_t pageIndexPhys; uint32_t idx; - uint32_t * src; - uint32_t * imageReadPos; uint32_t * pageIndexSource; hibernate_page_list_t * map; uint32_t stage; @@ -418,8 +428,10 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, uint32_t conflictCount; uint32_t compressedSize; uint32_t uncompressedPages; - uint32_t copyPageListHead; + uint32_t copyPageListHeadPage; + uint32_t pageListPage; uint32_t * copyPageList; + uint32_t * src; uint32_t copyPageIndex; uint32_t sum; uint32_t pageSum; @@ -432,37 +444,43 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, C_ASSERT(sizeof(IOHibernateImageHeader) == 512); + headerPhys = ptoa_64(p1); + if ((kIOHibernateDebugRestoreLogs & gIOHibernateDebugFlags) && !debug_probe()) gIOHibernateDebugFlags &= ~kIOHibernateDebugRestoreLogs; - debug_code(kIOHibernateRestoreCodeImageStart, (uintptr_t) header); + debug_code(kIOHibernateRestoreCodeImageStart, headerPhys); + + bcopy_internal((void *) pal_hib_map(IMAGE_AREA, headerPhys), + gIOHibernateCurrentHeader, + sizeof(IOHibernateImageHeader)); - bcopy_internal(header, - gIOHibernateCurrentHeader, - sizeof(IOHibernateImageHeader)); + debug_code(kIOHibernateRestoreCodeSignature, gIOHibernateCurrentHeader->signature); - map = (hibernate_page_list_t *) - (((uintptr_t) &header->fileExtentMap[0]) - + header->fileExtentMapSize - + ptoa_32(header->restore1PageCount) - + header->previewSize); + mapPhys = headerPhys + + (offsetof(IOHibernateImageHeader, fileExtentMap) + + gIOHibernateCurrentHeader->fileExtentMapSize + + ptoa_32(gIOHibernateCurrentHeader->restore1PageCount) + + gIOHibernateCurrentHeader->previewSize); - lastImagePage = atop_32(((uintptr_t) header) + header->image1Size); + map = (hibernate_page_list_t *) pal_hib_map(BITMAP_AREA, mapPhys); - lastMapPage = atop_32(((uintptr_t) map) + header->bitmapSize); + lastImagePage = atop_64(headerPhys + gIOHibernateCurrentHeader->image1Size); + lastMapPage = atop_64(mapPhys + gIOHibernateCurrentHeader->bitmapSize); - handoffPages = header->handoffPages; - handoffPageCount = header->handoffPageCount; + handoffPages = gIOHibernateCurrentHeader->handoffPages; + handoffPageCount = gIOHibernateCurrentHeader->handoffPageCount; debug_code(kIOHibernateRestoreCodeImageEnd, ptoa_64(lastImagePage)); - debug_code(kIOHibernateRestoreCodeMapStart, (uintptr_t) map); + debug_code(kIOHibernateRestoreCodeMapStart, mapPhys); debug_code(kIOHibernateRestoreCodeMapEnd, ptoa_64(lastMapPage)); - debug_code('hand', ptoa_64(handoffPages)); - debug_code('hnde', ptoa_64(handoffPageCount)); + debug_code(kIOHibernateRestoreCodeMapVirt, (uintptr_t) map); + debug_code(kIOHibernateRestoreCodeHandoffPages, ptoa_64(handoffPages)); + debug_code(kIOHibernateRestoreCodeHandoffCount, handoffPageCount); // knock all the image pages to be used out of free map - for (ppnum = atop_32((uintptr_t) header); ppnum <= lastImagePage; ppnum++) + for (ppnum = atop_64(headerPhys); ppnum <= lastImagePage; ppnum++) { hibernate_page_bitset(map, FALSE, ppnum); } @@ -475,40 +493,39 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, nextFree = 0; hibernate_page_list_grab(map, &nextFree); - pal_hib_window_setup(hibernate_page_list_grab(map, &nextFree)); - - sum = header->actualRestore1Sum; - gIOHibernateCurrentHeader->diag[0] = (uint32_t)(uintptr_t) header; + sum = gIOHibernateCurrentHeader->actualRestore1Sum; + gIOHibernateCurrentHeader->diag[0] = atop_64(headerPhys); gIOHibernateCurrentHeader->diag[1] = sum; - uncompressedPages = 0; - conflictCount = 0; - copyPageListHead = 0; - copyPageList = 0; - copyPageIndex = PAGE_SIZE >> 2; + uncompressedPages = 0; + conflictCount = 0; + copyPageListHeadPage = 0; + copyPageList = 0; + copyPageIndex = PAGE_SIZE >> 2; - compressedSize = PAGE_SIZE; - stage = 2; - count = 0; - src = NULL; + compressedSize = PAGE_SIZE; + stage = 2; + count = 0; + srcPhys = 0; if (gIOHibernateCurrentHeader->previewSize) { - pageIndexSource = (uint32_t *) - (((uintptr_t) &header->fileExtentMap[0]) - + gIOHibernateCurrentHeader->fileExtentMapSize - + ptoa_32(gIOHibernateCurrentHeader->restore1PageCount)); - imageReadPos = (uint32_t *) (((uintptr_t) pageIndexSource) + gIOHibernateCurrentHeader->previewPageListSize); - lastPageIndexPage = atop_32((uintptr_t) imageReadPos); + pageIndexPhys = headerPhys + + (offsetof(IOHibernateImageHeader, fileExtentMap) + + gIOHibernateCurrentHeader->fileExtentMapSize + + ptoa_32(gIOHibernateCurrentHeader->restore1PageCount)); + imageReadPhys = (pageIndexPhys + gIOHibernateCurrentHeader->previewPageListSize); + lastPageIndexPage = atop_64(imageReadPhys); + pageIndexSource = (uint32_t *) pal_hib_map(IMAGE2_AREA, pageIndexPhys); } else { - pageIndexSource = NULL; + pageIndexPhys = 0; lastPageIndexPage = 0; - imageReadPos = (uint32_t *) (((uintptr_t) map) + gIOHibernateCurrentHeader->bitmapSize); + imageReadPhys = (mapPhys + gIOHibernateCurrentHeader->bitmapSize); } - debug_code(kIOHibernateRestoreCodePageIndexStart, (uintptr_t) pageIndexSource); + debug_code(kIOHibernateRestoreCodePageIndexStart, pageIndexPhys); debug_code(kIOHibernateRestoreCodePageIndexEnd, ptoa_64(lastPageIndexPage)); while (1) @@ -517,38 +534,35 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, { case 2: // copy handoff data - count = src ? 0 : handoffPageCount; + count = srcPhys ? 0 : handoffPageCount; if (!count) break; - if (count > gIOHibernateHandoffPageCount) - count = gIOHibernateHandoffPageCount; - src = (uint32_t *) (uintptr_t) ptoa_64(handoffPages); + if (count > gIOHibernateHandoffPageCount) count = gIOHibernateHandoffPageCount; + srcPhys = ptoa_64(handoffPages); break; case 1: // copy pageIndexSource pages == preview image data - if (!src) + if (!srcPhys) { - if (!pageIndexSource) - break; - src = imageReadPos; + if (!pageIndexPhys) break; + srcPhys = imageReadPhys; } ppnum = pageIndexSource[0]; count = pageIndexSource[1]; pageIndexSource += 2; - imageReadPos = src; + pageIndexPhys += 2 * sizeof(pageIndexSource[0]); + imageReadPhys = srcPhys; break; case 0: // copy pages - if (!src) - { - src = (uint32_t *) (((uintptr_t) map) + gIOHibernateCurrentHeader->bitmapSize); - } + if (!srcPhys) srcPhys = (mapPhys + gIOHibernateCurrentHeader->bitmapSize); + src = (uint32_t *) pal_hib_map(IMAGE_AREA, srcPhys); ppnum = src[0]; count = src[1]; - src += 2; - imageReadPos = src; + srcPhys += 2 * sizeof(*src); + imageReadPhys = srcPhys; break; } @@ -558,7 +572,7 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, if (!stage) break; stage--; - src = NULL; + srcPhys = 0; continue; } @@ -567,23 +581,26 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, uint32_t tag; int conflicts; - if (2 == stage) - ppnum = gIOHibernateHandoffPages[page]; + src = (uint32_t *) pal_hib_map(IMAGE_AREA, srcPhys); + + if (2 == stage) ppnum = gIOHibernateHandoffPages[page]; else if (!stage) { tag = *src++; +// debug_code(kIOHibernateRestoreCodeTag, (uintptr_t) tag); + srcPhys += sizeof(*src); compressedSize = kIOHibernateTagLength & tag; } - conflicts = (ppnum >= atop_32((uintptr_t) map)) && (ppnum <= lastMapPage); + conflicts = (ppnum >= atop_64(mapPhys)) && (ppnum <= lastMapPage); - conflicts |= ((ppnum >= atop_32((uintptr_t) imageReadPos)) && (ppnum <= lastImagePage)); + conflicts |= ((ppnum >= atop_64(imageReadPhys)) && (ppnum <= lastImagePage)); if (stage >= 2) - conflicts |= ((ppnum >= atop_32((uintptr_t) src)) && (ppnum <= (handoffPages + handoffPageCount - 1))); + conflicts |= ((ppnum >= atop_64(srcPhys)) && (ppnum <= (handoffPages + handoffPageCount - 1))); if (stage >= 1) - conflicts |= ((ppnum >= atop_32((uintptr_t) pageIndexSource)) && (ppnum <= lastPageIndexPage)); + conflicts |= ((ppnum >= atop_64(pageIndexPhys)) && (ppnum <= lastPageIndexPage)); if (!conflicts) { @@ -610,15 +627,15 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, if (copyPageIndex > ((PAGE_SIZE >> 2) - 3)) { // alloc new copy list page - uint32_t pageListPage = hibernate_page_list_grab(map, &nextFree); + pageListPage = hibernate_page_list_grab(map, &nextFree); // link to current if (copyPageList) { copyPageList[1] = pageListPage; } else { - copyPageListHead = pageListPage; + copyPageListHeadPage = pageListPage; } copyPageList = (uint32_t *)pal_hib_map(SRC_COPY_AREA, - ptoa_32(pageListPage)); + ptoa_64(pageListPage)); copyPageList[1] = 0; copyPageIndex = 2; } @@ -628,11 +645,12 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, copyPageList[copyPageIndex++] = (compressedSize | (stage << 24)); copyPageList[0] = copyPageIndex; - dst = (uint32_t *)pal_hib_map(DEST_COPY_AREA, ptoa_32(bufferPage)); + dst = (uint32_t *)pal_hib_map(DEST_COPY_AREA, ptoa_64(bufferPage)); for (idx = 0; idx < ((compressedSize + 3) >> 2); idx++) dst[idx] = src[idx]; } - src += ((compressedSize + 3) >> 2); + srcPhys += ((compressedSize + 3) & ~3); + src += ((compressedSize + 3) >> 2); } } @@ -641,16 +659,15 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, // -- copy back conflicts - copyPageList = (uint32_t *)(uintptr_t) ptoa_32(copyPageListHead); - - while (copyPageList) + pageListPage = copyPageListHeadPage; + while (pageListPage) { - copyPageList = (uint32_t *)pal_hib_map(COPY_PAGE_AREA, (uintptr_t)copyPageList); + copyPageList = (uint32_t *)pal_hib_map(COPY_PAGE_AREA, ptoa_64(pageListPage)); for (copyPageIndex = 2; copyPageIndex < copyPageList[0]; copyPageIndex += 3) { ppnum = copyPageList[copyPageIndex + 0]; - src = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[copyPageIndex + 1]); - src = (uint32_t *)pal_hib_map(SRC_COPY_AREA, (uintptr_t)src); + srcPhys = ptoa_64(copyPageList[copyPageIndex + 1]); + src = (uint32_t *) pal_hib_map(SRC_COPY_AREA, srcPhys); compressedSize = copyPageList[copyPageIndex + 2]; stage = compressedSize >> 24; compressedSize &= 0x1FFF; @@ -660,7 +677,7 @@ hibernate_kernel_entrypoint(IOHibernateImageHeader * header, sum += pageSum; uncompressedPages++; } - copyPageList = (uint32_t *) (uintptr_t) ptoa_32(copyPageList[1]); + pageListPage = copyPageList[1]; } pal_hib_patchup(); diff --git a/iokit/Kernel/IOLib.cpp b/iokit/Kernel/IOLib.cpp index 50000299d..b2714fc9b 100644 --- a/iokit/Kernel/IOLib.cpp +++ b/iokit/Kernel/IOLib.cpp @@ -344,6 +344,7 @@ IOKernelFreePhysical(mach_vm_address_t address, mach_vm_size_t size) kfree((void *)allocationAddress, adjustedSize); } + IOStatisticsAlloc(kIOStatisticsFreeContiguous, size); #if IOALLOCDEBUG debug_iomalloc_size -= size; #endif @@ -379,12 +380,18 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP contiguous = (contiguous && (adjustedSize > page_size)) || (alignment > page_size); - if ((!contiguous) && (maxPhys <= 0xFFFFFFFF)) - { - maxPhys = 0; - options |= KMA_LOMEM; - } - + if (!contiguous) + { + if (maxPhys <= 0xFFFFFFFF) + { + maxPhys = 0; + options |= KMA_LOMEM; + } + else if (gIOLastPage && (atop_64(maxPhys) > gIOLastPage)) + { + maxPhys = 0; + } + } if (contiguous || maxPhys) { kr = kmem_alloc_contig(kernel_map, &virt, size, @@ -422,11 +429,12 @@ IOKernelAllocateWithPhysicalRestrict(mach_vm_size_t size, mach_vm_address_t maxP address = 0; } -#if IOALLOCDEBUG if (address) { + IOStatisticsAlloc(kIOStatisticsMallocContiguous, size); +#if IOALLOCDEBUG debug_iomalloc_size += size; - } #endif + } return (address); } @@ -490,10 +498,6 @@ void * IOMallocContiguous(vm_size_t size, vm_size_t alignment, } while (false); - if (address) { - IOStatisticsAlloc(kIOStatisticsMallocContiguous, size); - } - return (void *) address; } @@ -531,8 +535,6 @@ void IOFreeContiguous(void * _address, vm_size_t size) { IOKernelFreePhysical((mach_vm_address_t) address, size); } - - IOStatisticsAlloc(kIOStatisticsFreeContiguous, size); } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/Kernel/IOMemoryDescriptor.cpp b/iokit/Kernel/IOMemoryDescriptor.cpp index 0a11064a1..9b4590945 100644 --- a/iokit/Kernel/IOMemoryDescriptor.cpp +++ b/iokit/Kernel/IOMemoryDescriptor.cpp @@ -1937,7 +1937,7 @@ IOReturn IOGeneralMemoryDescriptor::wireVirtual(IODirection forDirection) assert(!_wireCount); assert(kIOMemoryTypeVirtual == type || kIOMemoryTypeVirtual64 == type || kIOMemoryTypeUIO == type); - if (_pages >= gIOMaximumMappedIOPageCount) + if (_pages > gIOMaximumMappedIOPageCount) return kIOReturnNoResources; dataP = getDataP(_memoryEntries); diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index 60f3ec07a..d4e9fa423 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -190,6 +190,7 @@ static IOPMPowerState ourPowerStates[NUM_POWER_STATES] = {1, kIOPMPowerOn, kIOPMPowerOn, ON_POWER, 0,0,0,0,0,0,0,0} }; +#define kIOPMRootDomainWakeTypeSleepService "SleepService" #define kIOPMRootDomainWakeTypeMaintenance "Maintenance" #define kIOPMRootDomainWakeTypeSleepTimer "SleepTimer" #define kIOPMrootDomainWakeTypeLowBattery "LowBattery" @@ -285,6 +286,8 @@ const OSSymbol *gIOPMStatsApplicationResponseTimedOut; const OSSymbol *gIOPMStatsApplicationResponseCancel; const OSSymbol *gIOPMStatsApplicationResponseSlow; +#define kBadPMFeatureID 0 + /* * PMSettingHandle * Opaque handle passed to clients of registerPMSettingController() @@ -796,13 +799,15 @@ static SYSCTL_INT(_debug, OID_AUTO, darkwake, CTLFLAG_RW, &gDarkWakeFlags, 0, "" static const OSSymbol * gIOPMSettingAutoWakeSecondsKey; static const OSSymbol * gIOPMSettingDebugWakeRelativeKey; static const OSSymbol * gIOPMSettingMaintenanceWakeCalendarKey; +static const OSSymbol * gIOPMSettingSleepServiceWakeCalendarKey; +static const OSSymbol * gIOPMSettingSilentRunningKey; //****************************************************************************** // start // //****************************************************************************** -#define kRootDomainSettingsCount 16 +#define kRootDomainSettingsCount 17 bool IOPMrootDomain::start( IOService * nub ) { @@ -815,8 +820,9 @@ bool IOPMrootDomain::start( IOService * nub ) gRootDomain = this; gIOPMSettingAutoWakeSecondsKey = OSSymbol::withCString(kIOPMSettingAutoWakeSecondsKey); gIOPMSettingDebugWakeRelativeKey = OSSymbol::withCString(kIOPMSettingDebugWakeRelativeKey); - gIOPMSettingMaintenanceWakeCalendarKey = - OSSymbol::withCString(kIOPMSettingMaintenanceWakeCalendarKey); + gIOPMSettingMaintenanceWakeCalendarKey = OSSymbol::withCString(kIOPMSettingMaintenanceWakeCalendarKey); + gIOPMSettingSleepServiceWakeCalendarKey = OSSymbol::withCString(kIOPMSettingSleepServiceWakeCalendarKey); + gIOPMSettingSilentRunningKey = OSSymbol::withCStringNoCopy(kIOPMSettingSilentRunningKey); gIOPMStatsApplicationResponseTimedOut = OSSymbol::withCString(kIOPMStatsResponseTimedOut); gIOPMStatsApplicationResponseCancel = OSSymbol::withCString(kIOPMStatsResponseCancel); @@ -842,7 +848,8 @@ bool IOPMrootDomain::start( IOService * nub ) OSSymbol::withCString(kIOPMSettingDisplaySleepUsesDimKey), OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey), OSSymbol::withCString(kIOPMSettingGraphicsSwitchKey), - OSSymbol::withCString(kIOPMStateConsoleShutdown) + OSSymbol::withCString(kIOPMStateConsoleShutdown), + gIOPMSettingSilentRunningKey }; PE_parse_boot_argn("darkwake", &gDarkWakeFlags, sizeof(gDarkWakeFlags)); @@ -927,7 +934,12 @@ bool IOPMrootDomain::start( IOService * nub ) (const OSObject **)settingsArr, kRootDomainSettingsCount, 0); - + + // List of PM settings that should not automatically publish itself + // as a feature when registered by a listener. + noPublishPMSettings = OSArray::withObjects( + (const OSObject **) &gIOPMSettingSilentRunningKey, 1, 0); + fPMSettingsDict = OSDictionary::withCapacity(5); PMinit(); // creates gIOPMWorkLoop @@ -1160,6 +1172,14 @@ IOReturn IOPMrootDomain::setProperties( OSObject * props_obj ) { setProperty(kIOPMDestroyFVKeyOnStandbyKey, b); } + if ((b = OSDynamicCast(OSBoolean, dict->getObject(kIOPMAutoPowerOffEnabledKey)))) + { + setProperty(kIOPMAutoPowerOffEnabledKey, b); + } + if ((n = OSDynamicCast(OSNumber, dict->getObject(kIOPMAutoPowerOffDelayKey)))) + { + setProperty(kIOPMAutoPowerOffDelayKey, n); + } // Relay our allowed PM settings onto our registered PM clients for(i = 0; i < allowedPMSettings->getCount(); i++) { @@ -2030,6 +2050,9 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) getPlatform()->PMLog(kIOPMrootDomainClass, kPMLogSystemWake, 0, 0); lowBatteryCondition = false; lastSleepReason = 0; + + _lastDebugWakeSeconds = _debugWakeSeconds; + _debugWakeSeconds = 0; // And start logging the wake event here // TODO: Publish the wakeReason string as an integer @@ -2041,7 +2064,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) recordAndReleasePMEvent( details ); - #ifndef __LP64__ systemWake(); #endif @@ -2070,38 +2092,58 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) OSNumber * hibOptions = OSDynamicCast( OSNumber, getProperty(kIOHibernateOptionsKey)); - if (hibernateAborted || - ((hibOptions && - !(hibOptions->unsigned32BitValue() & kIOHibernateOptionDarkWake))) || - ((_debugWakeSeconds != 0) && - ((gDarkWakeFlags & kDarkWakeFlagAlarmIsDark) == 0)) || - (wakeType && ( - wakeType->isEqualTo(kIOPMRootDomainWakeTypeUser) || - wakeType->isEqualTo(kIOPMRootDomainWakeTypeAlarm)))) + if (hibernateAborted || ((hibOptions && + !(hibOptions->unsigned32BitValue() & kIOHibernateOptionDarkWake)))) { + // Hibernate aborted, or EFI brought up graphics + wranglerTickled = true; + } + else + if (wakeType && ( + wakeType->isEqualTo(kIOPMRootDomainWakeTypeUser) || + wakeType->isEqualTo(kIOPMRootDomainWakeTypeAlarm))) + { + // User wake or RTC alarm wranglerTickled = true; } else if (wakeType && - wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance)) + wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer)) { + // SMC standby timer trumps SleepX darkWakeMaintenance = true; darkWakeToSleepASAP = true; + sleepTimerMaintenance = true; + } + else + if ((_lastDebugWakeSeconds != 0) && + ((gDarkWakeFlags & kDarkWakeFlagAlarmIsDark) == 0)) + { + // SleepX before maintenance + wranglerTickled = true; } else if (wakeType && - wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer)) + wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance)) { darkWakeMaintenance = true; darkWakeToSleepASAP = true; - sleepTimerMaintenance = true; + } + else + if (wakeType && + wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepService)) + { + darkWakeToSleepASAP = true; +// darkWakeMaintenance = true; // ???? + darkWakeSleepService = true; } else { // Unidentified wake source, resume to full wake if debug // alarm is pending. - if (_debugWakeSeconds && (!wakeReason || wakeReason->isEqualTo(""))) + if (_lastDebugWakeSeconds && + (!wakeReason || wakeReason->isEqualTo(""))) wranglerTickled = true; else darkWakeToSleepASAP = true; @@ -2109,11 +2151,18 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) } else { - // Post a HID tickle immediately - except for maintenance wake. - - if (hibernateAborted || !wakeType || - !wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance)) + if (wakeType && + wakeType->isEqualTo(kIOPMRootDomainWakeTypeSleepTimer)) { + darkWakeMaintenance = true; + darkWakeToSleepASAP = true; + sleepTimerMaintenance = true; + } + else if (hibernateAborted || !wakeType || + !wakeType->isEqualTo(kIOPMRootDomainWakeTypeMaintenance) || + !wakeReason || !wakeReason->isEqualTo("RTC")) + { + // Post a HID tickle immediately - except for RTC maintenance wake. wranglerTickled = true; } else @@ -2156,9 +2205,6 @@ void IOPMrootDomain::powerChangeDone( unsigned long previousPowerState ) recordAndReleasePMEvent( details ); - if (previousPowerState != ON_STATE) - _debugWakeSeconds = 0; - // Update childPreventSystemSleep flag using the capability computed // by IOSevice::rebuildChildClampBits(). @@ -2898,7 +2944,7 @@ void IOPMrootDomain::publishFeature( } else { // The easy case: no previously existing features listed. We simply // set the OSNumber at key 'feature' and we're on our way. - features->setObject(feature, new_feature_data); + features->setObject(feature, new_feature_data); } new_feature_data->release(); @@ -2937,6 +2983,9 @@ IOReturn IOPMrootDomain::removePublishedFeature( uint32_t removeFeatureID ) OSNumber *osNum = NULL; OSArray *arrayMemberCopy; + if (kBadPMFeatureID == removeFeatureID) + return kIOReturnNotFound; + if(featuresDictLock) IOLockLock(featuresDictLock); OSDictionary *features = @@ -3044,6 +3093,28 @@ exit: return ret; } +//****************************************************************************** +// publishPMSetting (private) +// +// Should only be called by PMSettingObject to publish a PM Setting as a +// supported feature. +//****************************************************************************** + +void IOPMrootDomain::publishPMSetting( + const OSSymbol * feature, uint32_t where, uint32_t * featureID ) +{ + if (noPublishPMSettings && + (noPublishPMSettings->getNextIndexOfObject(feature, 0) != (unsigned int)-1)) + { + // Setting found in noPublishPMSettings array + *featureID = kBadPMFeatureID; + return; + } + + publishFeature( + feature->getCStringNoCopy(), where, featureID); +} + //****************************************************************************** // setPMSetting (private) // @@ -3377,81 +3448,92 @@ void IOPMrootDomain::informCPUStateChange( // evaluateSystemSleepPolicy //****************************************************************************** +#define kIOPlatformSystemSleepPolicyKey "IOPlatformSystemSleepPolicy" + +// Sleep flags +enum { + kIOPMSleepFlagHibernate = 0x00000001, + kIOPMSleepFlagSleepTimerEnable = 0x00000002 +}; + struct IOPMSystemSleepPolicyEntry { uint32_t factorMask; uint32_t factorBits; uint32_t sleepFlags; uint32_t wakeEvents; -}; +} __attribute__((packed)); struct IOPMSystemSleepPolicyTable { - uint8_t signature[4]; + uint32_t signature; uint16_t version; uint16_t entryCount; IOPMSystemSleepPolicyEntry entries[]; -}; +} __attribute__((packed)); -enum { - kIOPMSleepFactorSleepTimerWake = 0x00000001, - kIOPMSleepFactorLidOpen = 0x00000002, - kIOPMSleepFactorACPower = 0x00000004, - kIOPMSleepFactorLowBattery = 0x00000008, - kIOPMSleepFactorDeepSleepNoDelay = 0x00000010, - kIOPMSleepFactorDeepSleepDemand = 0x00000020, - kIOPMSleepFactorDeepSleepDisable = 0x00000040, - kIOPMSleepFactorUSBExternalDevice = 0x00000080, - kIOPMSleepFactorBluetoothHIDDevice = 0x00000100, - kIOPMSleepFactorExternalMediaMounted = 0x00000200, - kIOPMSleepFactorDriverAssertBit5 = 0x00000400, /* Reserved for ThunderBolt */ - kIOPMSleepFactorDriverAssertBit6 = 0x00000800, - kIOPMSleepFactorDriverAssertBit7 = 0x00001000 -}; - -bool IOPMrootDomain::evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p ) +bool IOPMrootDomain::evaluateSystemSleepPolicy( + IOPMSystemSleepParameters * params, int sleepPhase ) { const IOPMSystemSleepPolicyTable * pt; OSObject * prop = 0; OSData * policyData; - uint32_t currentFactors; - uint32_t deepSleepDelay = 0; - bool success = false; - - if (getProperty(kIOPMDeepSleepEnabledKey) != kOSBooleanTrue) - return false; - - getSleepOption(kIOPMDeepSleepDelayKey, &deepSleepDelay); - - prop = getServiceRoot()->copyProperty(kIOPlatformSystemSleepPolicyKey); - if (!prop) - return false; - - policyData = OSDynamicCast(OSData, prop); - if (!policyData || - (policyData->getLength() < sizeof(IOPMSystemSleepPolicyTable))) - { - goto done; - } - - pt = (const IOPMSystemSleepPolicyTable *) policyData->getBytesNoCopy(); - if ((pt->signature[0] != 'S') || - (pt->signature[1] != 'L') || - (pt->signature[2] != 'P') || - (pt->signature[3] != 'T') || - (pt->version != 1) || - (pt->entryCount == 0)) + uint64_t currentFactors = 0; + uint32_t standbyDelay; + uint32_t powerOffDelay; + uint32_t mismatch; + bool standbyEnabled; + bool powerOffEnabled; + bool found = false; + + // Get platform's sleep policy table + if (!_sleepPolicyHandler) + { + prop = getServiceRoot()->copyProperty(kIOPlatformSystemSleepPolicyKey); + if (!prop) goto done; + } + + // Fetch additional settings + standbyEnabled = (getSleepOption(kIOPMDeepSleepDelayKey, &standbyDelay) + && (getProperty(kIOPMDeepSleepEnabledKey) == kOSBooleanTrue)); + powerOffEnabled = (getSleepOption(kIOPMAutoPowerOffDelayKey, &powerOffDelay) + && (getProperty(kIOPMAutoPowerOffEnabledKey) == kOSBooleanTrue)); + DLOG("standby %d delay %u, powerOff %d delay %u, hibernate %u\n", + standbyEnabled, standbyDelay, powerOffEnabled, powerOffDelay, + hibernateMode); + + // pmset level overrides + if ((hibernateMode & kIOHibernateModeOn) == 0) { - goto done; + standbyEnabled = false; + powerOffEnabled = false; } - - if ((policyData->getLength() - sizeof(IOPMSystemSleepPolicyTable)) != - (sizeof(IOPMSystemSleepPolicyEntry) * pt->entryCount)) + else if (!(hibernateMode & kIOHibernateModeSleep)) { - goto done; + // Force hibernate (i.e. mode 25) + // If standby is enabled, force standy. + // If poweroff is enabled, force poweroff. + if (standbyEnabled) + currentFactors |= kIOPMSleepFactorStandbyForced; + else if (powerOffEnabled) + currentFactors |= kIOPMSleepFactorAutoPowerOffForced; + else + currentFactors |= kIOPMSleepFactorHibernateForced; } - currentFactors = 0; + // Current factors based on environment and assertions + if (sleepTimerMaintenance) + currentFactors |= kIOPMSleepFactorSleepTimerWake; + if (!clamshellClosed) + currentFactors |= kIOPMSleepFactorLidOpen; + if (acAdaptorConnected) + currentFactors |= kIOPMSleepFactorACPower; + if (lowBatteryCondition) + currentFactors |= kIOPMSleepFactorBatteryLow; + if (!standbyDelay) + currentFactors |= kIOPMSleepFactorStandbyNoDelay; + if (!standbyEnabled) + currentFactors |= kIOPMSleepFactorStandbyDisabled; if (getPMAssertionLevel(kIOPMDriverAssertionUSBExternalDeviceBit) != kIOPMDriverAssertionLevelOff) currentFactors |= kIOPMSleepFactorUSBExternalDevice; @@ -3461,57 +3543,101 @@ bool IOPMrootDomain::evaluateSystemSleepPolicy( IOPMSystemSleepParameters * p ) if (getPMAssertionLevel(kIOPMDriverAssertionExternalMediaMountedBit) != kIOPMDriverAssertionLevelOff) currentFactors |= kIOPMSleepFactorExternalMediaMounted; - if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) != /* AssertionBit5 = Thunderbolt */ + if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit5) != kIOPMDriverAssertionLevelOff) - currentFactors |= kIOPMSleepFactorDriverAssertBit5; - if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit7) != + currentFactors |= kIOPMSleepFactorThunderboltDevice; + if (getPMAssertionLevel(kIOPMDriverAssertionReservedBit8) != kIOPMDriverAssertionLevelOff) - currentFactors |= kIOPMSleepFactorDriverAssertBit7; - if (0 == deepSleepDelay) - currentFactors |= kIOPMSleepFactorDeepSleepNoDelay; - if (!clamshellClosed) - currentFactors |= kIOPMSleepFactorLidOpen; - if (acAdaptorConnected) - currentFactors |= kIOPMSleepFactorACPower; - if (lowBatteryCondition) - currentFactors |= kIOPMSleepFactorLowBattery; - if (sleepTimerMaintenance) - currentFactors |= kIOPMSleepFactorSleepTimerWake; + currentFactors |= kIOPMSleepFactorMagicPacketWakeEnabled; + if (!powerOffEnabled) + currentFactors |= kIOPMSleepFactorAutoPowerOffDisabled; - // pmset overrides - if ((hibernateMode & kIOHibernateModeOn) == 0) - currentFactors |= kIOPMSleepFactorDeepSleepDisable; - else if ((hibernateMode & kIOHibernateModeSleep) == 0) - currentFactors |= kIOPMSleepFactorDeepSleepDemand; - - DLOG("Sleep policy %u entries, current factors 0x%x\n", - pt->entryCount, currentFactors); + DLOG("sleep factors 0x%llx\n", currentFactors); + + // Clear the output params + bzero(params, sizeof(*params)); + + if (_sleepPolicyHandler) + { + if (!_sleepPolicyVars) + { + _sleepPolicyVars = IONew(IOPMSystemSleepPolicyVariables, 1); + if (!_sleepPolicyVars) + goto done; + bzero(_sleepPolicyVars, sizeof(*_sleepPolicyVars)); + } + _sleepPolicyVars->signature = kIOPMSystemSleepPolicySignature; + _sleepPolicyVars->version = kIOPMSystemSleepPolicyVersion; + if (kIOPMSleepPhase1 == sleepPhase) + { + _sleepPolicyVars->currentCapability = _currentCapability; + _sleepPolicyVars->highestCapability = _highestCapability; + _sleepPolicyVars->sleepReason = lastSleepReason; + _sleepPolicyVars->hibernateMode = hibernateMode; + _sleepPolicyVars->standbyDelay = standbyDelay; + _sleepPolicyVars->poweroffDelay = powerOffDelay; + } + _sleepPolicyVars->sleepFactors = currentFactors; + _sleepPolicyVars->sleepPhase = sleepPhase; + + if ((_sleepPolicyHandler(_sleepPolicyTarget, _sleepPolicyVars, params) != + kIOReturnSuccess) || (kIOPMSleepTypeInvalid == params->sleepType) || + (params->sleepType >= kIOPMSleepTypeLast) || + (kIOPMSystemSleepParametersVersion != params->version)) + { + MSG("sleep policy handler error\n"); + goto done; + } + + DLOG("sleep params v%u, type %u, flags 0x%x, wake 0x%x, timer %u, poweroff %u\n", + params->version, params->sleepType, params->sleepFlags, + params->ecWakeEvents, params->ecWakeTimer, params->ecPoweroffTimer); + found = true; + goto done; + } + + // Policy table is meaningless without standby enabled + if (!standbyEnabled) + goto done; + + // Validate the sleep policy table + policyData = OSDynamicCast(OSData, prop); + if (!policyData || (policyData->getLength() <= sizeof(IOPMSystemSleepPolicyTable))) + goto done; + + pt = (const IOPMSystemSleepPolicyTable *) policyData->getBytesNoCopy(); + if ((pt->signature != kIOPMSystemSleepPolicySignature) || + (pt->version != 1) || (0 == pt->entryCount)) + goto done; + + if (((policyData->getLength() - sizeof(IOPMSystemSleepPolicyTable)) != + (sizeof(IOPMSystemSleepPolicyEntry) * pt->entryCount))) + goto done; for (uint32_t i = 0; i < pt->entryCount; i++) { - const IOPMSystemSleepPolicyEntry * policyEntry = &pt->entries[i]; + const IOPMSystemSleepPolicyEntry * entry = &pt->entries[i]; + mismatch = (((uint32_t)currentFactors ^ entry->factorBits) & entry->factorMask); - DLOG("factor mask 0x%08x, bits 0x%08x, flags 0x%08x, wake 0x%08x\n", - policyEntry->factorMask, policyEntry->factorBits, - policyEntry->sleepFlags, policyEntry->wakeEvents); + DLOG("mask 0x%08x, bits 0x%08x, flags 0x%08x, wake 0x%08x, mismatch 0x%08x\n", + entry->factorMask, entry->factorBits, + entry->sleepFlags, entry->wakeEvents, mismatch); + if (mismatch) + continue; - if ((currentFactors ^ policyEntry->factorBits) & policyEntry->factorMask) - continue; // mismatch, try next + DLOG("^ found match\n"); + found = true; - if (p) - { - p->version = 1; - p->sleepFlags = policyEntry->sleepFlags; - p->sleepTimer = 0; - p->wakeEvents = policyEntry->wakeEvents; - if (p->sleepFlags & kIOPMSleepFlagSleepTimerEnable) - { - p->sleepTimer = deepSleepDelay; - } - } + params->version = kIOPMSystemSleepParametersVersion; + params->reserved1 = 1; + if (entry->sleepFlags & kIOPMSleepFlagHibernate) + params->sleepType = kIOPMSleepTypeStandby; + else + params->sleepType = kIOPMSleepTypeNormalSleep; - DLOG("matched policy entry %u\n", i); - success = true; + params->ecWakeEvents = entry->wakeEvents; + if (entry->sleepFlags & kIOPMSleepFlagSleepTimerEnable) + params->ecWakeTimer = standbyDelay; break; } @@ -3519,14 +3645,14 @@ done: if (prop) prop->release(); - return success; + return found; } +static IOPMSystemSleepParameters gEarlySystemSleepParams; + void IOPMrootDomain::evaluateSystemSleepPolicyEarly( void ) { - IOPMSystemSleepParameters params; - - // Evaluate sleep policy before driver sleep phase. + // Evaluate early (priority interest phase), before drivers sleep. DLOG("%s\n", __FUNCTION__); removeProperty(kIOPMSystemSleepParametersKey); @@ -3535,12 +3661,37 @@ void IOPMrootDomain::evaluateSystemSleepPolicyEarly( void ) hibernateMode = 0; getSleepOption(kIOHibernateModeKey, &hibernateMode); - if (!hibernateNoDefeat && - evaluateSystemSleepPolicy(¶ms) && - ((params.sleepFlags & kIOPMSleepFlagHibernate) == 0)) + // Save for late evaluation if sleep is aborted + bzero(&gEarlySystemSleepParams, sizeof(gEarlySystemSleepParams)); + + if (evaluateSystemSleepPolicy(&gEarlySystemSleepParams, kIOPMSleepPhase1)) + { + if (!hibernateNoDefeat && + (gEarlySystemSleepParams.sleepType == kIOPMSleepTypeNormalSleep)) + { + // Disable hibernate setup for normal sleep + hibernateDisabled = true; + } + } + + // Publish IOPMSystemSleepType + uint32_t sleepType = gEarlySystemSleepParams.sleepType; + if (sleepType == kIOPMSleepTypeInvalid) + { + // no sleep policy + sleepType = kIOPMSleepTypeNormalSleep; + if (hibernateMode & kIOHibernateModeOn) + sleepType = (hibernateMode & kIOHibernateModeSleep) ? + kIOPMSleepTypeSafeSleep : kIOPMSleepTypeHibernate; + } + else if ((sleepType == kIOPMSleepTypeStandby) && + (gEarlySystemSleepParams.ecPoweroffTimer)) { - hibernateDisabled = true; + // report the lowest possible sleep state + sleepType = kIOPMSleepTypePowerOff; } + + setProperty(kIOPMSystemSleepTypeKey, sleepType, 32); } void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void ) @@ -3548,27 +3699,30 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void ) IOPMSystemSleepParameters params; OSData * paramsData; - // Evaluate sleep policy after drivers but before platform sleep. + // Evaluate sleep policy after sleeping drivers but before platform sleep. DLOG("%s\n", __FUNCTION__); - if (evaluateSystemSleepPolicy(¶ms)) + if (evaluateSystemSleepPolicy(¶ms, kIOPMSleepPhase2)) { if ((hibernateDisabled || hibernateAborted) && - (params.sleepFlags & kIOPMSleepFlagHibernate)) + (params.sleepType != kIOPMSleepTypeNormalSleep)) { - // Should hibernate but unable to or aborted. - // Arm timer for a short sleep and retry or wake fully. + // Final evaluation picked a state requiring hibernation, + // but hibernate setup was skipped. Retry using the early + // sleep parameters. - params.sleepFlags &= ~kIOPMSleepFlagHibernate; - params.sleepFlags |= kIOPMSleepFlagSleepTimerEnable; - params.sleepTimer = 1; + bcopy(&gEarlySystemSleepParams, ¶ms, sizeof(params)); + params.sleepType = kIOPMSleepTypeAbortedSleep; + params.ecWakeTimer = 1; hibernateNoDefeat = true; DLOG("wake in %u secs for hibernateDisabled %d, hibernateAborted %d\n", - params.sleepTimer, hibernateDisabled, hibernateAborted); + params.ecWakeTimer, hibernateDisabled, hibernateAborted); } else + { hibernateNoDefeat = false; + } paramsData = OSData::withBytes(¶ms, sizeof(params)); if (paramsData) @@ -3577,25 +3731,28 @@ void IOPMrootDomain::evaluateSystemSleepPolicyFinal( void ) paramsData->release(); } - if (params.sleepFlags & kIOPMSleepFlagHibernate) + if (params.sleepType >= kIOPMSleepTypeHibernate) { - // Force hibernate + // Disable safe sleep to force the hibernate path gIOHibernateMode &= ~kIOHibernateModeSleep; } } } bool IOPMrootDomain::getHibernateSettings( - uint32_t * hibernateMode, + uint32_t * hibernateModePtr, uint32_t * hibernateFreeRatio, uint32_t * hibernateFreeTime ) { - bool ok = getSleepOption(kIOHibernateModeKey, hibernateMode); + // Called by IOHibernateSystemSleep() after evaluateSystemSleepPolicyEarly() + // has updated the hibernateDisabled flag. + + bool ok = getSleepOption(kIOHibernateModeKey, hibernateModePtr); getSleepOption(kIOHibernateFreeRatioKey, hibernateFreeRatio); getSleepOption(kIOHibernateFreeTimeKey, hibernateFreeTime); if (hibernateDisabled) - *hibernateMode = 0; - DLOG("hibernateMode 0x%x\n", *hibernateMode); + *hibernateModePtr = 0; + DLOG("hibernateMode 0x%x\n", *hibernateModePtr); return ok; } @@ -4224,7 +4381,7 @@ void IOPMrootDomain::handleOurPowerChangeDone( { if (((gDarkWakeFlags & kDarkWakeFlagIgnoreDiskIOInDark) == 0) && (kSystemTransitionWake == _systemTransitionType) && - (_debugWakeSeconds == 0)) + (_lastDebugWakeSeconds == 0)) { OSObject * prop = copyProperty(kIOPMRootDomainWakeTypeKey); if (prop) @@ -4261,7 +4418,7 @@ void IOPMrootDomain::handleOurPowerChangeDone( _systemTransitionType, _systemStateGeneration, _systemMessageClientMask, _desiredCapability, _currentCapability, _pendingCapability, - _debugWakeSeconds); + _lastDebugWakeSeconds); // Update current system capability. @@ -4716,8 +4873,15 @@ IOReturn IOPMrootDomain::setMaintenanceWakeCalendar( data = OSData::withBytesNoCopy((void *) calendar, sizeof(*calendar)); if (!data) return kIOReturnNoMemory; + + if (kPMCalendarTypeMaintenance == calendar->selector) { + ret = setPMSetting(gIOPMSettingMaintenanceWakeCalendarKey, data); + } else + if (kPMCalendarTypeSleepService == calendar->selector) + { + ret = setPMSetting(gIOPMSettingSleepServiceWakeCalendarKey, data); + } - ret = setPMSetting(gIOPMSettingMaintenanceWakeCalendarKey, data); data->release(); return ret; @@ -5633,6 +5797,11 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) { lastSleepReason = kIOPMSleepReasonMaintenance; setProperty(kRootDomainSleepReasonKey, kIOPMMaintenanceSleepKey); + } + else if (darkWakeSleepService) + { + lastSleepReason = kIOPMSleepReasonSleepServiceExit; + setProperty(kRootDomainSleepReasonKey, kIOPMSleepServiceExitKey); } changePowerStateWithOverrideTo( SLEEP_STATE ); } @@ -5759,6 +5928,31 @@ void IOPMrootDomain::evaluatePolicy( int stimulus, uint32_t arg ) } } +//****************************************************************************** +// evaluateAssertions +// +//****************************************************************************** +void IOPMrootDomain::evaluateAssertions(IOPMDriverAssertionType newAssertions, IOPMDriverAssertionType oldAssertions) +{ + IOPMDriverAssertionType changedBits = newAssertions ^ oldAssertions; + + messageClients(kIOPMMessageDriverAssertionsChanged); + + if (changedBits & kIOPMDriverAssertionPreventDisplaySleepBit) { + + if (wrangler) { + bool value = (newAssertions & kIOPMDriverAssertionPreventDisplaySleepBit) ? true : false; + + DLOG("wrangler->setIgnoreIdleTimer\(%d)\n", value); + wrangler->setIgnoreIdleTimer( value ); + } + } + if (changedBits & kIOPMDriverAssertionCPUBit) + evaluatePolicy(kStimulusDarkWakeEvaluate); + + +} + // MARK: - // MARK: Statistics @@ -5930,6 +6124,18 @@ IOReturn IOPMrootDomain::callPlatformFunction( return kIOReturnSuccess; } + else if (functionName && + functionName->isEqualTo(kIOPMInstallSystemSleepPolicyHandlerKey)) + { + if (_sleepPolicyHandler) + return kIOReturnExclusiveAccess; + if (!param1) + return kIOReturnBadArgument; + _sleepPolicyHandler = (IOPMSystemSleepPolicyHandler) param1; + _sleepPolicyTarget = (void *) param2; + setProperty("IOPMSystemSleepPolicyHandler", kOSBooleanTrue); + return kIOReturnSuccess; + } return super::callPlatformFunction( functionName, waitForFunction, param1, param2, param3, param4); @@ -6790,7 +6996,7 @@ PMSettingObject *PMSettingObject::pmSettingObject( for (unsigned int i=0; ipublishFeature( settings[i]->getCStringNoCopy(), + parent_arg->publishPMSetting( settings[i], supportedPowerSources, &pmso->publishedFeatureID[i] ); } } @@ -7267,18 +7473,7 @@ void PMAssertionsTracker::tabulate(void) if ((assertionsKernel != oldKernel) || (assertionsCombined != oldCombined)) { - owner->messageClients(kIOPMMessageDriverAssertionsChanged); - - if (((assertionsCombined & kIOPMDriverAssertionPreventDisplaySleepBit) != 0) - && ((oldCombined & kIOPMDriverAssertionPreventDisplaySleepBit) == 0)) - { - /* We react to a new PreventDisplaySleep assertion by waking the display - * with an activityTickle - */ - owner->evaluatePolicy(kStimulusDarkWakeActivityTickle); - } else { - owner->evaluatePolicy(kStimulusDarkWakeEvaluate); - } + owner->evaluateAssertions(assertionsCombined, oldCombined); } } diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index 11efcab25..4905ec2cd 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -2867,6 +2867,24 @@ IOReturn IOService::setIdleTimerPeriod ( unsigned long period ) return kIOReturnSuccess; } +IOReturn IOService::setIgnoreIdleTimer( bool ignore ) +{ + if (!initialized) + return IOPMNotYetInitialized; + + OUR_PMLog(kIOPMRequestTypeIgnoreIdleTimer, ignore, 0); + + IOPMRequest * request = + acquirePMRequest( this, kIOPMRequestTypeIgnoreIdleTimer ); + if (!request) + return kIOReturnNoMemory; + + request->fArg0 = (void *) ignore; + submitPMRequest( request ); + + return kIOReturnSuccess; +} + //****************************************************************************** // [public] nextIdleTimeout // @@ -2987,7 +3005,7 @@ void IOService::idleTimerExpired( void ) // Device was active - do not drop power, restart timer. fDeviceWasActive = false; } - else + else if (!fIdleTimerIgnored) { // No device activity - drop power state by one level. // Decrement the cached tickle power state when possible. @@ -5414,7 +5432,7 @@ void IOService::pmTellClientWithResponse ( OSObject * object, void * arg ) getPMRootDomain()->traceDetail( detail ); } - retCode = context->us->messageClient(msgType, object, (void *) ¬ify); + retCode = context->us->messageClient(msgType, object, (void *) ¬ify, sizeof(notify)); if ( kIOReturnSuccess == retCode ) { if ( 0 == notify.returnValue ) @@ -5732,7 +5750,7 @@ static void tellKernelClientApplier ( OSObject * object, void * arg ) notify.stateNumber = context->stateNumber; notify.stateFlags = context->stateFlags; - context->us->messageClient(context->messageType, object, ¬ify); + context->us->messageClient(context->messageType, object, ¬ify, sizeof(notify)); if ((kIOLogDebugPower & gIOKitDebug) && (OSDynamicCast(_IOServiceInterestNotifier, object))) @@ -6804,6 +6822,10 @@ void IOService::executePMRequest( IOPMRequest * request ) } break; + case kIOPMRequestTypeIgnoreIdleTimer: + fIdleTimerIgnored = request->fArg0 ? 1 : 0; + break; + default: panic("executePMRequest: unknown request type %x", request->getType()); } diff --git a/iokit/Kernel/IOServicePMPrivate.h b/iokit/Kernel/IOServicePMPrivate.h index 00f53cdfe..bd2ec9234 100644 --- a/iokit/Kernel/IOServicePMPrivate.h +++ b/iokit/Kernel/IOServicePMPrivate.h @@ -54,6 +54,7 @@ enum { kIOPMRequestTypeSynchronizePowerTree = 0x0D, kIOPMRequestTypeRequestPowerStateOverride = 0x0E, kIOPMRequestTypeSetIdleTimerPeriod = 0x0F, + kIOPMRequestTypeIgnoreIdleTimer = 0x10, /* Reply Types */ kIOPMRequestTypeReplyStart = 0x80, @@ -240,6 +241,7 @@ private: unsigned int IsPreChange:1; unsigned int DriverCallBusy:1; unsigned int PCDFunctionOverride:1; + unsigned int IdleTimerIgnored:1; // Time of last device activity. AbsoluteTime DeviceActiveTimestamp; @@ -367,6 +369,7 @@ private: #define fIsPreChange pwrMgt->IsPreChange #define fDriverCallBusy pwrMgt->DriverCallBusy #define fPCDFunctionOverride pwrMgt->PCDFunctionOverride +#define fIdleTimerIgnored pwrMgt->IdleTimerIgnored #define fDeviceActiveTimestamp pwrMgt->DeviceActiveTimestamp #define fActivityLock pwrMgt->ActivityLock #define fIdleTimerPeriod pwrMgt->IdleTimerPeriod diff --git a/iokit/Kernel/RootDomainUserClient.cpp b/iokit/Kernel/RootDomainUserClient.cpp index 29c90deef..92097acde 100644 --- a/iokit/Kernel/RootDomainUserClient.cpp +++ b/iokit/Kernel/RootDomainUserClient.cpp @@ -309,6 +309,11 @@ IOReturn RootDomainUserClient::externalMethod( (uint32_t)arguments->scalarInput[0]); break; + case kPMActivityTickle: + fOwner->reportUserInput( ); + ret = kIOReturnSuccess; + break; + /* case kPMMethodCopySystemTimeline: // intentional fallthrough diff --git a/libkern/c++/OSKext.cpp b/libkern/c++/OSKext.cpp index 14f0643c2..68139f092 100644 --- a/libkern/c++/OSKext.cpp +++ b/libkern/c++/OSKext.cpp @@ -4874,8 +4874,9 @@ OSKext::jettisonLinkeditSegment(void) kernel_mach_header_t * machhdr = (kernel_mach_header_t *)kmod_info->address; kernel_segment_command_t * linkedit = NULL; vm_size_t linkeditsize, kextsize; + vm_offset_t linkeditaddr = 0; OSData * data = NULL; - + if (sKeepSymbols || isLibrary() || !isExecutable() || !linkedExecutable) { goto finish; } @@ -4899,7 +4900,10 @@ OSKext::jettisonLinkeditSegment(void) */ linkeditsize = round_page(linkedit->vmsize); kextsize = kmod_info->size - linkeditsize; - + + /* Save linkedit address as removeLinkeditHeaders() will zero it */ + linkeditaddr = trunc_page(linkedit->vmaddr); + data = OSData::withBytesNoCopy((void *)kmod_info->address, kextsize); if (!data) { goto finish; @@ -4921,7 +4925,7 @@ OSKext::jettisonLinkeditSegment(void) /* Free the linkedit segment. */ - kext_free(linkedit->vmaddr, linkeditsize); + kext_free(linkeditaddr, linkeditsize); finish: return; diff --git a/libkern/libkern/OSAtomic.h b/libkern/libkern/OSAtomic.h index 72ff30594..d585c4175 100644 --- a/libkern/libkern/OSAtomic.h +++ b/libkern/libkern/OSAtomic.h @@ -638,7 +638,13 @@ extern void OSSpinLockUnlock(volatile OSSpinLock * lock); static __inline__ void OSSynchronizeIO(void) { } - +#if defined(XNU_KERNEL_PRIVATE) +#if defined(__i386__) || defined(__x86_64__) +static inline void OSMemoryBarrier(void) { + __asm__ volatile("mfence" ::: "memory"); +} +#endif +#endif /*XNU_KERNEL_PRIVATE */ #if defined(__cplusplus) } #endif diff --git a/osfmk/console/video_console.c b/osfmk/console/video_console.c index 9c5460016..503fa5053 100644 --- a/osfmk/console/video_console.c +++ b/osfmk/console/video_console.c @@ -125,7 +125,15 @@ struct vc_info vinfo; /* allowed otherwise we won't use the panic dialog even if it is allowed */ boolean_t panicDialogDesired; - +void noroot_icon_test(void); + +int +vc_display_lzss_icon(uint32_t dst_x, uint32_t dst_y, + uint32_t image_width, uint32_t image_height, + const uint8_t *compressed_image, + uint32_t compressed_size, + const uint8_t *clut); + extern int disableConsoleOutput; static boolean_t gc_enabled = FALSE; static boolean_t gc_initialized = FALSE; @@ -2139,6 +2147,150 @@ static void vc_blit_rect_30(int x, int y, int bx, } } + +/* + * Routines to render the lzss image format + */ + +struct lzss_image_state { + uint32_t col; + uint32_t row; + uint32_t width; + uint32_t height; + uint32_t bytes_per_row; + volatile uint32_t * row_start; + const uint8_t* clut; +}; +typedef struct lzss_image_state lzss_image_state; + +// returns 0 if OK, 1 if error +static inline int +vc_decompress_lzss_next_pixel (int next_data, lzss_image_state* state) +{ + uint32_t palette_index = 0; + uint32_t pixel_value = 0; + + palette_index = next_data * 3; + + pixel_value = ( (uint32_t) state->clut[palette_index + 0] << 16) + | ( (uint32_t) state->clut[palette_index + 1] << 8) + | ( (uint32_t) state->clut[palette_index + 2]); + + *(state->row_start + state->col) = pixel_value; + + if (++state->col >= state->width) { + state->col = 0; + if (++state->row >= state->height) { + return 1; + } + state->row_start = (volatile uint32_t *) (((uintptr_t)state->row_start) + state->bytes_per_row); + } + return 0; +} + + +/* + * Blit an lzss compressed image to the framebuffer + * Assumes 32 bit screen (which is everything we ship at the moment) + * The function vc_display_lzss_icon was copied from libkern/mkext.c, then modified. + */ + +/* + * TODO: Does lzss use too much stack? 4096 plus bytes... + * Can probably chop it down by 1/2. + */ + +/************************************************************** + LZSS.C -- A Data Compression Program +*************************************************************** + 4/6/1989 Haruhiko Okumura + Use, distribute, and modify this program freely. + Please send me your improved versions. + PC-VAN SCIENCE + NIFTY-Serve PAF01022 + CompuServe 74050,1022 + +**************************************************************/ + +#define N 4096 /* size of ring buffer - must be power of 2 */ +#define F 18 /* upper limit for match_length */ +#define THRESHOLD 2 /* encode string into position and length + if match_length is greater than this */ + +// returns 0 if OK, 1 if error +// x and y indicate upper left corner of image location on screen +int +vc_display_lzss_icon(uint32_t dst_x, uint32_t dst_y, + uint32_t image_width, uint32_t image_height, + const uint8_t *compressed_image, + uint32_t compressed_size, + const uint8_t *clut) +{ + uint32_t* image_start; + uint32_t bytes_per_pixel = 4; + uint32_t bytes_per_row = vinfo.v_rowbytes; + + image_start = (uint32_t *) (vinfo.v_baseaddr + (dst_y * bytes_per_row) + (dst_x * bytes_per_pixel)); + + lzss_image_state state = {0, 0, image_width, image_height, bytes_per_row, image_start, clut}; + + int rval = 0; + + const uint8_t *src = compressed_image; + uint32_t srclen = compressed_size; + + /* ring buffer of size N, with extra F-1 bytes to aid string comparison */ + uint8_t text_buf[N + F - 1]; + const uint8_t *srcend = src + srclen; + int i, j, k, r, c; + unsigned int flags; + + srcend = src + srclen; + for (i = 0; i < N - F; i++) + text_buf[i] = ' '; + r = N - F; + flags = 0; + for ( ; ; ) { + if (((flags >>= 1) & 0x100) == 0) { + if (src < srcend) c = *src++; else break; + flags = c | 0xFF00; /* uses higher byte cleverly */ + } /* to count eight */ + if (flags & 1) { + if (src < srcend) c = *src++; else break; + rval = vc_decompress_lzss_next_pixel(c, &state); + if (rval != 0) + return rval; + text_buf[r++] = c; + r &= (N - 1); + } else { + if (src < srcend) i = *src++; else break; + if (src < srcend) j = *src++; else break; + i |= ((j & 0xF0) << 4); + j = (j & 0x0F) + THRESHOLD; + for (k = 0; k <= j; k++) { + c = text_buf[(i + k) & (N - 1)]; + rval = vc_decompress_lzss_next_pixel(c, &state); + if (rval != 0 ) + return rval; + text_buf[r++] = c; + r &= (N - 1); + } + } + } + return 0; +} + +void noroot_icon_test(void) { + boolean_t o_vc_progress_enable = vc_progress_enable; + + vc_progress_enable = 1; + + PE_display_icon( 0, "noroot"); + + vc_progress_enable = o_vc_progress_enable; +} + + void vc_display_icon( vc_progress_element * desc, const unsigned char * data ) { diff --git a/osfmk/i386/AT386/model_dep.c b/osfmk/i386/AT386/model_dep.c index b020ed419..a576ce445 100644 --- a/osfmk/i386/AT386/model_dep.c +++ b/osfmk/i386/AT386/model_dep.c @@ -122,8 +122,11 @@ #include #include +#if DEBUG +#define DPRINTF(x...) kprintf(x) +#else #define DPRINTF(x...) -//#define DPRINTF(x...) kprintf(x) +#endif static void machine_conf(void); @@ -571,7 +574,7 @@ efi_init(void) (void *) (uintptr_t) mptr->VirtualStart, (void *) vm_addr, (void *) vm_size); - pmap_map(vm_addr, phys_addr, phys_addr + round_page(vm_size), + pmap_map_bd(vm_addr, phys_addr, phys_addr + round_page(vm_size), (mptr->Type == kEfiRuntimeServicesCode) ? VM_PROT_READ | VM_PROT_EXECUTE : VM_PROT_READ|VM_PROT_WRITE, (mptr->Type == EfiMemoryMappedIO) ? VM_WIMG_IO : VM_WIMG_USE_DEFAULT); } @@ -580,7 +583,7 @@ efi_init(void) if (args->Version != kBootArgsVersion2) panic("Incompatible boot args version %d revision %d\n", args->Version, args->Revision); - kprintf("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode); + DPRINTF("Boot args version %d revision %d mode %d\n", args->Version, args->Revision, args->efiMode); if (args->efiMode == kBootArgsEfiMode64) { efi_set_tables_64((EFI_SYSTEM_TABLE_64 *) ml_static_ptovirt(args->efiSystemTable)); } else { diff --git a/osfmk/i386/commpage/commpage.c b/osfmk/i386/commpage/commpage.c index cc52576c5..01f8b409e 100644 --- a/osfmk/i386/commpage/commpage.c +++ b/osfmk/i386/commpage/commpage.c @@ -261,8 +261,7 @@ commpage_init_cpu_capabilities( void ) if (tscFreq <= SLOW_TSC_THRESHOLD) /* is TSC too slow for _commpage_nanotime? */ bits |= kSlow; - if (cpuid_features() & CPUID_FEATURE_AES) - bits |= kHasAES; + bits |= (cpuid_features() & CPUID_FEATURE_AES) ? kHasAES : 0; _cpu_capabilities = bits; // set kernel version for use by drivers etc } diff --git a/osfmk/i386/cpu_capabilities.h b/osfmk/i386/cpu_capabilities.h index a820ea7aa..eee6a8173 100644 --- a/osfmk/i386/cpu_capabilities.h +++ b/osfmk/i386/cpu_capabilities.h @@ -58,6 +58,9 @@ #define kUP 0x00008000 /* set if (kNumCPUs == 1) */ #define kNumCPUs 0x00FF0000 /* number of CPUs (see _NumCPUs() below) */ #define kHasAVX1_0 0x01000000 +#define kHasRDRAND 0x02000000 +#define kHasF16C 0x04000000 +#define kHasENFSTRG 0x08000000 #define kNumCPUsShift 16 /* see _NumCPUs() below */ #ifndef __ASSEMBLER__ diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c index b92a796e9..a29bfda26 100644 --- a/osfmk/i386/cpu_threads.c +++ b/osfmk/i386/cpu_threads.c @@ -35,21 +35,15 @@ #include #include -//#define TOPO_DEBUG 1 -#if TOPO_DEBUG -void debug_topology_print(void); -#define DBG(x...) kprintf("DBG: " x) -#else -#define DBG(x...) -#endif /* TOPO_DEBUG */ - +#define DIVISOR_GUARD(denom) \ + if ((denom) == 0) { \ + kprintf("%s: %d Zero divisor: " #denom, \ + __FILE__, __LINE__); \ + } -void validate_topology(void); +static void debug_topology_print(void); -/* Only for 32bit values */ -#define bit(n) (1U << (n)) -#define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1)) -#define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l) +boolean_t topo_dbg = FALSE; x86_pkg_t *x86_pkgs = NULL; uint32_t num_Lx_caches[MAX_CACHE_DEPTH] = { 0 }; @@ -67,6 +61,15 @@ x86_topology_parameters_t topoParms; decl_simple_lock_data(, x86_topo_lock); +static struct cpu_cache { + int level; int type; +} cpu_caches [LCACHE_MAX] = { + [L1D] { 1, CPU_CACHE_TYPE_DATA }, + [L1I] { 1, CPU_CACHE_TYPE_INST }, + [L2U] { 2, CPU_CACHE_TYPE_UNIF }, + [L3U] { 3, CPU_CACHE_TYPE_UNIF }, +}; + static boolean_t cpu_is_hyperthreaded(void) { @@ -107,66 +110,30 @@ x86_cache_alloc(void) static void x86_LLC_info(void) { - uint32_t index; - uint32_t cache_info[4]; - uint32_t cache_level = 0; + int cache_level = 0; uint32_t nCPUsSharing = 1; i386_cpu_info_t *cpuinfo; + struct cpu_cache *cachep; + int i; cpuinfo = cpuid_info(); - do_cpuid(0, cache_info); - - if (cache_info[eax] < 4) { - /* - * Processor does not support deterministic - * cache information. Set LLC sharing to 1, since - * we have no better information. - */ - if (cpu_is_hyperthreaded()) { - topoParms.nCoresSharingLLC = 1; - topoParms.nLCPUsSharingLLC = 2; - topoParms.maxSharingLLC = 2; - } else { - topoParms.nCoresSharingLLC = 1; - topoParms.nLCPUsSharingLLC = 1; - topoParms.maxSharingLLC = 1; - } - return; - } - - for (index = 0; ; index += 1) { - uint32_t this_level; - - cache_info[eax] = 4; - cache_info[ecx] = index; - cache_info[ebx] = 0; - cache_info[edx] = 0; - - cpuid(cache_info); + for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) { - /* - * See if all levels have been queried. - */ - if (bitfield(cache_info[eax], 4, 0) == 0) - break; - - /* - * Get the current level. - */ - this_level = bitfield(cache_info[eax], 7, 5); + if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0) + continue; /* * Only worry about it if it's a deeper level than * what we've seen before. */ - if (this_level > cache_level) { - cache_level = this_level; + if (cachep->level > cache_level) { + cache_level = cachep->level; /* * Save the number of CPUs sharing this cache. */ - nCPUsSharing = bitfield(cache_info[eax], 25, 14) + 1; + nCPUsSharing = cpuinfo->cache_sharing[i]; } } @@ -204,6 +171,8 @@ initTopoParms(void) cpuinfo = cpuid_info(); + PE_parse_boot_argn("-topo", &topo_dbg, sizeof(topo_dbg)); + /* * We need to start with getting the LLC information correct. */ @@ -212,15 +181,21 @@ initTopoParms(void) /* * Compute the number of threads (logical CPUs) per core. */ + DIVISOR_GUARD(cpuinfo->core_count); topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count; + DIVISOR_GUARD(cpuinfo->cpuid_cores_per_package); topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package; /* * Compute the number of dies per package. */ + DIVISOR_GUARD(topoParms.nCoresSharingLLC); topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC; + DIVISOR_GUARD(topoParms.nPThreadsPerCore); + DIVISOR_GUARD(topoParms.maxSharingLLC / topoParms.nPThreadsPerCore); topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore); + /* * Compute the number of cores per die. */ @@ -245,27 +220,27 @@ initTopoParms(void) topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage; topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage; - DBG("\nCache Topology Parameters:\n"); - DBG("\tLLC Depth: %d\n", topoParms.LLCDepth); - DBG("\tCores Sharing LLC: %d\n", topoParms.nCoresSharingLLC); - DBG("\tThreads Sharing LLC: %d\n", topoParms.nLCPUsSharingLLC); - DBG("\tmax Sharing of LLC: %d\n", topoParms.maxSharingLLC); - - DBG("\nLogical Topology Parameters:\n"); - DBG("\tThreads per Core: %d\n", topoParms.nLThreadsPerCore); - DBG("\tCores per Die: %d\n", topoParms.nLCoresPerDie); - DBG("\tThreads per Die: %d\n", topoParms.nLThreadsPerDie); - DBG("\tDies per Package: %d\n", topoParms.nLDiesPerPackage); - DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage); - DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage); - - DBG("\nPhysical Topology Parameters:\n"); - DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore); - DBG("\tCores per Die: %d\n", topoParms.nPCoresPerDie); - DBG("\tThreads per Die: %d\n", topoParms.nPThreadsPerDie); - DBG("\tDies per Package: %d\n", topoParms.nPDiesPerPackage); - DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage); - DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage); + TOPO_DBG("\nCache Topology Parameters:\n"); + TOPO_DBG("\tLLC Depth: %d\n", topoParms.LLCDepth); + TOPO_DBG("\tCores Sharing LLC: %d\n", topoParms.nCoresSharingLLC); + TOPO_DBG("\tThreads Sharing LLC: %d\n", topoParms.nLCPUsSharingLLC); + TOPO_DBG("\tmax Sharing of LLC: %d\n", topoParms.maxSharingLLC); + + TOPO_DBG("\nLogical Topology Parameters:\n"); + TOPO_DBG("\tThreads per Core: %d\n", topoParms.nLThreadsPerCore); + TOPO_DBG("\tCores per Die: %d\n", topoParms.nLCoresPerDie); + TOPO_DBG("\tThreads per Die: %d\n", topoParms.nLThreadsPerDie); + TOPO_DBG("\tDies per Package: %d\n", topoParms.nLDiesPerPackage); + TOPO_DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage); + TOPO_DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage); + + TOPO_DBG("\nPhysical Topology Parameters:\n"); + TOPO_DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore); + TOPO_DBG("\tCores per Die: %d\n", topoParms.nPCoresPerDie); + TOPO_DBG("\tThreads per Die: %d\n", topoParms.nPThreadsPerDie); + TOPO_DBG("\tDies per Package: %d\n", topoParms.nPDiesPerPackage); + TOPO_DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage); + TOPO_DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage); topoParmsInited = TRUE; } @@ -291,50 +266,29 @@ x86_cache_list(void) x86_cpu_cache_t *root = NULL; x86_cpu_cache_t *cur = NULL; x86_cpu_cache_t *last = NULL; - uint32_t index; - uint32_t cache_info[4]; - uint32_t nsets; - - do_cpuid(0, cache_info); - - if (cache_info[eax] < 4) { - /* - * Processor does not support deterministic - * cache information. Don't report anything - */ - return NULL; - } - - for (index = 0; ; index += 1) { - cache_info[eax] = 4; - cache_info[ecx] = index; - cache_info[ebx] = 0; - cache_info[edx] = 0; - - cpuid(cache_info); + struct cpu_cache *cachep; + int i; - /* - * See if all levels have been queried. - */ - if (bitfield(cache_info[eax], 4, 0) == 0) - break; + /* + * Cons up a list driven not by CPUID leaf 4 (deterministic cache params) + * but by the table above plus parameters already cracked from cpuid... + */ + for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) { + if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0) + continue; + cur = x86_cache_alloc(); - if (cur == NULL) { + if (cur == NULL) break; - } - cur->type = bitfield(cache_info[eax], 4, 0); - cur->level = bitfield(cache_info[eax], 7, 5); - cur->nlcpus = (bitfield(cache_info[eax], 25, 14) + 1); - if (cpuid_info()->cpuid_model == 26) - cur->nlcpus /= cpu_is_hyperthreaded() ? 1 : 2; - cur->maxcpus = (bitfield(cache_info[eax], 25, 14) + 1); - cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1; - cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1; - cur->ways = bitfield(cache_info[ebx], 31, 22) + 1; - nsets = bitfield(cache_info[ecx], 31, 0) + 1; - cur->cache_size = cur->line_size * cur->ways * cur->partitions * nsets; + cur->type = cachep->type; + cur->level = cachep->level; + cur->nlcpus = 0; + cur->maxcpus = cpuid_info()->cache_sharing[i]; + cur->partitions = cpuid_info()->cache_partitions[i]; + cur->cache_size = cpuid_info()->cache_size[i]; + cur->line_size = cpuid_info()->cache_linesize; if (last == NULL) { root = cur; @@ -343,14 +297,12 @@ x86_cache_list(void) last->next = cur; last = cur; } - - cur->nlcpus = 0; num_Lx_caches[cur->level - 1] += 1; } - - return(root); + return root; } + static x86_cpu_cache_t * x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher) { @@ -361,7 +313,6 @@ x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher) if (cur_cache->maxcpus == matcher->maxcpus && cur_cache->type == matcher->type && cur_cache->level == matcher->level - && cur_cache->ways == matcher->ways && cur_cache->partitions == matcher->partitions && cur_cache->line_size == matcher->line_size && cur_cache->cache_size == matcher->cache_size) @@ -1060,6 +1011,9 @@ validate_topology(void) uint32_t nCores; uint32_t nCPUs; + if (topo_dbg) + debug_topology_print(); + /* * XXX * @@ -1091,13 +1045,13 @@ validate_topology(void) panic("Die %d points to package %d, should be %d", die->pdie_num, die->package->lpkg_num, pkg->lpkg_num); - DBG("Die(%d)->package %d\n", + TOPO_DBG("Die(%d)->package %d\n", die->pdie_num, pkg->lpkg_num); /* * Make sure that the die has the correct number of cores. */ - DBG("Die(%d)->cores: ", die->pdie_num); + TOPO_DBG("Die(%d)->cores: ", die->pdie_num); nCores = 0; core = die->cores; while (core != NULL) { @@ -1108,10 +1062,10 @@ validate_topology(void) panic("Core %d points to die %d, should be %d", core->pcore_num, core->die->pdie_num, die->pdie_num); nCores += 1; - DBG("%d ", core->pcore_num); + TOPO_DBG("%d ", core->pcore_num); core = core->next_in_die; } - DBG("\n"); + TOPO_DBG("\n"); if (nCores != topoParms.nLCoresPerDie) panic("Should have %d Cores, but only found %d for Die %d", @@ -1120,7 +1074,7 @@ validate_topology(void) /* * Make sure that the die has the correct number of CPUs. */ - DBG("Die(%d)->lcpus: ", die->pdie_num); + TOPO_DBG("Die(%d)->lcpus: ", die->pdie_num); nCPUs = 0; lcpu = die->lcpus; while (lcpu != NULL) { @@ -1131,10 +1085,10 @@ validate_topology(void) panic("CPU %d points to die %d, should be %d", lcpu->cpu_num, lcpu->die->pdie_num, die->pdie_num); nCPUs += 1; - DBG("%d ", lcpu->cpu_num); + TOPO_DBG("%d ", lcpu->cpu_num); lcpu = lcpu->next_in_die; } - DBG("\n"); + TOPO_DBG("\n"); if (nCPUs != topoParms.nLThreadsPerDie) panic("Should have %d Threads, but only found %d for Die %d", @@ -1160,7 +1114,7 @@ validate_topology(void) if (core->package != pkg) panic("Core %d points to package %d, should be %d", core->pcore_num, core->package->lpkg_num, pkg->lpkg_num); - DBG("Core(%d)->package %d\n", + TOPO_DBG("Core(%d)->package %d\n", core->pcore_num, pkg->lpkg_num); /* @@ -1168,7 +1122,7 @@ validate_topology(void) */ nCPUs = 0; lcpu = core->lcpus; - DBG("Core(%d)->lcpus: ", core->pcore_num); + TOPO_DBG("Core(%d)->lcpus: ", core->pcore_num); while (lcpu != NULL) { if (lcpu->core == NULL) panic("CPU(%d)->core is NULL", @@ -1176,11 +1130,11 @@ validate_topology(void) if (lcpu->core != core) panic("CPU %d points to core %d, should be %d", lcpu->cpu_num, lcpu->core->pcore_num, core->pcore_num); - DBG("%d ", lcpu->cpu_num); + TOPO_DBG("%d ", lcpu->cpu_num); nCPUs += 1; lcpu = lcpu->next_in_core; } - DBG("\n"); + TOPO_DBG("\n"); if (nCPUs != topoParms.nLThreadsPerCore) panic("Should have %d Threads, but only found %d for Core %d", @@ -1205,7 +1159,7 @@ validate_topology(void) if (lcpu->package != pkg) panic("CPU %d points to package %d, should be %d", lcpu->cpu_num, lcpu->package->lpkg_num, pkg->lpkg_num); - DBG("CPU(%d)->package %d\n", + TOPO_DBG("CPU(%d)->package %d\n", lcpu->cpu_num, pkg->lpkg_num); nCPUs += 1; lcpu = lcpu->next_in_pkg; @@ -1219,11 +1173,10 @@ validate_topology(void) } } -#if TOPO_DEBUG /* * Prints out the topology */ -void +static void debug_topology_print(void) { x86_pkg_t *pkg; @@ -1276,4 +1229,3 @@ debug_topology_print(void) pkg = pkg->next; } } -#endif /* TOPO_DEBUG */ diff --git a/osfmk/i386/cpu_threads.h b/osfmk/i386/cpu_threads.h index fc7ef83e6..a576ef70d 100644 --- a/osfmk/i386/cpu_threads.h +++ b/osfmk/i386/cpu_threads.h @@ -75,4 +75,13 @@ extern void x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu); extern x86_topology_parameters_t topoParms; +extern boolean_t topo_dbg; +#define TOPO_DBG(x...) \ + do { \ + if (topo_dbg) \ + kprintf(x); \ + } while (0) \ + +extern void validate_topology(void); + #endif /* _I386_CPU_THREADS_H_ */ diff --git a/osfmk/i386/cpu_topology.c b/osfmk/i386/cpu_topology.c index 24c4f5c81..6be77e6ff 100644 --- a/osfmk/i386/cpu_topology.c +++ b/osfmk/i386/cpu_topology.c @@ -38,15 +38,6 @@ #include #include -//#define TOPO_DEBUG 1 -#if TOPO_DEBUG -#define DBG(x...) kprintf("DBG: " x) -#else -#define DBG(x...) -#endif -void debug_topology_print(void); -void validate_topology(void); - __private_extern__ void qsort( void * array, size_t nmembers, @@ -85,15 +76,16 @@ cpu_topology_sort(int ncpus) /* Lights out for this */ istate = ml_set_interrupts_enabled(FALSE); -#ifdef TOPO_DEBUG - DBG("cpu_topology_start() %d cpu%s registered\n", - ncpus, (ncpus > 1) ? "s" : ""); - for (i = 0; i < ncpus; i++) { - cpu_data_t *cpup = cpu_datap(i); - DBG("\tcpu_data[%d]:0x%08x local apic 0x%x\n", - i, (unsigned) cpup, cpup->cpu_phys_number); + if (topo_dbg) { + TOPO_DBG("cpu_topology_start() %d cpu%s registered\n", + ncpus, (ncpus > 1) ? "s" : ""); + for (i = 0; i < ncpus; i++) { + cpu_data_t *cpup = cpu_datap(i); + TOPO_DBG("\tcpu_data[%d]:%p local apic 0x%x\n", + i, (void *) cpup, cpup->cpu_phys_number); + } } -#endif + /* * Re-order the cpu_data_ptr vector sorting by physical id. * Skip the boot processor, it's required to be correct. @@ -104,14 +96,14 @@ cpu_topology_sort(int ncpus) sizeof(cpu_data_t *), lapicid_cmp); } -#ifdef TOPO_DEBUG - DBG("cpu_topology_start() after sorting:\n"); - for (i = 0; i < ncpus; i++) { - cpu_data_t *cpup = cpu_datap(i); - DBG("\tcpu_data[%d]:0x%08x local apic 0x%x\n", - i, (unsigned) cpup, cpup->cpu_phys_number); + if (topo_dbg) { + TOPO_DBG("cpu_topology_start() after sorting:\n"); + for (i = 0; i < ncpus; i++) { + cpu_data_t *cpup = cpu_datap(i); + TOPO_DBG("\tcpu_data[%d]:%p local apic 0x%x\n", + i, (void *) cpup, cpup->cpu_phys_number); + } } -#endif /* * Fix up logical numbers and reset the map kept by the lapic code. @@ -142,13 +134,10 @@ cpu_topology_sort(int ncpus) x86_set_pkg_numbers(pkg, &cpup->lcpu); } -#if TOPO_DEBUG - debug_topology_print(); -#endif /* TOPO_DEBUG */ validate_topology(); ml_set_interrupts_enabled(istate); - DBG("cpu_topology_start() LLC is L%d\n", topoParms.LLCDepth + 1); + TOPO_DBG("cpu_topology_start() LLC is L%d\n", topoParms.LLCDepth + 1); /* * Let the CPU Power Management know that the topology is stable. @@ -161,7 +150,7 @@ cpu_topology_sort(int ncpus) * for their LLC cache. Each affinity set possesses a processor set * into which each logical processor is added. */ - DBG("cpu_topology_start() creating affinity sets:\n"); + TOPO_DBG("cpu_topology_start() creating affinity sets:\n"); for (i = 0; i < ncpus; i++) { cpu_data_t *cpup = cpu_datap(i); x86_lcpu_t *lcpup = cpu_to_lcpu(i); @@ -184,11 +173,11 @@ cpu_topology_sort(int ncpus) pset_create(pset_node_root()); if (aset->pset == PROCESSOR_SET_NULL) panic("cpu_topology_start: pset_create"); - DBG("\tnew set %p(%d) pset %p for cache %p\n", + TOPO_DBG("\tnew set %p(%d) pset %p for cache %p\n", aset, aset->num, aset->pset, aset->cache); } - DBG("\tprocessor_init set %p(%d) lcpup %p(%d) cpu %p processor %p\n", + TOPO_DBG("\tprocessor_init set %p(%d) lcpup %p(%d) cpu %p processor %p\n", aset, aset->num, lcpup, lcpup->cpu_num, cpup, cpup->cpu_processor); if (i != master_cpu) @@ -213,10 +202,10 @@ cpu_topology_start_cpu( int cpunum ) int i = cpunum; /* Decide whether to start a CPU, and actually start it */ - DBG("cpu_topology_start() processor_start():\n"); + TOPO_DBG("cpu_topology_start() processor_start():\n"); if( i < ncpus) { - DBG("\tlcpu %d\n", cpu_datap(i)->cpu_number); + TOPO_DBG("\tlcpu %d\n", cpu_datap(i)->cpu_number); processor_start(cpu_datap(i)->cpu_processor); return KERN_SUCCESS; } @@ -230,7 +219,7 @@ lapicid_cmp(const void *x, const void *y) cpu_data_t *cpu_x = *((cpu_data_t **)(uintptr_t)x); cpu_data_t *cpu_y = *((cpu_data_t **)(uintptr_t)y); - DBG("lapicid_cmp(%p,%p) (%d,%d)\n", + TOPO_DBG("lapicid_cmp(%p,%p) (%d,%d)\n", x, y, cpu_x->cpu_phys_number, cpu_y->cpu_phys_number); if (cpu_x->cpu_phys_number < cpu_y->cpu_phys_number) return -1; diff --git a/osfmk/i386/cpuid.c b/osfmk/i386/cpuid.c index c6891aefb..b72a4b14f 100644 --- a/osfmk/i386/cpuid.c +++ b/osfmk/i386/cpuid.c @@ -45,6 +45,18 @@ #include #endif +static boolean_t cpuid_dbg +#if DEBUG + = TRUE; +#else + = FALSE; +#endif +#define DBG(x...) \ + do { \ + if (cpuid_dbg) \ + kprintf(x); \ + } while (0) \ + #define min(a,b) ((a) < (b) ? (a) : (b)) #define quad(hi,lo) (((uint64_t)(hi)) << 32 | (lo)) @@ -231,6 +243,8 @@ static i386_cpu_info_t cpuid_cpu_info; static void cpuid_fn(uint32_t selector, uint32_t *result) { do_cpuid(selector, result); + DBG("cpuid_fn(0x%08x) eax:0x%08x ebx:0x%08x ecx:0x%08x edx:0x%08x\n", + selector, result[0], result[1], result[2], result[3]); } #else static void cpuid_fn(uint32_t selector, uint32_t *result) @@ -248,9 +262,15 @@ static void cpuid_fn(uint32_t selector, uint32_t *result) } else { do_cpuid(selector, result); } + DBG("cpuid_fn(0x%08x) eax:0x%08x ebx:0x%08x ecx:0x%08x edx:0x%08x\n", + selector, result[0], result[1], result[2], result[3]); } #endif +static const char *cache_type_str[LCACHE_MAX] = { + "Lnone", "L1I", "L1D", "L2U", "L3U" +}; + /* this function is Intel-specific */ static void cpuid_set_cache_info( i386_cpu_info_t * info_p ) @@ -263,6 +283,8 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) unsigned int j; boolean_t cpuid_deterministic_supported = FALSE; + DBG("cpuid_set_cache_info(%p)\n", info_p); + bzero( linesizes, sizeof(linesizes) ); /* Get processor cache descriptor info using leaf 2. We don't use @@ -311,7 +333,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) reg[eax] = 4; /* cpuid request 4 */ reg[ecx] = index; /* index starting at 0 */ cpuid(reg); -//kprintf("cpuid(4) index=%d eax=%p\n", index, reg[eax]); + DBG("cpuid(4) index=%d eax=0x%x\n", index, reg[eax]); cache_type = bitfield32(reg[eax], 4, 0); if (cache_type == 0) break; /* no more caches */ @@ -354,6 +376,13 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) info_p->cache_partitions[type] = cache_partitions; linesizes[type] = cache_linesize; + DBG(" cache_size[%s] : %d\n", + cache_type_str[type], cache_size); + DBG(" cache_sharing[%s] : %d\n", + cache_type_str[type], cache_sharing); + DBG(" cache_partitions[%s]: %d\n", + cache_type_str[type], cache_partitions); + /* * Overwrite associativity determined via * CPUID.0x80000006 -- this leaf is more @@ -389,6 +418,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) vm_cache_geometry_colors = colors; } } + DBG(" vm_cache_geometry_colors: %d\n", vm_cache_geometry_colors); /* * If deterministic cache parameters are not available, use @@ -403,6 +433,13 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) info_p->cache_partitions[L2U] = 1; linesizes[L2U] = info_p->cpuid_cache_linesize; + + DBG(" cache_size[L2U] : %d\n", + info_p->cache_size[L2U]); + DBG(" cache_sharing[L2U] : 1\n"); + DBG(" cache_partitions[L2U]: 1\n"); + DBG(" linesizes[L2U] : %d\n", + info_p->cpuid_cache_linesize); } /* @@ -414,16 +451,19 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) else if (linesizes[L1D]) info_p->cache_linesize = linesizes[L1D]; else panic("no linesize"); + DBG(" cache_linesize : %d\n", info_p->cache_linesize); /* * Extract and publish TLB information from Leaf 2 descriptors. */ + DBG(" %ld leaf2 descriptors:\n", sizeof(info_p->cache_info)); for (i = 1; i < sizeof(info_p->cache_info); i++) { cpuid_cache_descriptor_t *descp; int id; int level; int page; + DBG(" 0x%02x", info_p->cache_info[i]); descp = cpuid_leaf2_find(info_p->cache_info[i]); if (descp == NULL) continue; @@ -458,6 +498,7 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p ) info_p->cpuid_stlb = descp->entries; } } + DBG("\n"); } static void @@ -466,6 +507,8 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) uint32_t reg[4]; char str[128], *p; + DBG("cpuid_set_generic_info(%p)\n", info_p); + /* do cpuid 0 to get vendor */ cpuid_fn(0, reg); info_p->cpuid_max_basic = reg[eax]; @@ -575,11 +618,30 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) quad(reg[ecx], reg[edx]); } + DBG(" max_basic : %d\n", info_p->cpuid_max_basic); + DBG(" max_ext : 0x%08x\n", info_p->cpuid_max_ext); + DBG(" vendor : %s\n", info_p->cpuid_vendor); + DBG(" brand_string : %s\n", info_p->cpuid_brand_string); + DBG(" signature : 0x%08x\n", info_p->cpuid_signature); + DBG(" stepping : %d\n", info_p->cpuid_stepping); + DBG(" model : %d\n", info_p->cpuid_model); + DBG(" family : %d\n", info_p->cpuid_family); + DBG(" type : %d\n", info_p->cpuid_type); + DBG(" extmodel : %d\n", info_p->cpuid_extmodel); + DBG(" extfamily : %d\n", info_p->cpuid_extfamily); + DBG(" brand : %d\n", info_p->cpuid_brand); + DBG(" features : 0x%016llx\n", info_p->cpuid_features); + DBG(" extfeatures : 0x%016llx\n", info_p->cpuid_extfeatures); + DBG(" logical_per_package : %d\n", info_p->cpuid_logical_per_package); + DBG(" microcode_version : 0x%08x\n", info_p->cpuid_microcode_version); + /* Fold in the Invariant TSC feature bit, if present */ if (info_p->cpuid_max_ext >= 0x80000007) { cpuid_fn(0x80000007, reg); info_p->cpuid_extfeatures |= reg[edx] & (uint32_t)CPUID_EXTFEATURE_TSCI; + DBG(" extfeatures : 0x%016llx\n", + info_p->cpuid_extfeatures); } if (info_p->cpuid_max_basic >= 0x5) { @@ -594,6 +656,12 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) cmp->extensions = reg[ecx]; cmp->sub_Cstates = reg[edx]; info_p->cpuid_mwait_leafp = cmp; + + DBG(" Monitor/Mwait Leaf:\n"); + DBG(" linesize_min : %d\n", cmp->linesize_min); + DBG(" linesize_max : %d\n", cmp->linesize_max); + DBG(" extensions : %d\n", cmp->extensions); + DBG(" sub_Cstates : 0x%08x\n", cmp->sub_Cstates); } if (info_p->cpuid_max_basic >= 0x6) { @@ -614,6 +682,18 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) ctp->hardware_feedback = bitfield32(reg[ecx], 1, 1); ctp->energy_policy = bitfield32(reg[ecx], 2, 2); info_p->cpuid_thermal_leafp = ctp; + + DBG(" Thermal/Power Leaf:\n"); + DBG(" sensor : %d\n", ctp->sensor); + DBG(" dynamic_acceleration : %d\n", ctp->dynamic_acceleration); + DBG(" invariant_APIC_timer : %d\n", ctp->invariant_APIC_timer); + DBG(" core_power_limits : %d\n", ctp->core_power_limits); + DBG(" fine_grain_clock_mod : %d\n", ctp->fine_grain_clock_mod); + DBG(" package_thermal_intr : %d\n", ctp->package_thermal_intr); + DBG(" thresholds : %d\n", ctp->thresholds); + DBG(" ACNT_MCNT : %d\n", ctp->ACNT_MCNT); + DBG(" hardware_feedback : %d\n", ctp->hardware_feedback); + DBG(" energy_policy : %d\n", ctp->energy_policy); } if (info_p->cpuid_max_basic >= 0xa) { @@ -631,6 +711,15 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) capp->fixed_number = bitfield32(reg[edx], 4, 0); capp->fixed_width = bitfield32(reg[edx], 12, 5); info_p->cpuid_arch_perf_leafp = capp; + + DBG(" Architectural Performance Monitoring Leaf:\n"); + DBG(" version : %d\n", capp->version); + DBG(" number : %d\n", capp->number); + DBG(" width : %d\n", capp->width); + DBG(" events_number : %d\n", capp->events_number); + DBG(" events : %d\n", capp->events); + DBG(" fixed_number : %d\n", capp->fixed_number); + DBG(" fixed_width : %d\n", capp->fixed_width); } if (info_p->cpuid_max_basic >= 0xd) { @@ -640,6 +729,12 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p) */ cpuid_fn(0xd, info_p->cpuid_xsave_leaf.extended_state); info_p->cpuid_xsave_leafp = xsp; + + DBG(" XSAVE Leaf:\n"); + DBG(" EAX : 0x%x\n", xsp->extended_state[eax]); + DBG(" EBX : 0x%x\n", xsp->extended_state[ebx]); + DBG(" ECX : 0x%x\n", xsp->extended_state[ecx]); + DBG(" EDX : 0x%x\n", xsp->extended_state[edx]); } return; @@ -684,6 +779,7 @@ cpuid_set_cpufamily(i386_cpu_info_t *info_p) } info_p->cpuid_cpufamily = cpufamily; + DBG("cpuid_set_cpufamily(%p) returning 0x%x\n", info_p, cpufamily); return cpufamily; } /* @@ -694,7 +790,9 @@ void cpuid_set_info(void) { i386_cpu_info_t *info_p = &cpuid_cpu_info; - + + PE_parse_boot_argn("-cpuid", &cpuid_dbg, sizeof(cpuid_dbg)); + bzero((void *)info_p, sizeof(cpuid_cpu_info)); cpuid_set_generic_info(info_p); @@ -734,11 +832,14 @@ cpuid_set_info(void) info_p->core_count = info_p->cpuid_cores_per_package; info_p->thread_count = info_p->cpuid_logical_per_package; } + DBG("cpuid_set_info():\n"); + DBG(" core_count : %d\n", info_p->core_count); + DBG(" thread_count : %d\n", info_p->thread_count); cpuid_cpu_info.cpuid_model_string = ""; /* deprecated */ } -static struct { +static struct table { uint64_t mask; const char *name; } feature_map[] = { @@ -811,6 +912,28 @@ extfeature_map[] = { {0, 0} }; +static char * +cpuid_get_names(struct table *map, uint64_t bits, char *buf, unsigned buf_len) +{ + size_t len = 0; + char *p = buf; + int i; + + for (i = 0; map[i].mask != 0; i++) { + if ((bits & map[i].mask) == 0) + continue; + if (len && ((size_t) (p - buf) < (buf_len - 1))) + *p++ = ' '; + len = min(strlen(map[i].name), (size_t)((buf_len-1)-(p-buf))); + if (len == 0) + break; + bcopy(map[i].name, p, len); + p += len; + } + *p = '\0'; + return buf; +} + i386_cpu_info_t * cpuid_info(void) { @@ -825,58 +948,24 @@ cpuid_info(void) char * cpuid_get_feature_names(uint64_t features, char *buf, unsigned buf_len) { - size_t len = 0; - char *p = buf; - int i; - - for (i = 0; feature_map[i].mask != 0; i++) { - if ((features & feature_map[i].mask) == 0) - continue; - if (len && ((size_t)(p - buf) < (buf_len - 1))) - *p++ = ' '; - - len = min(strlen(feature_map[i].name), (size_t) ((buf_len-1) - (p-buf))); - if (len == 0) - break; - bcopy(feature_map[i].name, p, len); - p += len; - } - *p = '\0'; - return buf; + return cpuid_get_names(feature_map, features, buf, buf_len); } char * cpuid_get_extfeature_names(uint64_t extfeatures, char *buf, unsigned buf_len) { - size_t len = 0; - char *p = buf; - int i; - - for (i = 0; extfeature_map[i].mask != 0; i++) { - if ((extfeatures & extfeature_map[i].mask) == 0) - continue; - if (len && ((size_t) (p - buf) < (buf_len - 1))) - *p++ = ' '; - len = min(strlen(extfeature_map[i].name), (size_t) ((buf_len-1)-(p-buf))); - if (len == 0) - break; - bcopy(extfeature_map[i].name, p, len); - p += len; - } - *p = '\0'; - return buf; + return cpuid_get_names(extfeature_map, extfeatures, buf, buf_len); } - void cpuid_feature_display( const char *header) { char buf[256]; - kprintf("%s: %s\n", header, - cpuid_get_feature_names(cpuid_features(), - buf, sizeof(buf))); + kprintf("%s: %s", header, + cpuid_get_feature_names(cpuid_features(), buf, sizeof(buf))); + kprintf("\n"); if (cpuid_features() & CPUID_FEATURE_HTT) { #define s_if_plural(n) ((n > 1) ? "s" : "") kprintf(" HTT: %d core%s per package;" @@ -962,7 +1051,7 @@ cpuid_extfeatures(void) return cpuid_info()->cpuid_extfeatures; } - + #if MACH_KDB /* diff --git a/osfmk/i386/cpuid.h b/osfmk/i386/cpuid.h index 51bd428f6..c95328961 100644 --- a/osfmk/i386/cpuid.h +++ b/osfmk/i386/cpuid.h @@ -94,24 +94,34 @@ #define CPUID_FEATURE_TM2 _HBit(8) /* Thermal Monitor 2 */ #define CPUID_FEATURE_SSSE3 _HBit(9) /* Supplemental SSE3 instructions */ #define CPUID_FEATURE_CID _HBit(10) /* L1 Context ID */ +#define CPUID_FEATURE_SEGLIM64 _HBit(11) /* 64-bit segment limit checking */ #define CPUID_FEATURE_CX16 _HBit(13) /* CmpXchg16b instruction */ #define CPUID_FEATURE_xTPR _HBit(14) /* Send Task PRiority msgs */ #define CPUID_FEATURE_PDCM _HBit(15) /* Perf/Debug Capability MSR */ +#define CPUID_FEATURE_PCID _HBit(17) /* ASID-PCID support */ #define CPUID_FEATURE_DCA _HBit(18) /* Direct Cache Access */ #define CPUID_FEATURE_SSE4_1 _HBit(19) /* Streaming SIMD extensions 4.1 */ #define CPUID_FEATURE_SSE4_2 _HBit(20) /* Streaming SIMD extensions 4.2 */ #define CPUID_FEATURE_xAPIC _HBit(21) /* Extended APIC Mode */ #define CPUID_FEATURE_MOVBE _HBit(22) /* MOVBE instruction */ #define CPUID_FEATURE_POPCNT _HBit(23) /* POPCNT instruction */ +#define CPUID_FEATURE_TSCTMR _HBit(24) /* TSC deadline timer */ #define CPUID_FEATURE_AES _HBit(25) /* AES instructions */ #define CPUID_FEATURE_XSAVE _HBit(26) /* XSAVE instructions */ #define CPUID_FEATURE_OSXSAVE _HBit(27) /* XGETBV/XSETBV instructions */ -#define CPUID_FEATURE_VMM _HBit(31) /* VMM (Hypervisor) present */ -#define CPUID_FEATURE_SEGLIM64 _HBit(11) /* 64-bit segment limit checking */ -#define CPUID_FEATURE_PCID _HBit(17) /* ASID-PCID support */ -#define CPUID_FEATURE_TSCTMR _HBit(24) /* TSC deadline timer */ #define CPUID_FEATURE_AVX1_0 _HBit(28) /* AVX 1.0 instructions */ +#define CPUID_FEATURE_VMM _HBit(31) /* VMM (Hypervisor) present */ +#define CPUID_FEATURE_RDRAND _HBit(29) /* RDRAND instruction */ +#define CPUID_FEATURE_F16C _HBit(30) /* Float16 convert instructions */ + +/* + * Leaf 7, subleaf 0 additional features. + * Bits returned in %ebx to a CPUID request with {%eax,%ecx} of (0x7,0x0}: + */ +#define CPUID_LEAF7_FEATURE_RDWRFSGS _Bit(0) /* FS/GS base read/write */ +#define CPUID_LEAF7_FEATURE_SMEP _Bit(7) /* Supervisor Mode Execute Protect */ +#define CPUID_LEAF7_FEATURE_ENFSTRG _Bit(9) /* ENhanced Fast STRinG copy */ /* * The CPUID_EXTFEATURE_XXX values define 64-bit values @@ -150,6 +160,7 @@ #define CPUID_MODEL_SANDYBRIDGE 0x2A #define CPUID_MODEL_JAKETOWN 0x2D + #ifndef ASSEMBLER #include #include @@ -322,6 +333,7 @@ typedef struct { cpuid_thermal_leaf_t *cpuid_thermal_leafp; cpuid_arch_perf_leaf_t *cpuid_arch_perf_leafp; cpuid_xsave_leaf_t *cpuid_xsave_leafp; + uint32_t cpuid_leaf7_features; } i386_cpu_info_t; #ifdef __cplusplus @@ -338,9 +350,11 @@ extern void cpuid_feature_display(const char *); extern void cpuid_extfeature_display(const char *); extern char * cpuid_get_feature_names(uint64_t, char *, unsigned); extern char * cpuid_get_extfeature_names(uint64_t, char *, unsigned); +extern char * cpuid_get_leaf7_feature_names(uint64_t, char *, unsigned); extern uint64_t cpuid_features(void); extern uint64_t cpuid_extfeatures(void); +extern uint64_t cpuid_leaf7_features(void); extern uint32_t cpuid_family(void); extern uint32_t cpuid_cpufamily(void); diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 7227b93a2..478eb2b4e 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -485,7 +485,7 @@ fpu_set_fxstate( struct x86_fx_thread_state *new_ifps; x86_float_state64_t *state; pcb_t pcb; - size_t state_size = (((f == x86_AVX_STATE32) || (f == x86_AVX_STATE64)) && (fpu_YMM_present == TRUE)) ? sizeof(struct x86_avx_thread_state) : sizeof(struct x86_fx_thread_state); + size_t state_size = sizeof(struct x86_fx_thread_state); boolean_t old_valid; if (fp_kind == FP_NO) return KERN_FAILURE; @@ -538,11 +538,29 @@ fpu_set_fxstate( panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act); } #endif + /* + * Clear any reserved bits in the MXCSR to prevent a GPF + * when issuing an FXRSTOR. + */ + + state->fpu_mxcsr &= mxcsr_capability_mask; bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size); if (fpu_YMM_present) { struct x86_avx_thread_state *iavx = (void *) ifps; + uint32_t fpu_nyreg = 0; + + if (f == x86_AVX_STATE32) + fpu_nyreg = 8; + else if (f == x86_AVX_STATE64) + fpu_nyreg = 16; + + if (fpu_nyreg) { + x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; + bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG)); + } + iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32; /* Sanitize XSAVE header */ bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd)); @@ -561,11 +579,6 @@ fpu_set_fxstate( set_ts(); ml_set_interrupts_enabled(istate); } - /* - * Clear any reserved bits in the MXCSR to prevent a GPF - * when issuing an FXRSTOR. - */ - ifps->fx_MXCSR &= mxcsr_capability_mask; simple_unlock(&pcb->lock); @@ -591,7 +604,7 @@ fpu_get_fxstate( x86_float_state64_t *state; kern_return_t ret = KERN_FAILURE; pcb_t pcb; - size_t state_size = (((f == x86_AVX_STATE32) || (f == x86_AVX_STATE64)) && (fpu_YMM_present == TRUE)) ? sizeof(struct x86_avx_thread_state) : sizeof(struct x86_fx_thread_state); + size_t state_size = sizeof(struct x86_fx_thread_state); if (fp_kind == FP_NO) return KERN_FAILURE; @@ -633,6 +646,21 @@ fpu_get_fxstate( } if (ifps->fp_valid) { bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size); + if (fpu_YMM_present) { + struct x86_avx_thread_state *iavx = (void *) ifps; + uint32_t fpu_nyreg = 0; + + if (f == x86_AVX_STATE32) + fpu_nyreg = 8; + else if (f == x86_AVX_STATE64) + fpu_nyreg = 16; + + if (fpu_nyreg) { + x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; + bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG)); + } + } + ret = KERN_SUCCESS; } simple_unlock(&pcb->lock); diff --git a/osfmk/i386/hibernate_restore.c b/osfmk/i386/hibernate_restore.c index bf0508a69..47a5b9c7a 100644 --- a/osfmk/i386/hibernate_restore.c +++ b/osfmk/i386/hibernate_restore.c @@ -33,9 +33,6 @@ extern pd_entry_t BootstrapPTD[2048]; -#define TWO_MEG_MASK 0xFFFFFFFFFFE00000ULL -#define FOUR_K_MASK 0xFFFFFFFFFFFFF000ULL - // src is virtually mapped, not page aligned, // dst is a physical 4k page aligned ptr, len is one 4K page // src & dst will not overlap @@ -63,63 +60,41 @@ hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t p void hibprintf(const char *fmt, ...); -void -pal_hib_window_setup(ppnum_t page) -{ - uint64_t *pp; - uint64_t phys = ptoa_64(page); - int i; - - BootstrapPTD[2047] = (phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE; - - invlpg(HIB_PTES); - - pp = (uint64_t *)(uintptr_t)(HIB_PTES + (phys & I386_LPGMASK)); - - for(i=0;i<512;i++) - *pp = 0; - - pp[0] = phys | INTEL_PTE_VALID | INTEL_PTE_WRITE; - BootstrapPTD[2047] = phys | INTEL_PTE_VALID | INTEL_PTE_WRITE; - - invlpg(HIB_PTES); -} - uintptr_t -pal_hib_map(uintptr_t v, uint64_t p) +pal_hib_map(uintptr_t virt, uint64_t phys) { - int index; - - switch(v) { - case DEST_COPY_AREA: - index = 1; - break; - case SRC_COPY_AREA: - index = 2; - break; - case COPY_PAGE_AREA: - index = 3; - break; - default: - index = -1; - asm("cli;hlt;"); - } - - uint64_t *ptes = (uint64_t *)HIB_PTES; - - /* Outside 1-1 4G map so set up the mappings for the dest page using 2MB pages */ - ptes[index] = (p & FOUR_K_MASK) | INTEL_PTE_VALID | INTEL_PTE_WRITE; - - /* Invalidate the page tables for this */ - invlpg((uintptr_t)v); - - return v; + uintptr_t index; + + switch (virt) + { + case DEST_COPY_AREA: + case SRC_COPY_AREA: + case COPY_PAGE_AREA: + case BITMAP_AREA: + case IMAGE_AREA: + case IMAGE2_AREA: + break; + default: + asm("cli;hlt;"); + break; + } + + index = (virt >> I386_LPGSHIFT); + virt += (uintptr_t)(phys & I386_LPGMASK); + phys = ((phys & ~((uint64_t)I386_LPGMASK)) | INTEL_PTE_PS | INTEL_PTE_VALID | INTEL_PTE_WRITE); + BootstrapPTD[index] = phys; + invlpg(virt); + BootstrapPTD[index + 1] = (phys + I386_LPGBYTES); + invlpg(virt + I386_LPGBYTES); + + return (virt); } void hibernateRestorePALState(uint32_t *arg) { - (void)arg; + (void)arg; } + void pal_hib_patchup(void) { diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index 596888b5f..560a88ffc 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -199,7 +199,7 @@ x86_64_post_sleep(uint64_t new_cr3) // Set up the physical mapping - NPHYSMAP GB of memory mapped at a high address // NPHYSMAP is determined by the maximum supported RAM size plus 4GB to account // the PCI hole (which is less 4GB but not more). -#define NPHYSMAP MAX(K64_MAXMEM/GB + 4, 4) + // Compile-time guard: extern int maxphymapsupported[NPHYSMAP <= PTE_PER_PAGE ? 1 : -1]; static void @@ -335,9 +335,22 @@ vstart(vm_offset_t boot_args_start) cpu = 0; cpu_data_alloc(TRUE); + + + /* + * Setup boot args given the physical start address. + */ + kernelBootArgs = (boot_args *) + ml_static_ptovirt(boot_args_start); + DBG("i386_init(0x%lx) kernelBootArgs=%p\n", + (unsigned long)boot_args_start, kernelBootArgs); + + PE_init_platform(FALSE, kernelBootArgs); + postcode(PE_INIT_PLATFORM_D); } else { /* Find our logical cpu number */ cpu = lapic_to_cpu[(LAPIC_READ(ID)>>LAPIC_ID_SHIFT) & LAPIC_ID_MASK]; + DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE)); #ifdef __x86_64__ if (cpuid_extfeatures() & CPUID_EXTFEATURE_XD) { wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_NXE); @@ -373,7 +386,7 @@ vstart(vm_offset_t boot_args_start) } if (is_boot_cpu) - i386_init(boot_args_start); + i386_init(); else i386_init_slave(); /*NOTREACHED*/ @@ -406,7 +419,7 @@ vstart(vm_offset_t boot_args_start) * set up. */ void -i386_init(vm_offset_t boot_args_start) +i386_init(void) { unsigned int maxmem; uint64_t maxmemtouse; @@ -423,16 +436,6 @@ i386_init(vm_offset_t boot_args_start) mca_cpu_init(); #endif - /* - * Setup boot args given the physical start address. - */ - kernelBootArgs = (boot_args *) - ml_static_ptovirt(boot_args_start); - DBG("i386_init(0x%lx) kernelBootArgs=%p\n", - (unsigned long)boot_args_start, kernelBootArgs); - - PE_init_platform(FALSE, kernelBootArgs); - postcode(PE_INIT_PLATFORM_D); kernel_early_bootstrap(); diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index 989895eb0..866dfa1fb 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -99,8 +99,10 @@ ppnum_t lowest_lo = 0; ppnum_t lowest_hi = 0; ppnum_t highest_hi = 0; +enum {PMAP_MAX_RESERVED_RANGES = 32}; uint32_t pmap_reserved_pages_allocated = 0; -uint32_t pmap_last_reserved_range = 0xFFFFFFFF; +uint32_t pmap_reserved_range_indices[PMAP_MAX_RESERVED_RANGES]; +uint32_t pmap_last_reserved_range_index = 0; uint32_t pmap_reserved_ranges = 0; extern unsigned int bsd_mbuf_cluster_reserve(boolean_t *); @@ -161,7 +163,7 @@ i386_vm_init(uint64_t maxmem, uint32_t maxdmaaddr; uint32_t mbuf_reserve = 0; boolean_t mbuf_override = FALSE; - + boolean_t coalescing_permitted; #if DEBUG kprintf("Boot args revision: %d version: %d", args->Revision, args->Version); @@ -256,6 +258,12 @@ i386_vm_init(uint64_t maxmem, } base = (ppnum_t) (mptr->PhysicalStart >> I386_PGSHIFT); top = (ppnum_t) (((mptr->PhysicalStart) >> I386_PGSHIFT) + mptr->NumberOfPages - 1); + +#if MR_RSV_TEST + static uint32_t nmr = 0; + if ((base > 0x20000) && (nmr++ < 4)) + mptr->Attribute |= EFI_MEMORY_KERN_RESERVED; +#endif region_bytes = (uint64_t)(mptr->NumberOfPages << I386_PGSHIFT); pmap_type = mptr->Type; @@ -347,6 +355,19 @@ i386_vm_init(uint64_t maxmem, prev_pmptr = 0; continue; } + /* + * A range may be marked with with the + * EFI_MEMORY_KERN_RESERVED attribute + * on some systems, to indicate that the range + * must not be made available to devices. + */ + + if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) { + if (++pmap_reserved_ranges > PMAP_MAX_RESERVED_RANGES) { + panic("Too many reserved ranges %u\n", pmap_reserved_ranges); + } + } + if (top < fap) { /* * entire range below first_avail @@ -361,21 +382,11 @@ i386_vm_init(uint64_t maxmem, pmptr->end = top; - /* - * A range may be marked with with the - * EFI_MEMORY_KERN_RESERVED attribute - * on some systems, to indicate that the range - * must not be made available to devices. - * Simplifying assumptions are made regarding - * the placement of the range. - */ - if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) - pmap_reserved_ranges++; if ((mptr->Attribute & EFI_MEMORY_KERN_RESERVED) && (top < I386_KERNEL_IMAGE_BASE_PAGE)) { pmptr->alloc = pmptr->base; - pmap_last_reserved_range = pmap_memory_region_count; + pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; } else { /* @@ -384,6 +395,7 @@ i386_vm_init(uint64_t maxmem, pmptr->alloc = top; } pmptr->type = pmap_type; + pmptr->attribute = mptr->Attribute; } else if ( (base < fap) && (top > fap) ) { /* @@ -394,39 +406,48 @@ i386_vm_init(uint64_t maxmem, pmptr->base = base; pmptr->alloc = pmptr->end = (fap - 1); pmptr->type = pmap_type; + pmptr->attribute = mptr->Attribute; /* * we bump these here inline so the accounting * below works correctly */ pmptr++; pmap_memory_region_count++; + pmptr->alloc = pmptr->base = fap; pmptr->type = pmap_type; + pmptr->attribute = mptr->Attribute; pmptr->end = top; - } - else { + + if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) + pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; + } else { /* * entire range useable */ pmptr->alloc = pmptr->base = base; pmptr->type = pmap_type; + pmptr->attribute = mptr->Attribute; pmptr->end = top; + if (mptr->Attribute & EFI_MEMORY_KERN_RESERVED) + pmap_reserved_range_indices[pmap_last_reserved_range_index++] = pmap_memory_region_count; } if (i386_ptob(pmptr->end) > avail_end ) avail_end = i386_ptob(pmptr->end); avail_remaining += (pmptr->end - pmptr->base); - + coalescing_permitted = (prev_pmptr && (pmptr->attribute == prev_pmptr->attribute) && ((pmptr->attribute & EFI_MEMORY_KERN_RESERVED) == 0)); /* * Consolidate contiguous memory regions, if possible */ if (prev_pmptr && - pmptr->type == prev_pmptr->type && - pmptr->base == pmptr->alloc && - pmptr->base == (prev_pmptr->end + 1)) + (pmptr->type == prev_pmptr->type) && + (coalescing_permitted) && + (pmptr->base == pmptr->alloc) && + (pmptr->base == (prev_pmptr->end + 1))) { - if(prev_pmptr->end == prev_pmptr->alloc) + if (prev_pmptr->end == prev_pmptr->alloc) prev_pmptr->alloc = pmptr->base; prev_pmptr->end = pmptr->end; } else { @@ -603,11 +624,12 @@ boolean_t pmap_next_page_reserved(ppnum_t *); */ boolean_t pmap_next_page_reserved(ppnum_t *pn) { - if (pmap_reserved_ranges && pmap_last_reserved_range != 0xFFFFFFFF) { + if (pmap_reserved_ranges) { uint32_t n; pmap_memory_region_t *region; - for (n = 0; n <= pmap_last_reserved_range; n++) { - region = &pmap_memory_regions[n]; + for (n = 0; n < pmap_last_reserved_range_index; n++) { + uint32_t reserved_index = pmap_reserved_range_indices[n]; + region = &pmap_memory_regions[reserved_index]; if (region->alloc < region->end) { *pn = region->alloc++; avail_remaining--; @@ -619,6 +641,11 @@ pmap_next_page_reserved(ppnum_t *pn) { lowest_lo = *pn; pmap_reserved_pages_allocated++; +#if DEBUG + if (region->alloc == region->end) { + kprintf("Exhausted reserved range index: %u, base: 0x%x end: 0x%x, type: 0x%x, attribute: 0x%llx\n", reserved_index, region->base, region->end, region->type, region->attribute); + } +#endif return TRUE; } } diff --git a/osfmk/i386/locks_i386.c b/osfmk/i386/locks_i386.c index ef38300e4..048dc704d 100644 --- a/osfmk/i386/locks_i386.c +++ b/osfmk/i386/locks_i386.c @@ -370,9 +370,10 @@ usimple_lock( if (uslock_acquired == FALSE) { uint32_t lock_cpu; + uintptr_t lowner = (uintptr_t)l->interlock.lock_data; spinlock_timed_out = l; - lock_cpu = spinlock_timeout_NMI((uintptr_t)l->interlock.lock_data); - panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x", l, (uintptr_t)l->interlock.lock_data, current_thread(), lock_cpu); + lock_cpu = spinlock_timeout_NMI(lowner); + panic("Spinlock acquisition timed out: lock=%p, lock owner thread=0x%lx, current_thread: %p, lock owner active on CPU 0x%x, current owner: 0x%lx", l, lowner, current_thread(), lock_cpu, (uintptr_t)l->interlock.lock_data); } } USLDBG(usld_lock_post(l, pc)); diff --git a/osfmk/i386/misc_protos.h b/osfmk/i386/misc_protos.h index 724490e22..474733e96 100644 --- a/osfmk/i386/misc_protos.h +++ b/osfmk/i386/misc_protos.h @@ -38,7 +38,8 @@ struct boot_args; struct cpu_data; extern void vstart(vm_offset_t); -extern void i386_init(vm_offset_t); +extern void i386_init(void); + extern void i386_vm_init( uint64_t, boolean_t, diff --git a/osfmk/i386/mp.c b/osfmk/i386/mp.c index e90a298f9..f4221f964 100644 --- a/osfmk/i386/mp.c +++ b/osfmk/i386/mp.c @@ -147,7 +147,7 @@ static volatile long mp_rv_complete __attribute__((aligned(64))); volatile uint64_t debugger_entry_time; volatile uint64_t debugger_exit_time; #if MACH_KDP - +#include extern int kdp_snapshot; static struct _kdp_xcpu_call_func { kdp_x86_xcpu_func_t func; @@ -579,12 +579,12 @@ NMIInterruptHandler(x86_saved_state_t *regs) goto NMExit; if (spinlock_timed_out) { - char pstr[160]; + char pstr[192]; snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu); panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs); } else if (pmap_tlb_flush_timeout == TRUE) { char pstr[128]; - snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:%d\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); + snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid); panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs); } @@ -1315,6 +1315,7 @@ i386_activate_cpu(void) cdp->cpu_running = TRUE; started_cpu(); simple_unlock(&x86_topo_lock); + flush_tlb_raw(); } extern void etimer_timer_expire(void *arg); @@ -1372,6 +1373,13 @@ mp_kdp_enter(void) */ mp_kdp_state = ml_set_interrupts_enabled(FALSE); my_cpu = cpu_number(); + + if (my_cpu == (unsigned) debugger_cpu) { + kprintf("\n\nRECURSIVE DEBUGGER ENTRY DETECTED\n\n"); + kdp_reset(); + return; + } + cpu_datap(my_cpu)->debugger_entry_time = mach_absolute_time(); simple_lock(&mp_kdp_lock); diff --git a/osfmk/i386/pal_hibernate.h b/osfmk/i386/pal_hibernate.h index 025e56ea4..a1fefe4e5 100644 --- a/osfmk/i386/pal_hibernate.h +++ b/osfmk/i386/pal_hibernate.h @@ -28,15 +28,17 @@ #ifndef _I386_PAL_HIBERNATE_H #define _I386_PAL_HIBERNATE_H -#define HIB_PTES (4*GB - 1*I386_LPGBYTES) /*4GB - 2m */ -#define DEST_COPY_AREA (HIB_PTES + 1*I386_PGBYTES) -#define SRC_COPY_AREA (HIB_PTES + 2*I386_PGBYTES) -#define COPY_PAGE_AREA (HIB_PTES + 3*I386_PGBYTES) +#define HIB_MAP_SIZE (2*I386_LPGBYTES) +#define DEST_COPY_AREA (4*GB - HIB_MAP_SIZE) /*4GB - 2*2m */ +#define SRC_COPY_AREA (DEST_COPY_AREA - HIB_MAP_SIZE) +#define COPY_PAGE_AREA (SRC_COPY_AREA - HIB_MAP_SIZE) +#define BITMAP_AREA (COPY_PAGE_AREA - HIB_MAP_SIZE) +#define IMAGE_AREA (BITMAP_AREA - HIB_MAP_SIZE) +#define IMAGE2_AREA (IMAGE_AREA - HIB_MAP_SIZE) #define HIB_BASE sectINITPTB #define HIB_ENTRYPOINT acpi_wake_prot_entry -void pal_hib_window_setup(ppnum_t page); uintptr_t pal_hib_map(uintptr_t v, uint64_t p); void hibernateRestorePALState(uint32_t *src); void pal_hib_patchup(void); diff --git a/osfmk/i386/phys.c b/osfmk/i386/phys.c index c8a2f5206..bfbb48d4b 100644 --- a/osfmk/i386/phys.c +++ b/osfmk/i386/phys.c @@ -229,6 +229,9 @@ kvtophys( return ((addr64_t)pa); } +extern pt_entry_t *debugger_ptep; +extern vm_map_offset_t debugger_window_kva; + __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t bytes) { void *src, *dst; @@ -243,6 +246,36 @@ __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t b #elif defined(__x86_64__) src = PHYSMAP_PTOV(src64); dst = PHYSMAP_PTOV(dst64); + + addr64_t debug_pa = 0; + + /* If either destination or source are outside the + * physical map, establish a physical window onto the target frame. + */ + assert(physmap_enclosed(src64) || physmap_enclosed(dst64)); + + if (physmap_enclosed(src64) == FALSE) { + src = (void *)(debugger_window_kva | (src64 & INTEL_OFFMASK)); + debug_pa = src64 & PG_FRAME; + } else if (physmap_enclosed(dst64) == FALSE) { + dst = (void *)(debugger_window_kva | (dst64 & INTEL_OFFMASK)); + debug_pa = dst64 & PG_FRAME; + } + /* DRK: debugger only routine, we don't bother checking for an + * identical mapping. + */ + if (debug_pa) { + if (debugger_window_kva == 0) + panic("%s: invoked in non-debug mode", __FUNCTION__); + /* Establish a cache-inhibited physical window; some platforms + * may not cover arbitrary ranges with MTRRs + */ + pmap_store_pte(debugger_ptep, debug_pa | INTEL_PTE_NCACHE | INTEL_PTE_RW | INTEL_PTE_REF| INTEL_PTE_MOD | INTEL_PTE_VALID); + flush_tlb_raw(); +#if DEBUG + kprintf("Remapping debugger physical window at %p to 0x%llx\n", (void *)debugger_window_kva, debug_pa); +#endif + } #endif /* ensure we stay within a page */ if (((((uint32_t)src64 & (I386_PGBYTES-1)) + bytes) > I386_PGBYTES) || ((((uint32_t)dst64 & (I386_PGBYTES-1)) + bytes) > I386_PGBYTES) ) { @@ -251,17 +284,17 @@ __private_extern__ void ml_copy_phys(addr64_t src64, addr64_t dst64, vm_size_t b switch (bytes) { case 1: - *((uint8_t *) dst) = *((uint8_t *) src); + *((uint8_t *) dst) = *((volatile uint8_t *) src); break; case 2: - *((uint16_t *) dst) = *((uint16_t *) src); + *((uint16_t *) dst) = *((volatile uint16_t *) src); break; case 4: - *((uint32_t *) dst) = *((uint32_t *) src); + *((uint32_t *) dst) = *((volatile uint32_t *) src); break; /* Should perform two 32-bit reads */ case 8: - *((uint64_t *) dst) = *((uint64_t *) src); + *((uint64_t *) dst) = *((volatile uint64_t *) src); break; default: bcopy(src, dst, bytes); diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c index 22eafd1b8..1f064b614 100644 --- a/osfmk/i386/pmCPU.c +++ b/osfmk/i386/pmCPU.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2010 Apple Inc. All rights reserved. + * Copyright (c) 2004-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -54,15 +54,17 @@ extern int disableConsoleOutput; -decl_simple_lock_data(,pm_init_lock); +#define DELAY_UNSET 0xFFFFFFFFFFFFFFFFULL /* * The following is set when the KEXT loads and initializes. */ pmDispatch_t *pmDispatch = NULL; -static uint32_t pmInitDone = 0; -static boolean_t earlyTopology = FALSE; +static uint32_t pmInitDone = 0; +static boolean_t earlyTopology = FALSE; +static uint64_t earlyMaxBusDelay = DELAY_UNSET; +static uint64_t earlyMaxIntDelay = DELAY_UNSET; /* @@ -71,16 +73,6 @@ static boolean_t earlyTopology = FALSE; void power_management_init(void) { - static boolean_t initialized = FALSE; - - /* - * Initialize the lock for the KEXT initialization. - */ - if (!initialized) { - simple_lock_init(&pm_init_lock, 0); - initialized = TRUE; - } - if (pmDispatch != NULL && pmDispatch->cstateInit != NULL) (*pmDispatch->cstateInit)(); } @@ -101,6 +93,20 @@ machine_idle(void) DBGLOG(cpu_handle, cpu_number(), MP_IDLE); MARK_CPU_IDLE(cpu_number()); + if (pmInitDone) { + /* + * Handle case where ml_set_maxbusdelay() or ml_set_maxintdelay() + * were called prior to the CPU PM kext being registered. We do + * this here since we know at this point since it'll be at idle + * where the decision using these values will be used. + */ + if (earlyMaxBusDelay != DELAY_UNSET) + ml_set_maxbusdelay((uint32_t)(earlyMaxBusDelay & 0xFFFFFFFF)); + + if (earlyMaxIntDelay != DELAY_UNSET) + ml_set_maxintdelay(earlyMaxIntDelay); + } + if (pmInitDone && pmDispatch != NULL && pmDispatch->MachineIdle != NULL) @@ -201,8 +207,12 @@ pmMarkAllCPUsOff(void) static void pmInitComplete(void) { - if (earlyTopology && pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL) + if (earlyTopology + && pmDispatch != NULL + && pmDispatch->pmCPUStateInit != NULL) { (*pmDispatch->pmCPUStateInit)(); + earlyTopology = FALSE; + } pmInitDone = 1; } @@ -284,7 +294,7 @@ pmCPUGetDeadline(cpu_data_t *cpu) { uint64_t deadline = 0; - if (pmInitDone + if (pmInitDone && pmDispatch != NULL && pmDispatch->GetDeadline != NULL) deadline = (*pmDispatch->GetDeadline)(&cpu->lcpu); @@ -448,7 +458,8 @@ ml_get_maxsnoop(void) { uint64_t max_snoop = 0; - if (pmDispatch != NULL + if (pmInitDone + && pmDispatch != NULL && pmDispatch->getMaxSnoop != NULL) max_snoop = pmDispatch->getMaxSnoop(); @@ -461,7 +472,8 @@ ml_get_maxbusdelay(void) { uint64_t max_delay = 0; - if (pmDispatch != NULL + if (pmInitDone + && pmDispatch != NULL && pmDispatch->getMaxBusDelay != NULL) max_delay = pmDispatch->getMaxBusDelay(); @@ -482,8 +494,11 @@ ml_set_maxbusdelay(uint32_t mdelay) uint64_t maxdelay = mdelay; if (pmDispatch != NULL - && pmDispatch->setMaxBusDelay != NULL) + && pmDispatch->setMaxBusDelay != NULL) { + earlyMaxBusDelay = DELAY_UNSET; pmDispatch->setMaxBusDelay(maxdelay); + } else + earlyMaxBusDelay = maxdelay; } uint64_t @@ -505,8 +520,11 @@ void ml_set_maxintdelay(uint64_t mdelay) { if (pmDispatch != NULL - && pmDispatch->setMaxIntDelay != NULL) + && pmDispatch->setMaxIntDelay != NULL) { + earlyMaxIntDelay = DELAY_UNSET; pmDispatch->setMaxIntDelay(mdelay); + } else + earlyMaxIntDelay = mdelay; } boolean_t @@ -811,6 +829,12 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, if (cpuFuncs != NULL) { pmDispatch = cpuFuncs; + if (earlyTopology + && pmDispatch->pmCPUStateInit != NULL) { + (*pmDispatch->pmCPUStateInit)(); + earlyTopology = FALSE; + } + if (pmDispatch->pmIPIHandler != NULL) { lapic_set_pm_func((i386_intr_func_t)pmDispatch->pmIPIHandler); } diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index b672bdc6b..c31ab2d8a 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -564,56 +564,6 @@ pmap_map( return(virt); } -/* - * Back-door routine for mapping kernel VM at initialization. - * Useful for mapping memory outside the range - * Sets no-cache, A, D. - * Otherwise like pmap_map. - */ -vm_offset_t -pmap_map_bd( - vm_offset_t virt, - vm_map_offset_t start_addr, - vm_map_offset_t end_addr, - vm_prot_t prot, - unsigned int flags) -{ - pt_entry_t template; - pt_entry_t *pte; - spl_t spl; - - template = pa_to_pte(start_addr) - | INTEL_PTE_REF - | INTEL_PTE_MOD - | INTEL_PTE_WIRED - | INTEL_PTE_VALID; - - if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) { - template |= INTEL_PTE_NCACHE; - if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT))) - template |= INTEL_PTE_PTA; - } - - if (prot & VM_PROT_WRITE) - template |= INTEL_PTE_WRITE; - - while (start_addr < end_addr) { - spl = splhigh(); - pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); - if (pte == PT_ENTRY_NULL) { - panic("pmap_map_bd: Invalid kernel address\n"); - } - pmap_store_pte(pte, template); - splx(spl); - pte_increment_pa(template); - virt += PAGE_SIZE; - start_addr += PAGE_SIZE; - } - - flush_tlb(); - return(virt); -} - extern pmap_paddr_t first_avail; extern vm_offset_t virtual_avail, virtual_end; extern pmap_paddr_t avail_start, avail_end; @@ -1060,9 +1010,7 @@ pmap_init(void) if (pn > last_managed_page) last_managed_page = pn; - if (pn < lowest_lo) - pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; - else if (pn >= lowest_hi && pn <= highest_hi) + if (pn >= lowest_hi && pn <= highest_hi) pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; } } diff --git a/osfmk/i386/pmap.h b/osfmk/i386/pmap.h index 44b6bf742..a168562c9 100644 --- a/osfmk/i386/pmap.h +++ b/osfmk/i386/pmap.h @@ -507,7 +507,12 @@ extern uint32_t pmap_kernel_text_ps; #define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK)) #define PHYSMAP_BASE KVADDR(KERNEL_PHYSMAP_INDEX,0,0,0) +#define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4)) #define PHYSMAP_PTOV(x) ((void *)(((uint64_t)(x)) + PHYSMAP_BASE)) + +static inline boolean_t physmap_enclosed(addr64_t a) { + return (a < (NPHYSMAP * GB)); +} #endif typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */ @@ -579,10 +584,11 @@ extern void pmap_put_mapwindow(mapwindow_t *map); #endif typedef struct pmap_memory_regions { - ppnum_t base; - ppnum_t end; - ppnum_t alloc; - uint32_t type; + ppnum_t base; + ppnum_t end; + ppnum_t alloc; + uint32_t type; + uint64_t attribute; } pmap_memory_region_t; extern unsigned pmap_memory_region_count; diff --git a/osfmk/i386/pmap_common.c b/osfmk/i386/pmap_common.c index d81248dae..576b9c089 100644 --- a/osfmk/i386/pmap_common.c +++ b/osfmk/i386/pmap_common.c @@ -137,7 +137,7 @@ pmap_is_noencrypt(ppnum_t pn) pai = ppn_to_pai(pn); if (!IS_MANAGED_PAGE(pai)) - return (TRUE); + return (FALSE); if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT) return (TRUE); @@ -171,11 +171,17 @@ pmap_clear_noencrypt(ppnum_t pn) pai = ppn_to_pai(pn); if (IS_MANAGED_PAGE(pai)) { - LOCK_PVH(pai); + /* + * synchronization at VM layer prevents PHYS_NOENCRYPT + * from changing state, so we don't need the lock to inspect + */ + if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT) { + LOCK_PVH(pai); - pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT; + pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT; - UNLOCK_PVH(pai); + UNLOCK_PVH(pai); + } } } diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c index d7e63d6b0..9061d73cf 100644 --- a/osfmk/i386/pmap_x86_common.c +++ b/osfmk/i386/pmap_x86_common.c @@ -1429,3 +1429,52 @@ pmap_change_wiring( PMAP_UNLOCK(map); } + +/* + * "Backdoor" direct map routine for early mappings. + * Useful for mapping memory outside the range + * Sets A, D and NC if requested + */ + +vm_offset_t +pmap_map_bd( + vm_offset_t virt, + vm_map_offset_t start_addr, + vm_map_offset_t end_addr, + vm_prot_t prot, + unsigned int flags) +{ + pt_entry_t template; + pt_entry_t *pte; + spl_t spl; + vm_offset_t base = virt; + template = pa_to_pte(start_addr) + | INTEL_PTE_REF + | INTEL_PTE_MOD + | INTEL_PTE_WIRED + | INTEL_PTE_VALID; + + if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) { + template |= INTEL_PTE_NCACHE; + if (!(flags & (VM_MEM_GUARDED))) + template |= INTEL_PTE_PTA; + } + if (prot & VM_PROT_WRITE) + template |= INTEL_PTE_WRITE; + + while (start_addr < end_addr) { + spl = splhigh(); + pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); + if (pte == PT_ENTRY_NULL) { + panic("pmap_map_bd: Invalid kernel address\n"); + } + pmap_store_pte(pte, template); + splx(spl); + pte_increment_pa(template); + virt += PAGE_SIZE; + start_addr += PAGE_SIZE; + } + flush_tlb_raw(); + PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr); + return(virt); +} diff --git a/osfmk/i386/proc_reg.h b/osfmk/i386/proc_reg.h index b35d5c0a0..05dd961f1 100644 --- a/osfmk/i386/proc_reg.h +++ b/osfmk/i386/proc_reg.h @@ -145,21 +145,23 @@ /* * CR4 */ -#define CR4_OSXSAVE 0x00040000 /* OS supports XSAVE */ -#define CR4_PCIDE 0x00020000 /* PCID Enable */ -#define CR4_SMXE 0x00004000 /* Enable SMX operation */ -#define CR4_VMXE 0x00002000 /* Enable VMX operation */ -#define CR4_OSXMM 0x00000400 /* SSE/SSE2 exceptions supported in OS */ -#define CR4_OSFXS 0x00000200 /* SSE/SSE2 OS supports FXSave */ -#define CR4_PCE 0x00000100 /* Performance-Monitor Count Enable */ -#define CR4_PGE 0x00000080 /* Page Global Enable */ -#define CR4_MCE 0x00000040 /* Machine Check Exceptions */ -#define CR4_PAE 0x00000020 /* Physical Address Extensions */ -#define CR4_PSE 0x00000010 /* Page Size Extensions */ -#define CR4_DE 0x00000008 /* Debugging Extensions */ -#define CR4_TSD 0x00000004 /* Time Stamp Disable */ -#define CR4_PVI 0x00000002 /* Protected-mode Virtual Interrupts */ -#define CR4_VME 0x00000001 /* Virtual-8086 Mode Extensions */ +#define CR4_SMEP 0x00100000 /* Supervisor-Mode Execute Protect */ +#define CR4_OSXSAVE 0x00040000 /* OS supports XSAVE */ +#define CR4_PCIDE 0x00020000 /* PCID Enable */ +#define CR4_RDWRFSGS 0x00010000 /* RDWRFSGS Enable */ +#define CR4_SMXE 0x00004000 /* Enable SMX operation */ +#define CR4_VMXE 0x00002000 /* Enable VMX operation */ +#define CR4_OSXMM 0x00000400 /* SSE/SSE2 exception support in OS */ +#define CR4_OSFXS 0x00000200 /* SSE/SSE2 OS supports FXSave */ +#define CR4_PCE 0x00000100 /* Performance-Monitor Count Enable */ +#define CR4_PGE 0x00000080 /* Page Global Enable */ +#define CR4_MCE 0x00000040 /* Machine Check Exceptions */ +#define CR4_PAE 0x00000020 /* Physical Address Extensions */ +#define CR4_PSE 0x00000010 /* Page Size Extensions */ +#define CR4_DE 0x00000008 /* Debugging Extensions */ +#define CR4_TSD 0x00000004 /* Time Stamp Disable */ +#define CR4_PVI 0x00000002 /* Protected-mode Virtual Interrupts */ +#define CR4_VME 0x00000001 /* Virtual-8086 Mode Extensions */ /* * XCR0 - XFEATURE_ENABLED_MASK (a.k.a. XFEM) register diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index 55be4fc75..a07b1b8cb 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -145,6 +145,7 @@ perfCallback tempDTraceTrapHook = NULL; /* Pointer to DTrace fbt trap hook routi extern boolean_t dtrace_tally_fault(user_addr_t); #endif + void thread_syscall_return( kern_return_t ret) @@ -679,10 +680,11 @@ kernel_trap( is_user = -1; } #else - if (vaddr < VM_MAX_USER_PAGE_ADDRESS) { + if (__probable(vaddr < VM_MAX_USER_PAGE_ADDRESS)) { /* fault occurred in userspace */ map = thread->map; is_user = -1; + /* * If we're not sharing cr3 with the user * and we faulted in copyio, @@ -699,6 +701,7 @@ kernel_trap( #endif } } + KERNEL_DEBUG_CONSTANT( (MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, (unsigned)(vaddr >> 32), (unsigned)vaddr, is_user, kern_ip, 0); @@ -744,22 +747,6 @@ kernel_trap( goto debugger_entry; #endif case T_PAGE_FAULT: - /* - * If the current map is a submap of the kernel map, - * and the address is within that map, fault on that - * map. If the same check is done in vm_fault - * (vm_map_lookup), we may deadlock on the kernel map - * lock. - */ - - prot = VM_PROT_READ; - - if (code & T_PF_WRITE) - prot |= VM_PROT_WRITE; -#if PAE - if (code & T_PF_EXECUTE) - prot |= VM_PROT_EXECUTE; -#endif #if MACH_KDB /* @@ -792,6 +779,16 @@ kernel_trap( } #endif /* CONFIG_DTRACE */ + + prot = VM_PROT_READ; + + if (code & T_PF_WRITE) + prot |= VM_PROT_WRITE; +#if PAE + if (code & T_PF_EXECUTE) + prot |= VM_PROT_EXECUTE; +#endif + result = vm_fault(map, vm_map_trunc_page(vaddr), prot, @@ -863,9 +860,6 @@ FALL_THROUGH: kprintf("kernel_trap() ignoring spurious trap 15\n"); return; } -#if defined(__x86_64__) && DEBUG - kprint_state(saved_state); -#endif debugger_entry: /* Ensure that the i386_kernel_state at the base of the * current thread's stack (if any) is synchronized with the @@ -959,6 +953,8 @@ panic_trap(x86_saved_state32_t *regs) cr0 = 0; } #else + + static void panic_trap(x86_saved_state64_t *regs) { @@ -981,6 +977,7 @@ panic_trap(x86_saved_state64_t *regs) if (regs->isf.trapno < TRAP_TYPES) trapname = trap_type[regs->isf.trapno]; + #undef panic panic("Kernel trap at 0x%016llx, type %d=%s, registers:\n" "CR0: 0x%016llx, CR2: 0x%016llx, CR3: 0x%016llx, CR4: 0x%016llx\n" @@ -989,7 +986,7 @@ panic_trap(x86_saved_state64_t *regs) "R8: 0x%016llx, R9: 0x%016llx, R10: 0x%016llx, R11: 0x%016llx\n" "R12: 0x%016llx, R13: 0x%016llx, R14: 0x%016llx, R15: 0x%016llx\n" "RFL: 0x%016llx, RIP: 0x%016llx, CS: 0x%016llx, SS: 0x%016llx\n" - "CR2: 0x%016llx, Error code: 0x%016llx, Faulting CPU: 0x%x\n", + "CR2: 0x%016llx, Error code: 0x%016llx, Faulting CPU: 0x%x%s\n", regs->isf.rip, regs->isf.trapno, trapname, cr0, cr2, cr3, cr4, regs->rax, regs->rbx, regs->rcx, regs->rdx, @@ -997,7 +994,8 @@ panic_trap(x86_saved_state64_t *regs) regs->r8, regs->r9, regs->r10, regs->r11, regs->r12, regs->r13, regs->r14, regs->r15, regs->isf.rflags, regs->isf.rip, regs->isf.cs & 0xFFFF, - regs->isf.ss & 0xFFFF,regs->cr2, regs->isf.err, regs->isf.cpu); + regs->isf.ss & 0xFFFF,regs->cr2, regs->isf.err, regs->isf.cpu, + ""); /* * This next statement is not executed, * but it's needed to stop the compiler using tail call optimization diff --git a/osfmk/ipc/ipc_init.c b/osfmk/ipc/ipc_init.c index 72f01383c..cc0970e32 100644 --- a/osfmk/ipc/ipc_init.c +++ b/osfmk/ipc/ipc_init.c @@ -189,7 +189,6 @@ ipc_bootstrap(void) IKM_SAVED_KMSG_SIZE, "ipc kmsgs"); zone_change(ipc_kmsg_zone, Z_CALLERACCT, FALSE); - zone_change(ipc_kmsg_zone, Z_NOENCRYPT, TRUE); #if CONFIG_MACF_MACH ipc_labelh_zone = diff --git a/osfmk/kdp/kdp.h b/osfmk/kdp/kdp.h index ab846c753..51f77134a 100644 --- a/osfmk/kdp/kdp.h +++ b/osfmk/kdp/kdp.h @@ -47,3 +47,5 @@ kdp_reset(void); void kdp_init(void); + +void kdp_machine_init(void); diff --git a/osfmk/kdp/kdp_udp.c b/osfmk/kdp/kdp_udp.c index 22bf8978a..caa07dfa5 100644 --- a/osfmk/kdp/kdp_udp.c +++ b/osfmk/kdp/kdp_udp.c @@ -1405,7 +1405,9 @@ kdp_reset(void) kdp.reply_port = kdp.exception_port = 0; kdp.is_halted = kdp.is_conn = FALSE; kdp.exception_seq = kdp.conn_seq = 0; - kdp.session_key = 0; + kdp.session_key = 0; + pkt.input = manual_pkt.input = FALSE; + pkt.len = pkt.off = manual_pkt.len = 0; } struct corehdr * @@ -2013,8 +2015,6 @@ kdp_panic_dump(void) panic_dump_exit: abort_panic_transfer(); - pkt.input = FALSE; - pkt.len = 0; kdp_reset(); return; } @@ -2122,6 +2122,9 @@ kdp_init(void) if (debug_boot_arg & DB_REBOOT_POST_CORE) kdp_flag |= REBOOT_POST_CORE; +#if defined(__x86_64__) + kdp_machine_init(); +#endif #if CONFIG_SERIAL_KDP char kdpname[80]; struct in_addr ipaddr; diff --git a/osfmk/kdp/ml/i386/kdp_x86_common.c b/osfmk/kdp/ml/i386/kdp_x86_common.c index 221d683ac..6016e4835 100644 --- a/osfmk/kdp/ml/i386/kdp_x86_common.c +++ b/osfmk/kdp/ml/i386/kdp_x86_common.c @@ -38,6 +38,8 @@ #include #include +#include + #include #include @@ -374,3 +376,36 @@ kdp_machine_msr64_write(kdp_writemsr64_req_t *rq, caddr_t data, uint16_t lcpu) wrmsr64(msr, *value); return KDPERR_NO_ERROR; } + +pt_entry_t *debugger_ptep; +vm_map_offset_t debugger_window_kva; + +/* Establish a pagetable window that can be remapped on demand. + * This is utilized by the debugger to address regions outside + * the physical map. + */ + +void +kdp_machine_init(void) { + if (debug_boot_arg == 0) + return; + + vm_map_entry_t e; + kern_return_t kr = vm_map_find_space(kernel_map, + &debugger_window_kva, + PAGE_SIZE, 0, + VM_MAKE_TAG(VM_MEMORY_IOKIT), &e); + + if (kr != KERN_SUCCESS) { + panic("%s: vm_map_find_space failed with %d\n", __FUNCTION__, kr); + } + + vm_map_unlock(kernel_map); + + debugger_ptep = pmap_pte(kernel_pmap, debugger_window_kva); + + if (debugger_ptep == NULL) { + pmap_expand(kernel_pmap, debugger_window_kva); + debugger_ptep = pmap_pte(kernel_pmap, debugger_window_kva); + } +} diff --git a/osfmk/kern/debug.c b/osfmk/kern/debug.c index b6d146746..1dd1aee28 100644 --- a/osfmk/kern/debug.c +++ b/osfmk/kern/debug.c @@ -235,6 +235,14 @@ panic(const char *str, ...) thread_t thread; wait_queue_t wq; + if (kdebug_enable) { + ml_set_interrupts_enabled(TRUE); + kdbg_dump_trace_to_file("/var/tmp/panic.trace"); + } + + s = splhigh(); + disable_preemption(); + #if defined(__i386__) || defined(__x86_64__) /* Attempt to display the unparsed panic string */ const char *tstr = str; @@ -244,11 +252,6 @@ panic(const char *str, ...) kprintf("%c", *tstr++); kprintf("\n"); #endif - if (kdebug_enable) - kdbg_dump_trace_to_file("/var/tmp/panic.trace"); - - s = splhigh(); - disable_preemption(); panic_safe(); diff --git a/osfmk/kern/startup.c b/osfmk/kern/startup.c index 47290e3d8..cb31f783c 100644 --- a/osfmk/kern/startup.c +++ b/osfmk/kern/startup.c @@ -69,7 +69,6 @@ #include #include #include -#include #include #include @@ -326,11 +325,6 @@ kernel_bootstrap_thread(void) device_service_create(); kth_started = 1; - -#if MACH_KDP - kernel_bootstrap_kprintf("calling kdp_init\n"); - kdp_init(); -#endif #if (defined(__i386__) || defined(__x86_64__)) && NCOPY_WINDOWS > 0 /* @@ -340,6 +334,13 @@ kernel_bootstrap_thread(void) cpu_physwindow_init(0); #endif + vm_kernel_reserved_entry_init(); + +#if MACH_KDP + kernel_bootstrap_kprintf("calling kdp_init\n"); + kdp_init(); +#endif + #if CONFIG_COUNTERS pmc_bootstrap(); #endif diff --git a/osfmk/kern/thread.c b/osfmk/kern/thread.c index 84f7cf817..3c3e5ce07 100644 --- a/osfmk/kern/thread.c +++ b/osfmk/kern/thread.c @@ -279,8 +279,6 @@ thread_init(void) THREAD_CHUNK * sizeof(struct thread), "threads"); - zone_change(thread_zone, Z_NOENCRYPT, TRUE); - lck_grp_attr_setdefault(&thread_lck_grp_attr); lck_grp_init(&thread_lck_grp, "thread", &thread_lck_grp_attr); lck_attr_setdefault(&thread_lck_attr); diff --git a/osfmk/kern/zalloc.c b/osfmk/kern/zalloc.c index c6bf2f01e..9d1afa5d6 100644 --- a/osfmk/kern/zalloc.c +++ b/osfmk/kern/zalloc.c @@ -95,6 +95,7 @@ #include #include +#include #include /* @@ -214,27 +215,47 @@ MACRO_END #endif /* ZONE_DEBUG */ /* - * Support for garbage collection of unused zone pages: + * Support for garbage collection of unused zone pages + * + * The kernel virtually allocates the "zone map" submap of the kernel + * map. When an individual zone needs more storage, memory is allocated + * out of the zone map, and the two-level "zone_page_table" is + * on-demand expanded so that it has entries for those pages. + * zone_page_init()/zone_page_alloc() initialize "alloc_count" + * to the number of zone elements that occupy the zone page (which may + * be a minimum of 1, including if a zone element spans multiple + * pages). + * + * Asynchronously, the zone_gc() logic attempts to walk zone free + * lists to see if all the elements on a zone page are free. If + * "collect_count" (which it increments during the scan) matches + * "alloc_count", the zone page is a candidate for collection and the + * physical page is returned to the VM system. During this process, the + * first word of the zone page is re-used to maintain a linked list of + * to-be-collected zone pages. */ +typedef uint32_t zone_page_index_t; +#define ZONE_PAGE_INDEX_INVALID ((zone_page_index_t)0xFFFFFFFFU) struct zone_page_table_entry { - struct zone_page_table_entry *link; - short alloc_count; - short collect_count; + volatile uint16_t alloc_count; + volatile uint16_t collect_count; }; +#define ZONE_PAGE_USED 0 +#define ZONE_PAGE_UNUSED 0xffff + /* Forwards */ void zone_page_init( vm_offset_t addr, - vm_size_t size, - int value); + vm_size_t size); void zone_page_alloc( vm_offset_t addr, vm_size_t size); void zone_page_free_element( - struct zone_page_table_entry **free_pages, + zone_page_index_t *free_page_list, vm_offset_t addr, vm_size_t size); @@ -271,6 +292,7 @@ zone_t zinfo_zone = ZONE_NULL; /* zone of per-task zone info */ /* * The VM system gives us an initial chunk of memory. * It has to be big enough to allocate the zone_zone + * all the way through the pmap zone. */ vm_offset_t zdata; @@ -304,23 +326,23 @@ MACRO_END #define lock_try_zone(zone) lck_mtx_try_lock_spin(&zone->lock) -kern_return_t zget_space( - zone_t zone, - vm_offset_t size, - vm_offset_t *result); - -decl_simple_lock_data(,zget_space_lock) -vm_offset_t zalloc_next_space; -vm_offset_t zalloc_end_of_space; -vm_size_t zalloc_wasted_space; - /* * Garbage collection map information */ -struct zone_page_table_entry * zone_page_table; +#define ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE (32) +struct zone_page_table_entry * volatile zone_page_table[ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE]; +vm_size_t zone_page_table_used_size; vm_offset_t zone_map_min_address; vm_offset_t zone_map_max_address; unsigned int zone_pages; +unsigned int zone_page_table_second_level_size; /* power of 2 */ +unsigned int zone_page_table_second_level_shift_amount; + +#define zone_page_table_first_level_slot(x) ((x) >> zone_page_table_second_level_shift_amount) +#define zone_page_table_second_level_slot(x) ((x) & (zone_page_table_second_level_size - 1)) + +void zone_page_table_expand(zone_page_index_t pindex); +struct zone_page_table_entry *zone_page_table_lookup(zone_page_index_t pindex); /* * Exclude more than one concurrent garbage collection @@ -343,10 +365,6 @@ lck_mtx_ext_t zone_lck_ext; ((vm_offset_t)(zone_virtual_addr((vm_map_address_t)addr)) + size -1) < zone_map_max_address) #endif -#define ZONE_PAGE_USED 0 -#define ZONE_PAGE_UNUSED -1 - - /* * Protects first_zone, last_zone, num_zones, * and the next_zone field of zones. @@ -1029,9 +1047,10 @@ zinit( zone_t z; if (zone_zone == ZONE_NULL) { - if (zget_space(NULL, sizeof(struct zone), (vm_offset_t *)&z) - != KERN_SUCCESS) - return(ZONE_NULL); + + z = (struct zone *)zdata; + zdata += sizeof(*z); + zdata_size -= sizeof(*z); } else z = (zone_t) zalloc(zone_zone); if (z == ZONE_NULL) @@ -1061,7 +1080,11 @@ zinit( alloc = PAGE_SIZE; else #endif - { vm_size_t best, waste; unsigned int i; +#if defined(__LP64__) + if (((alloc % size) != 0) || (alloc > PAGE_SIZE * 8)) +#endif + { + vm_size_t best, waste; unsigned int i; best = PAGE_SIZE; waste = best % size; @@ -1103,7 +1126,10 @@ use_this_allocation: z->async_pending = FALSE; z->caller_acct = TRUE; z->noencrypt = FALSE; - + z->no_callout = FALSE; + z->async_prio_refill = FALSE; + z->prio_refill_watermark = 0; + z->zone_replenish_thread = NULL; #if CONFIG_ZLEAKS z->num_allocs = 0; z->num_frees = 0; @@ -1172,18 +1198,98 @@ use_this_allocation: return(z); } +unsigned zone_replenish_loops, zone_replenish_wakeups, zone_replenish_wakeups_initiated; + +static void zone_replenish_thread(zone_t); + +/* High priority VM privileged thread used to asynchronously refill a designated + * zone, such as the reserved VM map entry zone. + */ +static void zone_replenish_thread(zone_t z) { + vm_size_t free_size; + current_thread()->options |= TH_OPT_VMPRIV; + + for (;;) { + lock_zone(z); + assert(z->prio_refill_watermark != 0); + while ((free_size = (z->cur_size - (z->count * z->elem_size))) < (z->prio_refill_watermark * z->elem_size)) { + assert(z->doing_alloc == FALSE); + assert(z->async_prio_refill == TRUE); + + unlock_zone(z); + int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; + vm_offset_t space, alloc_size; + kern_return_t kr; + + if (vm_pool_low()) + alloc_size = round_page(z->elem_size); + else + alloc_size = z->alloc_size; + + if (z->noencrypt) + zflags |= KMA_NOENCRYPT; + + kr = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags); + + if (kr == KERN_SUCCESS) { +#if ZONE_ALIAS_ADDR + if (alloc_size == PAGE_SIZE) + space = zone_alias_addr(space); +#endif + zcram(z, space, alloc_size); + } else if (kr == KERN_RESOURCE_SHORTAGE) { + VM_PAGE_WAIT(); + } else if (kr == KERN_NO_SPACE) { + kr = kernel_memory_allocate(kernel_map, &space, alloc_size, 0, zflags); + if (kr == KERN_SUCCESS) { +#if ZONE_ALIAS_ADDR + if (alloc_size == PAGE_SIZE) + space = zone_alias_addr(space); +#endif + zcram(z, space, alloc_size); + } else { + assert_wait_timeout(&z->zone_replenish_thread, THREAD_UNINT, 1, 100 * NSEC_PER_USEC); + thread_block(THREAD_CONTINUE_NULL); + } + } + + lock_zone(z); + zone_replenish_loops++; + } + + unlock_zone(z); + assert_wait(&z->zone_replenish_thread, THREAD_UNINT); + thread_block(THREAD_CONTINUE_NULL); + zone_replenish_wakeups++; + } +} + +void +zone_prio_refill_configure(zone_t z, vm_size_t low_water_mark) { + z->prio_refill_watermark = low_water_mark; + + z->async_prio_refill = TRUE; + OSMemoryBarrier(); + kern_return_t tres = kernel_thread_start_priority((thread_continue_t)zone_replenish_thread, z, MAXPRI_KERNEL, &z->zone_replenish_thread); + + if (tres != KERN_SUCCESS) { + panic("zone_prio_refill_configure, thread create: 0x%x", tres); + } + + thread_deallocate(z->zone_replenish_thread); +} /* * Cram the given memory into the specified zone. */ void zcram( - register zone_t zone, - void *newaddr, + zone_t zone, + vm_offset_t newmem, vm_size_t size) { - register vm_size_t elem_size; - vm_offset_t newmem = (vm_offset_t) newaddr; + vm_size_t elem_size; + boolean_t from_zm = FALSE; /* Basic sanity checks */ assert(zone != ZONE_NULL && newmem != (vm_offset_t)0); @@ -1192,10 +1298,16 @@ zcram( elem_size = zone->elem_size; + if (from_zone_map(newmem, size)) + from_zm = TRUE; + + if (from_zm) + zone_page_init(newmem, size); + lock_zone(zone); while (size >= elem_size) { ADD_TO_ZONE(zone, newmem); - if (from_zone_map(newmem, elem_size)) + if (from_zm) zone_page_alloc(newmem, elem_size); zone->count++; /* compensate for ADD_TO_ZONE */ size -= elem_size; @@ -1205,95 +1317,6 @@ zcram( unlock_zone(zone); } -/* - * Contiguous space allocator for non-paged zones. Allocates "size" amount - * of memory from zone_map. - */ - -kern_return_t -zget_space( - zone_t zone, - vm_offset_t size, - vm_offset_t *result) -{ - vm_offset_t new_space = 0; - vm_size_t space_to_add = 0; - - simple_lock(&zget_space_lock); - while ((zalloc_next_space + size) > zalloc_end_of_space) { - /* - * Add at least one page to allocation area. - */ - - space_to_add = round_page(size); - - if (new_space == 0) { - kern_return_t retval; - int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; - - /* - * Memory cannot be wired down while holding - * any locks that the pageout daemon might - * need to free up pages. [Making the zget_space - * lock a complex lock does not help in this - * regard.] - * - * Unlock and allocate memory. Because several - * threads might try to do this at once, don't - * use the memory before checking for available - * space again. - */ - - simple_unlock(&zget_space_lock); - - if (zone == NULL || zone->noencrypt) - zflags |= KMA_NOENCRYPT; - - retval = kernel_memory_allocate(zone_map, &new_space, space_to_add, 0, zflags); - if (retval != KERN_SUCCESS) - return(retval); -#if ZONE_ALIAS_ADDR - if (space_to_add == PAGE_SIZE) - new_space = zone_alias_addr(new_space); -#endif - zone_page_init(new_space, space_to_add, - ZONE_PAGE_USED); - simple_lock(&zget_space_lock); - continue; - } - - - /* - * Memory was allocated in a previous iteration. - * - * Check whether the new region is contiguous - * with the old one. - */ - - if (new_space != zalloc_end_of_space) { - /* - * Throw away the remainder of the - * old space, and start a new one. - */ - zalloc_wasted_space += - zalloc_end_of_space - zalloc_next_space; - zalloc_next_space = new_space; - } - - zalloc_end_of_space = new_space + space_to_add; - - new_space = 0; - } - *result = zalloc_next_space; - zalloc_next_space += size; - simple_unlock(&zget_space_lock); - - if (new_space != 0) - kmem_free(zone_map, new_space, space_to_add); - - return(KERN_SUCCESS); -} - /* * Steal memory for the zone package. Called from @@ -1302,8 +1325,9 @@ zget_space( void zone_steal_memory(void) { - zdata_size = round_page(128*sizeof(struct zone)); - zdata = (vm_offset_t)((char *)pmap_steal_memory(zdata_size) - (char *)0); + /* Request enough early memory to get to the pmap zone */ + zdata_size = 12 * sizeof(struct zone); + zdata = (vm_offset_t)pmap_steal_memory(round_page(zdata_size)); } @@ -1334,7 +1358,7 @@ zfill( return 0; zone_change(zone, Z_FOREIGN, TRUE); - zcram(zone, (void *)memory, size); + zcram(zone, memory, size); nalloc = (int)(size / zone->elem_size); assert(nalloc >= nelem); @@ -1349,8 +1373,6 @@ zfill( void zone_bootstrap(void) { - vm_size_t zone_zone_size; - vm_offset_t zone_zone_space; char temp_buf[16]; #if 6094439 @@ -1417,11 +1439,6 @@ zone_bootstrap(void) last_zone = &first_zone; num_zones = 0; - simple_lock_init(&zget_space_lock, 0); - zalloc_next_space = zdata; - zalloc_end_of_space = zdata + zdata_size; - zalloc_wasted_space = 0; - /* assertion: nobody else called zinit before us */ assert(zone_zone == ZONE_NULL); zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone), @@ -1430,9 +1447,7 @@ zone_bootstrap(void) zone_change(zone_zone, Z_CALLERACCT, FALSE); zone_change(zone_zone, Z_NOENCRYPT, TRUE); - zone_zone_size = zalloc_end_of_space - zalloc_next_space; - zget_space(NULL, zone_zone_size, &zone_zone_space); - zcram(zone_zone, (void *)zone_zone_space, zone_zone_size); + zcram(zone_zone, zdata, zdata_size); /* initialize fake zones and zone info if tracking by task */ if (zinfo_per_task) { @@ -1475,7 +1490,6 @@ zone_init( kern_return_t retval; vm_offset_t zone_min; vm_offset_t zone_max; - vm_size_t zone_table_size; retval = kmem_suballoc(kernel_map, &zone_min, max_zonemap_size, FALSE, VM_FLAGS_ANYWHERE | VM_FLAGS_PERMANENT, @@ -1487,23 +1501,30 @@ zone_init( /* * Setup garbage collection information: */ - zone_table_size = atop_kernel(zone_max - zone_min) * - sizeof(struct zone_page_table_entry); - if (kmem_alloc_kobject(zone_map, (vm_offset_t *) &zone_page_table, - zone_table_size) != KERN_SUCCESS) - panic("zone_init"); - zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size); - zone_pages = (unsigned int)atop_kernel(zone_max - zone_min); zone_map_min_address = zone_min; zone_map_max_address = zone_max; + + zone_pages = (unsigned int)atop_kernel(zone_max - zone_min); + zone_page_table_used_size = sizeof(zone_page_table); + + zone_page_table_second_level_size = 1; + zone_page_table_second_level_shift_amount = 0; + + /* + * Find the power of 2 for the second level that allows + * the first level to fit in ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE + * slots. + */ + while ((zone_page_table_first_level_slot(zone_pages-1)) >= ZONE_PAGE_TABLE_FIRST_LEVEL_SIZE) { + zone_page_table_second_level_size <<= 1; + zone_page_table_second_level_shift_amount++; + } lck_grp_attr_setdefault(&zone_lck_grp_attr); lck_grp_init(&zone_lck_grp, "zones", &zone_lck_grp_attr); lck_attr_setdefault(&zone_lck_attr); lck_mtx_init_ext(&zone_gc_lock, &zone_lck_ext, &zone_lck_grp, &zone_lck_attr); - zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED); - #if CONFIG_ZLEAKS /* * Initialize the zone leak monitor @@ -1512,6 +1533,68 @@ zone_init( #endif /* CONFIG_ZLEAKS */ } +void +zone_page_table_expand(zone_page_index_t pindex) +{ + unsigned int first_index; + struct zone_page_table_entry * volatile * first_level_ptr; + + assert(pindex < zone_pages); + + first_index = zone_page_table_first_level_slot(pindex); + first_level_ptr = &zone_page_table[first_index]; + + if (*first_level_ptr == NULL) { + /* + * We were able to verify the old first-level slot + * had NULL, so attempt to populate it. + */ + + vm_offset_t second_level_array = 0; + vm_size_t second_level_size = round_page(zone_page_table_second_level_size * sizeof(struct zone_page_table_entry)); + zone_page_index_t i; + struct zone_page_table_entry *entry_array; + + if (kmem_alloc_kobject(zone_map, &second_level_array, + second_level_size) != KERN_SUCCESS) { + panic("zone_page_table_expand"); + } + + /* + * zone_gc() may scan the "zone_page_table" directly, + * so make sure any slots have a valid unused state. + */ + entry_array = (struct zone_page_table_entry *)second_level_array; + for (i=0; i < zone_page_table_second_level_size; i++) { + entry_array[i].alloc_count = ZONE_PAGE_UNUSED; + entry_array[i].collect_count = 0; + } + + if (OSCompareAndSwapPtr(NULL, entry_array, first_level_ptr)) { + /* Old slot was NULL, replaced with expanded level */ + OSAddAtomicLong(second_level_size, &zone_page_table_used_size); + } else { + /* Old slot was not NULL, someone else expanded first */ + kmem_free(zone_map, second_level_array, second_level_size); + } + } else { + /* Old slot was not NULL, already been expanded */ + } +} + +struct zone_page_table_entry * +zone_page_table_lookup(zone_page_index_t pindex) +{ + unsigned int first_index = zone_page_table_first_level_slot(pindex); + struct zone_page_table_entry *second_level = zone_page_table[first_index]; + + if (second_level) { + return &second_level[zone_page_table_second_level_slot(pindex)]; + } + + return NULL; +} + extern volatile SInt32 kfree_nop_count; #pragma mark - @@ -1530,6 +1613,7 @@ zalloc_canblock( uintptr_t zbt[MAX_ZTRACE_DEPTH]; /* used in zone leak logging and zone leak detection */ int numsaved = 0; int i; + boolean_t zone_replenish_wakeup = FALSE; #if CONFIG_ZLEAKS uint32_t zleak_tracedepth = 0; /* log this allocation if nonzero */ @@ -1564,10 +1648,10 @@ zalloc_canblock( REMOVE_FROM_ZONE(zone, addr, vm_offset_t); - while ((addr == 0) && canblock && (zone->doing_gc)) { - zone->waiting = TRUE; - zone_sleep(zone); - REMOVE_FROM_ZONE(zone, addr, vm_offset_t); + if (zone->async_prio_refill && + ((zone->cur_size - (zone->count * zone->elem_size)) < (zone->prio_refill_watermark * zone->elem_size))) { + zone_replenish_wakeup = TRUE; + zone_replenish_wakeups_initiated++; } while ((addr == 0) && canblock) { @@ -1581,8 +1665,20 @@ zalloc_canblock( */ zone->waiting = TRUE; zone_sleep(zone); - } - else { + } else if (zone->doing_gc) { + /* zone_gc() is running. Since we need an element + * from the free list that is currently being + * collected, set the waiting bit and try to + * interrupt the GC process, and try again + * when we obtain the lock. + */ + zone->waiting = TRUE; + zone_sleep(zone); + } else { + vm_offset_t space; + vm_size_t alloc_size; + int retry = 0; + if ((zone->cur_size + zone->elem_size) > zone->max_size) { if (zone->exhaustible) @@ -1608,141 +1704,85 @@ zalloc_canblock( zone->doing_alloc = TRUE; unlock_zone(zone); - if (zone->collectable) { - vm_offset_t space; - vm_size_t alloc_size; - int retry = 0; - - for (;;) { - int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; - - if (vm_pool_low() || retry >= 1) - alloc_size = - round_page(zone->elem_size); - else - alloc_size = zone->alloc_size; - - if (zone->noencrypt) - zflags |= KMA_NOENCRYPT; - - retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags); - if (retval == KERN_SUCCESS) { + for (;;) { + int zflags = KMA_KOBJECT|KMA_NOPAGEWAIT; + + if (vm_pool_low() || retry >= 1) + alloc_size = + round_page(zone->elem_size); + else + alloc_size = zone->alloc_size; + + if (zone->noencrypt) + zflags |= KMA_NOENCRYPT; + + retval = kernel_memory_allocate(zone_map, &space, alloc_size, 0, zflags); + if (retval == KERN_SUCCESS) { #if ZONE_ALIAS_ADDR - if (alloc_size == PAGE_SIZE) - space = zone_alias_addr(space); + if (alloc_size == PAGE_SIZE) + space = zone_alias_addr(space); #endif - + #if CONFIG_ZLEAKS - if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) { - if (zone_map->size >= zleak_global_tracking_threshold) { - kern_return_t kr; - - kr = zleak_activate(); - if (kr != KERN_SUCCESS) { - printf("Failed to activate live zone leak debugging (%d).\n", kr); - } + if ((zleak_state & (ZLEAK_STATE_ENABLED | ZLEAK_STATE_ACTIVE)) == ZLEAK_STATE_ENABLED) { + if (zone_map->size >= zleak_global_tracking_threshold) { + kern_return_t kr; + + kr = zleak_activate(); + if (kr != KERN_SUCCESS) { + printf("Failed to activate live zone leak debugging (%d).\n", kr); } } - - if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) { - if (zone->cur_size > zleak_per_zone_tracking_threshold) { - zone->zleak_on = TRUE; - } - } -#endif /* CONFIG_ZLEAKS */ - - zone_page_init(space, alloc_size, - ZONE_PAGE_USED); - zcram(zone, (void *)space, alloc_size); - - break; - } else if (retval != KERN_RESOURCE_SHORTAGE) { - retry++; - - if (retry == 2) { - zone_gc(); - printf("zalloc did gc\n"); - zone_display_zprint(); - } - if (retry == 3) { - panic_include_zprint = TRUE; -#if CONFIG_ZLEAKS - if ((zleak_state & ZLEAK_STATE_ACTIVE)) { - panic_include_ztrace = TRUE; - } -#endif /* CONFIG_ZLEAKS */ - /* TODO: Change this to something more descriptive, perhaps - * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE). - */ - panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count); - } - } else { - break; } - } - lock_zone(zone); - zone->doing_alloc = FALSE; - if (zone->waiting) { - zone->waiting = FALSE; - zone_wakeup(zone); - } - REMOVE_FROM_ZONE(zone, addr, vm_offset_t); - if (addr == 0 && - retval == KERN_RESOURCE_SHORTAGE) { - unlock_zone(zone); - VM_PAGE_WAIT(); - lock_zone(zone); - } - } else { - vm_offset_t space; - retval = zget_space(zone, zone->elem_size, &space); - - lock_zone(zone); - zone->doing_alloc = FALSE; - if (zone->waiting) { - zone->waiting = FALSE; - thread_wakeup((event_t)zone); - } - if (retval == KERN_SUCCESS) { - zone->count++; - zone->sum_count++; - zone->cur_size += zone->elem_size; -#if ZONE_DEBUG - if (zone_debug_enabled(zone)) { - enqueue_tail(&zone->active_zones, (queue_entry_t)space); + if ((zleak_state & ZLEAK_STATE_ACTIVE) && !(zone->zleak_on)) { + if (zone->cur_size > zleak_per_zone_tracking_threshold) { + zone->zleak_on = TRUE; + } } -#endif - unlock_zone(zone); - zone_page_alloc(space, zone->elem_size); -#if ZONE_DEBUG - if (zone_debug_enabled(zone)) - space += ZONE_DEBUG_OFFSET; -#endif - addr = space; - goto success; - } - if (retval == KERN_RESOURCE_SHORTAGE) { - unlock_zone(zone); +#endif /* CONFIG_ZLEAKS */ - VM_PAGE_WAIT(); - lock_zone(zone); - } else { - /* - * Equivalent to a 'retry fail 3', we're out of address space in the zone_map - * (if it returned KERN_NO_SPACE) - */ - if (retval == KERN_NO_SPACE) { + zcram(zone, space, alloc_size); + + break; + } else if (retval != KERN_RESOURCE_SHORTAGE) { + retry++; + + if (retry == 2) { + zone_gc(); + printf("zalloc did gc\n"); + zone_display_zprint(); + } + if (retry == 3) { panic_include_zprint = TRUE; #if CONFIG_ZLEAKS - if ((zleak_state & ZLEAK_STATE_ACTIVE)) { + if ((zleak_state & ZLEAK_STATE_ACTIVE)) { panic_include_ztrace = TRUE; } -#endif /* CONFIG_ZLEAKS */ +#endif /* CONFIG_ZLEAKS */ + /* TODO: Change this to something more descriptive, perhaps + * 'zone_map exhausted' only if we get retval 3 (KERN_NO_SPACE). + */ + panic("zalloc: \"%s\" (%d elements) retry fail %d, kfree_nop_count: %d", zone->zone_name, zone->count, retval, (int)kfree_nop_count); } - panic("zalloc: \"%s\" (%d elements) zget_space returned %d", zone->zone_name, zone->count, retval); + } else { + break; } } + lock_zone(zone); + zone->doing_alloc = FALSE; + if (zone->waiting) { + zone->waiting = FALSE; + zone_wakeup(zone); + } + REMOVE_FROM_ZONE(zone, addr, vm_offset_t); + if (addr == 0 && + retval == KERN_RESOURCE_SHORTAGE) { + unlock_zone(zone); + + VM_PAGE_WAIT(); + lock_zone(zone); + } } if (addr == 0) REMOVE_FROM_ZONE(zone, addr, vm_offset_t); @@ -1832,7 +1872,7 @@ empty_slot: zcurrent = 0; } - if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { + if ((addr == 0) && !canblock && (zone->async_pending == FALSE) && (zone->no_callout == FALSE) && (zone->exhaustible == FALSE) && (!vm_pool_low())) { zone->async_pending = TRUE; unlock_zone(zone); thread_call_enter(&zone->call_async_alloc); @@ -1855,7 +1895,9 @@ empty_slot: unlock_zone(zone); -success: + if (zone_replenish_wakeup) + thread_wakeup(&zone->zone_replenish_thread); + TRACE_MACHLEAKS(ZALLOC_CODE, ZALLOC_CODE_2, zone->elem_size, addr); if (addr) { @@ -2179,6 +2221,9 @@ zone_change( case Z_CALLERACCT: zone->caller_acct = value; break; + case Z_NOCALLOUT: + zone->no_callout = value; + break; #if MACH_ASSERT default: panic("Zone_change: Wrong Item Type!"); @@ -2222,8 +2267,7 @@ zprealloc( if (size != 0) { if (kmem_alloc_kobject(zone_map, &addr, size) != KERN_SUCCESS) panic("zprealloc"); - zone_page_init(addr, size, ZONE_PAGE_USED); - zcram(zone, (void *)addr, size); + zcram(zone, addr, size); } } @@ -2237,7 +2281,7 @@ zone_page_collectable( vm_size_t size) { struct zone_page_table_entry *zp; - natural_t i, j; + zone_page_index_t i, j; #if ZONE_ALIAS_ADDR addr = zone_virtual_addr(addr); @@ -2247,12 +2291,14 @@ zone_page_collectable( panic("zone_page_collectable"); #endif - i = (natural_t)atop_kernel(addr-zone_map_min_address); - j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); + i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); + j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); - for (zp = zone_page_table + i; i <= j; zp++, i++) + for (; i <= j; i++) { + zp = zone_page_table_lookup(i); if (zp->collect_count == zp->alloc_count) return (TRUE); + } return (FALSE); } @@ -2263,7 +2309,7 @@ zone_page_keep( vm_size_t size) { struct zone_page_table_entry *zp; - natural_t i, j; + zone_page_index_t i, j; #if ZONE_ALIAS_ADDR addr = zone_virtual_addr(addr); @@ -2273,11 +2319,13 @@ zone_page_keep( panic("zone_page_keep"); #endif - i = (natural_t)atop_kernel(addr-zone_map_min_address); - j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); + i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); + j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); - for (zp = zone_page_table + i; i <= j; zp++, i++) + for (; i <= j; i++) { + zp = zone_page_table_lookup(i); zp->collect_count = 0; + } } void @@ -2286,7 +2334,7 @@ zone_page_collect( vm_size_t size) { struct zone_page_table_entry *zp; - natural_t i, j; + zone_page_index_t i, j; #if ZONE_ALIAS_ADDR addr = zone_virtual_addr(addr); @@ -2296,21 +2344,22 @@ zone_page_collect( panic("zone_page_collect"); #endif - i = (natural_t)atop_kernel(addr-zone_map_min_address); - j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); + i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); + j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); - for (zp = zone_page_table + i; i <= j; zp++, i++) + for (; i <= j; i++) { + zp = zone_page_table_lookup(i); ++zp->collect_count; + } } void zone_page_init( vm_offset_t addr, - vm_size_t size, - int value) + vm_size_t size) { struct zone_page_table_entry *zp; - natural_t i, j; + zone_page_index_t i, j; #if ZONE_ALIAS_ADDR addr = zone_virtual_addr(addr); @@ -2320,11 +2369,16 @@ zone_page_init( panic("zone_page_init"); #endif - i = (natural_t)atop_kernel(addr-zone_map_min_address); - j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); + i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); + j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); + + for (; i <= j; i++) { + /* make sure entry exists before marking unused */ + zone_page_table_expand(i); - for (zp = zone_page_table + i; i <= j; zp++, i++) { - zp->alloc_count = value; + zp = zone_page_table_lookup(i); + assert(zp); + zp->alloc_count = ZONE_PAGE_UNUSED; zp->collect_count = 0; } } @@ -2335,7 +2389,7 @@ zone_page_alloc( vm_size_t size) { struct zone_page_table_entry *zp; - natural_t i, j; + zone_page_index_t i, j; #if ZONE_ALIAS_ADDR addr = zone_virtual_addr(addr); @@ -2345,29 +2399,32 @@ zone_page_alloc( panic("zone_page_alloc"); #endif - i = (natural_t)atop_kernel(addr-zone_map_min_address); - j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); + i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); + j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); + + for (; i <= j; i++) { + zp = zone_page_table_lookup(i); + assert(zp); - for (zp = zone_page_table + i; i <= j; zp++, i++) { /* - * Set alloc_count to (ZONE_PAGE_USED + 1) if + * Set alloc_count to ZONE_PAGE_USED if * it was previously set to ZONE_PAGE_UNUSED. */ if (zp->alloc_count == ZONE_PAGE_UNUSED) - zp->alloc_count = 1; - else - ++zp->alloc_count; + zp->alloc_count = ZONE_PAGE_USED; + + ++zp->alloc_count; } } void zone_page_free_element( - struct zone_page_table_entry **free_pages, + zone_page_index_t *free_page_list, vm_offset_t addr, vm_size_t size) { struct zone_page_table_entry *zp; - natural_t i, j; + zone_page_index_t i, j; #if ZONE_ALIAS_ADDR addr = zone_virtual_addr(addr); @@ -2377,18 +2434,28 @@ zone_page_free_element( panic("zone_page_free_element"); #endif - i = (natural_t)atop_kernel(addr-zone_map_min_address); - j = (natural_t)atop_kernel((addr+size-1) - zone_map_min_address); + i = (zone_page_index_t)atop_kernel(addr-zone_map_min_address); + j = (zone_page_index_t)atop_kernel((addr+size-1) - zone_map_min_address); + + for (; i <= j; i++) { + zp = zone_page_table_lookup(i); - for (zp = zone_page_table + i; i <= j; zp++, i++) { if (zp->collect_count > 0) --zp->collect_count; if (--zp->alloc_count == 0) { + vm_address_t free_page_address; + zp->alloc_count = ZONE_PAGE_UNUSED; zp->collect_count = 0; - zp->link = *free_pages; - *free_pages = zp; + + /* + * This element was the last one on this page, re-use the page's + * storage for a page freelist + */ + free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)i); + *(zone_page_index_t *)free_page_address = *free_page_list; + *free_page_list = i; } } } @@ -2451,7 +2518,7 @@ zone_gc(void) unsigned int max_zones; zone_t z; unsigned int i; - struct zone_page_table_entry *zp, *zone_free_pages; + zone_page_index_t zone_free_page_head; lck_mtx_lock(&zone_gc_lock); @@ -2461,11 +2528,15 @@ zone_gc(void) simple_unlock(&all_zones_lock); #if MACH_ASSERT - for (i = 0; i < zone_pages; i++) - assert(zone_page_table[i].collect_count == 0); + for (i = 0; i < zone_pages; i++) { + struct zone_page_table_entry *zp; + + zp = zone_page_table_lookup(i); + assert(!zp || (zp->collect_count == 0)); + } #endif /* MACH_ASSERT */ - zone_free_pages = NULL; + zone_free_page_head = ZONE_PAGE_INDEX_INVALID; for (i = 0; i < max_zones; i++, z = z->next_zone) { unsigned int n, m; @@ -2546,6 +2617,7 @@ zone_gc(void) if (++n >= 50) { if (z->waiting == TRUE) { + /* z->waiting checked without lock held, rechecked below after locking */ lock_zone(z); if (keep != NULL) { @@ -2586,6 +2658,11 @@ zone_gc(void) ADD_LIST_TO_ZONE(z, keep, tail); + if (z->waiting) { + z->waiting = FALSE; + zone_wakeup(z); + } + unlock_zone(z); } @@ -2601,11 +2678,20 @@ zone_gc(void) n = 0; tail = keep = NULL; while (elt != NULL) { if (zone_page_collectable((vm_offset_t)elt, elt_size)) { + struct zone_free_element *next_elt = elt->next; + size_freed += elt_size; - zone_page_free_element(&zone_free_pages, + + /* + * If this is the last allocation on the page(s), + * we may use their storage to maintain the linked + * list of free-able pages. So store elt->next because + * "elt" may be scribbled over. + */ + zone_page_free_element(&zone_free_page_head, (vm_offset_t)elt, elt_size); - elt = elt->next; + elt = next_elt; ++zgc_stats.elems_freed; } @@ -2680,13 +2766,17 @@ zone_gc(void) * Reclaim the pages we are freeing. */ - while ((zp = zone_free_pages) != NULL) { - zone_free_pages = zp->link; + while (zone_free_page_head != ZONE_PAGE_INDEX_INVALID) { + zone_page_index_t zind = zone_free_page_head; + vm_address_t free_page_address; #if ZONE_ALIAS_ADDR z = (zone_t)zone_virtual_addr((vm_map_address_t)z); #endif - kmem_free(zone_map, zone_map_min_address + PAGE_SIZE * - (zp - zone_page_table), PAGE_SIZE); + /* Use the first word of the page about to be freed to find the next free page */ + free_page_address = zone_map_min_address + PAGE_SIZE * ((vm_size_t)zind); + zone_free_page_head = *(zone_page_index_t *)free_page_address; + + kmem_free(zone_map, free_page_address, PAGE_SIZE); ++zgc_stats.pgs_freed; } diff --git a/osfmk/kern/zalloc.h b/osfmk/kern/zalloc.h index d7d722239..81322fd9f 100644 --- a/osfmk/kern/zalloc.h +++ b/osfmk/kern/zalloc.h @@ -114,7 +114,9 @@ struct zone { #endif /* ZONE_DEBUG */ /* boolean_t */ caller_acct: 1, /* do we account allocation/free to the caller? */ /* boolean_t */ doing_gc :1, /* garbage collect in progress? */ - /* boolean_t */ noencrypt :1; + /* boolean_t */ noencrypt :1, + /* boolean_t */ no_callout:1, + /* boolean_t */ async_prio_refill:1; int index; /* index into zone_info arrays for this zone */ struct zone * next_zone; /* Link for all-zones list */ call_entry_data_t call_async_alloc; /* callout for asynchronous alloc */ @@ -128,6 +130,8 @@ struct zone { uint32_t num_frees; /* free stats for zleak benchmarks */ uint32_t zleak_capture; /* per-zone counter for capturing every N allocations */ #endif /* CONFIG_ZLEAKS */ + vm_size_t prio_refill_watermark; + thread_t zone_replenish_thread; }; /* @@ -232,7 +236,7 @@ extern void * zget( /* Fill zone with memory */ extern void zcram( zone_t zone, - void *newmem, + vm_offset_t newmem, vm_size_t size); /* Initially fill zone with specified number of elements */ @@ -245,7 +249,7 @@ extern void zone_change( zone_t zone, unsigned int item, boolean_t value); - +extern void zone_prio_refill_configure(zone_t, vm_size_t); /* Item definitions */ #define Z_EXHAUST 1 /* Make zone exhaustible */ #define Z_COLLECT 2 /* Make zone collectable */ @@ -253,7 +257,9 @@ extern void zone_change( #define Z_FOREIGN 4 /* Allow collectable zone to contain foreign elements */ #define Z_CALLERACCT 5 /* Account alloc/free against the caller */ #define Z_NOENCRYPT 6 /* Don't encrypt zone during hibernation */ - +#define Z_NOCALLOUT 7 /* Don't asynchronously replenish the zone via + * callouts + */ /* Preallocate space for zone from zone map */ extern void zprealloc( zone_t zone, diff --git a/osfmk/mach/i386/thread_state.h b/osfmk/mach/i386/thread_state.h index dba05b947..2d542d3dd 100644 --- a/osfmk/mach/i386/thread_state.h +++ b/osfmk/mach/i386/thread_state.h @@ -33,7 +33,7 @@ #define _MACH_I386_THREAD_STATE_H_ /* Size of maximum exported thread state in words */ -#define I386_THREAD_STATE_MAX (144) /* Size of biggest state possible */ +#define I386_THREAD_STATE_MAX (224) /* Size of biggest state possible */ #if defined (__i386__) || defined(__x86_64__) #define THREAD_STATE_MAX I386_THREAD_STATE_MAX diff --git a/osfmk/vm/vm_init.h b/osfmk/vm/vm_init.h index 06beca0ee..b405952d1 100644 --- a/osfmk/vm/vm_init.h +++ b/osfmk/vm/vm_init.h @@ -34,6 +34,6 @@ extern void vm_mem_bootstrap(void) __attribute__((section("__TEXT, initcode"))); extern void vm_mem_init(void) __attribute__((section("__TEXT, initcode"))); -extern void vm_map_steal_memory(void); +extern void vm_map_steal_memory(void) __attribute__((section("__TEXT, initcode")));; #endif /* VM_INIT_H */ diff --git a/osfmk/vm/vm_map.c b/osfmk/vm/vm_map.c index 604bc202f..0ce07a4d9 100644 --- a/osfmk/vm/vm_map.c +++ b/osfmk/vm/vm_map.c @@ -121,7 +121,7 @@ static boolean_t vm_map_range_check( vm_map_entry_t *entry); static vm_map_entry_t _vm_map_entry_create( - struct vm_map_header *map_header); + struct vm_map_header *map_header, boolean_t map_locked); static void _vm_map_entry_dispose( struct vm_map_header *map_header, @@ -303,8 +303,9 @@ __private_extern__ void default_freezer_mapping_free(void**, boolean_t all); * wire count; it's used for map splitting and zone changing in * vm_map_copyout. */ -#define vm_map_entry_copy(NEW,OLD) \ -MACRO_BEGIN \ +#define vm_map_entry_copy(NEW,OLD) \ +MACRO_BEGIN \ +boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \ *(NEW) = *(OLD); \ (NEW)->is_shared = FALSE; \ (NEW)->needs_wakeup = FALSE; \ @@ -312,9 +313,15 @@ MACRO_BEGIN \ (NEW)->wired_count = 0; \ (NEW)->user_wired_count = 0; \ (NEW)->permanent = FALSE; \ + (NEW)->from_reserved_zone = _vmec_reserved; \ MACRO_END -#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD)) +#define vm_map_entry_copy_full(NEW,OLD) \ +MACRO_BEGIN \ +boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \ +(*(NEW) = *(OLD)); \ +(NEW)->from_reserved_zone = _vmecf_reserved; \ +MACRO_END /* * Decide if we want to allow processes to execute from their data or stack areas. @@ -419,7 +426,8 @@ override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ static zone_t vm_map_zone; /* zone for vm_map structures */ static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ -static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */ +static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking + * allocations */ static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ @@ -435,7 +443,6 @@ static void *map_data; static vm_size_t map_data_size; static void *kentry_data; static vm_size_t kentry_data_size; -static int kentry_count = 2048; /* to init kentry_data_size */ #if CONFIG_EMBEDDED #define NO_COALESCE_LIMIT 0 @@ -603,7 +610,7 @@ lck_attr_t vm_map_lck_attr; * * vm_map_zone: used to allocate maps. * vm_map_entry_zone: used to allocate map entries. - * vm_map_kentry_zone: used to allocate map entries for the kernel. + * vm_map_entry_reserved_zone: fallback zone for kernel map entries * * The kernel allocates map entries from a special zone that is initially * "crammed" with memory. It would be difficult (perhaps impossible) for @@ -615,37 +622,46 @@ void vm_map_init( void) { + vm_size_t entry_zone_alloc_size; vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024, PAGE_SIZE, "maps"); zone_change(vm_map_zone, Z_NOENCRYPT, TRUE); - +#if defined(__LP64__) + entry_zone_alloc_size = PAGE_SIZE * 5; +#else + entry_zone_alloc_size = PAGE_SIZE * 6; +#endif + vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), - 1024*1024, PAGE_SIZE*5, - "non-kernel map entries"); + 1024*1024, entry_zone_alloc_size, + "VM map entries"); zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE); + zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE); - vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), - kentry_data_size, kentry_data_size, - "kernel map entries"); - zone_change(vm_map_kentry_zone, Z_NOENCRYPT, TRUE); + vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), + kentry_data_size * 64, kentry_data_size, + "Reserved VM map entries"); + zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE); vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), - 16*1024, PAGE_SIZE, "map copies"); + 16*1024, PAGE_SIZE, "VM map copies"); zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE); /* * Cram the map and kentry zones with initial data. - * Set kentry_zone non-collectible to aid zone_gc(). + * Set reserved_zone non-collectible to aid zone_gc(). */ zone_change(vm_map_zone, Z_COLLECT, FALSE); - zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE); - zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE); - zone_change(vm_map_kentry_zone, Z_FOREIGN, TRUE); - zone_change(vm_map_kentry_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ + + zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE); + zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE); + zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE); + zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE); + zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ - zcram(vm_map_zone, map_data, map_data_size); - zcram(vm_map_kentry_zone, kentry_data, kentry_data_size); + zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size); + zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size); lck_grp_attr_setdefault(&vm_map_lck_grp_attr); lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr); @@ -656,26 +672,30 @@ void vm_map_steal_memory( void) { + uint32_t kentry_initial_pages; + map_data_size = round_page(10 * sizeof(struct _vm_map)); map_data = pmap_steal_memory(map_data_size); -#if 0 /* - * Limiting worst case: vm_map_kentry_zone needs to map each "available" - * physical page (i.e. that beyond the kernel image and page tables) - * individually; we guess at most one entry per eight pages in the - * real world. This works out to roughly .1 of 1% of physical memory, - * or roughly 1900 entries (64K) for a 64M machine with 4K pages. + * kentry_initial_pages corresponds to the number of kernel map entries + * required during bootstrap until the asynchronous replenishment + * scheme is activated and/or entries are available from the general + * map entry pool. */ +#if defined(__LP64__) + kentry_initial_pages = 10; +#else + kentry_initial_pages = 6; #endif - kentry_count = pmap_free_pages() / 8; - - - kentry_data_size = - round_page(kentry_count * sizeof(struct vm_map_entry)); + kentry_data_size = kentry_initial_pages * PAGE_SIZE; kentry_data = pmap_steal_memory(kentry_data_size); } +void vm_kernel_reserved_entry_init(void) { + zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry)); +} + /* * vm_map_create: * @@ -742,27 +762,41 @@ vm_map_create( * Allocates a VM map entry for insertion in the * given map (or map copy). No fields are filled. */ -#define vm_map_entry_create(map) \ - _vm_map_entry_create(&(map)->hdr) +#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked) -#define vm_map_copy_entry_create(copy) \ - _vm_map_entry_create(&(copy)->cpy_hdr) +#define vm_map_copy_entry_create(copy, map_locked) \ + _vm_map_entry_create(&(copy)->cpy_hdr, map_locked) +unsigned reserved_zalloc_count, nonreserved_zalloc_count; static vm_map_entry_t _vm_map_entry_create( - register struct vm_map_header *map_header) + struct vm_map_header *map_header, boolean_t __unused map_locked) { - register zone_t zone; - register vm_map_entry_t entry; + zone_t zone; + vm_map_entry_t entry; - if (map_header->entries_pageable) - zone = vm_map_entry_zone; - else - zone = vm_map_kentry_zone; + zone = vm_map_entry_zone; + + assert(map_header->entries_pageable ? !map_locked : TRUE); + + if (map_header->entries_pageable) { + entry = (vm_map_entry_t) zalloc(zone); + } + else { + entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE); + + if (entry == VM_MAP_ENTRY_NULL) { + zone = vm_map_entry_reserved_zone; + entry = (vm_map_entry_t) zalloc(zone); + OSAddAtomic(1, &reserved_zalloc_count); + } else + OSAddAtomic(1, &nonreserved_zalloc_count); + } - entry = (vm_map_entry_t) zalloc(zone); if (entry == VM_MAP_ENTRY_NULL) panic("vm_map_entry_create"); + entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone); + vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE); return(entry); @@ -791,10 +825,17 @@ _vm_map_entry_dispose( { register zone_t zone; - if (map_header->entries_pageable) + if (map_header->entries_pageable || !(entry->from_reserved_zone)) zone = vm_map_entry_zone; else - zone = vm_map_kentry_zone; + zone = vm_map_entry_reserved_zone; + + if (!map_header->entries_pageable) { + if (zone == vm_map_entry_zone) + OSAddAtomic(-1, &nonreserved_zalloc_count); + else + OSAddAtomic(-1, &reserved_zalloc_count); + } zfree(zone, entry); } @@ -1160,7 +1201,7 @@ vm_map_find_space( size += PAGE_SIZE_64; } - new_entry = vm_map_entry_create(map); + new_entry = vm_map_entry_create(map, FALSE); /* * Look for the first possible address; if there's already @@ -3036,7 +3077,7 @@ _vm_map_clip_start( * address. */ - new_entry = _vm_map_entry_create(map_header); + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); new_entry->vme_end = start; @@ -3129,7 +3170,7 @@ _vm_map_clip_end( * AFTER the specified entry */ - new_entry = _vm_map_entry_create(map_header); + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy_full(new_entry, entry); assert(entry->vme_start < end); @@ -7227,9 +7268,7 @@ StartAgain: ; /* * Find the zone that the copies were allocated from */ - old_zone = (copy->cpy_hdr.entries_pageable) - ? vm_map_entry_zone - : vm_map_kentry_zone; + entry = vm_map_copy_first_entry(copy); /* @@ -7243,13 +7282,14 @@ StartAgain: ; * Copy each entry. */ while (entry != vm_map_copy_to_entry(copy)) { - new = vm_map_copy_entry_create(copy); + new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); vm_map_entry_copy_full(new, entry); new->use_pmap = FALSE; /* clr address space specifics */ vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new); next = entry->vme_next; + old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone; zfree(old_zone, entry); entry = next; } @@ -7511,7 +7551,7 @@ vm_map_copyin_common( copy->offset = src_addr; copy->size = len; - new_entry = vm_map_copy_entry_create(copy); + new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); #define RETURN(x) \ MACRO_BEGIN \ @@ -7633,7 +7673,7 @@ vm_map_copyin_common( version.main_timestamp = src_map->timestamp; vm_map_unlock(src_map); - new_entry = vm_map_copy_entry_create(copy); + new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); vm_map_lock(src_map); if ((version.main_timestamp + 1) != src_map->timestamp) { @@ -8221,7 +8261,8 @@ vm_map_fork_share( * Mark both entries as shared. */ - new_entry = vm_map_entry_create(new_map); + new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel + * map or descendants */ vm_map_entry_copy(new_entry, old_entry); old_entry->is_shared = TRUE; new_entry->is_shared = TRUE; @@ -8394,7 +8435,7 @@ vm_map_fork( goto slow_vm_map_fork_copy; } - new_entry = vm_map_entry_create(new_map); + new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */ vm_map_entry_copy(new_entry, old_entry); /* clear address space specifics */ new_entry->use_pmap = FALSE; @@ -11146,7 +11187,7 @@ vm_map_entry_insert( assert(insp_entry != (vm_map_entry_t)0); - new_entry = vm_map_entry_create(map); + new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable); new_entry->vme_start = start; new_entry->vme_end = end; @@ -11349,7 +11390,7 @@ vm_map_remap_extract( offset = src_entry->offset + (src_start - src_entry->vme_start); - new_entry = _vm_map_entry_create(map_header); + new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); vm_map_entry_copy(new_entry, src_entry); new_entry->use_pmap = FALSE; /* clr address space specifics */ diff --git a/osfmk/vm/vm_map.h b/osfmk/vm/vm_map.h index d8ab731e9..f88bd545d 100644 --- a/osfmk/vm/vm_map.h +++ b/osfmk/vm/vm_map.h @@ -237,7 +237,8 @@ struct vm_map_entry { /* boolean_t */ superpage_size:3,/* use superpages of a certain size */ /* boolean_t */ zero_wired_pages:1, /* zero out the wired pages of this entry it is being deleted without unwiring them */ /* boolean_t */ used_for_jit:1, - /* unsigned char */ pad:1; /* available bits */ + /* boolean_t */ from_reserved_zone:1; /* Allocated from + * kernel reserved zone */ unsigned short wired_count; /* can be paged if = 0 */ unsigned short user_wired_count; /* for vm_wire */ }; @@ -458,6 +459,8 @@ struct vm_map_copy { /* Initialize the module */ extern void vm_map_init(void) __attribute__((section("__TEXT, initcode"))); +extern void vm_kernel_reserved_entry_init(void) __attribute__((section("__TEXT, initcode"))); + /* Allocate a range in the specified virtual address map and * return the entry allocated for that range. */ extern kern_return_t vm_map_find_space( diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index 0761db5ef..28b3cb172 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -4724,6 +4724,9 @@ process_upl_to_commit: else { m->absent = FALSE; dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); + + if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal)) + dwp->dw_mask |= DW_vm_page_activate; } } else dwp->dw_mask |= DW_vm_page_unwire; diff --git a/osfmk/vm/vm_resident.c b/osfmk/vm/vm_resident.c index 397914b0c..0c0a34e04 100644 --- a/osfmk/vm/vm_resident.c +++ b/osfmk/vm/vm_resident.c @@ -136,7 +136,7 @@ static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr); vm_offset_t virtual_space_start; vm_offset_t virtual_space_end; -int vm_page_pages; +uint32_t vm_page_pages; /* * The vm_page_lookup() routine, which provides for fast @@ -1473,6 +1473,14 @@ vm_page_init( boolean_t lopage) { assert(phys_page); + +#if DEBUG + if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) { + if (!(pmap_valid_page(phys_page))) { + panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page); + } + } +#endif *mem = vm_page_template; mem->phys_page = phys_page; #if 0 @@ -1628,7 +1636,7 @@ void vm_page_more_fictitious(void) vm_page_wait(THREAD_UNINT); return; } - zcram(vm_page_zone, (void *) addr, PAGE_SIZE); + zcram(vm_page_zone, addr, PAGE_SIZE); lck_mtx_unlock(&vm_page_alloc_lock); } @@ -1717,6 +1725,7 @@ vm_page_grablo(void) assert(!mem->free); assert(!mem->pmapped); assert(!mem->wpmapped); + assert(!pmap_is_noencrypt(mem->phys_page)); mem->pageq.next = NULL; mem->pageq.prev = NULL; @@ -1779,6 +1788,7 @@ return_page_from_cpu_list: assert(!mem->inactive); assert(!mem->throttled); assert(!mem->speculative); + assert(!pmap_is_noencrypt(mem->phys_page)); return mem; } @@ -1895,6 +1905,7 @@ return_page_from_cpu_list: assert(!mem->encrypted); assert(!mem->pmapped); assert(!mem->wpmapped); + assert(!pmap_is_noencrypt(mem->phys_page)); } PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next; PROCESSOR_DATA(current_processor(), start_color) = color; @@ -1954,6 +1965,8 @@ vm_page_release( // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */ + pmap_clear_noencrypt(mem->phys_page); + lck_mtx_lock_spin(&vm_page_queue_free_lock); #if DEBUG if (mem->free) @@ -2372,6 +2385,8 @@ vm_page_free_list( vm_page_t, pageq); pg_count++; + + pmap_clear_noencrypt(mem->phys_page); } } else { assert(mem->phys_page == vm_page_fictitious_addr || @@ -4776,7 +4791,7 @@ hibernate_flush_memory() sync_internal(); (void)(*consider_buffer_cache_collect)(1); - consider_zone_gc(1); + consider_zone_gc(TRUE); KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0); } diff --git a/osfmk/x86_64/idt64.s b/osfmk/x86_64/idt64.s index 50bc8b991..fe6cb1295 100644 --- a/osfmk/x86_64/idt64.s +++ b/osfmk/x86_64/idt64.s @@ -268,13 +268,14 @@ L_32bit_dispatch: /* 32-bit user task */ mov %eax, R32_EIP(%rsp) mov ISC32_RFLAGS(%rsp), %eax mov %eax, R32_EFLAGS(%rsp) + mov ISC32_CS(%rsp), %esi /* %esi := %cs for later */ + + mov %esi, R32_CS(%rsp) mov ISC32_RSP(%rsp), %eax mov %eax, R32_UESP(%rsp) mov ISC32_SS(%rsp), %eax mov %eax, R32_SS(%rsp) L_32bit_dispatch_after_fault: - mov ISC32_CS(%rsp), %esi /* %esi := %cs for later */ - mov %esi, R32_CS(%rsp) mov ISC32_TRAPNO(%rsp), %ebx /* %ebx := trapno for later */ mov %ebx, R32_TRAPNO(%rsp) mov ISC32_ERR(%rsp), %eax diff --git a/osfmk/x86_64/loose_ends.c b/osfmk/x86_64/loose_ends.c index 3d75d8eab..10a086542 100644 --- a/osfmk/x86_64/loose_ends.c +++ b/osfmk/x86_64/loose_ends.c @@ -248,30 +248,36 @@ ml_phys_read_data(pmap_paddr_t paddr, int size) { unsigned int result; + if (!physmap_enclosed(paddr)) + panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr); + switch (size) { - unsigned char s1; - unsigned short s2; + unsigned char s1; + unsigned short s2; case 1: - s1 = *(unsigned char *)PHYSMAP_PTOV(paddr); - result = s1; - break; + s1 = *(volatile unsigned char *)PHYSMAP_PTOV(paddr); + result = s1; + break; case 2: - s2 = *(unsigned short *)PHYSMAP_PTOV(paddr); - result = s2; - break; + s2 = *(volatile unsigned short *)PHYSMAP_PTOV(paddr); + result = s2; + break; case 4: - default: - result = *(unsigned int *)PHYSMAP_PTOV(paddr); - break; + result = *(volatile unsigned int *)PHYSMAP_PTOV(paddr); + break; + default: + panic("Invalid size %d for ml_phys_read_data\n", size); + break; } - return result; } static unsigned long long ml_phys_read_long_long(pmap_paddr_t paddr ) { - return *(unsigned long long *)PHYSMAP_PTOV(paddr); + if (!physmap_enclosed(paddr)) + panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr); + return *(volatile unsigned long long *)PHYSMAP_PTOV(paddr); } unsigned int ml_phys_read( vm_offset_t paddr) @@ -333,24 +339,32 @@ unsigned long long ml_phys_read_double_64(addr64_t paddr64) static inline void ml_phys_write_data(pmap_paddr_t paddr, unsigned long data, int size) { + if (!physmap_enclosed(paddr)) + panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr); + switch (size) { case 1: - *(unsigned char *)PHYSMAP_PTOV(paddr) = (unsigned char)data; + *(volatile unsigned char *)PHYSMAP_PTOV(paddr) = (unsigned char)data; break; case 2: - *(unsigned short *)PHYSMAP_PTOV(paddr) = (unsigned short)data; + *(volatile unsigned short *)PHYSMAP_PTOV(paddr) = (unsigned short)data; break; case 4: - default: - *(unsigned int *)PHYSMAP_PTOV(paddr) = (unsigned int)data; + *(volatile unsigned int *)PHYSMAP_PTOV(paddr) = (unsigned int)data; break; + default: + panic("Invalid size %d for ml_phys_write_data\n", size); + break; } } static void ml_phys_write_long_long(pmap_paddr_t paddr, unsigned long long data) { - *(unsigned long long *)PHYSMAP_PTOV(paddr) = data; + if (!physmap_enclosed(paddr)) + panic("%s: 0x%llx out of bounds\n", __FUNCTION__, paddr); + + *(volatile unsigned long long *)PHYSMAP_PTOV(paddr) = data; } void ml_phys_write_byte(vm_offset_t paddr, unsigned int data) @@ -408,9 +422,8 @@ void ml_phys_write_double_64(addr64_t paddr64, unsigned long long data) * * * Read the memory location at physical address paddr. - * This is a part of a device probe, so there is a good chance we will - * have a machine check here. So we have to be able to handle that. - * We assume that machine checks are enabled both in MSR and HIDs + * *Does not* recover from machine checks, unlike the PowerPC implementation. + * Should probably be deprecated. */ boolean_t diff --git a/osfmk/x86_64/pmap.c b/osfmk/x86_64/pmap.c index 2dadb540c..014df101f 100644 --- a/osfmk/x86_64/pmap.c +++ b/osfmk/x86_64/pmap.c @@ -286,55 +286,6 @@ pmap_map( return(virt); } -/* - * Back-door routine for mapping kernel VM at initialization. - * Useful for mapping memory outside the range - * Sets no-cache, A, D. - * Otherwise like pmap_map. - */ -vm_offset_t -pmap_map_bd( - vm_offset_t virt, - vm_map_offset_t start_addr, - vm_map_offset_t end_addr, - vm_prot_t prot, - unsigned int flags) -{ - pt_entry_t template; - pt_entry_t *pte; - spl_t spl; - vm_offset_t base = virt; - template = pa_to_pte(start_addr) - | INTEL_PTE_REF - | INTEL_PTE_MOD - | INTEL_PTE_WIRED - | INTEL_PTE_VALID; - - if (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) { - template |= INTEL_PTE_NCACHE; - if (!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT))) - template |= INTEL_PTE_PTA; - } - if (prot & VM_PROT_WRITE) - template |= INTEL_PTE_WRITE; - - while (start_addr < end_addr) { - spl = splhigh(); - pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); - if (pte == PT_ENTRY_NULL) { - panic("pmap_map_bd: Invalid kernel address\n"); - } - pmap_store_pte(pte, template); - splx(spl); - pte_increment_pa(template); - virt += PAGE_SIZE; - start_addr += PAGE_SIZE; - } - (void)base; - PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr); - return(virt); -} - extern char *first_avail; extern vm_offset_t virtual_avail, virtual_end; extern pmap_paddr_t avail_start, avail_end; @@ -472,6 +423,7 @@ pmap_bootstrap( if (pmap_pcid_ncpus) printf("PMAP: PCID enabled\n"); + boot_args *args = (boot_args *)PE_state.bootArgs; if (args->efiMode == kBootArgsEfiMode32) { printf("EFI32: kernel virtual space limited to 4GB\n"); @@ -597,9 +549,7 @@ pmap_init(void) if (pn > last_managed_page) last_managed_page = pn; - if (pn < lowest_lo) - pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; - else if (pn >= lowest_hi && pn <= highest_hi) + if (pn >= lowest_hi && pn <= highest_hi) pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; } } diff --git a/pexpert/i386/pe_init.c b/pexpert/i386/pe_init.c index fc29c1a65..80335544f 100644 --- a/pexpert/i386/pe_init.c +++ b/pexpert/i386/pe_init.c @@ -44,6 +44,12 @@ /* extern references */ extern void pe_identify_machine(void * args); +extern int +vc_display_lzss_icon(uint32_t dst_x, uint32_t dst_y, + uint32_t image_width, uint32_t image_height, + const uint8_t *compressed_image, + uint32_t compressed_size, + const uint8_t *clut); /* private globals */ PE_state_t PE_state; @@ -54,6 +60,9 @@ clock_frequency_info_t gPEClockFrequencyInfo; void *gPEEFISystemTable; void *gPEEFIRuntimeServices; +static boot_icon_element* norootIcon_lzss; +static const uint8_t* norootClut_lzss; + int PE_initialize_console( PE_Video * info, int op ) { static int last_console = -1; @@ -104,12 +113,16 @@ void PE_init_iokit(void) } DriversPackageProp; boolean_t bootClutInitialized = FALSE; - boolean_t norootInitialized = FALSE; + boolean_t noroot_rle_Initialized = FALSE; + DTEntry entry; unsigned int size; uint32_t *map; boot_progress_element *bootPict; + norootIcon_lzss = NULL; + norootClut_lzss = NULL; + PE_init_kprintf(TRUE); PE_init_printf(TRUE); @@ -120,34 +133,45 @@ void PE_init_iokit(void) */ if( kSuccess == DTLookupEntry(NULL, "/chosen/memory-map", &entry)) { - if( kSuccess == DTGetProperty(entry, "BootCLUT", (void **) &map, &size)) { - if (sizeof(appleClut8) <= map[1]) { - bcopy( (void *)ml_static_ptovirt(map[0]), appleClut8, sizeof(appleClut8) ); - bootClutInitialized = TRUE; - } - } - - if( kSuccess == DTGetProperty(entry, "Pict-FailedBoot", (void **) &map, &size)) { - bootPict = (boot_progress_element *) ml_static_ptovirt(map[0]); - default_noroot.width = bootPict->width; - default_noroot.height = bootPict->height; - default_noroot.dx = 0; - default_noroot.dy = bootPict->yOffset; - default_noroot_data = &bootPict->data[0]; - norootInitialized = TRUE; - } + if( kSuccess == DTGetProperty(entry, "BootCLUT", (void **) &map, &size)) { + if (sizeof(appleClut8) <= map[1]) { + bcopy( (void *)ml_static_ptovirt(map[0]), appleClut8, sizeof(appleClut8) ); + bootClutInitialized = TRUE; + } + } + + if( kSuccess == DTGetProperty(entry, "Pict-FailedBoot", (void **) &map, &size)) { + bootPict = (boot_progress_element *) ml_static_ptovirt(map[0]); + default_noroot.width = bootPict->width; + default_noroot.height = bootPict->height; + default_noroot.dx = 0; + default_noroot.dy = bootPict->yOffset; + default_noroot_data = &bootPict->data[0]; + noroot_rle_Initialized = TRUE; + } + + if( kSuccess == DTGetProperty(entry, "FailedCLUT", (void **) &map, &size)) { + norootClut_lzss = (uint8_t*) ml_static_ptovirt(map[0]); + } + + if( kSuccess == DTGetProperty(entry, "FailedImage", (void **) &map, &size)) { + norootIcon_lzss = (boot_icon_element *) ml_static_ptovirt(map[0]); + if (norootClut_lzss == NULL) { + printf("ERROR: No FailedCLUT provided for noroot icon!\n"); + } + } } if (!bootClutInitialized) { - bcopy( (void *) (uintptr_t) bootClut, (void *) appleClut8, sizeof(appleClut8) ); + bcopy( (void *) (uintptr_t) bootClut, (void *) appleClut8, sizeof(appleClut8) ); } - if (!norootInitialized) { - default_noroot.width = kFailedBootWidth; - default_noroot.height = kFailedBootHeight; - default_noroot.dx = 0; - default_noroot.dy = kFailedBootOffset; - default_noroot_data = failedBootPict; + if (!noroot_rle_Initialized) { + default_noroot.width = kFailedBootWidth; + default_noroot.height = kFailedBootHeight; + default_noroot.dx = 0; + default_noroot.dy = kFailedBootOffset; + default_noroot_data = failedBootPict; } /* @@ -214,8 +238,22 @@ int PE_current_console( PE_Video * info ) void PE_display_icon( __unused unsigned int flags, __unused const char * name ) { - if ( default_noroot_data ) + if ( norootIcon_lzss && norootClut_lzss ) { + uint32_t width = norootIcon_lzss->width; + uint32_t height = norootIcon_lzss->height; + uint32_t x = ((PE_state.video.v_width - width) / 2); + uint32_t y = ((PE_state.video.v_height - height) / 2) + norootIcon_lzss->y_offset_from_center; + + vc_display_lzss_icon(x, y, width, height, + &norootIcon_lzss->data[0], + norootIcon_lzss->data_size, + norootClut_lzss); + } + else if ( default_noroot_data ) { vc_display_icon( &default_noroot, default_noroot_data ); + } else { + printf("ERROR: No data found for noroot icon!\n"); + } } boolean_t diff --git a/pexpert/i386/pe_kprintf.c b/pexpert/i386/pe_kprintf.c index 6533908eb..68d5fc2eb 100644 --- a/pexpert/i386/pe_kprintf.c +++ b/pexpert/i386/pe_kprintf.c @@ -36,6 +36,7 @@ #include #include #include +#include /* Globals */ void (*PE_kputc)(char c); @@ -105,10 +106,13 @@ void kprintf(const char *fmt, ...) boolean_t state; if (!disable_serial_output) { - + boolean_t early = FALSE; + if (rdmsr64(MSR_IA32_GS_BASE) == 0) { + early = TRUE; + } /* If PE_kputc has not yet been initialized, don't * take any locks, just dump to serial */ - if (!PE_kputc) { + if (!PE_kputc || early) { va_start(listp, fmt); _doprnt(fmt, &listp, pal_serial_putc, 16); va_end(listp); diff --git a/pexpert/pexpert/i386/boot.h b/pexpert/pexpert/i386/boot.h index 18e65d406..f911e64bc 100644 --- a/pexpert/pexpert/i386/boot.h +++ b/pexpert/pexpert/i386/boot.h @@ -96,6 +96,17 @@ typedef struct Boot_Video Boot_Video; #define GRAPHICS_MODE 1 #define FB_TEXT_MODE 2 +/* Struct describing an image passed in by the booter */ +struct boot_icon_element { + unsigned int width; + unsigned int height; + int y_offset_from_center; + unsigned int data_size; + unsigned int __reserved1[4]; + unsigned char data[0]; +}; +typedef struct boot_icon_element boot_icon_element; + /* Boot argument structure - passed into Mach kernel at boot time. * "Revision" can be incremented for compatible changes */ @@ -149,7 +160,10 @@ typedef struct boot_args { uint64_t bootMemSize; uint64_t PhysicalMemorySize; uint64_t FSBFrequency; - uint32_t __reserved4[734]; + uint64_t pciConfigSpaceBaseAddress; + uint32_t pciConfigSpaceStartBusNumber; + uint32_t pciConfigSpaceEndBusNumber; + uint32_t __reserved4[730]; } boot_args; -- 2.45.2