From 3a60a9f5b85abb8c2cf24e1926c5c7b3f608a5e2 Mon Sep 17 00:00:00 2001 From: Apple Date: Tue, 25 Oct 2005 00:01:58 +0000 Subject: [PATCH] xnu-792.6.22.tar.gz --- bsd/crypto/aes/aescrypt.c | 8 +- bsd/hfs/hfs.h | 1 + bsd/hfs/hfs_btreeio.c | 59 +- bsd/hfs/hfs_catalog.c | 322 +-- bsd/hfs/hfs_catalog.h | 3 +- bsd/hfs/hfs_cnode.c | 14 +- bsd/hfs/hfs_endian.c | 709 +++++-- bsd/hfs/hfs_endian.h | 28 +- bsd/hfs/hfs_format.h | 5 +- bsd/hfs/hfs_readwrite.c | 44 +- bsd/hfs/hfs_vfsops.c | 8 +- bsd/hfs/hfs_vfsutils.c | 2 +- bsd/hfs/hfs_vnops.c | 17 +- bsd/hfs/hfscommon/BTree/BTree.c | 29 +- bsd/hfs/hfscommon/BTree/BTreeNodeOps.c | 148 +- bsd/hfs/hfscommon/BTree/BTreeScanner.c | 24 +- bsd/hfs/hfscommon/headers/BTreesInternal.h | 1 + bsd/hfs/hfscommon/headers/BTreesPrivate.h | 3 - bsd/kern/kdebug.c | 22 +- bsd/kern/kern_control.c | 2 +- bsd/kern/kern_core.c | 2 +- bsd/kern/kern_descrip.c | 21 + bsd/kern/kern_event.c | 14 +- bsd/kern/kern_symfile.c | 228 +++ bsd/kern/kpi_mbuf.c | 16 +- bsd/kern/kpi_socketfilter.c | 74 +- bsd/kern/sysctl_init.c | 2 + bsd/kern/sysv_sem.c | 190 +- bsd/kern/uipc_mbuf.c | 20 +- bsd/kern/uipc_mbuf2.c | 2 +- bsd/kern/uipc_socket.c | 132 +- bsd/kern/uipc_usrreq.c | 5 + bsd/net/dlil.c | 69 +- bsd/net/if.c | 12 +- bsd/net/if_mib.c | 1 + bsd/net/kext_net.h | 4 +- bsd/netat/atp_write.c | 41 +- bsd/netinet/ip_divert.c | 5 + bsd/netinet/ip_fw2.c | 5 - bsd/netinet/ip_input.c | 20 +- bsd/netinet/ip_output.c | 8 +- bsd/netinet/raw_ip.c | 7 +- bsd/netinet/tcp_input.c | 22 +- bsd/netinet/tcp_subr.c | 5 + bsd/netinet/tcp_usrreq.c | 12 +- bsd/netinet/udp_usrreq.c | 5 + bsd/netkey/key.c | 11 +- bsd/nfs/nfs.h | 2 + bsd/nfs/nfs_bio.c | 2 +- bsd/nfs/nfs_serv.c | 8 + bsd/nfs/nfs_subs.c | 29 + bsd/nfs/nfs_vnops.c | 3 + bsd/sys/fcntl.h | 1 + bsd/sys/socketvar.h | 2 +- bsd/sys/vnode.h | 1 + bsd/vfs/vfs_attrlist.c | 13 +- bsd/vfs/vfs_subr.c | 14 +- config/MasterVersion | 2 +- config/System6.0.ppc.exports | 5 + config/Unsupported.ppc.exports | 6 + iokit/IOKit/IOHibernatePrivate.h | 335 ++++ iokit/IOKit/Makefile | 4 +- iokit/Kernel/IODeviceTreeSupport.cpp | 119 +- iokit/Kernel/IOHibernateIO.cpp | 2071 ++++++++++++++++++++ iokit/Kernel/IOHibernateInternal.h | 93 + iokit/Kernel/IOHibernateRestoreKernel.c | 501 +++++ iokit/Kernel/IOPMrootDomain.cpp | 92 +- iokit/Kernel/IOPlatformExpert.cpp | 20 +- iokit/Kernel/IOServicePM.cpp | 17 +- iokit/Kernel/WKdm.h | 227 +++ iokit/Kernel/WKdmCompress.c | 328 ++++ iokit/Kernel/WKdmDecompress.c | 283 +++ iokit/conf/Makefile.template | 5 + iokit/conf/files | 5 + libkern/libkern/Makefile | 5 +- libkern/libkern/OSCrossEndian.h | 86 + osfmk/conf/Makefile.i386 | 3 +- osfmk/conf/Makefile.ppc | 2 +- osfmk/conf/files | 1 + osfmk/conf/files.i386 | 2 + osfmk/conf/files.ppc | 4 + osfmk/device/iokit_rpc.c | 2 +- osfmk/i386/acpi.c | 38 +- osfmk/i386/cpu.c | 8 +- osfmk/i386/cpu_data.h | 2 +- osfmk/i386/hibernate_i386.c | 182 ++ osfmk/i386/hibernate_restore.s | 233 +++ osfmk/i386/i386_vm_init.c | 2 + osfmk/i386/start.s | 5 + osfmk/ipc/ipc_kmsg.c | 4 +- osfmk/kern/hibernate.c | 446 +++++ osfmk/kern/machine.c | 5 + osfmk/kern/sched_prim.c | 18 +- osfmk/mach/ppc/syscall_sw.h | 4 +- osfmk/ppc/Firmware.s | 3 +- osfmk/ppc/Makefile | 1 + osfmk/ppc/PPCcalls.h | 4 +- osfmk/ppc/aligned_data.s | 5 + osfmk/ppc/asm.h | 10 +- osfmk/ppc/cpu.c | 59 +- osfmk/ppc/db_low_trace.c | 2 +- osfmk/ppc/exception.h | 50 +- osfmk/ppc/genassym.c | 8 +- osfmk/ppc/hibernate_ppc.c | 192 ++ osfmk/ppc/hibernate_restore.s | 186 ++ osfmk/ppc/hw_exception.s | 98 +- osfmk/ppc/hw_lock.s | 10 +- osfmk/ppc/hw_vm.s | 116 +- osfmk/ppc/interrupt.c | 18 +- osfmk/ppc/io_map.c | 4 +- osfmk/ppc/lowglobals.h | 4 +- osfmk/ppc/lowmem_vectors.s | 152 +- osfmk/ppc/machine_cpu.h | 4 +- osfmk/ppc/machine_routines.c | 93 +- osfmk/ppc/machine_routines_asm.s | 76 +- osfmk/ppc/mappings.c | 35 +- osfmk/ppc/mappings.h | 4 +- osfmk/ppc/misc_asm.s | 8 +- osfmk/ppc/model_dep.c | 4 + osfmk/ppc/pmap.c | 90 +- osfmk/ppc/pmap.h | 5 +- osfmk/ppc/pms.c | 682 +++++++ osfmk/ppc/pms.h | 167 ++ osfmk/ppc/pmsCPU.c | 233 +++ osfmk/ppc/ppc_init.c | 48 +- osfmk/ppc/ppc_vm_init.c | 6 +- osfmk/ppc/rtclock.c | 215 +- osfmk/ppc/rtclock.h | 53 + osfmk/ppc/savearea.h | 2 +- osfmk/ppc/serial_io.c | 77 +- osfmk/ppc/skiplists.s | 238 +-- osfmk/ppc/start.s | 7 + osfmk/ppc/vmachmon_asm.s | 4 +- osfmk/vm/vm_fault.c | 8 +- osfmk/vm/vm_pageout.c | 2 +- pexpert/ppc/pe_init.c | 12 +- 136 files changed, 8970 insertions(+), 1647 deletions(-) create mode 100644 iokit/IOKit/IOHibernatePrivate.h create mode 100644 iokit/Kernel/IOHibernateIO.cpp create mode 100644 iokit/Kernel/IOHibernateInternal.h create mode 100644 iokit/Kernel/IOHibernateRestoreKernel.c create mode 100644 iokit/Kernel/WKdm.h create mode 100644 iokit/Kernel/WKdmCompress.c create mode 100644 iokit/Kernel/WKdmDecompress.c create mode 100644 libkern/libkern/OSCrossEndian.h create mode 100644 osfmk/i386/hibernate_i386.c create mode 100644 osfmk/i386/hibernate_restore.s create mode 100644 osfmk/kern/hibernate.c create mode 100644 osfmk/ppc/hibernate_ppc.c create mode 100644 osfmk/ppc/hibernate_restore.s create mode 100644 osfmk/ppc/pms.c create mode 100644 osfmk/ppc/pms.h create mode 100644 osfmk/ppc/pmsCPU.c create mode 100644 osfmk/ppc/rtclock.h diff --git a/bsd/crypto/aes/aescrypt.c b/bsd/crypto/aes/aescrypt.c index 141cd3fbf..f23e9131c 100644 --- a/bsd/crypto/aes/aescrypt.c +++ b/bsd/crypto/aes/aescrypt.c @@ -123,7 +123,8 @@ extern "C" aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk, unsigned char *out, const aes_encrypt_ctx cx[1]) { aes_32t locals(b0, b1); - const aes_32t *kp = cx->ks; + const aes_32t *kp; + const aes_32t *kptr = cx->ks; #if defined(ENC_ROUND_CACHE_TABLES) dtables(t_fn); #endif @@ -145,6 +146,7 @@ aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, un for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk) { + kp = kptr; #if 0 // Read the plaintext into b1 state_in(b1, in); @@ -289,7 +291,8 @@ aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, un aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk, unsigned char *out, const aes_decrypt_ctx cx[1]) { aes_32t locals(b0, b1); - const aes_32t *kp = cx->ks + cx->rn * N_COLS; + const aes_32t *kptr = cx->ks + cx->rn * N_COLS; + const aes_32t *kp; #if defined(DEC_ROUND_CACHE_TABLES) dtables(t_in); #endif @@ -317,6 +320,7 @@ aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, un for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk) { + kp = kptr; // Do the xor part of state_in, where b1 is the previous block's ciphertext. key_in(b0, b1, kp); diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h index e9b96c239..e238dbb08 100644 --- a/bsd/hfs/hfs.h +++ b/bsd/hfs/hfs.h @@ -247,6 +247,7 @@ typedef struct hfsmount { lck_mtx_t hfs_mutex; /* protects access to hfsmount data */ void *hfs_freezing_proc; /* who froze the fs */ + lck_rw_t hfs_insync; /* protects sync/freeze interaction */ } hfsmount_t; typedef hfsmount_t ExtendedVCB; diff --git a/bsd/hfs/hfs_btreeio.c b/bsd/hfs/hfs_btreeio.c index 688983419..503528553 100644 --- a/bsd/hfs/hfs_btreeio.c +++ b/bsd/hfs/hfs_btreeio.c @@ -85,36 +85,57 @@ OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions option if (retval == E_NONE) { block->blockHeader = bp; block->buffer = (char *)buf_dataptr(bp); + block->blockNum = buf_lblkno(bp); block->blockReadFromDisk = (buf_fromcache(bp) == 0); /* not found in cache ==> came from disk */ // XXXdbg block->isModified = 0; -#if BYTE_ORDER == LITTLE_ENDIAN - /* Endian swap B-Tree node (only if it's a valid block) */ + /* Check and endian swap B-Tree node (only if it's a valid block) */ if (!(options & kGetEmptyBlock)) { /* This happens when we first open the b-tree, we might not have all the node data on hand */ if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) && (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) && (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) { - /* Don't swap the descriptors at all, we don't care (this block will be invalidated) */ - SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 3); - - /* The node needs swapping */ + /* + * Don't swap the node descriptor, record offsets, or other records. + * This record will be invalidated and re-read with the correct node + * size once the B-tree control block is set up with the node size + * from the header record. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly); + + } else if (block->blockReadFromDisk) { + /* + * The node was just read from disk, so always swap/check it. + * This is necessary on big endian since the test below won't trigger. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost); } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) == 0x0e00) { - SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 0); -#if 0 - /* The node is not already in native byte order, hence corrupt */ - } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) != 0x000e) { - panic ("%s Corrupt B-Tree node detected!\n", "GetBTreeBlock:"); -#endif + /* + * The node was left in the cache in non-native order, so swap it. + * This only happens on little endian, after the node is written + * back to disk. + */ + retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost); } + + /* + * If we got an error, then the node is only partially swapped. + * We mark the buffer invalid so that the next attempt to get the + * node will read it and attempt to swap again, and will notice + * the error again. If we didn't do this, the next attempt to get + * the node might use the partially swapped node as-is. + */ + if (retval) + buf_markinvalid(bp); } -#endif - } else { + } + + if (retval) { if (bp) - buf_brelse(bp); + buf_brelse(bp); block->blockHeader = NULL; block->buffer = NULL; } @@ -146,20 +167,22 @@ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr) static int btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp) { -#if BYTE_ORDER == LITTLE_ENDIAN + int retval; struct vnode *vp = buf_vnode(bp); BlockDescriptor block; /* Prepare the block pointer */ block.blockHeader = bp; block.buffer = (char *)buf_dataptr(bp); + block.blockNum = buf_lblkno(bp); /* not found in cache ==> came from disk */ block.blockReadFromDisk = (buf_fromcache(bp) == 0); block.blockSize = buf_count(bp); // XXXdbg have to swap the data before it goes in the journal - SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1); -#endif + retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig); + if (retval) + panic("btree_journal_modify_block_end: about to write corrupt node!\n"); return journal_modify_block_end(hfsmp->jnl, bp); } diff --git a/bsd/hfs/hfs_catalog.c b/bsd/hfs/hfs_catalog.c index 1b05373d5..d21b4c4e0 100644 --- a/bsd/hfs/hfs_catalog.c +++ b/bsd/hfs/hfs_catalog.c @@ -1784,7 +1784,12 @@ struct packdirentry_state { linkinfo_t * cbs_linkinfo; struct cat_desc * cbs_desc; // struct dirent * cbs_stdentry; + // followign fields are only used for NFS readdir, which uses the next file id as the seek offset of each entry struct direntry * cbs_direntry; + struct direntry * cbs_prevdirentry; + u_int32_t cbs_previlinkref; + Boolean cbs_hasprevdirentry; + Boolean cbs_eof; }; static int @@ -1798,7 +1803,8 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp, struct dirent catent; struct direntry * entry = NULL; time_t itime; - u_long ilinkref = 0; + u_int32_t ilinkref = 0; + u_int32_t curlinkref = 0; cnid_t cnid; int hide = 0; u_int8_t type; @@ -1809,6 +1815,7 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp, size_t maxnamelen; size_t uiosize = 0; caddr_t uioaddr; + Boolean stop_after_pack = false; hfsmp = state->cbs_hfsmp; @@ -1819,8 +1826,18 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp, /* We're done when parent directory changes */ if (state->cbs_parentID != curID) { - state->cbs_result = ENOENT; - return (0); /* stop */ + if (state->cbs_extended) { + if (state->cbs_hasprevdirentry) { /* the last record haven't been returned yet, so we want to stop after + * packing the last item */ + stop_after_pack = true; + } else { + state->cbs_result = ENOENT; + return (0); /* stop */ + } + } else { + state->cbs_result = ENOENT; + return (0); /* stop */ + } } if (state->cbs_extended) { @@ -1832,95 +1849,93 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp, maxnamelen = NAME_MAX; } - if (!(hfsmp->hfs_flags & HFS_STANDARD)) { - switch(crp->recordType) { - case kHFSPlusFolderRecord: - type = DT_DIR; - cnid = crp->hfsPlusFolder.folderID; - /* Hide our private meta data directory */ - if ((curID == kHFSRootFolderID) && - (cnid == hfsmp->hfs_privdir_desc.cd_cnid)) { - hide = 1; + if (state->cbs_extended && stop_after_pack) { + cnid = INT_MAX; /* the last item returns a non-zero invalid cookie */ + } else { + if (!(hfsmp->hfs_flags & HFS_STANDARD)) { + switch(crp->recordType) { + case kHFSPlusFolderRecord: + type = DT_DIR; + cnid = crp->hfsPlusFolder.folderID; + /* Hide our private meta data directory */ + if ((curID == kHFSRootFolderID) && + (cnid == hfsmp->hfs_privdir_desc.cd_cnid)) { + hide = 1; + } + + break; + case kHFSPlusFileRecord: + itime = to_bsd_time(crp->hfsPlusFile.createDate); + /* + * When a hardlink link is encountered save its link ref. + */ + if ((SWAP_BE32(crp->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) && + (SWAP_BE32(crp->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator) && + ((itime == (time_t)hfsmp->hfs_itime) || + (itime == (time_t)hfsmp->hfs_metadata_createdate))) { + ilinkref = crp->hfsPlusFile.bsdInfo.special.iNodeNum; + } + type = MODE_TO_DT(crp->hfsPlusFile.bsdInfo.fileMode); + cnid = crp->hfsPlusFile.fileID; + /* Hide the journal files */ + if ((curID == kHFSRootFolderID) && + (hfsmp->jnl) && + ((cnid == hfsmp->hfs_jnlfileid) || + (cnid == hfsmp->hfs_jnlinfoblkid))) { + hide = 1; + } + break; + default: + return (0); /* stop */ + }; + + cnp = (CatalogName*) &ckp->hfsPlus.nodeName; + result = utf8_encodestr(cnp->ustr.unicode, cnp->ustr.length * sizeof(UniChar), + nameptr, &namelen, maxnamelen + 1, ':', 0); + if (result == ENAMETOOLONG) { + result = ConvertUnicodeToUTF8Mangled(cnp->ustr.length * sizeof(UniChar), + cnp->ustr.unicode, maxnamelen + 1, + (ByteCount*)&namelen, nameptr, + cnid); + is_mangled = 1; } + } else { /* hfs */ + switch(crp->recordType) { + case kHFSFolderRecord: + type = DT_DIR; + cnid = crp->hfsFolder.folderID; + break; + case kHFSFileRecord: + type = DT_REG; + cnid = crp->hfsFile.fileID; + break; + default: + return (0); /* stop */ + }; - break; - case kHFSPlusFileRecord: - itime = to_bsd_time(crp->hfsPlusFile.createDate); + cnp = (CatalogName*) ckp->hfs.nodeName; + result = hfs_to_utf8(hfsmp, cnp->pstr, maxnamelen + 1, + (ByteCount *)&namelen, nameptr); /* - * When a hardlink link is encountered save its link ref. + * When an HFS name cannot be encoded with the current + * volume encoding we use MacRoman as a fallback. */ - if ((SWAP_BE32(crp->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) && - (SWAP_BE32(crp->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator) && - ((itime == (time_t)hfsmp->hfs_itime) || - (itime == (time_t)hfsmp->hfs_metadata_createdate))) { - ilinkref = crp->hfsPlusFile.bsdInfo.special.iNodeNum; - } - type = MODE_TO_DT(crp->hfsPlusFile.bsdInfo.fileMode); - cnid = crp->hfsPlusFile.fileID; - /* Hide the journal files */ - if ((curID == kHFSRootFolderID) && - (hfsmp->jnl) && - ((cnid == hfsmp->hfs_jnlfileid) || - (cnid == hfsmp->hfs_jnlinfoblkid))) { - hide = 1; - } - break; - default: - return (0); /* stop */ - }; - - cnp = (CatalogName*) &ckp->hfsPlus.nodeName; - result = utf8_encodestr(cnp->ustr.unicode, cnp->ustr.length * sizeof(UniChar), - nameptr, &namelen, maxnamelen + 1, ':', 0); - if (result == ENAMETOOLONG) { - result = ConvertUnicodeToUTF8Mangled(cnp->ustr.length * sizeof(UniChar), - cnp->ustr.unicode, maxnamelen + 1, - (ByteCount*)&namelen, nameptr, - cnid); - is_mangled = 1; + if (result) + result = mac_roman_to_utf8(cnp->pstr, maxnamelen + 1, + (ByteCount *)&namelen, nameptr); } - } else { /* hfs */ - switch(crp->recordType) { - case kHFSFolderRecord: - type = DT_DIR; - cnid = crp->hfsFolder.folderID; - break; - case kHFSFileRecord: - type = DT_REG; - cnid = crp->hfsFile.fileID; - break; - default: - return (0); /* stop */ - }; - - cnp = (CatalogName*) ckp->hfs.nodeName; - result = hfs_to_utf8(hfsmp, cnp->pstr, maxnamelen + 1, - (ByteCount *)&namelen, nameptr); - /* - * When an HFS name cannot be encoded with the current - * volume encoding we use MacRoman as a fallback. - */ - if (result) - result = mac_roman_to_utf8(cnp->pstr, maxnamelen + 1, - (ByteCount *)&namelen, nameptr); } if (state->cbs_extended) { - entry->d_type = type; - entry->d_namlen = namelen; - entry->d_reclen = uiosize = EXT_DIRENT_LEN(namelen); - if (hide) - entry->d_fileno = 0; /* file number = 0 means skip entry */ - else - entry->d_fileno = cnid; - /* * The index is 1 relative and includes "." and ".." * - * Also stuff the cnid in the upper 32 bits of the cookie. + * Also stuff the cnid in the upper 32 bits of the cookie. The cookie is stored to the previous entry, which + * will be packed and copied this time */ - entry->d_seekoff = (state->cbs_index + 3) | ((u_int64_t)cnid << 32); - uioaddr = (caddr_t) entry; + state->cbs_prevdirentry->d_seekoff = (state->cbs_index + 3) | ((u_int64_t)cnid << 32); + uiosize = state->cbs_prevdirentry->d_reclen; + uioaddr = (caddr_t) state->cbs_prevdirentry; } else { catent.d_type = type; catent.d_namlen = namelen; @@ -1941,58 +1956,89 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp, return (0); /* stop */ } - state->cbs_result = uiomove(uioaddr, uiosize, state->cbs_uio); - if (state->cbs_result == 0) { - ++state->cbs_index; + if (!state->cbs_extended || state->cbs_hasprevdirentry) { + state->cbs_result = uiomove(uioaddr, uiosize, state->cbs_uio); + if (state->cbs_result == 0) { + ++state->cbs_index; - /* Remember previous entry */ - state->cbs_desc->cd_cnid = cnid; - if (type == DT_DIR) { - state->cbs_desc->cd_flags |= CD_ISDIR; - } else { - state->cbs_desc->cd_flags &= ~CD_ISDIR; - } - if (state->cbs_desc->cd_nameptr != NULL) { - vfs_removename(state->cbs_desc->cd_nameptr); - } + /* Remember previous entry */ + state->cbs_desc->cd_cnid = cnid; + if (type == DT_DIR) { + state->cbs_desc->cd_flags |= CD_ISDIR; + } else { + state->cbs_desc->cd_flags &= ~CD_ISDIR; + } + if (state->cbs_desc->cd_nameptr != NULL) { + vfs_removename(state->cbs_desc->cd_nameptr); + } #if 0 - state->cbs_desc->cd_encoding = xxxx; + state->cbs_desc->cd_encoding = xxxx; #endif - if (!is_mangled) { - state->cbs_desc->cd_namelen = namelen; - state->cbs_desc->cd_nameptr = vfs_addname(nameptr, namelen, 0, 0); - } else { - /* Store unmangled name for the directory hint else it will - * restart readdir at the last location again - */ - char *new_nameptr; - size_t bufsize; + if (!is_mangled) { + state->cbs_desc->cd_namelen = namelen; + state->cbs_desc->cd_nameptr = vfs_addname(nameptr, namelen, 0, 0); + } else { + /* Store unmangled name for the directory hint else it will + * restart readdir at the last location again + */ + char *new_nameptr; + size_t bufsize; + size_t tmp_namelen = 0; - cnp = (CatalogName *)&ckp->hfsPlus.nodeName; - bufsize = 1 + utf8_encodelen(cnp->ustr.unicode, - cnp->ustr.length * sizeof(UniChar), - ':', 0); - MALLOC(new_nameptr, char *, bufsize, M_TEMP, M_WAITOK); - result = utf8_encodestr(cnp->ustr.unicode, - cnp->ustr.length * sizeof(UniChar), - new_nameptr, &namelen, - bufsize, ':', 0); + cnp = (CatalogName *)&ckp->hfsPlus.nodeName; + bufsize = 1 + utf8_encodelen(cnp->ustr.unicode, + cnp->ustr.length * sizeof(UniChar), + ':', 0); + MALLOC(new_nameptr, char *, bufsize, M_TEMP, M_WAITOK); + result = utf8_encodestr(cnp->ustr.unicode, + cnp->ustr.length * sizeof(UniChar), + new_nameptr, &tmp_namelen, + bufsize, ':', 0); - state->cbs_desc->cd_namelen = namelen; - state->cbs_desc->cd_nameptr = vfs_addname(new_nameptr, namelen, 0, 0); + state->cbs_desc->cd_namelen = tmp_namelen; + state->cbs_desc->cd_nameptr = vfs_addname(new_nameptr, tmp_namelen, 0, 0); - FREE(new_nameptr, M_TEMP); - } + FREE(new_nameptr, M_TEMP); + } + } + if (state->cbs_hasprevdirentry) { + curlinkref = ilinkref; /* save current */ + ilinkref = state->cbs_previlinkref; /* use previous */ + } + /* + * Record any hard links for post processing. + */ + if ((ilinkref != 0) && + (state->cbs_result == 0) && + (state->cbs_nlinks < state->cbs_maxlinks)) { + state->cbs_linkinfo[state->cbs_nlinks].dirent_addr = uiobase; + state->cbs_linkinfo[state->cbs_nlinks].link_ref = ilinkref; + state->cbs_nlinks++; + } + if (state->cbs_hasprevdirentry) { + ilinkref = curlinkref; /* restore current */ + } } - /* - * Record any hard links for post processing. - */ - if ((ilinkref != 0) && - (state->cbs_result == 0) && - (state->cbs_nlinks < state->cbs_maxlinks)) { - state->cbs_linkinfo[state->cbs_nlinks].dirent_addr = uiobase; - state->cbs_linkinfo[state->cbs_nlinks].link_ref = ilinkref; - state->cbs_nlinks++; + + if (state->cbs_extended) { /* fill the direntry to be used the next time */ + if (stop_after_pack) { + state->cbs_eof = true; + return (0); /* stop */ + } + entry->d_type = type; + entry->d_namlen = namelen; + entry->d_reclen = EXT_DIRENT_LEN(namelen); + if (hide) + entry->d_fileno = 0; /* file number = 0 means skip entry */ + else + entry->d_fileno = cnid; + /* swap the current and previous entry */ + struct direntry * tmp; + tmp = state->cbs_direntry; + state->cbs_direntry = state->cbs_prevdirentry; + state->cbs_prevdirentry = tmp; + state->cbs_hasprevdirentry = true; + state->cbs_previlinkref = ilinkref; } /* Continue iteration if there's room */ @@ -2007,7 +2053,7 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp, __private_extern__ int cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint, - uio_t uio, int extended, int * items) + uio_t uio, int extended, int * items, int * eofflag) { FCB* fcb; BTreeIterator * iterator; @@ -2022,16 +2068,20 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint fcb = GetFileControlBlock(hfsmp->hfs_catalog_vp); - /* Get a buffer for collecting link info and for a btree iterator */ + /* + * Get a buffer for link info array, btree iterator and a direntry: + */ maxlinks = MIN(entrycnt, uio_resid(uio) / SMALL_DIRENTRY_SIZE); bufsize = (maxlinks * sizeof(linkinfo_t)) + sizeof(*iterator); if (extended) { - bufsize += sizeof(struct direntry); + bufsize += 2*sizeof(struct direntry); } MALLOC(buffer, void *, bufsize, M_TEMP, M_WAITOK); bzero(buffer, bufsize); state.cbs_extended = extended; + state.cbs_hasprevdirentry = false; + state.cbs_previlinkref = 0; state.cbs_nlinks = 0; state.cbs_maxlinks = maxlinks; state.cbs_linkinfo = (linkinfo_t *) buffer; @@ -2041,7 +2091,9 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint have_key = 0; index = dirhint->dh_index + 1; if (extended) { - state.cbs_direntry = (struct direntry *)((char *)buffer + sizeof(BTreeIterator)); + state.cbs_direntry = (struct direntry *)((char *)iterator + sizeof(BTreeIterator)); + state.cbs_prevdirentry = state.cbs_direntry + 1; + state.cbs_eof = false; } /* * Attempt to build a key from cached filename @@ -2100,15 +2152,25 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint state.cbs_result = 0; state.cbs_parentID = dirhint->dh_desc.cd_parentcnid; + enum BTreeIterationOperations op; + if (extended && index != 0 && have_key) + op = kBTreeCurrentRecord; + else + op = kBTreeNextRecord; + /* * Process as many entries as possible starting at iterator->key. */ - result = BTIterateRecords(fcb, kBTreeNextRecord, iterator, + result = BTIterateRecords(fcb, op, iterator, (IterateCallBackProcPtr)cat_packdirentry, &state); /* Note that state.cbs_index is still valid on errors */ *items = state.cbs_index - index; index = state.cbs_index; + + if (state.cbs_eof) { + *eofflag = 1; + } /* Finish updating the catalog iterator. */ dirhint->dh_desc.cd_hint = iterator->hint.nodeNum; diff --git a/bsd/hfs/hfs_catalog.h b/bsd/hfs/hfs_catalog.h index 63c2fe994..2f91eae90 100644 --- a/bsd/hfs/hfs_catalog.h +++ b/bsd/hfs/hfs_catalog.h @@ -269,7 +269,8 @@ extern int cat_getdirentries( directoryhint_t *dirhint, uio_t uio, int extended, - int * items); + int * items, + int * eofflag); extern int cat_insertfilethread ( struct hfsmount *hfsmp, diff --git a/bsd/hfs/hfs_cnode.c b/bsd/hfs/hfs_cnode.c index 8351989ed..132b17cc2 100644 --- a/bsd/hfs/hfs_cnode.c +++ b/bsd/hfs/hfs_cnode.c @@ -85,9 +85,11 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) v_type = vnode_vtype(vp); cp = VTOC(vp); - if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp)) { + if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) || + (hfsmp->hfs_freezing_proc == p)) { return (0); } + /* * Ignore nodes related to stale file handles. */ @@ -142,11 +144,11 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap) // them in the catalog entry and then double // free them later. // - if (hfs_start_transaction(hfsmp) != 0) { - error = EINVAL; - goto out; - } - started_tr = 1; +// if (hfs_start_transaction(hfsmp) != 0) { +// error = EINVAL; +// goto out; +// } +// started_tr = 1; /* * Since we're already inside a transaction, diff --git a/bsd/hfs/hfs_endian.c b/bsd/hfs/hfs_endian.c index 0341f15db..304f27e83 100644 --- a/bsd/hfs/hfs_endian.c +++ b/bsd/hfs/hfs_endian.c @@ -31,19 +31,27 @@ #include "hfs_endian.h" #include "hfs_dbg.h" +#include "hfscommon/headers/BTreesPrivate.h" #undef ENDIAN_DEBUG -/* Private swapping routines */ -int hfs_swap_HFSPlusBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, int unswap); -int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, int unswap); +/* + * Internal swapping routines + * + * These routines handle swapping the records of leaf and index nodes. The + * layout of the keys and records varies depending on the kind of B-tree + * (determined by fileID). + * + * The direction parameter must be kSwapBTNodeBigToHost or kSwapBTNodeHostToBig. + * The kSwapBTNodeHeaderRecordOnly "direction" is not valid for these routines. + */ +static int hfs_swap_HFSPlusBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction); +static int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction); /* * hfs_swap_HFSPlusForkData - * - * There's still a few spots where we still need to swap the fork data. */ -void +static void hfs_swap_HFSPlusForkData ( HFSPlusForkData *src ) @@ -70,79 +78,142 @@ hfs_swap_HFSPlusForkData ( int hfs_swap_BTNode ( BlockDescriptor *src, - int isHFSPlus, - HFSCatalogNodeID fileID, - int unswap + vnode_t vp, + enum HFSBTSwapDirection direction ) { BTNodeDescriptor *srcDesc = src->buffer; UInt16 *srcOffs = NULL; - + BTreeControlBlockPtr btcb = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr; UInt32 i; int error = 0; - #ifdef ENDIAN_DEBUG - if (unswap == 0) { - printf ("BE -> LE Swap\n"); - } else if (unswap == 1) { - printf ("LE -> BE Swap\n"); - } else if (unswap == 3) { + if (direction == kSwapBTNodeBigToHost) { + printf ("BE -> Native Swap\n"); + } else if (direction == kSwapBTNodeHostToBig) { + printf ("Native -> BE Swap\n"); + } else if (direction == kSwapBTNodeHeaderRecordOnly) { printf ("Not swapping descriptors\n"); } else { - panic ("%s This is impossible", "hfs_swap_BTNode:"); + panic ("hfs_swap_BTNode: This is impossible"); } #endif - /* If we are doing a swap */ - if (unswap == 0) { - /* Swap the node descriptor */ + /* + * If we are doing a swap from on-disk to in-memory, then swap the node + * descriptor and record offsets before we need to use them. + */ + if (direction == kSwapBTNodeBigToHost) { srcDesc->fLink = SWAP_BE32 (srcDesc->fLink); srcDesc->bLink = SWAP_BE32 (srcDesc->bLink); - /* Don't swap srcDesc->kind */ - /* Don't swap srcDesc->height */ + /* + * When first opening a BTree, we have to read the header node before the + * control block is initialized. In this case, totalNodes will be zero, + * so skip the bounds checking. + */ + if (btcb->totalNodes != 0) { + if (srcDesc->fLink >= btcb->totalNodes) { + printf("hfs_swap_BTNode: invalid forward link (0x%08X)\n", srcDesc->fLink); + error = fsBTInvalidHeaderErr; + goto fail; + } + if (srcDesc->bLink >= btcb->totalNodes) { + printf("hfs_swap_BTNode: invalid backward link (0x%08X)\n", srcDesc->bLink); + error = fsBTInvalidHeaderErr; + goto fail; + } + } + + /* + * Check srcDesc->kind. Don't swap it because it's only one byte. + */ + if (srcDesc->kind < kBTLeafNode || srcDesc->kind > kBTMapNode) { + printf("hfs_swap_BTNode: invalid node kind (%d)\n", srcDesc->kind); + error = fsBTInvalidHeaderErr; + goto fail; + } + + /* + * Check srcDesc->height. Don't swap it because it's only one byte. + */ + if (srcDesc->height > btcb->treeDepth) { + printf("hfs_swap_BTNode: invalid node height (%d)\n", srcDesc->height); + error = fsBTInvalidHeaderErr; + goto fail; + } + /* Don't swap srcDesc->reserved */ srcDesc->numRecords = SWAP_BE16 (srcDesc->numRecords); - /* Swap the node offsets (including the free space one!) */ + /* + * Swap the node offsets (including the free space one!). + */ srcOffs = (UInt16 *)((char *)src->buffer + (src->blockSize - ((srcDesc->numRecords + 1) * sizeof (UInt16)))); - /* Sanity check */ - if ((char *)srcOffs > ((char *)src->buffer + src->blockSize)) { - panic ("%s Too many records in the B-Tree node", "hfs_swap_BTNode:"); + /* + * Sanity check that the record offsets are within the node itself. + */ + if ((char *)srcOffs > ((char *)src->buffer + src->blockSize) || + (char *)srcOffs < ((char *)src->buffer + sizeof(BTNodeDescriptor))) { + printf("hfs_swap_BTNode: invalid record count (0x%04X)\n", srcDesc->numRecords); + error = fsBTInvalidHeaderErr; + goto fail; } - for (i = 0; i < srcDesc->numRecords + 1; i++) { + /* + * Swap and sanity check each of the record offsets. + */ + for (i = 0; i <= srcDesc->numRecords; i++) { srcOffs[i] = SWAP_BE16 (srcOffs[i]); - /* Sanity check */ - if (srcOffs[i] >= src->blockSize) { - panic ("%s B-Tree node offset out of range", "hfs_swap_BTNode:"); + /* + * Sanity check: must be even, and within the node itself. + * + * We may be called to swap an unused node, which contains all zeroes. + * This is why we allow the record offset to be zero. + */ + if ((srcOffs[i] & 1) || (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || (srcOffs[i] >= src->blockSize)) { + printf("hfs_swap_BTNode: record #%d invalid offset (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); + error = fsBTInvalidHeaderErr; + goto fail; + } + + /* + * Make sure the offsets are strictly increasing. Note that we're looping over + * them backwards, hence the order in the comparison. + */ + if ((i != 0) && (srcOffs[i] >= srcOffs[i-1])) { + printf("hfs_swap_BTNode: offsets %d and %d out of order (0x%04X, 0x%04X)\n", + srcDesc->numRecords-i-1, srcDesc->numRecords-i, srcOffs[i], srcOffs[i-1]); + error = fsBTInvalidHeaderErr; + goto fail; } } } - /* Swap the records (ordered by frequency of access) */ - /* Swap a B-Tree internal node */ + /* + * Swap the records (ordered by frequency of access) + */ if ((srcDesc->kind == kBTIndexNode) || (srcDesc-> kind == kBTLeafNode)) { - if (isHFSPlus) { - error = hfs_swap_HFSPlusBTInternalNode (src, fileID, unswap); + if (VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) { + error = hfs_swap_HFSPlusBTInternalNode (src, VTOC(vp)->c_fileid, direction); } else { - error = hfs_swap_HFSBTInternalNode (src, fileID, unswap); + error = hfs_swap_HFSBTInternalNode (src, VTOC(vp)->c_fileid, direction); } - /* Swap a B-Tree map node */ + if (error) goto fail; + } else if (srcDesc-> kind == kBTMapNode) { /* Don't swap the bitmaps, they'll be done in the bitmap routines */ - /* Swap a B-Tree header node */ } else if (srcDesc-> kind == kBTHeaderNode) { - /* The header's offset is hard-wired because we cannot trust the offset pointers */ - BTHeaderRec *srcHead = (BTHeaderRec *)((char *)src->buffer + 14); + /* The header's offset is hard-wired because we cannot trust the offset pointers. */ + BTHeaderRec *srcHead = (BTHeaderRec *)((char *)src->buffer + sizeof(BTNodeDescriptor)); srcHead->treeDepth = SWAP_BE16 (srcHead->treeDepth); @@ -161,34 +232,93 @@ hfs_swap_BTNode ( srcHead->attributes = SWAP_BE32 (srcHead->attributes); /* Don't swap srcHead->reserved1 */ - /* Don't swap srcHead->btreeType */ + /* Don't swap srcHead->btreeType; it's only one byte */ /* Don't swap srcHead->reserved2 */ /* Don't swap srcHead->reserved3 */ /* Don't swap bitmap */ } - /* If we are doing an unswap */ - if (unswap == 1) { - /* Swap the node descriptor */ + /* + * If we are doing a swap from in-memory to on-disk, then swap the node + * descriptor and record offsets after we're done using them. + */ + if (direction == kSwapBTNodeHostToBig) { + /* + * Sanity check and swap the forkward and backward links. + */ + if (srcDesc->fLink >= btcb->totalNodes) { + printf("hfs_UNswap_BTNode: invalid forward link (0x%08X)\n", srcDesc->fLink); + error = fsBTInvalidHeaderErr; + goto fail; + } + if (srcDesc->bLink >= btcb->totalNodes) { + printf("hfs_UNswap_BTNode: invalid backward link (0x%08X)\n", srcDesc->bLink); + error = fsBTInvalidHeaderErr; + goto fail; + } srcDesc->fLink = SWAP_BE32 (srcDesc->fLink); srcDesc->bLink = SWAP_BE32 (srcDesc->bLink); - /* Don't swap srcDesc->kind */ - /* Don't swap srcDesc->height */ + /* + * Check srcDesc->kind. Don't swap it because it's only one byte. + */ + if (srcDesc->kind < kBTLeafNode || srcDesc->kind > kBTMapNode) { + printf("hfs_UNswap_BTNode: invalid node kind (%d)\n", srcDesc->kind); + error = fsBTInvalidHeaderErr; + goto fail; + } + + /* + * Check srcDesc->height. Don't swap it because it's only one byte. + */ + if (srcDesc->height > btcb->treeDepth) { + printf("hfs_UNswap_BTNode: invalid node height (%d)\n", srcDesc->height); + error = fsBTInvalidHeaderErr; + goto fail; + } + /* Don't swap srcDesc->reserved */ - /* Swap the node offsets (including the free space one!) */ + /* + * Swap the node offsets (including the free space one!). + */ srcOffs = (UInt16 *)((char *)src->buffer + (src->blockSize - ((srcDesc->numRecords + 1) * sizeof (UInt16)))); - /* Sanity check */ - if ((char *)srcOffs > ((char *)src->buffer + src->blockSize)) { - panic ("%s Too many records in the B-Tree node", "hfs_swap_BTNode:"); + /* + * Sanity check that the record offsets are within the node itself. + */ + if ((char *)srcOffs > ((char *)src->buffer + src->blockSize) || + (char *)srcOffs < ((char *)src->buffer + sizeof(BTNodeDescriptor))) { + printf("hfs_UNswap_BTNode: invalid record count (0x%04X)\n", srcDesc->numRecords); + error = fsBTInvalidHeaderErr; + goto fail; } - for (i = 0; i < srcDesc->numRecords + 1; i++) { - /* Sanity check */ - if (srcOffs[i] >= src->blockSize) { - panic ("%s B-Tree node offset out of range", "hfs_swap_BTNode:"); + /* + * Swap and sanity check each of the record offsets. + */ + for (i = 0; i <= srcDesc->numRecords; i++) { + /* + * Sanity check: must be even, and within the node itself. + * + * We may be called to swap an unused node, which contains all zeroes. + * This is why we allow the record offset to be zero. + */ + if ((srcOffs[i] & 1) || (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || (srcOffs[i] >= src->blockSize)) { + printf("hfs_UNswap_BTNode: record #%d invalid offset (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); + error = fsBTInvalidHeaderErr; + goto fail; + } + + /* + * Make sure the offsets are strictly increasing. Note that we're looping over + * them backwards, hence the order in the comparison. + */ + if ((i < srcDesc->numRecords) && (srcOffs[i+1] >= srcOffs[i])) { + printf("hfs_UNswap_BTNode: offsets %d and %d out of order (0x%04X, 0x%04X)\n", + srcDesc->numRecords-i-2, srcDesc->numRecords-i-1, srcOffs[i+1], srcOffs[i]); + error = fsBTInvalidHeaderErr; + goto fail; } srcOffs[i] = SWAP_BE16 (srcOffs[i]); @@ -196,86 +326,182 @@ hfs_swap_BTNode ( srcDesc->numRecords = SWAP_BE16 (srcDesc->numRecords); } - + +fail: + if (error) { + /* + * Log some useful information about where the corrupt node is. + */ + printf("node=%lld fileID=%u volume=%s device=%s\n", src->blockNum, VTOC(vp)->c_fileid, + VTOVCB(vp)->vcbVN, vfs_statfs(vnode_mount(vp))->f_mntfromname); + VTOVCB(vp)->vcbFlags |= kHFS_DamagedVolume; + } + return (error); } -int +static int hfs_swap_HFSPlusBTInternalNode ( BlockDescriptor *src, HFSCatalogNodeID fileID, - int unswap + enum HFSBTSwapDirection direction ) { BTNodeDescriptor *srcDesc = src->buffer; UInt16 *srcOffs = (UInt16 *)((char *)src->buffer + (src->blockSize - (srcDesc->numRecords * sizeof (UInt16)))); - + char *nextRecord; /* Points to start of record following current one */ UInt32 i; UInt32 j; if (fileID == kHFSExtentsFileID) { HFSPlusExtentKey *srcKey; HFSPlusExtentDescriptor *srcRec; + size_t recordSize; /* Size of the data part of the record, or node number for index nodes */ + if (srcDesc->kind == kBTIndexNode) + recordSize = sizeof(UInt32); + else + recordSize = sizeof(HFSPlusExtentDescriptor); + for (i = 0; i < srcDesc->numRecords; i++) { + /* Point to the start of the record we're currently checking. */ srcKey = (HFSPlusExtentKey *)((char *)src->buffer + srcOffs[i]); + + /* + * Point to start of next (larger offset) record. We'll use this + * to be sure the current record doesn't overflow into the next + * record. + */ + nextRecord = (char *)src->buffer + srcOffs[i-1]; - if (!unswap) srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); - srcRec = (HFSPlusExtentDescriptor *)((char *)srcKey + srcKey->keyLength + 2); - if (unswap) srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); + /* + * Make sure the key and data are within the buffer. Since both key + * and data are fixed size, this is relatively easy. Note that this + * relies on the keyLength being a constant; we verify the keyLength + * below. + */ + if ((char *)srcKey + sizeof(HFSPlusExtentKey) + recordSize > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: extents key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); + return fsBTInvalidNodeErr; + } + + if (direction == kSwapBTNodeBigToHost) + srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); + if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) { + printf("hfs_swap_HFSPlusBTInternalNode: extents key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength); + return fsBTInvalidNodeErr; + } + srcRec = (HFSPlusExtentDescriptor *)((char *)srcKey + srcKey->keyLength + sizeof(srcKey->keyLength)); + if (direction == kSwapBTNodeHostToBig) + srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); - /* Don't swap srcKey->forkType */ + /* Don't swap srcKey->forkType; it's only one byte */ /* Don't swap srcKey->pad */ srcKey->fileID = SWAP_BE32 (srcKey->fileID); srcKey->startBlock = SWAP_BE32 (srcKey->startBlock); - /* Stop if this is just an index node */ if (srcDesc->kind == kBTIndexNode) { + /* For index nodes, the record data is just a child node number. */ *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec)); - continue; - } - - /* Swap the extent data */ - - /* Swap each extent */ - for (j = 0; j < kHFSPlusExtentDensity; j++) { - srcRec[j].startBlock = SWAP_BE32 (srcRec[j].startBlock); - srcRec[j].blockCount = SWAP_BE32 (srcRec[j].blockCount); + } else { + /* Swap the extent data */ + for (j = 0; j < kHFSPlusExtentDensity; j++) { + srcRec[j].startBlock = SWAP_BE32 (srcRec[j].startBlock); + srcRec[j].blockCount = SWAP_BE32 (srcRec[j].blockCount); + } } } } else if (fileID == kHFSCatalogFileID) { HFSPlusCatalogKey *srcKey; SInt16 *srcPtr; - + u_int16_t keyLength; + for (i = 0; i < srcDesc->numRecords; i++) { + /* Point to the start of the record we're currently checking. */ srcKey = (HFSPlusCatalogKey *)((char *)src->buffer + srcOffs[i]); - if (!unswap) srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); - srcPtr = (SInt16 *)((char *)srcKey + srcKey->keyLength + 2); - if (unswap) srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); + /* + * Point to start of next (larger offset) record. We'll use this + * to be sure the current record doesn't overflow into the next + * record. + */ + nextRecord = (char *)src->buffer + srcOffs[i-1]; + + /* + * Make sure we can safely dereference the keyLength and parentID fields. */ + if ((char *)srcKey + offsetof(HFSPlusCatalogKey, nodeName.unicode[0]) > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); + return fsBTInvalidNodeErr; + } + + /* + * Swap and sanity check the key length + */ + if (direction == kSwapBTNodeBigToHost) + srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); + keyLength = srcKey->keyLength; /* Put it in a local (native order) because we use it several times */ + if (direction == kSwapBTNodeHostToBig) + srcKey->keyLength = SWAP_BE16 (keyLength); + /* Sanity check the key length */ + if (keyLength < kHFSPlusCatalogKeyMinimumLength || keyLength > kHFSPlusCatalogKeyMaximumLength) { + printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, keyLength); + return fsBTInvalidNodeErr; + } + + /* + * Make sure that we can safely dereference the record's type field or + * an index node's child node number. + */ + srcPtr = (SInt16 *)((char *)srcKey + keyLength + sizeof(srcKey->keyLength)); + if ((char *)srcPtr + sizeof(UInt32) > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } + srcKey->parentID = SWAP_BE32 (srcKey->parentID); - if (!unswap) srcKey->nodeName.length = SWAP_BE16 (srcKey->nodeName.length); + /* + * Swap and sanity check the key's node name + */ + if (direction == kSwapBTNodeBigToHost) + srcKey->nodeName.length = SWAP_BE16 (srcKey->nodeName.length); + /* Make sure name length is consistent with key length */ + if (keyLength < sizeof(srcKey->parentID) + sizeof(srcKey->nodeName.length) + + srcKey->nodeName.length*sizeof(srcKey->nodeName.unicode[0])) { + printf("hfs_swap_HFSPlusBTInternalNode: catalog record #%d keyLength=%d expected=%d\n", + srcDesc->numRecords-i, keyLength, sizeof(srcKey->parentID) + sizeof(srcKey->nodeName.length) + + srcKey->nodeName.length*sizeof(srcKey->nodeName.unicode[0])); + return fsBTInvalidNodeErr; + } for (j = 0; j < srcKey->nodeName.length; j++) { srcKey->nodeName.unicode[j] = SWAP_BE16 (srcKey->nodeName.unicode[j]); } - if (unswap) srcKey->nodeName.length = SWAP_BE16 (srcKey->nodeName.length); + if (direction == kSwapBTNodeHostToBig) + srcKey->nodeName.length = SWAP_BE16 (srcKey->nodeName.length); - /* Stop if this is just an index node */ + /* + * For index nodes, the record data is just the child's node number. + * Skip over swapping the various types of catalog record. + */ if (srcDesc->kind == kBTIndexNode) { *((UInt32 *)srcPtr) = SWAP_BE32 (*((UInt32 *)srcPtr)); continue; } - /* Swap the recordType field, if unswapping, leave to later */ - if (!unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]); + /* Make sure the recordType is in native order before using it. */ + if (direction == kSwapBTNodeBigToHost) + srcPtr[0] = SWAP_BE16 (srcPtr[0]); if (srcPtr[0] == kHFSPlusFolderRecord) { HFSPlusCatalogFolder *srcRec = (HFSPlusCatalogFolder *)srcPtr; - + if ((char *)srcRec + sizeof(*srcRec) > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: catalog folder record #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } + srcRec->flags = SWAP_BE16 (srcRec->flags); srcRec->valence = SWAP_BE32 (srcRec->valence); srcRec->folderID = SWAP_BE32 (srcRec->folderID); @@ -288,8 +514,8 @@ hfs_swap_HFSPlusBTInternalNode ( srcRec->bsdInfo.ownerID = SWAP_BE32 (srcRec->bsdInfo.ownerID); srcRec->bsdInfo.groupID = SWAP_BE32 (srcRec->bsdInfo.groupID); - /* Don't swap srcRec->bsdInfo.adminFlags */ - /* Don't swap srcRec->bsdInfo.ownerFlags */ + /* Don't swap srcRec->bsdInfo.adminFlags; it's only one byte */ + /* Don't swap srcRec->bsdInfo.ownerFlags; it's only one byte */ srcRec->bsdInfo.fileMode = SWAP_BE16 (srcRec->bsdInfo.fileMode); srcRec->bsdInfo.special.iNodeNum = SWAP_BE32 (srcRec->bsdInfo.special.iNodeNum); @@ -302,6 +528,10 @@ hfs_swap_HFSPlusBTInternalNode ( } else if (srcPtr[0] == kHFSPlusFileRecord) { HFSPlusCatalogFile *srcRec = (HFSPlusCatalogFile *)srcPtr; + if ((char *)srcRec + sizeof(*srcRec) > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: catalog file record #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } srcRec->flags = SWAP_BE16 (srcRec->flags); @@ -316,8 +546,8 @@ hfs_swap_HFSPlusBTInternalNode ( srcRec->bsdInfo.ownerID = SWAP_BE32 (srcRec->bsdInfo.ownerID); srcRec->bsdInfo.groupID = SWAP_BE32 (srcRec->bsdInfo.groupID); - /* Don't swap srcRec->bsdInfo.adminFlags */ - /* Don't swap srcRec->bsdInfo.ownerFlags */ + /* Don't swap srcRec->bsdInfo.adminFlags; it's only one byte */ + /* Don't swap srcRec->bsdInfo.ownerFlags; it's only one byte */ srcRec->bsdInfo.fileMode = SWAP_BE16 (srcRec->bsdInfo.fileMode); srcRec->bsdInfo.special.iNodeNum = SWAP_BE32 (srcRec->bsdInfo.special.iNodeNum); @@ -335,65 +565,164 @@ hfs_swap_HFSPlusBTInternalNode ( } else if ((srcPtr[0] == kHFSPlusFolderThreadRecord) || (srcPtr[0] == kHFSPlusFileThreadRecord)) { + /* + * Make sure there is room for parentID and name length. + */ HFSPlusCatalogThread *srcRec = (HFSPlusCatalogThread *)srcPtr; - + if ((char *) &srcRec->nodeName.unicode[0] > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: catalog thread record #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } + /* Don't swap srcRec->reserved */ srcRec->parentID = SWAP_BE32 (srcRec->parentID); - if (!unswap) srcRec->nodeName.length = SWAP_BE16 (srcRec->nodeName.length); + if (direction == kSwapBTNodeBigToHost) + srcRec->nodeName.length = SWAP_BE16 (srcRec->nodeName.length); + + /* + * Make sure there is room for the name in the buffer. + * Then swap the characters of the name itself. + */ + if ((char *) &srcRec->nodeName.unicode[srcRec->nodeName.length] > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: catalog thread record #%d name too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } for (j = 0; j < srcRec->nodeName.length; j++) { srcRec->nodeName.unicode[j] = SWAP_BE16 (srcRec->nodeName.unicode[j]); } - if (unswap) srcRec->nodeName.length = SWAP_BE16 (srcRec->nodeName.length); + + if (direction == kSwapBTNodeHostToBig) + srcRec->nodeName.length = SWAP_BE16 (srcRec->nodeName.length); } else { - panic ("%s unrecognized catalog record type", "hfs_swap_BTNode:"); + printf("hfs_swap_HFSPlusBTInternalNode: unrecognized catalog record type (0x%04X; record #%d)\n", srcPtr[0], srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; } - /* If unswapping, we can safely unswap type field now */ - if (unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]); + /* We can swap the record type now that we're done using it. */ + if (direction == kSwapBTNodeHostToBig) + srcPtr[0] = SWAP_BE16 (srcPtr[0]); } } else if (fileID == kHFSAttributesFileID) { HFSPlusAttrKey *srcKey; HFSPlusAttrRecord *srcRec; - + u_int16_t keyLength; + u_int32_t attrSize = 0; + for (i = 0; i < srcDesc->numRecords; i++) { + /* Point to the start of the record we're currently checking. */ srcKey = (HFSPlusAttrKey *)((char *)src->buffer + srcOffs[i]); + + /* + * Point to start of next (larger offset) record. We'll use this + * to be sure the current record doesn't overflow into the next + * record. + */ + nextRecord = (char *)src->buffer + srcOffs[i-1]; + + /* Make sure there is room in the buffer for a minimal key */ + if ((char *) &srcKey->attrName[1] > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); + return fsBTInvalidNodeErr; + } - if (!unswap) srcKey->keyLength = SWAP_BE16(srcKey->keyLength); - srcRec = (HFSPlusAttrRecord *)((char *)srcKey + srcKey->keyLength + 2); - if (unswap) srcKey->keyLength = SWAP_BE16(srcKey->keyLength); + /* Swap the key length field */ + if (direction == kSwapBTNodeBigToHost) + srcKey->keyLength = SWAP_BE16(srcKey->keyLength); + keyLength = srcKey->keyLength; /* Keep a copy in native order */ + if (direction == kSwapBTNodeHostToBig) + srcKey->keyLength = SWAP_BE16(srcKey->keyLength); + + /* + * Make sure that we can safely dereference the record's type field or + * an index node's child node number. + */ + srcRec = (HFSPlusAttrRecord *)((char *)srcKey + keyLength + sizeof(srcKey->keyLength)); + if ((char *)srcRec + sizeof(u_int32_t) > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d too big (%d)\n", srcDesc->numRecords-i-1, keyLength); + return fsBTInvalidNodeErr; + } srcKey->fileID = SWAP_BE32(srcKey->fileID); srcKey->startBlock = SWAP_BE32(srcKey->startBlock); - - if (!unswap) srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen); + + /* + * Swap and check the attribute name + */ + if (direction == kSwapBTNodeBigToHost) + srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen); + /* Sanity check the attribute name length */ + if (srcKey->attrNameLen > kHFSMaxAttrNameLen || keyLength < (kHFSPlusAttrKeyMinimumLength + sizeof(u_int16_t)*srcKey->attrNameLen)) { + printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d keyLength=%d attrNameLen=%d\n", srcDesc->numRecords-i-1, keyLength, srcKey->attrNameLen); + return fsBTInvalidNodeErr; + } for (j = 0; j < srcKey->attrNameLen; j++) srcKey->attrName[j] = SWAP_BE16(srcKey->attrName[j]); - if (unswap) srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen); + if (direction == kSwapBTNodeHostToBig) + srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen); - /* If this is an index node, just swap the child node number */ + /* + * For index nodes, the record data is just the child's node number. + * Skip over swapping the various types of attribute record. + */ if (srcDesc->kind == kBTIndexNode) { *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec)); continue; } - /* Swap the data record */ - if (!unswap) srcRec->recordType = SWAP_BE32(srcRec->recordType); + /* Swap the record data */ + if (direction == kSwapBTNodeBigToHost) + srcRec->recordType = SWAP_BE32(srcRec->recordType); switch (srcRec->recordType) { case kHFSPlusAttrInlineData: + /* Is there room for the inline data header? */ + if ((char *) &srcRec->attrData.attrData[0] > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: attr inline #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } + /* We're not swapping the reserved fields */ + + /* Swap the attribute size */ + if (direction == kSwapBTNodeHostToBig) + attrSize = srcRec->attrData.attrSize; srcRec->attrData.attrSize = SWAP_BE32(srcRec->attrData.attrSize); - /* Not swapping the attrData */ + if (direction == kSwapBTNodeBigToHost) + attrSize = srcRec->attrData.attrSize; + + /* Is there room for the inline attribute data? */ + if ((char *) &srcRec->attrData.attrData[attrSize] > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: attr inline #%d too big (attrSize=%u)\n", srcDesc->numRecords-i-1, attrSize); + return fsBTInvalidNodeErr; + } + + /* Not swapping the attribute data itself */ break; + case kHFSPlusAttrForkData: + /* Is there room for the fork data record? */ + if ((char *)srcRec + sizeof(HFSPlusAttrForkData) > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: attr fork data #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } + /* We're not swapping the reserved field */ + hfs_swap_HFSPlusForkData(&srcRec->forkData.theFork); break; + case kHFSPlusAttrExtents: + /* Is there room for an extent record? */ + if ((char *)srcRec + sizeof(HFSPlusAttrExtents) > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: attr extents #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } + /* We're not swapping the reserved field */ + for (j = 0; j < kHFSPlusExtentDensity; j++) { srcRec->overflowExtents.extents[j].startBlock = SWAP_BE32(srcRec->overflowExtents.extents[j].startBlock); @@ -402,19 +731,40 @@ hfs_swap_HFSPlusBTInternalNode ( } break; } - if (unswap) srcRec->recordType = SWAP_BE32(srcRec->recordType); + if (direction == kSwapBTNodeHostToBig) + srcRec->recordType = SWAP_BE32(srcRec->recordType); } } else if (fileID > kHFSFirstUserCatalogNodeID) { + /* The only B-tree with a non-system CNID that we use is the hotfile B-tree */ HotFileKey *srcKey; UInt32 *srcRec; for (i = 0; i < srcDesc->numRecords; i++) { + /* Point to the start of the record we're currently checking. */ srcKey = (HotFileKey *)((char *)src->buffer + srcOffs[i]); - if (!unswap) + /* + * Point to start of next (larger offset) record. We'll use this + * to be sure the current record doesn't overflow into the next + * record. + */ + nextRecord = (char *)src->buffer + srcOffs[i-1]; + + /* Make sure there is room for the key (HotFileKey) and data (UInt32) */ + if ((char *)srcKey + sizeof(HotFileKey) + sizeof(UInt32) > nextRecord) { + printf("hfs_swap_HFSPlusBTInternalNode: hotfile #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); + return fsBTInvalidNodeErr; + } + + /* Swap and sanity check the key length field */ + if (direction == kSwapBTNodeBigToHost) srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); - srcRec = (u_int32_t *)((char *)srcKey + srcKey->keyLength + 2); - if (unswap) + if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) { + printf("hfs_swap_HFSPlusBTInternalNode: hotfile #%d incorrect keyLength %d\n", srcDesc->numRecords-i-1, srcKey->keyLength); + return fsBTInvalidNodeErr; + } + srcRec = (u_int32_t *)((char *)srcKey + srcKey->keyLength + sizeof(srcKey->keyLength)); + if (direction == kSwapBTNodeHostToBig) srcKey->keyLength = SWAP_BE16 (srcKey->keyLength); /* Don't swap srcKey->forkType */ @@ -426,22 +776,23 @@ hfs_swap_HFSPlusBTInternalNode ( *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec)); } } else { - panic ("%s unrecognized B-Tree type", "hfs_swap_BTNode:"); + panic ("hfs_swap_HFSPlusBTInternalNode: fileID %u is not a system B-tree\n", fileID); } return (0); } -int +static int hfs_swap_HFSBTInternalNode ( BlockDescriptor *src, HFSCatalogNodeID fileID, - int unswap + enum HFSBTSwapDirection direction ) { BTNodeDescriptor *srcDesc = src->buffer; UInt16 *srcOffs = (UInt16 *)((char *)src->buffer + (src->blockSize - (srcDesc->numRecords * sizeof (UInt16)))); + char *nextRecord; /* Points to start of record following current one */ UInt32 i; UInt32 j; @@ -449,12 +800,42 @@ hfs_swap_HFSBTInternalNode ( if (fileID == kHFSExtentsFileID) { HFSExtentKey *srcKey; HFSExtentDescriptor *srcRec; + size_t recordSize; /* Size of the data part of the record, or node number for index nodes */ + if (srcDesc->kind == kBTIndexNode) + recordSize = sizeof(UInt32); + else + recordSize = sizeof(HFSExtentDescriptor); + for (i = 0; i < srcDesc->numRecords; i++) { + /* Point to the start of the record we're currently checking. */ srcKey = (HFSExtentKey *)((char *)src->buffer + srcOffs[i]); - /* Don't swap srcKey->keyLength */ - /* Don't swap srcKey->forkType */ + /* + * Point to start of next (larger offset) record. We'll use this + * to be sure the current record doesn't overflow into the next + * record. + */ + nextRecord = (char *)src->buffer + srcOffs[i-1]; + + /* + * Make sure the key and data are within the buffer. Since both key + * and data are fixed size, this is relatively easy. Note that this + * relies on the keyLength being a constant; we verify the keyLength + * below. + */ + if ((char *)srcKey + sizeof(HFSExtentKey) + recordSize > nextRecord) { + printf("hfs_swap_HFSBTInternalNode: extents key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); + return fsBTInvalidNodeErr; + } + + /* Don't swap srcKey->keyLength (it's only one byte), but do sanity check it */ + if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) { + printf("hfs_swap_HFSBTInternalNode: extents key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength); + return fsBTInvalidNodeErr; + } + + /* Don't swap srcKey->forkType; it's only one byte */ srcKey->fileID = SWAP_BE32 (srcKey->fileID); srcKey->startBlock = SWAP_BE16 (srcKey->startBlock); @@ -462,47 +843,99 @@ hfs_swap_HFSBTInternalNode ( /* Point to record data (round up to even byte boundary) */ srcRec = (HFSExtentDescriptor *)((char *)srcKey + ((srcKey->keyLength + 2) & ~1)); - /* Stop if this is just an index node */ if (srcDesc->kind == kBTIndexNode) { + /* For index nodes, the record data is just a child node number. */ *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec)); - continue; - } - - /* Swap each extent */ - for (j = 0; j < kHFSExtentDensity; j++) { - srcRec[j].startBlock = SWAP_BE16 (srcRec[j].startBlock); - srcRec[j].blockCount = SWAP_BE16 (srcRec[j].blockCount); + } else { + /* Swap the extent data */ + for (j = 0; j < kHFSExtentDensity; j++) { + srcRec[j].startBlock = SWAP_BE16 (srcRec[j].startBlock); + srcRec[j].blockCount = SWAP_BE16 (srcRec[j].blockCount); + } } } } else if (fileID == kHFSCatalogFileID) { HFSCatalogKey *srcKey; SInt16 *srcPtr; - + unsigned expectedKeyLength; + for (i = 0; i < srcDesc->numRecords; i++) { + /* Point to the start of the record we're currently checking. */ srcKey = (HFSCatalogKey *)((char *)src->buffer + srcOffs[i]); - /* Don't swap srcKey->keyLength */ + /* + * Point to start of next (larger offset) record. We'll use this + * to be sure the current record doesn't overflow into the next + * record. + */ + nextRecord = (char *)src->buffer + srcOffs[i-1]; + + /* + * Make sure we can safely dereference the keyLength and parentID fields. + * The value 8 below is 1 bytes for keyLength + 1 byte reserved + 4 bytes + * for parentID + 1 byte for nodeName's length + 1 byte to round up the + * record start to an even offset, which forms a minimal key. + */ + if ((char *)srcKey + 8 > nextRecord) { + printf("hfs_swap_HFSBTInternalNode: catalog key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]); + return fsBTInvalidNodeErr; + } + + /* Don't swap srcKey->keyLength (it's only one byte), but do sanity check it */ + if (srcKey->keyLength < kHFSCatalogKeyMinimumLength || srcKey->keyLength > kHFSCatalogKeyMaximumLength) { + printf("hfs_swap_HFSBTInternalNode: catalog key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength); + return fsBTInvalidNodeErr; + } + /* Don't swap srcKey->reserved */ srcKey->parentID = SWAP_BE32 (srcKey->parentID); /* Don't swap srcKey->nodeName */ + + /* Make sure the keyLength is big enough for the key's content */ + if (srcDesc->kind == kBTIndexNode) + expectedKeyLength = sizeof(*srcKey) - sizeof(srcKey->keyLength); + else + expectedKeyLength = srcKey->nodeName[0] + kHFSCatalogKeyMinimumLength; + if (srcKey->keyLength < expectedKeyLength) { + printf("hfs_swap_HFSBTInternalNode: catalog record #%d keyLength=%u expected=%u\n", + srcDesc->numRecords-i, srcKey->keyLength, expectedKeyLength); + return fsBTInvalidNodeErr; + } /* Point to record data (round up to even byte boundary) */ srcPtr = (SInt16 *)((char *)srcKey + ((srcKey->keyLength + 2) & ~1)); - /* Stop if this is just an index node */ + /* + * Make sure that we can safely dereference the record's type field or + * and index node's child node number. + */ + if ((char *)srcPtr + sizeof(UInt32) > nextRecord) { + printf("hfs_swap_HFSBTInternalNode: catalog key #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } + + /* + * For index nodes, the record data is just the child's node number. + * Skip over swapping the various types of catalog record. + */ if (srcDesc->kind == kBTIndexNode) { *((UInt32 *)srcPtr) = SWAP_BE32 (*((UInt32 *)srcPtr)); continue; } - /* Swap the recordType field, if unswapping, leave to later */ - if (!unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]); + /* Make sure the recordType is in native order before using it. */ + if (direction == kSwapBTNodeBigToHost) + srcPtr[0] = SWAP_BE16 (srcPtr[0]); if (srcPtr[0] == kHFSFolderRecord) { HFSCatalogFolder *srcRec = (HFSCatalogFolder *)srcPtr; + if ((char *)srcRec + sizeof(*srcRec) > nextRecord) { + printf("hfs_swap_HFSBTInternalNode: catalog folder record #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } srcRec->flags = SWAP_BE16 (srcRec->flags); srcRec->valence = SWAP_BE16 (srcRec->valence); @@ -518,6 +951,10 @@ hfs_swap_HFSBTInternalNode ( } else if (srcPtr[0] == kHFSFileRecord) { HFSCatalogFile *srcRec = (HFSCatalogFile *)srcPtr; + if ((char *)srcRec + sizeof(*srcRec) > nextRecord) { + printf("hfs_swap_HFSBTInternalNode: catalog file record #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } srcRec->flags = srcRec->flags; srcRec->fileType = srcRec->fileType; @@ -552,25 +989,37 @@ hfs_swap_HFSBTInternalNode ( } else if ((srcPtr[0] == kHFSFolderThreadRecord) || (srcPtr[0] == kHFSFileThreadRecord)) { - HFSCatalogThread *srcRec = (HFSCatalogThread *)srcPtr; + + /* Make sure there is room for parentID and name length */ + if ((char *) &srcRec->nodeName[1] > nextRecord) { + printf("hfs_swap_HFSBTInternalNode: catalog thread record #%d too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } /* Don't swap srcRec->reserved array */ srcRec->parentID = SWAP_BE32 (srcRec->parentID); /* Don't swap srcRec->nodeName */ - + + /* Make sure there is room for the name in the buffer */ + if ((char *) &srcRec->nodeName[srcRec->nodeName[0]] > nextRecord) { + printf("hfs_swap_HFSBTInternalNode: catalog thread record #%d name too big\n", srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; + } } else { - panic ("%s unrecognized catalog record type", "hfs_swap_BTNode:"); + printf("hfs_swap_HFSBTInternalNode: unrecognized catalog record type (0x%04X; record #%d)\n", srcPtr[0], srcDesc->numRecords-i-1); + return fsBTInvalidNodeErr; } - /* If unswapping, we can safely swap type now */ - if (unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]); + /* We can swap the record type now that we're done using it */ + if (direction == kSwapBTNodeHostToBig) + srcPtr[0] = SWAP_BE16 (srcPtr[0]); } } else { - panic ("%s unrecognized B-Tree type", "hfs_swap_BTNode:"); + panic ("hfs_swap_HFSBTInternalNode: fileID %u is not a system B-tree\n", fileID); } return (0); diff --git a/bsd/hfs/hfs_endian.h b/bsd/hfs/hfs_endian.h index 06801dc69..330839d29 100644 --- a/bsd/hfs/hfs_endian.h +++ b/bsd/hfs/hfs_endian.h @@ -48,7 +48,6 @@ /* HFS is always big endian, no swapping needed */ #define SWAP_HFS_PLUS_FORK_DATA(__a) - #define SWAP_BT_NODE(__a, __b, __c, __d) /************************/ /* LITTLE ENDIAN Macros */ @@ -61,7 +60,6 @@ #define SWAP_BE64(__a) NXSwapBigLongLongToHost (__a) #define SWAP_HFS_PLUS_FORK_DATA(__a) hfs_swap_HFSPlusForkData ((__a)) - #define SWAP_BT_NODE(__a, __b, __c, __d) hfs_swap_BTNode ((__a), (__b), (__c), (__d)) #else #warning Unknown byte order @@ -72,8 +70,30 @@ extern "C" { #endif -void hfs_swap_HFSPlusForkData (HFSPlusForkData *src); -int hfs_swap_BTNode (BlockDescriptor *src, int isHFSPlus, HFSCatalogNodeID fileID, int unswap); +/* + * Constants for the "unswap" argument to hfs_swap_BTNode: + */ +enum HFSBTSwapDirection { + kSwapBTNodeBigToHost = 0, + kSwapBTNodeHostToBig = 1, + + /* + * kSwapBTNodeHeaderRecordOnly is used to swap just the header record + * of a header node from big endian (on disk) to host endian (in memory). + * It does not swap the node descriptor (forward/backward links, record + * count, etc.). It assumes the header record is at offset 0x000E. + * + * Since HFS Plus doesn't have fixed B-tree node sizes, we have to read + * the header record to determine the actual node size for that tree + * before we can set up the B-tree control block. We read it initially + * as 512 bytes, then re-read it once we know the correct node size. Since + * we may not have read the entire header node the first time, we can't + * swap the record offsets, other records, or do most sanity checks. + */ + kSwapBTNodeHeaderRecordOnly = 3 +}; + +int hfs_swap_BTNode (BlockDescriptor *src, vnode_t vp, enum HFSBTSwapDirection direction); #ifdef __cplusplus } diff --git a/bsd/hfs/hfs_format.h b/bsd/hfs/hfs_format.h index 001206d45..a285f0da8 100644 --- a/bsd/hfs/hfs_format.h +++ b/bsd/hfs/hfs_format.h @@ -458,18 +458,19 @@ union HFSPlusAttrRecord { typedef union HFSPlusAttrRecord HFSPlusAttrRecord; /* Attribute key */ +enum { kHFSMaxAttrNameLen = 127 }; struct HFSPlusAttrKey { u_int16_t keyLength; /* key length (in bytes) */ u_int16_t pad; /* set to zero */ u_int32_t fileID; /* file associated with attribute */ u_int32_t startBlock; /* first attribue allocation block number for extents */ u_int16_t attrNameLen; /* number of unicode characters */ - u_int16_t attrName[127]; /* attribute name (Unicode) */ + u_int16_t attrName[kHFSMaxAttrNameLen]; /* attribute name (Unicode) */ }; typedef struct HFSPlusAttrKey HFSPlusAttrKey; #define kHFSPlusAttrKeyMaximumLength (sizeof(HFSPlusAttrKey) - sizeof(u_int16_t)) -#define kHFSPlusAttrKeyMinimumLength (kHFSPlusAttrKeyMaximumLength - (127 * sizeof(u_int16_t))) +#define kHFSPlusAttrKeyMinimumLength (kHFSPlusAttrKeyMaximumLength - kHFSMaxAttrNameLen*sizeof(u_int16_t)) #endif /* __APPLE_API_UNSTABLE */ diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c index 46f8e54e5..76cd198d3 100644 --- a/bsd/hfs/hfs_readwrite.c +++ b/bsd/hfs/hfs_readwrite.c @@ -990,6 +990,8 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { if (!(hfsmp->jnl)) return (ENOTSUP); + + lck_rw_lock_exclusive(&hfsmp->hfs_insync); task = current_task(); task_working_set_disable(task); @@ -1001,9 +1003,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); hfs_global_exclusive_lock_acquire(hfsmp); journal_flush(hfsmp->jnl); + // don't need to iterate on all vnodes, we just need to // wait for writes to the system files and the device vnode - // vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL); if (HFSTOVCB(hfsmp)->extentsRefNum) vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze"); if (HFSTOVCB(hfsmp)->catalogRefNum) @@ -1026,7 +1028,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { // if we're not the one who froze the fs then we // can't thaw it. if (hfsmp->hfs_freezing_proc != current_proc()) { - return EINVAL; + return EPERM; } // NOTE: if you add code here, also go check the @@ -1034,6 +1036,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { // hfsmp->hfs_freezing_proc = NULL; hfs_global_exclusive_lock_release(hfsmp); + lck_rw_unlock_exclusive(&hfsmp->hfs_insync); return (0); } @@ -1262,13 +1265,18 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* { case HFS_SETACLSTATE: { int state; - if (!is_suser()) { - return (EPERM); - } if (ap->a_data == NULL) { return (EINVAL); } + + vfsp = vfs_statfs(HFSTOVFS(hfsmp)); state = *(int *)ap->a_data; + + // super-user can enable or disable acl's on a volume. + // the volume owner can only enable acl's + if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) { + return (EPERM); + } if (state == 0 || state == 1) return hfs_setextendedsecurity(hfsmp, state); else @@ -1605,6 +1613,11 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap) int started_tr = 0; int tooklock = 0; + /* Do not allow blockmap operation on a directory */ + if (vnode_isdir(vp)) { + return (ENOTSUP); + } + /* * Check for underlying vnode requests and ensure that logical * to physical mapping is requested. @@ -2106,6 +2119,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, off_t filebytes; u_long fileblocks; int blksize, error = 0; + struct cnode *cp = VTOC(vp); if (vnode_isdir(vp)) return (EISDIR); /* cannot truncate an HFS directory! */ @@ -2125,6 +2139,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, } else { filebytes = length; } + cp->c_flag |= C_FORCEUPDATE; error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context); if (error) break; @@ -2136,6 +2151,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, } else { filebytes = length; } + cp->c_flag |= C_FORCEUPDATE; error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context); if (error) break; @@ -2516,7 +2532,6 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap) int retval = 0; register struct buf *bp = ap->a_bp; register struct vnode *vp = buf_vnode(bp); -#if BYTE_ORDER == LITTLE_ENDIAN BlockDescriptor block; /* Trap B-Tree writes */ @@ -2524,22 +2539,29 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap) (VTOC(vp)->c_fileid == kHFSCatalogFileID) || (VTOC(vp)->c_fileid == kHFSAttributesFileID)) { - /* Swap if the B-Tree node is in native byte order */ + /* + * Swap and validate the node if it is in native byte order. + * This is always be true on big endian, so we always validate + * before writing here. On little endian, the node typically has + * been swapped and validatated when it was written to the journal, + * so we won't do anything here. + */ if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) { /* Prepare the block pointer */ block.blockHeader = bp; block.buffer = (char *)buf_dataptr(bp); + block.blockNum = buf_lblkno(bp); /* not found in cache ==> came from disk */ block.blockReadFromDisk = (buf_fromcache(bp) == 0); block.blockSize = buf_count(bp); /* Endian un-swap B-Tree node */ - SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1); + retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig); + if (retval) + panic("hfs_vnop_bwrite: about to write corrupt node!\n"); } - - /* We don't check to make sure that it's 0x0e00 because it could be all zeros */ } -#endif + /* This buffer shouldn't be locked anymore but if it is clear it */ if ((buf_flags(bp) & B_LOCKED)) { // XXXdbg diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index f6569bf71..7ebe8aff7 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -860,6 +860,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr); lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr); lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr); + lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr); vfs_setfsprivate(mp, hfsmp); hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */ @@ -1655,6 +1656,10 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) if (hfsmp->hfs_flags & HFS_READ_ONLY) return (EROFS); + /* skip over frozen volumes */ + if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync)) + return 0; + args.cred = vfs_context_proc(context); args.waitfor = waitfor; args.p = p; @@ -1734,7 +1739,8 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context) if (hfsmp->jnl) { journal_flush(hfsmp->jnl); } - + + lck_rw_unlock_shared(&hfsmp->hfs_insync); return (allerror); } diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c index 3dfe383b6..4eed699c9 100644 --- a/bsd/hfs/hfs_vfsutils.c +++ b/bsd/hfs/hfs_vfsutils.c @@ -1492,7 +1492,7 @@ short MacToVFSError(OSErr err) return EOVERFLOW; case btBadNode: /* -32731 */ - return EBADF; + return EIO; case memFullErr: /* -108 */ return ENOMEM; /* +12 */ diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c index 5c0cc83ed..59c278f10 100644 --- a/bsd/hfs/hfs_vnops.c +++ b/bsd/hfs/hfs_vnops.c @@ -43,6 +43,7 @@ #include #include +#include #include "hfs.h" #include "hfs_catalog.h" @@ -65,6 +66,9 @@ /* Global vfs data structures for hfs */ +/* Always F_FULLFSYNC? 1=yes,0=no (default due to "various" reasons is 'no') */ +int always_do_fullfsync = 0; +SYSCTL_INT (_kern, OID_AUTO, always_do_fullfsync, CTLFLAG_RW, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called"); extern unsigned long strtoul(const char *, char **, int); @@ -236,6 +240,7 @@ hfs_vnop_close(ap) if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) { hfsmp->hfs_freezing_proc = NULL; hfs_global_exclusive_lock_release(hfsmp); + lck_rw_unlock_exclusive(&hfsmp->hfs_insync); } busy = vnode_isinuse(vp, 1); @@ -962,6 +967,7 @@ hfs_vnop_exchange(ap) from_cp->c_uid = to_cp->c_uid; from_cp->c_flags = to_cp->c_flags; from_cp->c_mode = to_cp->c_mode; + from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags; bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32); bcopy(&tempdesc, &to_cp->c_desc, sizeof(struct cat_desc)); @@ -975,6 +981,7 @@ hfs_vnop_exchange(ap) to_cp->c_uid = tempattr.ca_uid; to_cp->c_flags = tempattr.ca_flags; to_cp->c_mode = tempattr.ca_mode; + to_cp->c_attr.ca_recflags = tempattr.ca_recflags; bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32); /* Rehash the cnodes using their new file IDs */ @@ -1137,7 +1144,7 @@ metasync: cp->c_touch_acctime = FALSE; cp->c_touch_chgtime = FALSE; cp->c_touch_modtime = FALSE; - } else /* User file */ { + } else if ( !(vp->v_flag & VSWAP) ) /* User file */ { retval = hfs_update(vp, wait); /* When MNT_WAIT is requested push out any delayed meta data */ @@ -1150,7 +1157,7 @@ metasync: // fsync() and if so push out any pending transactions // that this file might is a part of (and get them on // stable storage). - if (fullsync) { + if (fullsync || always_do_fullfsync) { if (hfsmp->jnl) { journal_flush(hfsmp->jnl); } else { @@ -2441,6 +2448,10 @@ hfs_vnop_readdir(ap) if (nfs_cookies) { cnid_hint = (cnid_t)(uio_offset(uio) >> 32); uio_setoffset(uio, uio_offset(uio) & 0x00000000ffffffffLL); + if (cnid_hint == INT_MAX) { /* searching pass the last item */ + eofflag = 1; + goto out; + } } /* * Synthesize entries for "." and ".." @@ -2565,7 +2576,7 @@ hfs_vnop_readdir(ap) } /* Pack the buffer with dirent entries. */ - error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items); + error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items, &eofflag); hfs_systemfile_unlock(hfsmp, lockflags); diff --git a/bsd/hfs/hfscommon/BTree/BTree.c b/bsd/hfs/hfscommon/BTree/BTree.c index f11af332a..9920c8742 100644 --- a/bsd/hfs/hfscommon/BTree/BTree.c +++ b/bsd/hfs/hfscommon/BTree/BTree.c @@ -315,15 +315,12 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc) } } - // if nodeSize Matches then we don't need to release, just CheckNode - if ( btreePtr->nodeSize == nodeRec.blockSize ) - { - err = CheckNode (btreePtr, nodeRec.buffer); - if (err) - VTOVCB(btreePtr->fileRefNum)->vcbFlags |= kHFS_DamagedVolume; - M_ExitOnError (err); - } - else + /* + * If the actual node size is different than the amount we read, + * then release and trash this block, and re-read with the correct + * node size. + */ + if ( btreePtr->nodeSize != nodeRec.blockSize ) { err = SetBTreeBlockSize (btreePtr->fileRefNum, btreePtr->nodeSize, 32); M_ExitOnError (err); @@ -336,7 +333,7 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc) ++btreePtr->numReleaseNodes; M_ExitOnError (err); - err = GetNode (btreePtr, kHeaderNodeNum, &nodeRec ); // calls CheckNode... + err = GetNode (btreePtr, kHeaderNodeNum, &nodeRec ); M_ExitOnError (err); } @@ -1286,15 +1283,19 @@ OSStatus BTInsertRecord (FCB *filePtr, goto ErrorExit; } - err = UpdateNode (btreePtr, &nodeRec, 0, kLockTransaction); - M_ExitOnError (err); - - // update BTreeControlBlock + /* + * Update the B-tree control block. Do this before + * calling UpdateNode since it will compare the node's + * height with treeDepth. + */ btreePtr->treeDepth = 1; btreePtr->rootNode = insertNodeNum; btreePtr->firstLeafNode = insertNodeNum; btreePtr->lastLeafNode = insertNodeNum; + err = UpdateNode (btreePtr, &nodeRec, 0, kLockTransaction); + M_ExitOnError (err); + M_BTreeHeaderDirty (btreePtr); goto Success; diff --git a/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c b/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c index fe9b141df..590dfecc5 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c +++ b/bsd/hfs/hfscommon/BTree/BTreeNodeOps.c @@ -111,7 +111,6 @@ // ReleaseNode - Call FS Agent to release node obtained by GetNode. // UpdateNode - Mark a node as dirty and call FS Agent to release it. // -// CheckNode - Checks the validity of a node. // ClearNode - Clear a node to all zeroes. // // InsertRecord - Inserts a record into a BTree node. @@ -215,58 +214,6 @@ OSStatus GetNode (BTreeControlBlockPtr btreePtr, goto ErrorExit; } ++btreePtr->numGetNodes; - - // - // Optimization - // Only call CheckNode if the node came from disk. - // If it was in the cache, we'll assume its already a valid node. - // - - if ( nodePtr->blockReadFromDisk ) // if we read it from disk then check it - { - err = CheckNode (btreePtr, nodePtr->buffer); - - if (err != noErr) - { - - VTOVCB(btreePtr->fileRefNum)->vcbFlags |= kHFS_DamagedVolume; - - #if HFS_DIAGNOSTIC - if (((NodeDescPtr)nodePtr->buffer)->numRecords != 0) - PrintNode(nodePtr->buffer, btreePtr->nodeSize, nodeNum); - #endif - - if (DEBUG_BUILD) - { - // With the removal of bounds checking in IsItAHint(), it's possible that - // GetNode() will be called to fetch a clear (all zeroes) node. We want - // CheckNode() to fail in this case (it does), however we don't want to assert - // this case because it is not really an "error". Returning an error from GetNode() - // in this case will cause the hint checking code to ignore the hint and revert to - // the full search mode. - - { - UInt32 *cur; - UInt32 *lastPlusOne; - - cur = nodePtr->buffer; - lastPlusOne = (UInt32 *) ((UInt8 *) cur + btreePtr->nodeSize); - - while( cur < lastPlusOne ) - { - if( *cur++ != 0 ) - { - Panic ("\pGetNode: CheckNode returned error."); - break; - } - } - } - } - - (void) TrashNode (btreePtr, nodePtr); // ignore error - goto ErrorExit; - } - } return noErr; @@ -427,9 +374,6 @@ Routine: UpdateNode - Mark a node as dirty and call FS Agent to release it. Function: Marks a BTree node dirty and informs the FS Agent that it may be released. - //€€ have another routine that clears & writes a node, so we can call - CheckNode from this routine. - Input: btreePtr - pointer to BTree control block nodeNum - number of node to release transactionID - ID of transaction this node update is a part of @@ -450,14 +394,8 @@ OSStatus UpdateNode (BTreeControlBlockPtr btreePtr, err = noErr; - if (nodePtr->buffer != nil) //€€ why call UpdateNode if nil ?!? + if (nodePtr->buffer != nil) // Why call UpdateNode if nil ?!? { - if (DEBUG_BUILD) - { - if ( btreePtr->attributes & kBTVariableIndexKeysMask ) - (void) CheckNode (btreePtr, nodePtr->buffer); - } - releaseNodeProc = btreePtr->releaseBlockProc; err = releaseNodeProc (btreePtr->fileRefNum, nodePtr, @@ -478,90 +416,6 @@ ErrorExit: -/*------------------------------------------------------------------------------- - -Routine: CheckNode - Checks the validity of a node. - -Function: Checks the validity of a node by verifying that the fLink and bLink fields - are within the forks EOF. The node type must be one of the four known - types. The node height must be less than or equal to the tree height. The - node must not have more than the maximum number of records, and the record - offsets must make sense. - -Input: btreePtr - pointer to BTree control block - node - pointer to node to check - -Result: noErr - success - fsBTInvalidNodeErr - failure --------------------------------------------------------------------------------*/ - -OSStatus CheckNode (BTreeControlBlockPtr btreePtr, NodeDescPtr node ) -{ - SInt32 index; - SInt32 maxRecords; - UInt32 maxNode; - UInt16 nodeSize; - UInt16 offset; - UInt16 prevOffset; - - nodeSize = btreePtr->nodeSize; - - ///////////////////// are fLink and bLink within EOF //////////////////////// - - maxNode = (GetFileControlBlock(btreePtr->fileRefNum)->fcbEOF / nodeSize) - 1; - - if ( (node->fLink > maxNode) || (node->bLink > maxNode) ) - return fsBTInvalidNodeErr; - - /////////////// check node type (leaf, index, header, map) ////////////////// - - if ( (node->kind < kBTLeafNode) || (node->kind > kBTMapNode) ) - return fsBTInvalidNodeErr; - - ///////////////////// is node height > tree depth? ////////////////////////// - - if ( node->height > btreePtr->treeDepth ) - return fsBTInvalidNodeErr; - - //////////////////////// check number of records //////////////////////////// - - //XXX can we calculate a more accurate minimum record size? - maxRecords = ( nodeSize - sizeof (BTNodeDescriptor) ) >> 3; - - if (node->numRecords == 0 || node->numRecords > maxRecords) - return fsBTInvalidNodeErr; - - ////////////////////////// check record offsets ///////////////////////////// - - index = node->numRecords; /* start index at free space */ - prevOffset = nodeSize - (index << 1); /* use 2 bytes past end of free space */ - - do { - offset = GetRecordOffset (btreePtr, node, index); - - if (offset & 1) // offset is odd - return fsBTInvalidNodeErr; - - if (offset >= prevOffset) // offset >= previous offset - return fsBTInvalidNodeErr; - - /* reject keys that overflow record slot */ - if ((node->kind == kBTLeafNode) && - (index < node->numRecords) && /* ignore free space record */ - (CalcKeySize(btreePtr, (KeyPtr) ((Ptr)node + offset)) > (prevOffset - offset))) { - return fsBTInvalidNodeErr; - } - - prevOffset = offset; - } while ( --index >= 0 ); - - if (offset < sizeof (BTNodeDescriptor) ) // first offset < minimum ? - return fsBTInvalidNodeErr; - - return noErr; -} - - #if HFS_DIAGNOSTIC static void PrintNode(const NodeDescPtr node, UInt16 nodeSize, UInt32 nodeNumber) { diff --git a/bsd/hfs/hfscommon/BTree/BTreeScanner.c b/bsd/hfs/hfscommon/BTree/BTreeScanner.c index 66521dbbd..1139f6415 100644 --- a/bsd/hfs/hfscommon/BTree/BTreeScanner.c +++ b/bsd/hfs/hfscommon/BTree/BTreeScanner.c @@ -140,7 +140,9 @@ int BTScanNextRecord( BTScanState * scanState, static int FindNextLeafNode( BTScanState *scanState, Boolean avoidIO ) { - int err; + int err; + BlockDescriptor block; + FileReference fref; err = noErr; // Assume everything will be OK @@ -180,29 +182,23 @@ static int FindNextLeafNode( BTScanState *scanState, Boolean avoidIO ) (u_int8_t *) scanState->currentNodePtr += scanState->btcb->nodeSize; } -#if BYTE_ORDER == LITTLE_ENDIAN - { - BlockDescriptor block; - FileReference fref; - /* Fake a BlockDescriptor */ + block.blockHeader = NULL; /* No buffer cache buffer */ block.buffer = scanState->currentNodePtr; + block.blockNum = scanState->nodeNum; block.blockSize = scanState->btcb->nodeSize; block.blockReadFromDisk = 1; block.isModified = 0; fref = scanState->btcb->fileRefNum; - SWAP_BT_NODE(&block, ISHFSPLUS(VTOVCB(fref)), VTOC(fref)->c_fileid, 0); - } -#endif - - // Make sure this is a valid node - if ( CheckNode( scanState->btcb, scanState->currentNodePtr ) != noErr ) - { + /* This node was read from disk, so it must be swapped/checked. */ + err = hfs_swap_BTNode(&block, fref, kSwapBTNodeBigToHost); + if ( err != noErr ) { + printf("FindNextLeafNode: Error from hfs_swap_BTNode (node %u)\n", scanState->nodeNum); continue; } - + if ( scanState->currentNodePtr->kind == kBTLeafNode ) break; } diff --git a/bsd/hfs/hfscommon/headers/BTreesInternal.h b/bsd/hfs/hfscommon/headers/BTreesInternal.h index 0cce7eb23..5747e31aa 100644 --- a/bsd/hfs/hfscommon/headers/BTreesInternal.h +++ b/bsd/hfs/hfscommon/headers/BTreesInternal.h @@ -113,6 +113,7 @@ enum { struct BlockDescriptor{ void *buffer; void *blockHeader; + daddr64_t blockNum; /* logical block number (used by hfs_swap_BTNode) */ ByteCount blockSize; Boolean blockReadFromDisk; Byte isModified; // XXXdbg - for journaling diff --git a/bsd/hfs/hfscommon/headers/BTreesPrivate.h b/bsd/hfs/hfscommon/headers/BTreesPrivate.h index 35c5830a5..852942dd0 100644 --- a/bsd/hfs/hfscommon/headers/BTreesPrivate.h +++ b/bsd/hfs/hfscommon/headers/BTreesPrivate.h @@ -402,9 +402,6 @@ OSStatus GetMapNode (BTreeControlBlockPtr btreePtr, //// Node Buffer Operations -OSStatus CheckNode (BTreeControlBlockPtr btreePtr, - NodeDescPtr node ); - void ClearNode (BTreeControlBlockPtr btreePtr, NodeDescPtr node ); diff --git a/bsd/kern/kdebug.c b/bsd/kern/kdebug.c index 381b74fe2..c72e598fb 100644 --- a/bsd/kern/kdebug.c +++ b/bsd/kern/kdebug.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @Apple_LICENSE_HEADER_START@ * @@ -95,10 +95,6 @@ pid_t global_state_pid = -1; /* Used to control exclusive use of kd_buffer #define DBG_FUNC_MASK 0xfffffffc -#ifdef ppc -extern natural_t rtclock_decrementer_min; -#endif /* ppc */ - /* task to string structure */ struct tts { @@ -774,7 +770,7 @@ kdbg_setpidex(kd_regtype *kdr) return(ret); } -/* This is for setting a minimum decrementer value */ +/* This is for setting a maximum decrementer value */ kdbg_setrtcdec(kd_regtype *kdr) { int ret=0; @@ -783,13 +779,17 @@ kdbg_setrtcdec(kd_regtype *kdr) decval = (natural_t)kdr->value1; if (decval && decval < KDBG_MINRTCDEC) - ret = EINVAL; + ret = EINVAL; #ifdef ppc - else - rtclock_decrementer_min = decval; + else { + + extern uint32_t maxDec; + + maxDec = decval ? decval : 0x7FFFFFFF; /* Set or reset the max decrementer */ + } #else - else - ret = ENOTSUP; + else + ret = ENOTSUP; #endif /* ppc */ return(ret); diff --git a/bsd/kern/kern_control.c b/bsd/kern/kern_control.c index fa4e4c216..95c9cbe2e 100644 --- a/bsd/kern/kern_control.c +++ b/bsd/kern/kern_control.c @@ -772,7 +772,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref) userkctl->ctl_sendsize = CTL_SENDSIZE; kctl->sendbufsize = userkctl->ctl_sendsize; - if (kctl->recvbufsize == 0) + if (userkctl->ctl_recvsize == 0) userkctl->ctl_recvsize = CTL_RECVSIZE; kctl->recvbufsize = userkctl->ctl_recvsize; diff --git a/bsd/kern/kern_core.c b/bsd/kern/kern_core.c index d17444fd6..105181c1a 100644 --- a/bsd/kern/kern_core.c +++ b/bsd/kern/kern_core.c @@ -222,7 +222,7 @@ coredump(struct proc *p) context.vc_proc = p; context.vc_ucred = cred; - if ((error = vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR, 0, &vp, &context))) + if ((error = vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR, VNODE_LOOKUP_NOFOLLOW, &vp, &context))) return (error); VATTR_INIT(&va); diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index 331e24761..1ec789877 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -701,6 +701,27 @@ fcntl(p, uap, retval) } goto outdrop; + case F_GLOBAL_NOCACHE: + if (fp->f_type != DTYPE_VNODE) { + error = EBADF; + goto out; + } + vp = (struct vnode *)fp->f_data; + proc_fdunlock(p); + + if ( (error = vnode_getwithref(vp)) == 0 ) { + + *retval = vnode_isnocache(vp); + + if (uap->arg) + vnode_setnocache(vp); + else + vnode_clearnocache(vp); + + (void)vnode_put(vp); + } + goto outdrop; + case F_RDADVISE: { struct radvisory ra_struct; diff --git a/bsd/kern/kern_event.c b/bsd/kern/kern_event.c index 1bf948822..d9392549b 100644 --- a/bsd/kern/kern_event.c +++ b/bsd/kern/kern_event.c @@ -1055,21 +1055,21 @@ kevent(struct proc *p, struct kevent_args *uap, register_t *retval) /* register all the change requests the user provided... */ noutputs = 0; - while (nchanges > 0) { + while (nchanges > 0 && error == 0) { error = kevent_copyin(&changelist, &kev, p); if (error) break; kev.flags &= ~EV_SYSFLAGS; error = kevent_register(kq, &kev, p); - if (error) { - if (nevents == 0) - break; + if (error && nevents > 0) { kev.flags = EV_ERROR; kev.data = error; - (void) kevent_copyout(&kev, &ueventlist, p); - nevents--; - noutputs++; + error = kevent_copyout(&kev, &ueventlist, p); + if (error == 0) { + nevents--; + noutputs++; + } } nchanges--; } diff --git a/bsd/kern/kern_symfile.c b/bsd/kern/kern_symfile.c index ed56bd1cd..4dc099a0d 100644 --- a/bsd/kern/kern_symfile.c +++ b/bsd/kern/kern_symfile.c @@ -71,6 +71,7 @@ #include #include #include +#include extern unsigned char rootdevice[]; extern struct mach_header _mh_execute_header; @@ -341,3 +342,230 @@ int get_kernel_symfile(struct proc *p, char **symfile) return error_code; } +struct kern_direct_file_io_ref_t +{ + struct vfs_context context; + struct vnode *vp; +}; + + +static int file_ioctl(void * p1, void * p2, int theIoctl, caddr_t result) +{ + dev_t device = (dev_t) p1; + + return ((*bdevsw[major(device)].d_ioctl) + (device, theIoctl, result, S_IFBLK, p2)); +} + +static int device_ioctl(void * p1, __unused void * p2, int theIoctl, caddr_t result) +{ + return (VNOP_IOCTL(p1, theIoctl, result, 0, p2)); +} + +struct kern_direct_file_io_ref_t * +kern_open_file_for_direct_io(const char * name, + kern_get_file_extents_callback_t callback, + void * callback_ref, + dev_t * device_result, + uint64_t * partitionbase_result, + uint64_t * maxiocount_result) +{ + struct kern_direct_file_io_ref_t * ref; + + struct proc *p; + struct ucred *cred; + struct vnode_attr va; + int error; + off_t f_offset; + uint32_t blksize; + uint64_t size; + dev_t device; + off_t maxiocount, count; + + int (*do_ioctl)(void * p1, void * p2, int theIoctl, caddr_t result); + void * p1; + void * p2; + + error = EFAULT; + + ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t)); + if (!ref) + { + error = EFAULT; + goto out; + } + + ref->vp = NULL; + p = current_proc(); // kernproc; + cred = p->p_ucred; + ref->context.vc_proc = p; + ref->context.vc_ucred = cred; + + if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, &ref->context))) + goto out; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_rdev); + VATTR_WANTED(&va, va_fsid); + VATTR_WANTED(&va, va_data_size); + VATTR_WANTED(&va, va_nlink); + error = EFAULT; + if (vnode_getattr(ref->vp, &va, &ref->context)) + goto out; + + kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev)); + kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid)); + kprintf("vp size %qd\n", va.va_data_size); + + if (ref->vp->v_type == VREG) + { + /* Don't dump files with links. */ + if (va.va_nlink != 1) + goto out; + + device = va.va_fsid; + p1 = (void *) device; + p2 = p; + do_ioctl = &file_ioctl; + } + else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) + { + /* Partition. */ + device = va.va_rdev; + + p1 = ref->vp; + p2 = &ref->context; + do_ioctl = &device_ioctl; + } + else + { + /* Don't dump to non-regular files. */ + error = EFAULT; + goto out; + } + + // get partition base + + error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); + if (error) + goto out; + + // get block size & constraints + + error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize); + if (error) + goto out; + + maxiocount = 1*1024*1024*1024; + + error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count); + if (error) + count = 0; + count *= blksize; + if (count && (count < maxiocount)) + maxiocount = count; + + error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count); + if (error) + count = 0; + count *= blksize; + if (count && (count < maxiocount)) + maxiocount = count; + + error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count); + if (error) + count = 0; + if (count && (count < maxiocount)) + maxiocount = count; + + error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count); + if (error) + count = 0; + if (count && (count < maxiocount)) + maxiocount = count; + + error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count); + if (error) + count = 0; + if (count && (count < maxiocount)) + maxiocount = count; + + error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count); + if (error) + count = 0; + if (count && (count < maxiocount)) + maxiocount = count; + + kprintf("max io 0x%qx bytes\n", maxiocount); + if (maxiocount_result) + *maxiocount_result = maxiocount; + + // generate the block list + + error = 0; + if (ref->vp->v_type == VREG) + { + f_offset = 0; + while(f_offset < (off_t) va.va_data_size) + { + size_t io_size = 1*1024*1024*1024; + daddr64_t blkno; + + error = VNOP_BLOCKMAP(ref->vp, f_offset, io_size, &blkno, (size_t *)&io_size, NULL, 0, NULL); + if (error) + goto out; + callback(callback_ref, ((uint64_t) blkno) * blksize, (uint64_t) io_size); + f_offset += io_size; + } + callback(callback_ref, 0ULL, 0ULL); + } + else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) + { + error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &size); + if (error) + goto out; + size *= blksize; + callback(callback_ref, 0ULL, size); + callback(callback_ref, size, 0ULL); + } + + if (device_result) + *device_result = device; + +out: + kprintf("kern_open_file_for_direct_io(%d)\n", error); + + if (error && ref) { + if (ref->vp) + vnode_close(ref->vp, FWRITE, &ref->context); + + kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); + } + + return(ref); +} + +int +kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len) +{ + return (vn_rdwr(UIO_WRITE, ref->vp, + addr, len, offset, + UIO_SYSSPACE32, IO_SYNC|IO_NODELOCKED|IO_UNIT, + ref->context.vc_ucred, (int *) 0, ref->context.vc_proc)); +} + +void +kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref) +{ + kprintf("kern_close_file_for_direct_io\n"); + + if (ref) { + int error; + + if (ref->vp) { + error = vnode_close(ref->vp, FWRITE, &ref->context); + kprintf("vnode_close(%d)\n", error); + } + kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); + } +} diff --git a/bsd/kern/kpi_mbuf.c b/bsd/kern/kpi_mbuf.c index 15c290aab..c4b070b42 100644 --- a/bsd/kern/kpi_mbuf.c +++ b/bsd/kern/kpi_mbuf.c @@ -462,12 +462,26 @@ extern void in_delayed_cksum_offset(struct mbuf *m, int ip_offset); void mbuf_outbound_finalize(mbuf_t mbuf, u_long protocol_family, size_t protocol_offset) { - if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP)) == 0) + if ((mbuf->m_pkthdr.csum_flags & + (CSUM_DELAY_DATA | CSUM_DELAY_IP | CSUM_TCP_SUM16)) == 0) return; /* Generate the packet in software, client needs it */ switch (protocol_family) { case PF_INET: + if (mbuf->m_pkthdr.csum_flags & CSUM_TCP_SUM16) { + /* + * If you're wondering where this lovely code comes + * from, we're trying to undo what happens in ip_output. + * Look for CSUM_TCP_SUM16 in ip_output. + */ + u_int16_t first, second; + mbuf->m_pkthdr.csum_flags &= ~CSUM_TCP_SUM16; + mbuf->m_pkthdr.csum_flags |= CSUM_TCP; + first = mbuf->m_pkthdr.csum_data >> 16; + second = mbuf->m_pkthdr.csum_data & 0xffff; + mbuf->m_pkthdr.csum_data = first - second; + } if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum_offset(mbuf, protocol_offset); } diff --git a/bsd/kern/kpi_socketfilter.c b/bsd/kern/kpi_socketfilter.c index 68ea45cc4..86b3e9c60 100644 --- a/bsd/kern/kpi_socketfilter.c +++ b/bsd/kern/kpi_socketfilter.c @@ -33,6 +33,8 @@ static struct socket_filter_list sock_filter_head; static lck_mtx_t *sock_filter_lock = 0; +static void sflt_detach_private(struct socket_filter_entry *entry, int unregistering); + __private_extern__ void sflt_init(void) { @@ -82,6 +84,7 @@ sflt_termsock( filter_next = filter->sfe_next_onsocket; sflt_detach_private(filter, 0); } + so->so_filt = NULL; } __private_extern__ void @@ -103,7 +106,7 @@ sflt_unuse( for (filter = so->so_filt; filter; filter = next_filter) { next_filter = filter->sfe_next_onsocket; - if (filter->sfe_flags & SFEF_DETACHING) { + if (filter->sfe_flags & SFEF_DETACHUSEZERO) { sflt_detach_private(filter, 0); } } @@ -219,6 +222,7 @@ sflt_attach_private( entry->sfe_filter = filter; entry->sfe_socket = so; entry->sfe_cookie = NULL; + entry->sfe_flags = 0; if (entry->sfe_filter->sf_filter.sf_attach) { filter->sf_usecount++; @@ -247,9 +251,6 @@ sflt_attach_private( entry->sfe_next_onfilter = filter->sf_entry_head; filter->sf_entry_head = entry; - /* Increment the socket's usecount */ - so->so_usecount++; - /* Incremenet the parent filter's usecount */ filter->sf_usecount++; } @@ -270,17 +271,17 @@ sflt_attach_private( * list and the socket lock is not held. */ -__private_extern__ void +static void sflt_detach_private( struct socket_filter_entry *entry, - int filter_detached) + int unregistering) { struct socket *so = entry->sfe_socket; struct socket_filter_entry **next_ptr; int detached = 0; int found = 0; - if (filter_detached) { + if (unregistering) { socket_lock(entry->sfe_socket, 0); } @@ -290,7 +291,16 @@ sflt_detach_private( * same time from attempting to remove the same entry. */ lck_mtx_lock(sock_filter_lock); - if (!filter_detached) { + if (!unregistering) { + if ((entry->sfe_flags & SFEF_UNREGISTERING) != 0) { + /* + * Another thread is unregistering the filter, we need to + * avoid detaching the filter here so the socket won't go + * away. + */ + lck_mtx_unlock(sock_filter_lock); + return; + } for (next_ptr = &entry->sfe_filter->sf_entry_head; *next_ptr; next_ptr = &((*next_ptr)->sfe_next_onfilter)) { if (*next_ptr == entry) { @@ -299,24 +309,30 @@ sflt_detach_private( break; } } + + if (!found && (entry->sfe_flags & SFEF_DETACHUSEZERO) == 0) { + lck_mtx_unlock(sock_filter_lock); + return; + } } - - if (!filter_detached && !found && (entry->sfe_flags & SFEF_DETACHING) == 0) { - lck_mtx_unlock(sock_filter_lock); - return; + else { + /* + * Clear the removing flag. We will perform the detach here or + * request a delayed deatch. + */ + entry->sfe_flags &= ~SFEF_UNREGISTERING; } if (entry->sfe_socket->so_filteruse != 0) { + entry->sfe_flags |= SFEF_DETACHUSEZERO; lck_mtx_unlock(sock_filter_lock); - entry->sfe_flags |= SFEF_DETACHING; return; } - - /* - * Check if we are removing the last attached filter and - * the parent filter is being unregistered. - */ - if (entry->sfe_socket->so_filteruse == 0) { + else { + /* + * Check if we are removing the last attached filter and + * the parent filter is being unregistered. + */ entry->sfe_filter->sf_usecount--; if ((entry->sfe_filter->sf_usecount == 0) && (entry->sfe_filter->sf_flags & SFF_DETACHING) != 0) @@ -340,14 +356,10 @@ sflt_detach_private( entry->sfe_filter->sf_filter.sf_unregistered(entry->sfe_filter->sf_filter.sf_handle); FREE(entry->sfe_filter, M_IFADDR); } - - if (filter_detached) { + + if (unregistering) socket_unlock(entry->sfe_socket, 1); - } - else { - // We need some better way to decrement the usecount - so->so_usecount--; - } + FREE(entry, M_IFADDR); } @@ -385,6 +397,7 @@ sflt_detach( sflt_detach_private(filter, 0); } else { + socket->so_filt = NULL; result = ENOENT; } @@ -453,6 +466,7 @@ sflt_unregister( { struct socket_filter *filter; struct socket_filter_entry *entry_head = NULL; + struct socket_filter_entry *next_entry = NULL; /* Find the entry and remove it from the global and protosw lists */ lck_mtx_lock(sock_filter_lock); @@ -469,6 +483,13 @@ sflt_unregister( entry_head = filter->sf_entry_head; filter->sf_entry_head = NULL; filter->sf_flags |= SFF_DETACHING; + + for (next_entry = entry_head; next_entry; + next_entry = next_entry->sfe_next_onfilter) { + socket_lock(next_entry->sfe_socket, 1); + next_entry->sfe_flags |= SFEF_UNREGISTERING; + socket_unlock(next_entry->sfe_socket, 0); /* Radar 4201550: prevents the socket from being deleted while being unregistered */ + } } lck_mtx_unlock(sock_filter_lock); @@ -482,7 +503,6 @@ sflt_unregister( filter->sf_filter.sf_unregistered(filter->sf_filter.sf_handle); } else { while (entry_head) { - struct socket_filter_entry *next_entry; next_entry = entry_head->sfe_next_onfilter; sflt_detach_private(entry_head, 1); entry_head = next_entry; diff --git a/bsd/kern/sysctl_init.c b/bsd/kern/sysctl_init.c index e50013d38..42ac8142c 100644 --- a/bsd/kern/sysctl_init.c +++ b/bsd/kern/sysctl_init.c @@ -113,6 +113,7 @@ extern struct sysctl_oid sysctl__kern_posix; extern struct sysctl_oid sysctl__kern_posix_sem; extern struct sysctl_oid sysctl__kern_posix_sem_max; extern struct sysctl_oid sysctl__kern_sugid_scripts; +extern struct sysctl_oid sysctl__kern_always_do_fullfsync; extern struct sysctl_oid sysctl__net_inet_icmp_icmplim; extern struct sysctl_oid sysctl__net_inet_icmp_maskrepl; extern struct sysctl_oid sysctl__net_inet_icmp_timestamp; @@ -523,6 +524,7 @@ struct sysctl_oid *newsysctl_list[] = ,&sysctl__kern_ipc_maxsockets ,&sysctl__kern_sugid_scripts + ,&sysctl__kern_always_do_fullfsync ,&sysctl__hw_machine ,&sysctl__hw_model diff --git a/bsd/kern/sysv_sem.c b/bsd/kern/sysv_sem.c index 8f7b26537..24bc5cfad 100644 --- a/bsd/kern/sysv_sem.c +++ b/bsd/kern/sysv_sem.c @@ -248,12 +248,6 @@ grow_semu_array(int newSize) { register int i; register struct sem_undo *newSemu; - static boolean_t grow_semu_array_in_progress = FALSE; - - while (grow_semu_array_in_progress) { - msleep(&grow_semu_array_in_progress, &sysv_sem_subsys_mutex, - PPAUSE, "grow_semu_array", NULL); - } if (newSize <= seminfo.semmnu) return 1; @@ -271,13 +265,8 @@ grow_semu_array(int newSize) #ifdef SEM_DEBUG printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize); #endif - grow_semu_array_in_progress = TRUE; - SYSV_SEM_SUBSYS_UNLOCK(); - MALLOC(newSemu, struct sem_undo*, sizeof(struct sem_undo) * newSize, - M_SYSVSEM, M_WAITOK); - SYSV_SEM_SUBSYS_LOCK(); - grow_semu_array_in_progress = FALSE; - wakeup((caddr_t) &grow_semu_array_in_progress); + MALLOC(newSemu, struct sem_undo *, sizeof (struct sem_undo) * newSize, + M_SYSVSEM, M_WAITOK | M_ZERO); if (NULL == newSemu) { #ifdef SEM_DEBUG @@ -286,15 +275,16 @@ grow_semu_array(int newSize) return 0; } - /* Initialize our structure. */ + /* copy the old data to the new array */ for (i = 0; i < seminfo.semmnu; i++) { newSemu[i] = semu[i]; } - for (i = seminfo.semmnu; i < newSize; i++) - { - newSemu[i].un_proc = NULL; - } + /* + * The new elements (from newSemu[i] to newSemu[newSize-1]) have their + * "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above, + * so they're already marked as "not in use". + */ /* Clean up the old array */ if (semu) @@ -336,8 +326,9 @@ grow_sema_array(int newSize) #ifdef SEM_DEBUG printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize); #endif - MALLOC(newSema, struct user_semid_ds *, sizeof(struct user_semid_ds) * newSize, - M_SYSVSEM, M_WAITOK); + MALLOC(newSema, struct user_semid_ds *, + sizeof (struct user_semid_ds) * newSize, + M_SYSVSEM, M_WAITOK | M_ZERO); if (NULL == newSema) { #ifdef SEM_DEBUG @@ -346,7 +337,7 @@ grow_sema_array(int newSize) return 0; } - /* Initialize our new ids, and copy over the old ones */ + /* copy over the old ids */ for (i = 0; i < seminfo.semmni; i++) { newSema[i] = sema[i]; @@ -361,12 +352,11 @@ grow_sema_array(int newSize) if (sema[i].sem_perm.mode & SEM_ALLOC) wakeup((caddr_t)&sema[i]); } - - for (i = seminfo.semmni; i < newSize; i++) - { - newSema[i].sem_base = NULL; - newSema[i].sem_perm.mode = 0; - } + /* + * The new elements (from newSema[i] to newSema[newSize-1]) have their + * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO + * flag to MALLOC() above, so they're already marked as "not in use". + */ /* Clean up the old array */ if (sema) @@ -410,8 +400,8 @@ grow_sem_pool(int new_pool_size) #ifdef SEM_DEBUG printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size); #endif - MALLOC(new_sem_pool, struct sem *, sizeof(struct sem) * new_pool_size, - M_SYSVSEM, M_WAITOK); + MALLOC(new_sem_pool, struct sem *, sizeof (struct sem) * new_pool_size, + M_SYSVSEM, M_WAITOK | M_ZERO); if (NULL == new_sem_pool) { #ifdef SEM_DEBUG printf("allocation failed. no changes made.\n"); @@ -535,8 +525,9 @@ semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, register struct undo *sueptr, **suepptr, *new_sueptr; int i; - /* Look for and remember the sem_undo if the caller doesn't provide - it */ + /* + * Look for and remember the sem_undo if the caller doesn't provide it + */ suptr = *supptr; if (suptr == NULL) { @@ -562,7 +553,6 @@ semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid, * 0). */ new_sueptr = NULL; -lookup: for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent; i < suptr->un_cnt; i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) { @@ -578,61 +568,36 @@ lookup: FREE(sueptr, M_SYSVSEM); sueptr = NULL; } - if (new_sueptr != NULL) { - /* - * We lost the race: free the "undo" entry we allocated - * and use the one that won. - */ - FREE(new_sueptr, M_SYSVSEM); - new_sueptr = NULL; - } - return(0); + return 0; } /* Didn't find the right entry - create it */ if (adjval == 0) { - if (new_sueptr != NULL) { - FREE(new_sueptr, M_SYSVSEM); - new_sueptr = NULL; - } - return(0); + /* no adjustment: no need for a new entry */ + return 0; } - if (new_sueptr != NULL) { - /* - * Use the new "undo" entry we allocated in the previous pass - */ - new_sueptr->une_next = suptr->un_ent; - suptr->un_ent = new_sueptr; - suptr->un_cnt++; - new_sueptr->une_adjval = adjval; - new_sueptr->une_id = semid; - new_sueptr->une_num = semnum; - return 0; + if (suptr->un_cnt == limitseminfo.semume) { + /* reached the limit number of semaphore undo entries */ + return EINVAL; } - if (suptr->un_cnt != limitseminfo.semume) { - SYSV_SEM_SUBSYS_UNLOCK(); - /* - * Unlocking opens the door to race conditions. Someone else - * could be trying to allocate the same thing at this point, - * so we'll have to check if we lost the race. - */ - MALLOC(new_sueptr, struct undo *, sizeof (struct undo), - M_SYSVSEM, M_WAITOK); - SYSV_SEM_SUBSYS_LOCK(); - if (new_sueptr == NULL) { - return ENOMEM; - } - /* - * There might be other threads doing the same thing for this - * process, so check again if an "undo" entry exists for that - * semaphore. - */ - goto lookup; - } else - return(EINVAL); - return(0); + /* allocate a new semaphore undo entry */ + MALLOC(new_sueptr, struct undo *, sizeof (struct undo), + M_SYSVSEM, M_WAITOK); + if (new_sueptr == NULL) { + return ENOMEM; + } + + /* fill in the new semaphore undo entry */ + new_sueptr->une_next = suptr->un_ent; + suptr->un_ent = new_sueptr; + suptr->un_cnt++; + new_sueptr->une_adjval = adjval; + new_sueptr->une_id = semid; + new_sueptr->une_num = semnum; + + return 0; } /* Assumes we already hold the subsystem lock. @@ -742,8 +707,6 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M))) goto semctlout; - SYSV_SEM_SUBSYS_UNLOCK(); - if (IS_64BIT_PROCESS(p)) { eval = copyin(user_arg.buf, &sbuf, sizeof(struct user_semid_ds)); } else { @@ -752,10 +715,9 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) semid_ds_32to64((struct semid_ds *)&sbuf, &sbuf); } - if (eval != 0) - return(eval); - - SYSV_SEM_SUBSYS_LOCK(); + if (eval != 0) { + goto semctlout; + } semaptr->sem_perm.uid = sbuf.sem_perm.uid; semaptr->sem_perm.gid = sbuf.sem_perm.gid; @@ -768,7 +730,6 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) goto semctlout; bcopy(semaptr, &uds, sizeof(struct user_semid_ds)); - SYSV_SEM_SUBSYS_UNLOCK(); if (IS_64BIT_PROCESS(p)) { eval = copyout(&uds, user_arg.buf, sizeof(struct user_semid_ds)); } else { @@ -776,7 +737,6 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval) semid_ds_64to32(&uds, &semid_ds32); eval = copyout(&semid_ds32, user_arg.buf, sizeof(struct semid_ds)); } - SYSV_SEM_SUBSYS_LOCK(); break; case GETNCNT: @@ -904,7 +864,7 @@ semget(__unused struct proc *p, struct semget_args *uap, register_t *retval) #endif - SYSV_SEM_SUBSYS_LOCK(); + SYSV_SEM_SUBSYS_LOCK(); if (key != IPC_PRIVATE) { @@ -1190,42 +1150,44 @@ semop(struct proc *p, struct semop_args *uap, register_t *retval) #ifdef SEM_DEBUG printf("semop: good morning (eval=%d)!\n", eval); #endif - /* we need the lock here due to mods on semptr */ if (eval != 0) { - if (sopptr->sem_op == 0) - semptr->semzcnt--; - else - semptr->semncnt--; - eval = EINTR; - goto semopout; } + /* + * IMPORTANT: while we were asleep, the semaphore array might + * have been reallocated somewhere else (see grow_sema_array()). + * When we wake up, we have to re-lookup the semaphore + * structures and re-validate them. + */ + suptr = NULL; /* sem_undo may have been reallocated */ semaptr = &sema[semid]; /* sema may have been reallocated */ - -#ifdef SEM_DEBUG - printf("semop: good morning!\n"); -#endif - /* * Make sure that the semaphore still exists */ if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || - semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { - /* The man page says to return EIDRM. */ - /* Unfortunately, BSD doesn't define that code! */ - if (sopptr->sem_op == 0) - semptr->semzcnt--; - else - semptr->semncnt--; + semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid) || + sopptr->sem_num >= semaptr->sem_nsems) { + if (eval == EINTR) { + /* + * EINTR takes precedence over the fact that + * the semaphore disappeared while we were + * sleeping... + */ + } else { + /* + * The man page says to return EIDRM. + * Unfortunately, BSD doesn't define that code! + */ #ifdef EIDRM - eval = EIDRM; + eval = EIDRM; #else - eval = EINVAL; + eval = EINVAL; #endif - goto semopout; + } + goto semopout; } /* @@ -1239,6 +1201,10 @@ semop(struct proc *p, struct semop_args *uap, register_t *retval) semptr->semzcnt--; else semptr->semncnt--; + + if (eval != 0) { /* EINTR */ + goto semopout; + } } done: @@ -1553,9 +1519,7 @@ IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, error = EINVAL; break; } - SYSV_SEM_SUBSYS_UNLOCK(); error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); - SYSV_SEM_SUBSYS_LOCK(); break; case IPCS_SEM_ITER: /* Iterate over existing segments */ @@ -1588,14 +1552,12 @@ IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1, semid_ds_64to32(semid_dsp, &semid_ds32); semid_dsp = &semid_ds32; } - SYSV_SEM_SUBSYS_UNLOCK(); error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen); if (!error) { /* update cursor */ ipcs.u64.ipcs_cursor = cursor + 1; error = SYSCTL_OUT(req, &ipcs, ipcs_sz); } - SYSV_SEM_SUBSYS_LOCK(); break; default: diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index 84100312a..ec648d675 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -812,8 +812,7 @@ m_move_pkthdr(struct mbuf *to, struct mbuf *from) * "from" must have M_PKTHDR set, and "to" must be empty. * In particular, this does a deep copy of the packet tags. */ -#ifndef __APPLE__ -int +static int m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how) { to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT); @@ -823,7 +822,6 @@ m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how) SLIST_INIT(&to->m_pkthdr.tags); return (m_tag_copy_chain(to, from, how)); } -#endif /* * return a list of mbuf hdrs that point to clusters... @@ -2146,17 +2144,8 @@ m_dup(struct mbuf *m, int how) { if ((n = m_gethdr(how, m->m_type)) == NULL) return(NULL); n->m_len = m->m_len; - n->m_flags |= (m->m_flags & M_COPYFLAGS); - n->m_pkthdr.len = m->m_pkthdr.len; - n->m_pkthdr.rcvif = m->m_pkthdr.rcvif; - n->m_pkthdr.header = NULL; - n->m_pkthdr.csum_flags = 0; - n->m_pkthdr.csum_data = 0; - n->m_pkthdr.aux = NULL; - n->m_pkthdr.vlan_tag = 0; - n->m_pkthdr.socket_id = 0; - SLIST_INIT(&n->m_pkthdr.tags); - bcopy(m->m_data, n->m_data, m->m_pkthdr.len); + m_dup_pkthdr(n, m, how); + bcopy(m->m_data, n->m_data, m->m_len); return(n); } } else if (m->m_len <= MLEN) @@ -2187,8 +2176,7 @@ m_dup(struct mbuf *m, int how) *np = n; if (copyhdr) { /* Don't use M_COPY_PKTHDR: preserve m_data */ - n->m_pkthdr = m->m_pkthdr; - n->m_flags |= (m->m_flags & M_COPYFLAGS); + m_dup_pkthdr(n, m, how); copyhdr = 0; if ((n->m_flags & M_EXT) == 0) n->m_data = n->m_pktdat; diff --git a/bsd/kern/uipc_mbuf2.c b/bsd/kern/uipc_mbuf2.c index a8c8652b2..3dd9060de 100644 --- a/bsd/kern/uipc_mbuf2.c +++ b/bsd/kern/uipc_mbuf2.c @@ -552,7 +552,7 @@ m_tag_copy(struct m_tag *t, int how) struct m_tag *p; KASSERT(t, ("m_tag_copy: null tag")); - p = m_tag_alloc(t->m_tag_type, t->m_tag_id, t->m_tag_len, how); + p = m_tag_alloc(t->m_tag_id, t->m_tag_type, t->m_tag_len, how); if (p == NULL) return (NULL); bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */ diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index b0068b4c8..29380a399 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -642,6 +642,9 @@ sofreelastref(so, dealloc) /*### Assume socket is locked */ + /* Remove any filters - may be called more than once */ + sflt_termsock(so); + if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) { #ifdef __APPLE__ selthreadclear(&so->so_snd.sb_sel); @@ -1029,13 +1032,28 @@ sosendcheck( { int error = 0; long space; + int assumelock = 0; restart: if (*sblocked == 0) { - error = sblock(&so->so_snd, SBLOCKWAIT(flags)); - if (error) - return error; - *sblocked = 1; + if ((so->so_snd.sb_flags & SB_LOCK) != 0 && + so->so_send_filt_thread != 0 && + so->so_send_filt_thread == current_thread()) { + /* + * We're being called recursively from a filter, + * allow this to continue. Radar 4150520. + * Don't set sblocked because we don't want + * to perform an unlock later. + */ + assumelock = 1; + } + else { + error = sblock(&so->so_snd, SBLOCKWAIT(flags)); + if (error) { + return error; + } + *sblocked = 1; + } } if (so->so_state & SS_CANTSENDMORE) @@ -1070,8 +1088,9 @@ restart: return EMSGSIZE; if (space < resid + clen && (atomic || space < so->so_snd.sb_lowat || space < clen)) { - if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) + if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) { return EWOULDBLOCK; + } sbunlock(&so->so_snd, 1); error = sbwait(&so->so_snd); if (error) { @@ -1164,12 +1183,7 @@ sosend(so, addr, uio, top, control, flags) do { error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked); if (error) { - if (sblocked) - goto release; - else { - socket_unlock(so, 1); - goto out; - } + goto release; } mp = ⊤ space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0); @@ -1237,12 +1251,7 @@ sosend(so, addr, uio, top, control, flags) if (freelist == NULL) { error = ENOBUFS; socket_lock(so, 0); - if (sblocked) { - goto release; - } else { - socket_unlock(so, 1); - goto out; - } + goto release; } /* * For datagram protocols, leave room @@ -1294,25 +1303,28 @@ sosend(so, addr, uio, top, control, flags) } if (flags & (MSG_HOLD|MSG_SEND)) - { /* Enqueue for later, go away if HOLD */ - register struct mbuf *mb1; - if (so->so_temp && (flags & MSG_FLUSH)) - { m_freem(so->so_temp); - so->so_temp = NULL; - } - if (so->so_temp) - so->so_tail->m_next = top; - else - so->so_temp = top; - mb1 = top; - while (mb1->m_next) - mb1 = mb1->m_next; - so->so_tail = mb1; - if (flags&MSG_HOLD) - { top = NULL; - goto release; - } - top = so->so_temp; + { + /* Enqueue for later, go away if HOLD */ + register struct mbuf *mb1; + if (so->so_temp && (flags & MSG_FLUSH)) + { + m_freem(so->so_temp); + so->so_temp = NULL; + } + if (so->so_temp) + so->so_tail->m_next = top; + else + so->so_temp = top; + mb1 = top; + while (mb1->m_next) + mb1 = mb1->m_next; + so->so_tail = mb1; + if (flags & MSG_HOLD) + { + top = NULL; + goto release; + } + top = so->so_temp; } if (dontroute) so->so_options |= SO_DONTROUTE; @@ -1345,12 +1357,7 @@ sosend(so, addr, uio, top, control, flags) int so_flags = 0; if (filtered == 0) { filtered = 1; - /* - * We don't let sbunlock unlock the socket because - * we don't want it to decrement the usecount. - */ - sbunlock(&so->so_snd, 1); - sblocked = 0; + so->so_send_filt_thread = current_thread(); socket_unlock(so, 0); so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0; } @@ -1365,33 +1372,16 @@ sosend(so, addr, uio, top, control, flags) * The socket is unlocked as is the socket buffer. */ socket_lock(so, 0); - if (error == EJUSTRETURN) { - error = 0; - clen = 0; - control = 0; - top = 0; - socket_unlock(so, 1); - goto out; - } - else if (error) { - socket_unlock(so, 1); - goto out; - } - - - /* Verify our state again, this will lock the socket buffer */ - error = sosendcheck(so, addr, top->m_pkthdr.len, - control ? control->m_pkthdr.len : 0, - atomic, flags, &sblocked); + so->so_send_filt_thread = 0; if (error) { - if (sblocked) { - /* sbunlock at release will unlock the socket */ - goto release; - } - else { - socket_unlock(so, 1); - goto out; + if (error == EJUSTRETURN) { + error = 0; + clen = 0; + control = 0; + top = 0; } + + goto release; } } } @@ -1423,7 +1413,10 @@ sosend(so, addr, uio, top, control, flags) } while (resid); release: - sbunlock(&so->so_snd, 0); /* will unlock socket */ + if (sblocked) + sbunlock(&so->so_snd, 0); /* will unlock socket */ + else + socket_unlock(so, 1); out: if (top) m_freem(top); @@ -2930,9 +2923,6 @@ sofree(so) mutex_held = so->so_proto->pr_domain->dom_mtx; lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); - /* Remove the filters */ - sflt_termsock(so); - sofreelastref(so, 0); } diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c index 60eb7747e..78487fdcf 100644 --- a/bsd/kern/uipc_usrreq.c +++ b/bsd/kern/uipc_usrreq.c @@ -905,6 +905,7 @@ unp_pcblist SYSCTL_HANDLER_ARGS gencnt = unp_gencnt; n = unp_count; + bzero(&xug, sizeof(xug)); xug.xug_len = sizeof xug; xug.xug_count = n; xug.xug_gen = gencnt; @@ -941,6 +942,8 @@ unp_pcblist SYSCTL_HANDLER_ARGS unp = unp_list[i]; if (unp->unp_gencnt <= gencnt) { struct xunpcb xu; + + bzero(&xu, sizeof(xu)); xu.xu_len = sizeof xu; xu.xu_unpp = (struct unpcb_compat *)unp; /* @@ -967,6 +970,8 @@ unp_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ + bzero(&xug, sizeof(xug)); + xug.xug_len = sizeof xug; xug.xug_gen = unp_gencnt; xug.xug_sogen = so_gencnt; xug.xug_count = unp_count; diff --git a/bsd/net/dlil.c b/bsd/net/dlil.c index 5f766f6b7..1118b6142 100644 --- a/bsd/net/dlil.c +++ b/bsd/net/dlil.c @@ -78,8 +78,6 @@ #define DLIL_PRINTF kprintf #endif -//#define DLIL_ALWAYS_DELAY_DETACH 1 - enum { kProtoKPI_DLIL = 0, kProtoKPI_v1 = 1 @@ -632,34 +630,59 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached) { int retval = 0; - - /* Take the write lock */ -#if DLIL_ALWAYS_DELAY_DETACH - retval = EDEADLK; -#else - if (detached == 0 && (retval = dlil_write_begin()) != 0) -#endif - { + if (detached == 0) { + ifnet_t ifp = NULL; + interface_filter_t entry = NULL; + + /* Take the write lock */ + retval = dlil_write_begin(); + if (retval != 0 && retval != EDEADLK) + return retval; + + /* + * At this point either we have the write lock (retval == 0) + * or we couldn't get it (retval == EDEADLK) because someone + * else up the stack is holding the read lock. It is safe to + * read, either the read or write is held. Verify the filter + * parameter before proceeding. + */ + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) { + if (entry == filter) + break; + } + if (entry == filter) + break; + } + ifnet_head_done(); + + if (entry != filter) { + /* filter parameter is not a valid filter ref */ + if (retval == 0) { + dlil_write_end(); + } + return EINVAL; + } + if (retval == EDEADLK) { /* Perform a delayed detach */ filter->filt_detaching = 1; dlil_detach_waiting = 1; wakeup(&dlil_detach_waiting); - retval = 0; + return 0; } - return retval; - } - - if (detached == 0) - TAILQ_REMOVE(&filter->filt_ifp->if_flt_head, filter, filt_next); - - /* release the write lock */ - if (detached == 0) + + /* Remove the filter from the list */ + TAILQ_REMOVE(&ifp->if_flt_head, filter, filt_next); dlil_write_end(); + } + /* Call the detached funciton if there is one */ if (filter->filt_detached) filter->filt_detached(filter->filt_cookie, filter->filt_ifp); + /* Free the filter */ FREE(filter, M_NKE); return retval; @@ -668,6 +691,8 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached) void dlil_detach_filter(interface_filter_t filter) { + if (filter == NULL) + return; dlil_detach_filter_internal(filter, 0); } @@ -972,6 +997,7 @@ dlil_event(struct ifnet *ifp, struct kern_event_msg *event) return result; } +int dlil_output_list( struct ifnet* ifp, u_long proto_family, @@ -1964,12 +1990,7 @@ dlil_detach_protocol(struct ifnet *ifp, u_long proto_family) int use_reached_zero = 0; -#if DLIL_ALWAYS_DELAY_DETACH - { - retval = EDEADLK; -#else if ((retval = dlil_write_begin()) != 0) { -#endif if (retval == EDEADLK) { retval = 0; dlil_read_begin(); diff --git a/bsd/net/if.c b/bsd/net/if.c index 3f6d1157d..d5e1a5eaf 100644 --- a/bsd/net/if.c +++ b/bsd/net/if.c @@ -1454,6 +1454,12 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space) int error = 0; size_t space; + /* + * Zero the ifr buffer to make sure we don't + * disclose the contents of the stack. + */ + bzero(&ifr, sizeof(struct ifreq)); + space = *ret_space; ifnet_head_lock_shared(); for (ifp = ifnet_head.tqh_first; space > sizeof(ifr) && ifp; ifp = ifp->if_link.tqe_next) { @@ -1932,10 +1938,10 @@ if_rtdel( */ void if_rtproto_del(struct ifnet *ifp, int protocol) { - - struct radix_node_head *rnh; + struct radix_node_head *rnh; - if ((protocol <= AF_MAX) && ((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) { + if ((protocol <= AF_MAX) && (protocol >= 0) && + ((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) { lck_mtx_lock(rt_mtx); (void) rnh->rnh_walktree(rnh, if_rtdel, ifp); lck_mtx_unlock(rt_mtx); diff --git a/bsd/net/if_mib.c b/bsd/net/if_mib.c index faa462f44..d78dabb5b 100644 --- a/bsd/net/if_mib.c +++ b/bsd/net/if_mib.c @@ -117,6 +117,7 @@ make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req) case IFDATA_GENERAL: + bzero(&ifmd, sizeof(ifmd)); snprintf(ifmd.ifmd_name, sizeof(ifmd.ifmd_name), "%s%d", ifp->if_name, ifp->if_unit); diff --git a/bsd/net/kext_net.h b/bsd/net/kext_net.h index 33d783bb2..e56930a55 100644 --- a/bsd/net/kext_net.h +++ b/bsd/net/kext_net.h @@ -42,7 +42,8 @@ struct socket_filter; -#define SFEF_DETACHING 0x1 +#define SFEF_DETACHUSEZERO 0x1 // Detach when use reaches zero +#define SFEF_UNREGISTERING 0x2 // Remove due to unregister struct socket_filter_entry { struct socket_filter_entry *sfe_next_onsocket; @@ -80,7 +81,6 @@ void sflt_notify(struct socket *so, sflt_event_t event, void *param); int sflt_data_in(struct socket *so, const struct sockaddr *from, mbuf_t *data, mbuf_t *control, sflt_data_flag_t flags, int *filtered); int sflt_attach_private(struct socket *so, struct socket_filter *filter, sflt_handle handle, int locked); -void sflt_detach_private(struct socket_filter_entry *entry, int filter_detached); #endif /* BSD_KERNEL_PRIVATE */ diff --git a/bsd/netat/atp_write.c b/bsd/netat/atp_write.c index 865962ef6..d84ecb6ff 100644 --- a/bsd/netat/atp_write.c +++ b/bsd/netat/atp_write.c @@ -65,7 +65,7 @@ static int loop_cnt; /* for debugging loops */ } \ } -static void atp_pack_bdsp(struct atp_trans *, struct atpBDS *); +static int atp_pack_bdsp(struct atp_trans *, struct atpBDS *); static int atp_unpack_bdsp(struct atp_state *, gbuf_t *, struct atp_rcb *, int, int); void atp_trp_clock(), asp_clock(), asp_clock_locked(), atp_trp_clock_locked();; @@ -604,7 +604,7 @@ nothing_to_send: } /* atp_send_replies */ -static void +static int atp_pack_bdsp(trp, bdsp) register struct atp_trans *trp; register struct atpBDS *bdsp; @@ -612,12 +612,13 @@ atp_pack_bdsp(trp, bdsp) register gbuf_t *m = NULL; register int i, datsize = 0; struct atpBDS *bdsbase = bdsp; + int error = 0; dPrintf(D_M_ATP, D_L_INFO, ("atp_pack_bdsp: socket=%d\n", trp->tr_queue->atp_socket_no)); for (i = 0; i < ATP_TRESP_MAX; i++, bdsp++) { - short bufsize = UAS_VALUE(bdsp->bdsBuffSz); + unsigned short bufsize = UAS_VALUE(bdsp->bdsBuffSz); long bufaddr = UAL_VALUE(bdsp->bdsBuffAddr); if ((m = trp->tr_rcv[i]) == NULL) @@ -639,13 +640,15 @@ atp_pack_bdsp(trp, bdsp) register char *buf = (char *)bufaddr; while (m) { - short len = (short)(gbuf_len(m)); + unsigned short len = (unsigned short)(gbuf_len(m)); if (len) { if (len > bufsize) len = bufsize; - copyout((caddr_t)gbuf_rptr(m), + if ((error = copyout((caddr_t)gbuf_rptr(m), CAST_USER_ADDR_T(&buf[tmp]), - len); + len)) != 0) { + return error; + } bufsize -= len; tmp += len; } @@ -664,6 +667,8 @@ atp_pack_bdsp(trp, bdsp) dPrintf(D_M_ATP, D_L_INFO, (" : size=%d\n", datsize)); + + return 0; } /* atp_pack_bdsp */ @@ -1635,12 +1640,20 @@ _ATPsndreq(fd, buf, len, nowait, err, proc) /* * copy out the recv data */ - atp_pack_bdsp(trp, (struct atpBDS *)bds); + if ((*err = atp_pack_bdsp(trp, (struct atpBDS *)bds)) != 0) { + atp_free(trp); + file_drop(fd); + return -1; + } /* * copyout the result info */ - copyout((caddr_t)bds, CAST_USER_ADDR_T(buf), atpBDSsize); + if ((*err = copyout((caddr_t)bds, CAST_USER_ADDR_T(buf), atpBDSsize)) != 0) { + atp_free(trp); + file_drop(fd); + return -1; + } atp_free(trp); file_drop(fd); @@ -1885,13 +1898,21 @@ _ATPgetrsp(fd, bdsp, err, proc) ATENABLE(s, atp->atp_lock); if ((*err = copyin(CAST_USER_ADDR_T(bdsp), (caddr_t)bds, sizeof(bds))) != 0) { + atp_free(trp); + file_drop(fd); + return -1; + } + if ((*err = atp_pack_bdsp(trp, (struct atpBDS *)bds)) != 0) { + atp_free(trp); file_drop(fd); return -1; } - atp_pack_bdsp(trp, (struct atpBDS *)bds); tid = (int)trp->tr_tid; atp_free(trp); - copyout((caddr_t)bds, CAST_USER_ADDR_T(bdsp), sizeof(bds)); + if ((*err = copyout((caddr_t)bds, CAST_USER_ADDR_T(bdsp), sizeof(bds))) != 0) { + file_drop(fd); + return -1; + } file_drop(fd); return tid; diff --git a/bsd/netinet/ip_divert.c b/bsd/netinet/ip_divert.c index e92d7e6a3..5d4ffb3f2 100644 --- a/bsd/netinet/ip_divert.c +++ b/bsd/netinet/ip_divert.c @@ -567,6 +567,7 @@ div_pcblist SYSCTL_HANDLER_ARGS gencnt = divcbinfo.ipi_gencnt; n = divcbinfo.ipi_count; + bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; @@ -599,6 +600,8 @@ div_pcblist SYSCTL_HANDLER_ARGS inp = inp_list[i]; if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb xi; + + bzero(&xi, sizeof(xi)); xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ inpcb_to_compat(inp, &xi.xi_inp); @@ -615,6 +618,8 @@ div_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; xig.xig_gen = divcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = divcbinfo.ipi_count; diff --git a/bsd/netinet/ip_fw2.c b/bsd/netinet/ip_fw2.c index 5f45949f1..b3a716cec 100644 --- a/bsd/netinet/ip_fw2.c +++ b/bsd/netinet/ip_fw2.c @@ -238,7 +238,6 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, #endif /* SYSCTL_NODE */ -extern lck_mtx_t *ip_mutex; static ip_fw_chk_t ipfw_chk; /* firewall lock */ @@ -1305,18 +1304,14 @@ send_reject(struct ip_fw_args *args, int code, int offset, int ip_len) ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); } - lck_mtx_unlock(ip_mutex); icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); - lck_mtx_lock(ip_mutex); } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) { struct tcphdr *const tcp = L3HDR(struct tcphdr, mtod(args->m, struct ip *)); if ( (tcp->th_flags & TH_RST) == 0) { - lck_mtx_unlock(ip_mutex); send_pkt(&(args->f_id), ntohl(tcp->th_seq), ntohl(tcp->th_ack), tcp->th_flags | TH_RST); - lck_mtx_lock(ip_mutex); } m_freem(args->m); } else diff --git a/bsd/netinet/ip_input.c b/bsd/netinet/ip_input.c index 89f607747..96ad70ed7 100644 --- a/bsd/netinet/ip_input.c +++ b/bsd/netinet/ip_input.c @@ -685,8 +685,10 @@ iphack: if (fr_checkp) { struct mbuf *m1 = m; - if (fr_checkp(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1) + if (fr_checkp(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1) { + lck_mtx_unlock(ip_mutex); return; + } ip = mtod(m = m1, struct ip *); } if (fw_enable && IPFW_LOADED) { @@ -700,22 +702,24 @@ iphack: #endif /* IPFIREWALL_FORWARD */ args.m = m; + lck_mtx_unlock(ip_mutex); + i = ip_fw_chk_ptr(&args); m = args.m; if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */ if (m) - m_freem(m); - lck_mtx_unlock(ip_mutex); + m_freem(m); return; } ip = mtod(m, struct ip *); /* just in case m changed */ - if (i == 0 && args.next_hop == NULL) /* common case */ + if (i == 0 && args.next_hop == NULL) { /* common case */ + lck_mtx_lock(ip_mutex); goto pass; + } #if DUMMYNET if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) { /* Send packet to the appropriate pipe */ - lck_mtx_unlock(ip_mutex); ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args); return; } @@ -723,19 +727,21 @@ iphack: #if IPDIVERT if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) { /* Divert or tee packet */ + lck_mtx_lock(ip_mutex); div_info = i; goto ours; } #endif #if IPFIREWALL_FORWARD - if (i == 0 && args.next_hop != NULL) + if (i == 0 && args.next_hop != NULL) { + lck_mtx_lock(ip_mutex); goto pass; + } #endif /* * if we get here, the packet must be dropped */ m_freem(m); - lck_mtx_unlock(ip_mutex); return; } pass: diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index 9fd7a09a1..259e5dd54 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -850,6 +850,7 @@ skip_ipsec: args.m = m; args.next_hop = dst; args.oif = ifp; + lck_mtx_unlock(ip_mutex); off = ip_fw_chk_ptr(&args); m = args.m; dst = args.next_hop; @@ -873,12 +874,13 @@ skip_ipsec: if (m) m_freem(m); error = EACCES ; - lck_mtx_unlock(ip_mutex); goto done ; } ip = mtod(m, struct ip *); - if (off == 0 && dst == old) /* common case */ + if (off == 0 && dst == old) {/* common case */ + lck_mtx_lock(ip_mutex); goto pass ; + } #if DUMMYNET if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) { /* @@ -894,12 +896,12 @@ skip_ipsec: args.dst = dst; args.flags = flags; - lck_mtx_unlock(ip_mutex); error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT, &args); goto done; } #endif /* DUMMYNET */ + lck_mtx_lock(ip_mutex); #if IPDIVERT if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) { struct mbuf *clone = NULL; diff --git a/bsd/netinet/raw_ip.c b/bsd/netinet/raw_ip.c index f361be892..5332bf708 100644 --- a/bsd/netinet/raw_ip.c +++ b/bsd/netinet/raw_ip.c @@ -794,7 +794,8 @@ rip_pcblist SYSCTL_HANDLER_ARGS */ gencnt = ripcbinfo.ipi_gencnt; n = ripcbinfo.ipi_count; - + + bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; @@ -830,6 +831,8 @@ rip_pcblist SYSCTL_HANDLER_ARGS inp = inp_list[i]; if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb xi; + + bzero(&xi, sizeof(xi)); xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ inpcb_to_compat(inp, &xi.xi_inp); @@ -846,6 +849,8 @@ rip_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; xig.xig_gen = ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = ripcbinfo.ipi_count; diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c index b931b642d..e85e6aed5 100644 --- a/bsd/netinet/tcp_input.c +++ b/bsd/netinet/tcp_input.c @@ -3420,6 +3420,7 @@ tcpdropdropablreq(struct socket *head) static unsigned int cur_cnt, old_cnt; struct timeval tv; struct inpcb *inp = NULL; + struct tcpcb *tp; microtime(&tv); if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) { @@ -3459,16 +3460,29 @@ tcpdropdropablreq(struct socket *head) tcp_unlock(so, 1, 0); return 0; } - sototcpcb(so)->t_flags |= TF_LQ_OVERFLOW; head->so_incqlen--; head->so_qlen--; - so->so_head = NULL; TAILQ_REMOVE(&head->so_incomp, so, so_list); + tcp_unlock(head, 0, 0); + + so->so_head = NULL; so->so_usecount--; /* No more held by so_head */ - tcp_drop(sototcpcb(so), ETIMEDOUT); - + /* + * We do not want to lose track of the PCB right away in case we receive + * more segments from the peer + */ + tp = sototcpcb(so); + tp->t_flags |= TF_LQ_OVERFLOW; + tp->t_state = TCPS_CLOSED; + (void) tcp_output(tp); + tcpstat.tcps_drops++; + soisdisconnected(so); + tcp_canceltimers(tp); + add_to_time_wait(tp); + tcp_unlock(so, 1, 0); + tcp_lock(head, 0, 0); return 1; diff --git a/bsd/netinet/tcp_subr.c b/bsd/netinet/tcp_subr.c index 0d8a16867..0e72a2325 100644 --- a/bsd/netinet/tcp_subr.c +++ b/bsd/netinet/tcp_subr.c @@ -1058,6 +1058,7 @@ tcp_pcblist SYSCTL_HANDLER_ARGS gencnt = tcbinfo.ipi_gencnt; n = tcbinfo.ipi_count; + bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; @@ -1098,6 +1099,8 @@ tcp_pcblist SYSCTL_HANDLER_ARGS if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { struct xtcpcb xt; caddr_t inp_ppcb; + + bzero(&xt, sizeof(xt)); xt.xt_len = sizeof xt; /* XXX should avoid extra copy */ inpcb_to_compat(inp, &xt.xt_inp); @@ -1120,6 +1123,8 @@ tcp_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; xig.xig_gen = tcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = tcbinfo.ipi_count; diff --git a/bsd/netinet/tcp_usrreq.c b/bsd/netinet/tcp_usrreq.c index d29331e28..2a5ab6988 100644 --- a/bsd/netinet/tcp_usrreq.c +++ b/bsd/netinet/tcp_usrreq.c @@ -754,9 +754,11 @@ tcp_connect(tp, nam, p) tcp_lock(inp->inp_socket, 0, 0); if (oinp) { - tcp_lock(oinp->inp_socket, 1, 0); + if (oinp != inp) /* 4143933: avoid deadlock if inp == oinp */ + tcp_lock(oinp->inp_socket, 1, 0); if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) { - tcp_unlock(oinp->inp_socket, 1, 0); + if (oinp != inp) + tcp_unlock(oinp->inp_socket, 1, 0); goto skip_oinp; } @@ -767,10 +769,12 @@ tcp_connect(tp, nam, p) otp = tcp_close(otp); else { printf("tcp_connect: inp=%x err=EADDRINUSE\n", inp); - tcp_unlock(oinp->inp_socket, 1, 0); + if (oinp != inp) + tcp_unlock(oinp->inp_socket, 1, 0); return EADDRINUSE; } - tcp_unlock(oinp->inp_socket, 1, 0); + if (oinp != inp) + tcp_unlock(oinp->inp_socket, 1, 0); } skip_oinp: if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr->sin_addr.s_addr : diff --git a/bsd/netinet/udp_usrreq.c b/bsd/netinet/udp_usrreq.c index 4eafd6c8f..3ecfaee90 100644 --- a/bsd/netinet/udp_usrreq.c +++ b/bsd/netinet/udp_usrreq.c @@ -809,6 +809,7 @@ udp_pcblist SYSCTL_HANDLER_ARGS gencnt = udbinfo.ipi_gencnt; n = udbinfo.ipi_count; + bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = n; xig.xig_gen = gencnt; @@ -844,6 +845,8 @@ udp_pcblist SYSCTL_HANDLER_ARGS inp = inp_list[i]; if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) { struct xinpcb xi; + + bzero(&xi, sizeof(xi)); xi.xi_len = sizeof xi; /* XXX should avoid extra copy */ inpcb_to_compat(inp, &xi.xi_inp); @@ -860,6 +863,8 @@ udp_pcblist SYSCTL_HANDLER_ARGS * while we were processing this request, and it * might be necessary to retry. */ + bzero(&xig, sizeof(xig)); + xig.xig_len = sizeof xig; xig.xig_gen = udbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = udbinfo.ipi_count; diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c index 9576d7afe..2c635da4c 100644 --- a/bsd/netkey/key.c +++ b/bsd/netkey/key.c @@ -2850,6 +2850,8 @@ key_newsav(m, mhp, sah, errp) if (mhp->msg->sadb_msg_type != SADB_GETSPI) { *errp = key_setsaval(newsav, m, mhp); if (*errp) { + if (newsav->spihash.le_prev || newsav->spihash.le_next) + LIST_REMOVE(newsav, spihash); KFREE(newsav); return NULL; } @@ -3094,6 +3096,7 @@ key_setsaval(sav, m, mhp) sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid message size.\n")); error = EINVAL; goto fail; } @@ -3109,6 +3112,7 @@ key_setsaval(sav, m, mhp) if ((sav->flags & SADB_X_EXT_NATT) != 0) { if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa_2) || ((struct sadb_sa_2*)(sa0))->sadb_sa_natt_port == 0) { + ipseclog((LOG_DEBUG, "key_setsaval: natt port not set.\n")); error = EINVAL; goto fail; } @@ -3136,6 +3140,7 @@ key_setsaval(sav, m, mhp) error = 0; if (len < sizeof(*key0)) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid auth key ext len. len = %d\n", len)); error = EINVAL; goto fail; } @@ -3174,6 +3179,7 @@ key_setsaval(sav, m, mhp) error = 0; if (len < sizeof(*key0)) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid encryption key ext len. len = %d\n", len)); error = EINVAL; goto fail; } @@ -3181,6 +3187,7 @@ key_setsaval(sav, m, mhp) case SADB_SATYPE_ESP: if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && sav->alg_enc != SADB_EALG_NULL) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid ESP algorithm.\n")); error = EINVAL; break; } @@ -3202,7 +3209,7 @@ key_setsaval(sav, m, mhp) break; } if (error) { - ipseclog((LOG_DEBUG, "key_setsatval: invalid key_enc value.\n")); + ipseclog((LOG_DEBUG, "key_setsaval: invalid key_enc value.\n")); goto fail; } } @@ -3268,6 +3275,7 @@ key_setsaval(sav, m, mhp) lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; if (lft0 != NULL) { if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid hard lifetime ext len.\n")); error = EINVAL; goto fail; } @@ -3284,6 +3292,7 @@ key_setsaval(sav, m, mhp) lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_SOFT]; if (lft0 != NULL) { if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) { + ipseclog((LOG_DEBUG, "key_setsaval: invalid soft lifetime ext len.\n")); error = EINVAL; goto fail; } diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h index b955a56e8..673240a29 100644 --- a/bsd/nfs/nfs.h +++ b/bsd/nfs/nfs.h @@ -457,6 +457,7 @@ struct user_nfs_export_args { #define NXA_DELETE 0x0001 /* delete the specified export(s) */ #define NXA_ADD 0x0002 /* add the specified export(s) */ #define NXA_REPLACE 0x0003 /* delete and add the specified export(s) */ +#define NXA_DELETE_ALL 0x0004 /* delete all exports */ /* export option flags */ #define NX_READONLY 0x0001 /* exported read-only */ @@ -464,6 +465,7 @@ struct user_nfs_export_args { #define NX_MAPROOT 0x0004 /* map root access to anon credential */ #define NX_MAPALL 0x0008 /* map all access to anon credential */ #define NX_KERB 0x0010 /* exported with Kerberos uid mapping */ +#define NX_32BITCLIENTS 0x0020 /* restrict directory cookies to 32 bits */ #ifdef KERNEL struct nfs_exportfs; diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c index 966cf72f5..b4a0836a2 100644 --- a/bsd/nfs/nfs_bio.c +++ b/bsd/nfs/nfs_bio.c @@ -974,7 +974,7 @@ nfs_buf_release(struct nfsbuf *bp, int freeup) bp->nb_data = NULL; } if (bp->nb_flags & (NB_ERROR | NB_INVAL | NB_NOCACHE)) { - if (bp->nb_flags & (NB_READ | NB_INVAL)) + if (bp->nb_flags & (NB_READ | NB_INVAL | NB_NOCACHE)) upl_flags = UPL_ABORT_DUMP_PAGES; else upl_flags = 0; diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c index 1d11d4243..f6fc25fd4 100644 --- a/bsd/nfs/nfs_serv.c +++ b/bsd/nfs/nfs_serv.c @@ -3486,6 +3486,8 @@ nfsrv_readdir(nfsd, slp, procp, mrq) } context.vc_proc = procp; context.vc_ucred = nfsd->nd_cr; + if (!v3 || (nxo->nxo_flags & NX_32BITCLIENTS)) + vnopflag |= VNODE_READDIR_SEEKOFF32; if (v3) { nfsm_srv_vattr_init(&at, v3); error = getret = vnode_getattr(vp, &at, &context); @@ -3655,6 +3657,8 @@ again: /* Finish off the record with the cookie */ nfsm_clget; if (v3) { + if (vnopflag & VNODE_READDIR_SEEKOFF32) + dp->d_seekoff &= 0x00000000ffffffffULL; txdr_hyper(&dp->d_seekoff, &tquad); *tl = tquad.nfsuquad[0]; bp += NFSX_UNSIGNED; @@ -3762,6 +3766,8 @@ nfsrv_readdirplus(nfsd, slp, procp, mrq) } context.vc_proc = procp; context.vc_ucred = nfsd->nd_cr; + if (nxo->nxo_flags & NX_32BITCLIENTS) + vnopflag |= VNODE_READDIR_SEEKOFF32; nfsm_srv_vattr_init(&at, 1); error = getret = vnode_getattr(vp, &at, &context); if (!error && toff && verf && verf != at.va_filerev) @@ -3932,6 +3938,8 @@ again: fl.fl_fhsize = txdr_unsigned(nfhp->nfh_len); fl.fl_fhok = nfs_true; fl.fl_postopok = nfs_true; + if (vnopflag & VNODE_READDIR_SEEKOFF32) + dp->d_seekoff &= 0x00000000ffffffffULL; txdr_hyper(&dp->d_seekoff, &fl.fl_off); nfsm_clget; diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index d0c970018..556db3712 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -2298,6 +2298,35 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx) char path[MAXPATHLEN]; int expisroot; + if (unxa->nxa_flags & NXA_DELETE_ALL) { + /* delete all exports on all file systems */ + lck_rw_lock_exclusive(&nfs_export_rwlock); + while ((nxfs = LIST_FIRST(&nfs_exports))) { + mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path); + if (mp) + mp->mnt_flag &= ~MNT_EXPORTED; + /* delete all exports on this file system */ + while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) { + LIST_REMOVE(nx, nx_next); + LIST_REMOVE(nx, nx_hash); + /* delete all netopts for this export */ + nfsrv_free_addrlist(nx); + nx->nx_flags &= ~NX_DEFAULTEXPORT; + if (nx->nx_defopt.nxo_cred) { + kauth_cred_rele(nx->nx_defopt.nxo_cred); + nx->nx_defopt.nxo_cred = NULL; + } + FREE(nx->nx_path, M_TEMP); + FREE(nx, M_TEMP); + } + LIST_REMOVE(nxfs, nxfs_next); + FREE(nxfs->nxfs_path, M_TEMP); + FREE(nxfs, M_TEMP); + } + lck_rw_done(&nfs_export_rwlock); + return (0); + } + error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, (size_t *)&pathlen); if (error) return (error); diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index c858df061..b5d0b9c6f 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -4148,6 +4148,9 @@ again: if ((waitfor == MNT_WAIT) && !LIST_EMPTY(&np->n_dirtyblkhd)) { goto again; } + /* if we have no dirty blocks, we can clear the modified flag */ + if (LIST_EMPTY(&np->n_dirtyblkhd)) + np->n_flag &= ~NMODIFIED; FSDBG(526, np->n_flag, np->n_error, 0, 0); if (!ignore_writeerr && (np->n_flag & NWRITEERR)) { diff --git a/bsd/sys/fcntl.h b/bsd/sys/fcntl.h index 0519d9522..f5f7ba1ee 100644 --- a/bsd/sys/fcntl.h +++ b/bsd/sys/fcntl.h @@ -213,6 +213,7 @@ typedef __darwin_pid_t pid_t; #define F_PATHPKG_CHECK 52 /* find which component (if any) is a package */ #define F_FREEZE_FS 53 /* "freeze" all fs operations */ #define F_THAW_FS 54 /* "thaw" all fs operations */ +#define F_GLOBAL_NOCACHE 55 /* turn data caching off/on (globally) for this file */ // FS-specific fcntl()'s numbers begin at 0x00010000 and go up #define FCNTL_FS_SPECIFIC_BASE 0x00010000 diff --git a/bsd/sys/socketvar.h b/bsd/sys/socketvar.h index f069bf4ac..a8db652c3 100644 --- a/bsd/sys/socketvar.h +++ b/bsd/sys/socketvar.h @@ -206,7 +206,7 @@ struct socket { u_int32_t so_filteruse; /* usecount for the socket filters */ void *reserved3; /* Temporarily in use/debug: last socket lock LR */ void *reserved4; /* Temporarily in use/debug: last socket unlock LR */ - + thread_t so_send_filt_thread; #endif }; #endif /* KERNEL_PRIVATE */ diff --git a/bsd/sys/vnode.h b/bsd/sys/vnode.h index 9bac1aec0..125d020d2 100644 --- a/bsd/sys/vnode.h +++ b/bsd/sys/vnode.h @@ -415,6 +415,7 @@ extern int vttoif_tab[]; /* VNOP_READDIR flags: */ #define VNODE_READDIR_EXTENDED 0x0001 /* use extended directory entries */ #define VNODE_READDIR_REQSEEKOFF 0x0002 /* requires seek offset (cookies) */ +#define VNODE_READDIR_SEEKOFF32 0x0004 /* seek offset values should fit in 32 bits */ #define NULLVP ((struct vnode *)NULL) diff --git a/bsd/vfs/vfs_attrlist.c b/bsd/vfs/vfs_attrlist.c index 7716e41e2..96c731a06 100644 --- a/bsd/vfs/vfs_attrlist.c +++ b/bsd/vfs/vfs_attrlist.c @@ -970,10 +970,12 @@ getattrlist(struct proc *p, struct getattrlist_args *uap, __unused register_t *r /* * Allocate a target buffer for attribute results. - * Note that since we won't ever copy out more than the caller requested, - * we never need to allocate more than they offer. + * + * Note that we won't ever copy out more than the caller requested, even though + * we might have to allocate more than they offer do that the diagnostic checks + * don't result in a panic if the caller's buffer is too small.. */ - ab.allocated = imin(uap->bufferSize, fixedsize + varsize); + ab.allocated = fixedsize + varsize; if (ab.allocated > ATTR_MAX_BUFFER) { error = ENOMEM; VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER); @@ -991,7 +993,7 @@ getattrlist(struct proc *p, struct getattrlist_args *uap, __unused register_t *r */ ab.fixedcursor = ab.base + sizeof(uint32_t); ab.varcursor = ab.base + fixedsize; - ab.needed = fixedsize + varsize; + ab.needed = ab.allocated; /* common attributes **************************************************/ if (al.commonattr & ATTR_CMN_NAME) @@ -1248,7 +1250,8 @@ getattrlist(struct proc *p, struct getattrlist_args *uap, __unused register_t *r */ *(uint32_t *)ab.base = (uap->options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed); - error = copyout(ab.base, uap->attributeBuffer, ab.allocated); + /* Only actually copyout as much out as the user buffer can hold */ + error = copyout(ab.base, uap->attributeBuffer, imin(uap->bufferSize, ab.allocated)); out: if (va.va_name) diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index a01ac6c45..fd2250cc7 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -1023,7 +1023,7 @@ insmntque(vnode_t vp, mount_t mp) /* * Delete from old mount point vnode list, if on one. */ - if ( (lmp = vp->v_mount) != NULL) { + if ( (lmp = vp->v_mount) != NULL && lmp != dead_mountp) { if ((vp->v_lflag & VNAMED_MOUNT) == 0) panic("insmntque: vp not in mount vnode list"); vp->v_lflag &= ~VNAMED_MOUNT; @@ -1619,10 +1619,8 @@ loop: vnode_unlock(vp); } else { vclean(vp, 0, p); - vp->v_mount = 0; /*override any dead_mountp */ vp->v_lflag &= ~VL_DEAD; vp->v_op = spec_vnodeop_p; - insmntque(vp, (struct mount *)0); vnode_unlock(vp); } mount_lock(mp); @@ -3915,6 +3913,7 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_ struct vfs_context context2; vfs_context_t ctx = context; u_long ndflags = 0; + int lflags = flags; if (context == NULL) { /* XXX technically an error */ context2.vc_proc = current_proc(); @@ -3922,14 +3921,17 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_ ctx = &context2; } - if (flags & VNODE_LOOKUP_NOFOLLOW) + if (fmode & O_NOFOLLOW) + lflags |= VNODE_LOOKUP_NOFOLLOW; + + if (lflags & VNODE_LOOKUP_NOFOLLOW) ndflags = NOFOLLOW; else ndflags = FOLLOW; - if (flags & VNODE_LOOKUP_NOCROSSMOUNT) + if (lflags & VNODE_LOOKUP_NOCROSSMOUNT) ndflags |= NOCROSSMOUNT; - if (flags & VNODE_LOOKUP_DOWHITEOUT) + if (lflags & VNODE_LOOKUP_DOWHITEOUT) ndflags |= DOWHITEOUT; /* XXX AUDITVNPATH1 needed ? */ diff --git a/config/MasterVersion b/config/MasterVersion index 4dd4f2f0c..1aa675b29 100644 --- a/config/MasterVersion +++ b/config/MasterVersion @@ -1,4 +1,4 @@ -8.2.0 +8.3.0 # The first line of this file contains the master version number for the kernel. # All other instances of the kernel version in xnu are derived from this file. diff --git a/config/System6.0.ppc.exports b/config/System6.0.ppc.exports index 9aeba05d6..16e230227 100644 --- a/config/System6.0.ppc.exports +++ b/config/System6.0.ppc.exports @@ -208,3 +208,8 @@ _ml_mem_backoff _pe_do_clock_test _pe_run_clock_test _scc +_pmsStart +_pmsPark +_pmsRun +_pmsRunLocal +_pmsBuild diff --git a/config/Unsupported.ppc.exports b/config/Unsupported.ppc.exports index da87c45ee..1f785c1ee 100644 --- a/config/Unsupported.ppc.exports +++ b/config/Unsupported.ppc.exports @@ -23,3 +23,9 @@ _ml_set_processor_voltage _ml_throttle _temp_patch_ptrace _temp_unpatch_ptrace +_pmsStart +_pmsPark +_pmsRun +_pmsRunLocal +_pmsBuild +_ml_mem_backoff diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h new file mode 100644 index 000000000..01e3b860c --- /dev/null +++ b/iokit/IOKit/IOHibernatePrivate.h @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KERNEL +#include +#endif + +struct IOPolledFileExtent +{ + uint64_t start; + uint64_t length; +}; +typedef struct IOPolledFileExtent IOPolledFileExtent; + +struct IOHibernateImageHeader +{ + uint64_t imageSize; + uint64_t image1Size; + + uint32_t restore1CodePage; + uint32_t restore1PageCount; + uint32_t restore1CodeOffset; + uint32_t restore1StackOffset; + + uint32_t pageCount; + uint32_t bitmapSize; + + uint32_t restore1Sum; + uint32_t image1Sum; + uint32_t image2Sum; + + uint32_t actualRestore1Sum; + uint32_t actualImage1Sum; + uint32_t actualImage2Sum; + + uint32_t actualUncompressedPages; + uint32_t conflictCount; + uint32_t nextFree; + + uint32_t signature; + uint32_t processorFlags; + + uint8_t reserved2[24]; + + uint64_t encryptStart; + uint64_t machineSignature; + + uint32_t previewSize; + uint32_t previewPageListSize; + + uint32_t diag[4]; + + uint32_t reserved[82]; // make sizeof == 512 + + uint32_t fileExtentMapSize; + IOPolledFileExtent fileExtentMap[2]; +}; +typedef struct IOHibernateImageHeader IOHibernateImageHeader; + + +struct hibernate_bitmap_t +{ + uint32_t first_page; + uint32_t last_page; + uint32_t bitmapwords; + uint32_t bitmap[0]; +}; +typedef struct hibernate_bitmap_t hibernate_bitmap_t; + +struct hibernate_page_list_t +{ + uint32_t list_size; + uint32_t page_count; + uint32_t bank_count; + hibernate_bitmap_t bank_bitmap[0]; +}; +typedef struct hibernate_page_list_t hibernate_page_list_t; + +struct hibernate_cryptwakevars_t +{ +#ifdef _AES_H + uint8_t aes_iv[AES_BLOCK_SIZE]; +#else +#warning undef _AES_H +#endif +}; +typedef struct hibernate_cryptwakevars_t hibernate_cryptwakevars_t; + +struct hibernate_cryptvars_t +{ +#ifdef _AES_H + uint8_t aes_iv[AES_BLOCK_SIZE]; + aes_ctx ctx; +#else +#warning undef _AES_H +#endif +}; +typedef struct hibernate_cryptvars_t hibernate_cryptvars_t; + + +enum +{ + kIOHibernateProgressCount = 19, + kIOHibernateProgressWidth = 7, + kIOHibernateProgressHeight = 16, + kIOHibernateProgressSpacing = 3, + kIOHibernateProgressOriginY = 81, + + kIOHibernateProgressSaveUnderSize = 2*5+14*2, + + kIOHibernateProgressLightGray = 230, + kIOHibernateProgressMidGray = 174, + kIOHibernateProgressDarkGray = 92 +}; + +struct hibernate_graphics_t +{ + unsigned long physicalAddress; // Base address of video memory + unsigned long mode; // + unsigned long rowBytes; // Number of bytes per pixel row + unsigned long width; // Width + unsigned long height; // Height + unsigned long depth; // Pixel Depth + + uint8_t progressSaveUnder[kIOHibernateProgressCount][kIOHibernateProgressSaveUnderSize]; +}; +typedef struct hibernate_graphics_t hibernate_graphics_t; + +#define DECLARE_IOHIBERNATEPROGRESSALPHA \ +static const uint8_t gIOHibernateProgressAlpha \ +[kIOHibernateProgressHeight][kIOHibernateProgressWidth] = \ +{ \ + { 0x00,0x63,0xd8,0xf0,0xd8,0x63,0x00 }, \ + { 0x51,0xff,0xff,0xff,0xff,0xff,0x51 }, \ + { 0xae,0xff,0xff,0xff,0xff,0xff,0xae }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 }, \ + { 0xae,0xff,0xff,0xff,0xff,0xff,0xae }, \ + { 0x54,0xff,0xff,0xff,0xff,0xff,0x54 }, \ + { 0x00,0x66,0xdb,0xf3,0xdb,0x66,0x00 } \ +}; + +#ifdef KERNEL + +#ifdef __cplusplus + +void IOHibernateSystemInit(IOPMrootDomain * rootDomain); + +IOReturn IOHibernateSystemSleep(void); +IOReturn IOHibernateSystemHasSlept(void); +IOReturn IOHibernateSystemWake(void); +IOReturn IOHibernateSystemPostWake(void); + +#endif /* __cplusplus */ + +#ifdef _SYS_CONF_H_ +typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uint64_t size); + +struct kern_direct_file_io_ref_t * +kern_open_file_for_direct_io(const char * name, + kern_get_file_extents_callback_t callback, + void * callback_ref, + dev_t * device, + uint64_t * partitionbase_result, + uint64_t * maxiocount_result); +void +kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref); +int +kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len); +int get_kernel_symfile(struct proc *p, char **symfile); +#endif /* _SYS_CONF_H_ */ + +hibernate_page_list_t * +hibernate_page_list_allocate(void); + +kern_return_t +hibernate_setup(IOHibernateImageHeader * header, + uint32_t free_page_ratio, + uint32_t free_page_time, + hibernate_page_list_t ** page_list_ret, + hibernate_page_list_t ** page_list_wired_ret, + boolean_t * encryptedswap); +kern_return_t +hibernate_teardown(hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired); + +kern_return_t +hibernate_processor_setup(IOHibernateImageHeader * header); + +void +hibernate_vm_lock(void); +void +hibernate_vm_unlock(void); + +void +hibernate_page_list_setall(hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut); + +void +hibernate_page_list_setall_machine(hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut); +void +hibernate_page_list_discard(hibernate_page_list_t * page_list); + +void +hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, + vm_offset_t ppnum, vm_offset_t count, uint32_t kind); + +void +hibernate_page_bitset(hibernate_page_list_t * list, boolean_t set, uint32_t page); +boolean_t +hibernate_page_bittst(hibernate_page_list_t * list, uint32_t page); + +uint32_t +hibernate_page_list_count(hibernate_page_list_t *list, uint32_t set, uint32_t page); + +void +hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags); + +void +hibernate_machine_init(void); +boolean_t +hibernate_write_image(void); + +long +hibernate_machine_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4); +long +hibernate_kernel_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4); + +extern uint32_t gIOHibernateState; +extern uint32_t gIOHibernateMode; +extern uint32_t gIOHibernateFreeTime; // max time to spend freeing pages (ms) +extern uint8_t gIOHibernateRestoreStack[]; +extern uint8_t gIOHibernateRestoreStackEnd[]; +extern IOHibernateImageHeader * gIOHibernateCurrentHeader; +extern hibernate_graphics_t * gIOHibernateGraphicsInfo; +extern hibernate_cryptwakevars_t * gIOHibernateCryptWakeVars; + +#define HIBLOG(fmt, args...) \ + { kprintf(fmt, ## args); printf(fmt, ## args); } + +#define HIBPRINT(fmt, args...) \ + { kprintf(fmt, ## args); } + +#endif /* KERNEL */ + +// gIOHibernateState, kIOHibernateStateKey +enum +{ + kIOHibernateStateInactive = 0, + kIOHibernateStateHibernating = 1, /* writing image */ + kIOHibernateStateWakingFromHibernate = 2 /* booted and restored image */ +}; + +// gIOHibernateMode, kIOHibernateModeKey +enum +{ + kIOHibernateModeOn = 0x00000001, + kIOHibernateModeSleep = 0x00000002, + kIOHibernateModeEncrypt = 0x00000004, + + kIOHibernateModeDiscardCleanInactive = 0x00000008, + kIOHibernateModeDiscardCleanActive = 0x00000010 +}; + +// IOHibernateImageHeader.signature +enum +{ + kIOHibernateHeaderSignature = 0x73696d65, + kIOHibernateHeaderInvalidSignature = 0x7a7a7a7a +}; + +// kind for hibernate_set_page_state() +enum +{ + kIOHibernatePageStateFree = 0, + kIOHibernatePageStateWiredSave = 1, + kIOHibernatePageStateUnwiredSave = 2 +}; + +#define kIOHibernateModeKey "Hibernate Mode" +#define kIOHibernateFileKey "Hibernate File" +#define kIOHibernateFreeRatioKey "Hibernate Free Ratio" +#define kIOHibernateFreeTimeKey "Hibernate Free Time" + +#define kIOHibernateStateKey "IOHibernateState" +#define kIOHibernateFeatureKey "Hibernation" +#define kIOHibernatePreviewBufferKey "IOPreviewBuffer" + +#define kIOHibernateBootImageKey "boot-image" +#define kIOHibernateBootImageKeyKey "boot-image-key" +#define kIOHibernateBootSignatureKey "boot-signature" + +#define kIOHibernateMemorySignatureKey "memory-signature" +#define kIOHibernateMemorySignatureEnvKey "mem-sig" +#define kIOHibernateMachineSignatureKey "machine-signature" + +#ifdef __cplusplus +} +#endif diff --git a/iokit/IOKit/Makefile b/iokit/IOKit/Makefile index fdcf6ccb7..963483625 100644 --- a/iokit/IOKit/Makefile +++ b/iokit/IOKit/Makefile @@ -30,7 +30,7 @@ EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386} NOT_EXPORT_HEADERS = -NOT_KF_MI_HEADERS = $(NOT_EXPORT_HEADERS) IOKitKeysPrivate.h IOCPU.h IOPolledInterface.h +NOT_KF_MI_HEADERS = $(NOT_EXPORT_HEADERS) IOKitKeysPrivate.h IOCPU.h IOHibernatePrivate.h IOPolledInterface.h NOT_LOCAL_HEADERS = @@ -40,7 +40,7 @@ INSTALL_MI_LIST = IOBSD.h IOKitKeys.h IOKitServer.h IOReturn.h\ IOSharedLock.h IOTypes.h OSMessageNotification.h\ IODataQueueShared.h IOMessage.h -INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h +INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h IOHibernatePrivate.h INSTALL_MI_DIR = . diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp index fd695d3bc..a238b773c 100644 --- a/iokit/Kernel/IODeviceTreeSupport.cpp +++ b/iokit/Kernel/IODeviceTreeSupport.cpp @@ -912,7 +912,6 @@ static SInt32 DefaultCompare( UInt32 cellCount, UInt32 left[], UInt32 right[] ) return( left[ cellCount ] - right[ cellCount ] ); } - void IODTGetCellCounts( IORegistryEntry * regEntry, UInt32 * sizeCount, UInt32 * addressCount) { @@ -941,10 +940,13 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry, UInt32 childSizeCells, childAddressCells; UInt32 childCells; UInt32 cell[ 5 ], offset = 0, length; + UInt32 endCell[ 5 ]; UInt32 *range; + UInt32 *lookRange; + UInt32 *startRange; UInt32 *endRanges; bool ok = true; - SInt32 diff; + SInt32 diff, endDiff; IODTPersistent *persist; IODTCompareAddressCellFunc compare; @@ -959,60 +961,81 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry, else *len = IOPhysical32( 0, cellsIn[ childAddressCells ] ); - do { - prop = OSDynamicCast( OSData, regEntry->getProperty( gIODTRangeKey )); - if( 0 == prop) { - /* end of the road */ - *phys = IOPhysical32( 0, cell[ childAddressCells - 1 ] + offset); - break; - } - - parent = regEntry->getParentEntry( gIODTPlane ); - IODTGetCellCounts( parent, &sizeCells, &addressCells ); - - if( (length = prop->getLength())) { - // search - range = (UInt32 *) prop->getBytesNoCopy(); - endRanges = range + (length / 4); - - prop = (OSData *) regEntry->getProperty( gIODTPersistKey ); - if( prop) { - persist = (IODTPersistent *) prop->getBytesNoCopy(); - compare = persist->compareFunc; - } else - compare = DefaultCompare; - - for( ok = false; - range < endRanges; - range += (childCells + addressCells) ) { - - // is cell >= range start? - diff = (*compare)( childAddressCells, cell, range ); - if( diff < 0) - continue; - - // is cell + size <= range end? - if( (diff + cell[ childCells - 1 ]) - > range[ childCells + addressCells - 1 ]) - continue; + do + { + prop = OSDynamicCast( OSData, regEntry->getProperty( gIODTRangeKey )); + if( 0 == prop) { + /* end of the road */ + *phys = IOPhysical32( 0, cell[ childAddressCells - 1 ] + offset); + break; + } - offset += diff; - ok = true; - break; - } + parent = regEntry->getParentEntry( gIODTPlane ); + IODTGetCellCounts( parent, &sizeCells, &addressCells ); + + if( (length = prop->getLength())) { + // search + startRange = (UInt32 *) prop->getBytesNoCopy(); + range = startRange; + endRanges = range + (length / 4); + + prop = (OSData *) regEntry->getProperty( gIODTPersistKey ); + if( prop) { + persist = (IODTPersistent *) prop->getBytesNoCopy(); + compare = persist->compareFunc; + } else + compare = DefaultCompare; + + for( ok = false; + range < endRanges; + range += (childCells + addressCells) ) { + + // is cell start >= range start? + diff = (*compare)( childAddressCells, cell, range ); + if( diff < 0) + continue; + + ok = (0 == cell[childCells - 1]); + if (!ok) + { + // search for cell end + bcopy(cell, endCell, childAddressCells * sizeof(UInt32)); + endCell[childAddressCells - 1] += cell[childCells - 1] - 1; + lookRange = startRange; + for( ; + lookRange < endRanges; + lookRange += (childCells + addressCells) ) + { + // is cell >= range start? + endDiff = (*compare)( childAddressCells, endCell, lookRange ); + if( endDiff < 0) + continue; + if ((endDiff - cell[childCells - 1] + 1 + lookRange[childAddressCells + addressCells - 1]) + == (diff + range[childAddressCells + addressCells - 1])) + { + ok = true; + break; + } + } + if (!ok) + continue; + } + offset += diff; + break; + } - // Get the physical start of the range from our parent - bcopy( range + childAddressCells, cell, 4 * addressCells ); - bzero( cell + addressCells, 4 * sizeCells ); + // Get the physical start of the range from our parent + bcopy( range + childAddressCells, cell, 4 * addressCells ); + bzero( cell + addressCells, 4 * sizeCells ); - } /* else zero length range => pass thru to parent */ + } /* else zero length range => pass thru to parent */ regEntry = parent; childSizeCells = sizeCells; childAddressCells = addressCells; childCells = childAddressCells + childSizeCells; - - } while( ok && regEntry); + } + while( ok && regEntry); return( ok); } diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp new file mode 100644 index 000000000..0d5fdbacd --- /dev/null +++ b/iokit/Kernel/IOHibernateIO.cpp @@ -0,0 +1,2071 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + + +/* + +Sleep: + +- PMRootDomain calls IOHibernateSystemSleep() before system sleep +(devices awake, normal execution context) +- IOHibernateSystemSleep opens the hibernation file (or partition) at the bsd level, + grabs its extents and searches for a polling driver willing to work with that IOMedia. + The BSD code makes an ioctl to the storage driver to get the partition base offset to + the disk, and other ioctls to get the transfer constraints + If successful, the file is written to make sure its initially not bootable (in case of + later failure) and nvram set to point to the first block of the file. (Has to be done + here so blocking is possible in nvram support). + hibernate_setup() in osfmk is called to allocate page bitmaps for all dram, and + page out any pages it wants to (currently zero, but probably some percentage of memory). + Its assumed just allocating pages will cause the VM system to naturally select the best + pages for eviction. It also copies processor flags needed for the restore path and sets + a flag in the boot processor proc info. + gIOHibernateState = kIOHibernateStateHibernating. +- Regular sleep progresses - some drivers may inspect the root domain property + kIOHibernateStateKey to modify behavior. The platform driver saves state to memory + as usual but leaves motherboard I/O on. +- Eventually the platform calls ml_ppc_sleep() in the shutdown context on the last cpu, + at which point memory is ready to be saved. mapping_hibernate_flush() is called to get + all ppc RC bits out of the hash table and caches into the mapping structures. +- hibernate_write_image() is called (still in shutdown context, no blocking or preemption). + hibernate_page_list_setall() is called to get a bitmap of dram pages that need to be saved. + All pages are assumed to be saved (as part of the wired image) unless explicitly subtracted + by hibernate_page_list_setall(), avoiding having to find arch dependent low level bits. + The image header and block list are written. The header includes the second file extent so + only the header block is needed to read the file, regardless of filesystem. + The kernel section "__HIB" is written uncompressed to the image. This section of code and data + (only) is used to decompress the image during wake/boot. + Some additional pages are removed from the bitmaps - the buffers used for hibernation. + The bitmaps are written to the image. + More areas are removed from the bitmaps (after they have been written to the image) - the + section "__HIB" pages and interrupt stack. + Each wired page is compressed and written and then each non-wired page. Compression and + disk writes are in parallel. + The image header is written to the start of the file and the polling driver closed. + The machine powers down (or sleeps). + +Boot/Wake: + +- BootX sees the boot-image nvram variable containing the device and block number of the image, + reads the header and if the signature is correct proceeds. The boot-image variable is cleared. +- BootX reads the portion of the image used for wired pages, to memory. Its assumed this will fit + in the OF memory environment, and the image is decrypted. There is no decompression in BootX, + that is in the kernel's __HIB section. +- BootX copies the "__HIB" section to its correct position in memory, quiesces and calls its entry + hibernate_kernel_entrypoint(), passing the location of the image in memory. Translation is off, + only code & data in that section is safe to call since all the other wired pages are still + compressed in the image. +- hibernate_kernel_entrypoint() removes pages occupied by the raw image from the page bitmaps. + It uses the bitmaps to work out which pages can be uncompressed from the image to their final + location directly, and copies those that can't to interim free pages. When the image has been + completed, the copies are uncompressed, overwriting the wired image pages. + hibernate_restore_phys_page() (in osfmk since its arch dependent, but part of the "__HIB" section) + is used to get pages into place for 64bit. +- the reset vector is called (at least on ppc), the kernel proceeds on a normal wake, with some + changes conditional on the per proc flag - before VM is turned on the boot cpu, all mappings + are removed from the software strutures, and the hash table is reinitialized. +- After the platform CPU init code is called, hibernate_machine_init() is called to restore the rest + of memory, using the polled mode driver, before other threads can run or any devices are turned on. + This reduces the memory usage for BootX and allows decompression in parallel with disk reads, + for the remaining non wired pages. +- The polling driver is closed down and regular wake proceeds. When the kernel calls iokit to wake + (normal execution context) hibernate_teardown() in osmfk is called to release any memory, the file + is closed via bsd. + +Polled Mode I/O: + +IOHibernateSystemSleep() finds a polled mode interface to the ATA controller via a property in the +registry, specifying an object of calls IOPolledInterface. + +Before the system goes to sleep it searches from the IOMedia object (could be a filesystem or +partition) that the image is going to live, looking for polled interface properties. If it finds +one the IOMedia object is passed to a "probe" call for the interface to accept or reject. All the +interfaces found are kept in an ordered list. + +There is an Open/Close pair of calls made to each of the interfaces at various stages since there are +few different contexts things happen in: + +- there is an Open/Close (Preflight) made before any part of the system has slept (I/O is all +up and running) and after wake - this is safe to allocate memory and do anything. The device +ignores sleep requests from that point since its a waste of time if it goes to sleep and +immediately wakes back up for the image write. + +- there is an Open/Close (BeforeSleep) pair made around the image write operations that happen +immediately before sleep. These can't block or allocate memory - the I/O system is asleep apart +from the low level bits (motherboard I/O etc). There is only one thread running. The close can be +used to flush and set the disk to sleep. + +- there is an Open/Close (AfterSleep) pair made around the image read operations that happen +immediately after sleep. These can't block or allocate memory. This is happening after the platform +expert has woken the low level bits of the system, but most of the I/O system has not. There is only +one thread running. + +For the actual I/O, all the ops are with respect to a single IOMemoryDescriptor that was passed +(prepared) to the Preflight Open() call. There is a read/write op, buffer offset to the IOMD for +the data, an offset to the disk and length (block aligned 64 bit numbers), and completion callback. +Each I/O is async but only one is ever outstanding. The polled interface has a checkForWork call +that is called for the hardware to check for events, and complete the I/O via the callback. +The hibernate path uses the same transfer constraints the regular cluster I/O path in BSD uses +to restrict I/O ops. +*/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "RootDomainUserClient.h" +#include +#include "IOPMPowerStateQueue.h" +#include +#include + +#include +#include +#include +#include // (FWRITE, ...) +extern "C" { +#include +} + +#include +#include +#include +#include "IOHibernateInternal.h" +#include "WKdm.h" +#include "IOKitKernelInternal.h" + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +OSDefineMetaClassAndAbstractStructors(IOPolledInterface, OSObject); + +OSMetaClassDefineReservedUnused(IOPolledInterface, 0); +OSMetaClassDefineReservedUnused(IOPolledInterface, 1); +OSMetaClassDefineReservedUnused(IOPolledInterface, 2); +OSMetaClassDefineReservedUnused(IOPolledInterface, 3); +OSMetaClassDefineReservedUnused(IOPolledInterface, 4); +OSMetaClassDefineReservedUnused(IOPolledInterface, 5); +OSMetaClassDefineReservedUnused(IOPolledInterface, 6); +OSMetaClassDefineReservedUnused(IOPolledInterface, 7); +OSMetaClassDefineReservedUnused(IOPolledInterface, 8); +OSMetaClassDefineReservedUnused(IOPolledInterface, 9); +OSMetaClassDefineReservedUnused(IOPolledInterface, 10); +OSMetaClassDefineReservedUnused(IOPolledInterface, 11); +OSMetaClassDefineReservedUnused(IOPolledInterface, 12); +OSMetaClassDefineReservedUnused(IOPolledInterface, 13); +OSMetaClassDefineReservedUnused(IOPolledInterface, 14); +OSMetaClassDefineReservedUnused(IOPolledInterface, 15); + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +extern uint32_t gIOHibernateState; +uint32_t gIOHibernateMode; +static char gIOHibernateBootSignature[256+1]; +static char gIOHibernateFilename[MAXPATHLEN+1]; +static uint32_t gIOHibernateFreeRatio = 0; // free page target (percent) +uint32_t gIOHibernateFreeTime = 0*1000; // max time to spend freeing pages (ms) + +static IODTNVRAM * gIOOptionsEntry; +static IORegistryEntry * gIOChosenEntry; + +static IOPolledFileIOVars gFileVars; +static IOHibernateVars gIOHibernateVars; +static struct kern_direct_file_io_ref_t * gIOHibernateFileRef; +static hibernate_cryptvars_t gIOHibernateCryptWakeContext; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +enum { kXPRamAudioVolume = 8 }; +enum { kDefaultIOSize = 128 * 1024 }; +enum { kVideoMapSize = 32 * 1024 * 1024 }; + +#ifndef kIOMediaPreferredBlockSizeKey +#define kIOMediaPreferredBlockSizeKey "Preferred Block Size" +#endif + +#ifndef kIOBootPathKey +#define kIOBootPathKey "bootpath" +#endif +#ifndef kIOSelectedBootDeviceKey +#define kIOSelectedBootDeviceKey "boot-device" +#endif + + +enum { kIOHibernateMinPollersNeeded = 2 }; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +// copy from phys addr to MD + +static IOReturn +IOMemoryDescriptorWriteFromPhysical(IOMemoryDescriptor * md, + IOByteCount offset, addr64_t bytes, IOByteCount length) +{ + addr64_t srcAddr = bytes; + IOByteCount remaining; + + remaining = length = min(length, md->getLength() - offset); + while (remaining) { // (process another target segment?) + addr64_t dstAddr64; + IOByteCount dstLen; + + dstAddr64 = md->getPhysicalSegment64(offset, &dstLen); + if (!dstAddr64) + break; + + // Clip segment length to remaining + if (dstLen > remaining) + dstLen = remaining; + +#if 1 + bcopy_phys(srcAddr, dstAddr64, dstLen); +#else + copypv(srcAddr, dstAddr64, dstLen, + cppvPsnk | cppvFsnk | cppvNoRefSrc | cppvNoModSnk | cppvKmap); +#endif + srcAddr += dstLen; + offset += dstLen; + remaining -= dstLen; + } + + assert(!remaining); + + return remaining ? kIOReturnUnderrun : kIOReturnSuccess; +} + +// copy from MD to phys addr + +static IOReturn +IOMemoryDescriptorReadToPhysical(IOMemoryDescriptor * md, + IOByteCount offset, addr64_t bytes, IOByteCount length) +{ + addr64_t dstAddr = bytes; + IOByteCount remaining; + + remaining = length = min(length, md->getLength() - offset); + while (remaining) { // (process another target segment?) + addr64_t srcAddr64; + IOByteCount dstLen; + + srcAddr64 = md->getPhysicalSegment64(offset, &dstLen); + if (!srcAddr64) + break; + + // Clip segment length to remaining + if (dstLen > remaining) + dstLen = remaining; + +#if 1 + bcopy_phys(srcAddr64, dstAddr, dstLen); +#else + copypv(srcAddr, dstAddr64, dstLen, + cppvPsnk | cppvFsnk | cppvNoRefSrc | cppvNoModSnk | cppvKmap); +#endif + dstAddr += dstLen; + offset += dstLen; + remaining -= dstLen; + } + + assert(!remaining); + + return remaining ? kIOReturnUnderrun : kIOReturnSuccess; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired, + vm_offset_t ppnum, vm_offset_t count, uint32_t kind) +{ + count += ppnum; + switch (kind) + { + case kIOHibernatePageStateUnwiredSave: + // unwired save + for (; ppnum < count; ppnum++) + { + hibernate_page_bitset(page_list, FALSE, ppnum); + hibernate_page_bitset(page_list_wired, TRUE, ppnum); + } + break; + case kIOHibernatePageStateWiredSave: + // wired save + for (; ppnum < count; ppnum++) + { + hibernate_page_bitset(page_list, FALSE, ppnum); + hibernate_page_bitset(page_list_wired, FALSE, ppnum); + } + break; + case kIOHibernatePageStateFree: + // free page + for (; ppnum < count; ppnum++) + { + hibernate_page_bitset(page_list, TRUE, ppnum); + hibernate_page_bitset(page_list_wired, TRUE, ppnum); + } + break; + default: + panic("hibernate_set_page_state"); + } +} + +static vm_offset_t +hibernate_page_list_iterate(hibernate_page_list_t * list, + void ** iterator, vm_offset_t * ppnum) +{ + uint32_t count, idx; + + idx = (uint32_t) *iterator; + + if (!idx) + idx = hibernate_page_list_count(list, TRUE, idx); + + *ppnum = idx; + count = hibernate_page_list_count(list, FALSE, idx); + idx += count; + idx += hibernate_page_list_count(list, TRUE, idx); + *iterator = (void *) idx; + + return (count); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static IOReturn +IOHibernatePollerProbe(IOPolledFileIOVars * vars, IOService * target) +{ + IOReturn err = kIOReturnError; + int32_t idx; + IOPolledInterface * poller; + + for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--) + { + poller = (IOPolledInterface *) vars->pollers->getObject(idx); + err = poller->probe(target); + if (err) + { + HIBLOG("IOPolledInterface::probe[%d] 0x%x\n", idx, err); + break; + } + } + + return (err); +} + +static IOReturn +IOHibernatePollerOpen(IOPolledFileIOVars * vars, uint32_t state, IOMemoryDescriptor * md) +{ + IOReturn err = kIOReturnError; + int32_t idx; + IOPolledInterface * poller; + + for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--) + { + poller = (IOPolledInterface *) vars->pollers->getObject(idx); + err = poller->open(state, md); + if (err) + { + HIBLOG("IOPolledInterface::open[%d] 0x%x\n", idx, err); + break; + } + } + + return (err); +} + +static IOReturn +IOHibernatePollerClose(IOPolledFileIOVars * vars, uint32_t state) +{ + IOReturn err = kIOReturnError; + int32_t idx; + IOPolledInterface * poller; + + for (idx = 0; + (poller = (IOPolledInterface *) vars->pollers->getObject(idx)); + idx++) + { + err = poller->close(state); + if (err) + HIBLOG("IOPolledInterface::close[%d] 0x%x\n", idx, err); + } + + return (err); +} + +static void +IOHibernatePollerIOComplete(void * target, + void * parameter, + IOReturn status, + UInt64 actualByteCount) +{ + IOPolledFileIOVars * vars = (IOPolledFileIOVars *) parameter; + + vars->ioStatus = status; +} + +static IOReturn +IOHibernatePollerIO(IOPolledFileIOVars * vars, + uint32_t operation, uint32_t bufferOffset, + uint64_t deviceOffset, uint64_t length) +{ + + IOReturn err = kIOReturnError; + IOPolledInterface * poller; + IOPolledCompletion completion; + + completion.target = 0; + completion.action = &IOHibernatePollerIOComplete; + completion.parameter = vars; + + vars->ioStatus = -1; + + poller = (IOPolledInterface *) vars->pollers->getObject(0); + err = poller->startIO(operation, bufferOffset, deviceOffset + vars->block0, length, completion); + if (err) + HIBLOG("IOPolledInterface::startIO[%d] 0x%x\n", 0, err); + + return (err); +} + +static IOReturn +IOHibernatePollerIODone(IOPolledFileIOVars * vars) +{ + IOReturn err = kIOReturnError; + int32_t idx; + IOPolledInterface * poller; + + while (-1 == vars->ioStatus) + { + for (idx = 0; + (poller = (IOPolledInterface *) vars->pollers->getObject(idx)); + idx++) + { + err = poller->checkForWork(); + if (err) + HIBLOG("IOPolledInterface::checkForWork[%d] 0x%x\n", idx, err); + } + } + + if (kIOReturnSuccess != vars->ioStatus) + HIBLOG("IOPolledInterface::ioStatus 0x%x\n", vars->ioStatus); + + return (vars->ioStatus); +} + +IOReturn +IOPolledInterface::checkAllForWork(void) +{ + IOReturn err = kIOReturnNotReady; + int32_t idx; + IOPolledInterface * poller; + + IOHibernateVars * vars = &gIOHibernateVars; + + if (!vars->fileVars || !vars->fileVars->pollers) + return (err); + + for (idx = 0; + (poller = (IOPolledInterface *) vars->fileVars->pollers->getObject(idx)); + idx++) + { + err = poller->checkForWork(); + if (err) + HIBLOG("IOPolledInterface::checkAllForWork[%d] 0x%x\n", idx, err); + } + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +struct _OpenFileContext +{ + OSData * extents; + uint64_t size; +}; + +static void +file_extent_callback(void * ref, uint64_t start, uint64_t length) +{ + _OpenFileContext * ctx = (_OpenFileContext *) ref; + IOPolledFileExtent extent; + + extent.start = start; + extent.length = length; + + ctx->extents->appendBytes(&extent, sizeof(extent)); + ctx->size += length; +} + +IOReturn +IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer, + IOPolledFileIOVars ** fileVars, OSData ** fileExtents, + OSData ** imagePath) +{ + IOReturn err = kIOReturnError; + IOPolledFileIOVars * vars; + _OpenFileContext ctx; + OSData * extentsData; + OSNumber * num; + IORegistryEntry * part = 0; + OSDictionary * matching; + OSIterator * iter; + dev_t hibernate_image_dev; + uint64_t maxiobytes; + + vars = &gFileVars; + do + { + HIBLOG("sizeof(IOHibernateImageHeader) == %ld\n", sizeof(IOHibernateImageHeader)); + if (sizeof(IOHibernateImageHeader) != 512) + continue; + + vars->io = false; + vars->buffer = (uint8_t *) ioBuffer->getBytesNoCopy(); + vars->bufferHalf = 0; + vars->bufferOffset = 0; + vars->bufferSize = ioBuffer->getLength() >> 1; + + extentsData = OSData::withCapacity(32); + + ctx.extents = extentsData; + ctx.size = 0; + vars->fileRef = kern_open_file_for_direct_io(filename, + &file_extent_callback, &ctx, + &hibernate_image_dev, + &vars->block0, + &maxiobytes); + if (!vars->fileRef) + { + err = kIOReturnNoSpace; + break; + } + HIBLOG("Opened file %s, size %qd, partition base 0x%qx, maxio %qx\n", filename, ctx.size, + vars->block0, maxiobytes); + if (ctx.size < 1*1024*1024) // check against image size estimate! + { + err = kIOReturnNoSpace; + break; + } + + if (maxiobytes < vars->bufferSize) + vars->bufferSize = maxiobytes; + + vars->extentMap = (IOPolledFileExtent *) extentsData->getBytesNoCopy(); + + matching = IOService::serviceMatching("IOMedia"); + num = OSNumber::withNumber(major(hibernate_image_dev), 32); + matching->setObject(kIOBSDMajorKey, num); + num->release(); + num = OSNumber::withNumber(minor(hibernate_image_dev), 32); + matching->setObject(kIOBSDMinorKey, num); + num->release(); + iter = IOService::getMatchingServices(matching); + matching->release(); + if (iter) + { + part = (IORegistryEntry *) iter->getNextObject(); + part->retain(); + iter->release(); + } + + int minor, major; + IORegistryEntry * next; + IORegistryEntry * child; + OSData * data; + + num = (OSNumber *) part->getProperty(kIOBSDMajorKey); + if (!num) + break; + major = num->unsigned32BitValue(); + num = (OSNumber *) part->getProperty(kIOBSDMinorKey); + if (!num) + break; + minor = num->unsigned32BitValue(); + + hibernate_image_dev = makedev(major, minor); + + vars->pollers = OSArray::withCapacity(4); + if (!vars->pollers) + break; + + vars->blockSize = 512; + next = part; + do + { + IOPolledInterface * poller; + if ((poller = OSDynamicCast(IOPolledInterface, next->getProperty(kIOPolledInterfaceSupportKey)))) + vars->pollers->setObject(poller); + if ((num = OSDynamicCast(OSNumber, next->getProperty(kIOMediaPreferredBlockSizeKey)))) + vars->blockSize = num->unsigned32BitValue(); + child = next; + } + while ((next = child->getParentEntry(gIOServicePlane)) + && child->isParent(next, gIOServicePlane, true)); + + HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n", + major, minor, vars->blockSize, vars->pollers->getCount()); + if (vars->pollers->getCount() < kIOHibernateMinPollersNeeded) + continue; + + err = IOHibernatePollerProbe(vars, (IOService *) part); + if (kIOReturnSuccess != err) + break; + + err = IOHibernatePollerOpen(vars, kIOPolledPreflightState, ioBuffer); + if (kIOReturnSuccess != err) + break; + + *fileVars = vars; + *fileExtents = extentsData; + + // make imagePath + char str1[256]; + char str2[24]; + int len = sizeof(str1); + + if ((extentsData->getLength() >= sizeof(IOPolledFileExtent)) + && part->getPath(str1, &len, gIODTPlane)) + { + // (strip the plane name) + char * tail = strchr(str1, ':'); + if (!tail) + tail = str1 - 1; + data = OSData::withBytes(tail + 1, strlen(tail + 1)); + sprintf(str2, ",%qx", vars->extentMap[0]); + data->appendBytes(str2, strlen(str2)); + *imagePath = data; + } + } + while (false); + + if (kIOReturnSuccess != err) + { + HIBLOG("error 0x%x opening hibernation file\n", err); + if (vars->fileRef) + kern_close_file_for_direct_io(vars->fileRef); + } + + if (part) + part->release(); + + return (err); +} + +IOReturn +IOPolledFileClose( IOPolledFileIOVars * vars ) +{ + if (vars->pollers) + { + IOHibernatePollerClose(vars, kIOPolledPostflightState); + vars->pollers->release(); + } + + gIOHibernateFileRef = vars->fileRef; + + bzero(vars, sizeof(IOPolledFileIOVars)); + + return (kIOReturnSuccess); +} + +static IOReturn +IOPolledFileSeek(IOPolledFileIOVars * vars, uint64_t position) +{ + IOPolledFileExtent * extentMap; + + extentMap = vars->extentMap; + + vars->position = position; + + while (position >= extentMap->length) + { + position -= extentMap->length; + extentMap++; + } + + vars->currentExtent = extentMap; + vars->extentRemaining = extentMap->length - position; + vars->extentPosition = vars->position - position; + + if (vars->bufferSize <= vars->extentRemaining) + vars->bufferLimit = vars->bufferSize; + else + vars->bufferLimit = vars->extentRemaining; + + return (kIOReturnSuccess); +} + +static IOReturn +IOPolledFileWrite(IOPolledFileIOVars * vars, + const uint8_t * bytes, IOByteCount size, + hibernate_cryptvars_t * cryptvars) +{ + IOReturn err = kIOReturnSuccess; + IOByteCount copy; + bool flush = false; + + do + { + if (!bytes && !size) + { + // seek to end of block & flush + size = vars->position & (vars->blockSize - 1); + if (size) + size = vars->blockSize - size; + flush = true; + // use some garbage for the fill + bytes = vars->buffer + vars->bufferOffset; + } + + copy = vars->bufferLimit - vars->bufferOffset; + if (copy > size) + copy = size; + else + flush = true; + + if (bytes) + { + bcopy(bytes, vars->buffer + vars->bufferHalf + vars->bufferOffset, copy); + bytes += copy; + } + else + bzero(vars->buffer + vars->bufferHalf + vars->bufferOffset, copy); + + size -= copy; + vars->bufferOffset += copy; + vars->position += copy; + + if (flush && vars->bufferOffset) + { + uint64_t offset = (vars->position - vars->bufferOffset + - vars->extentPosition + vars->currentExtent->start); + uint32_t length = (vars->bufferOffset); + + if (cryptvars && vars->encryptStart && (vars->position > vars->encryptStart)) + { + uint32_t encryptLen, encryptStart; + encryptLen = vars->position - vars->encryptStart; + if (encryptLen > length) + encryptLen = length; + encryptStart = length - encryptLen; + + // encrypt the buffer + aes_encrypt_cbc(vars->buffer + vars->bufferHalf + encryptStart, + &cryptvars->aes_iv[0], + encryptLen / AES_BLOCK_SIZE, + vars->buffer + vars->bufferHalf + encryptStart, + &cryptvars->ctx.encrypt); + // save initial vector for following encrypts + bcopy(vars->buffer + vars->bufferHalf + encryptStart + encryptLen - AES_BLOCK_SIZE, + &cryptvars->aes_iv[0], + AES_BLOCK_SIZE); + } + + if (vars->io) + { + err = IOHibernatePollerIODone(vars); + if (kIOReturnSuccess != err) + break; + } + +if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position); +//if (length != vars->bufferSize) HIBLOG("short write of %qx ends@ %qx\n", length, offset + length); + + err = IOHibernatePollerIO(vars, kIOPolledWrite, vars->bufferHalf, offset, length); + if (kIOReturnSuccess != err) + break; + vars->io = true; + + vars->extentRemaining -= vars->bufferOffset; + if (!vars->extentRemaining) + { + vars->currentExtent++; + vars->extentRemaining = vars->currentExtent->length; + vars->extentPosition = vars->position; + if (!vars->extentRemaining) + { + err = kIOReturnOverrun; + break; + } + } + + vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize; + vars->bufferOffset = 0; + if (vars->bufferSize <= vars->extentRemaining) + vars->bufferLimit = vars->bufferSize; + else + vars->bufferLimit = vars->extentRemaining; + + flush = false; + } + } + while (size); + + return (err); +} + +static IOReturn +IOPolledFileRead(IOPolledFileIOVars * vars, + uint8_t * bytes, IOByteCount size, + hibernate_cryptvars_t * cryptvars) +{ + IOReturn err = kIOReturnSuccess; + IOByteCount copy; + +// bytesWritten += size; + + do + { + copy = vars->bufferLimit - vars->bufferOffset; + if (copy > size) + copy = size; + + if (bytes) + { + bcopy(vars->buffer + vars->bufferHalf + vars->bufferOffset, bytes, copy); + bytes += copy; + } + size -= copy; + vars->bufferOffset += copy; +// vars->position += copy; + + if (vars->bufferOffset == vars->bufferLimit) + { + if (vars->io) + { + err = IOHibernatePollerIODone(vars); + if (kIOReturnSuccess != err) + break; + } + else + cryptvars = 0; + +if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position); + + vars->position += vars->lastRead; + vars->extentRemaining -= vars->lastRead; + vars->bufferLimit = vars->lastRead; + + if (!vars->extentRemaining) + { + vars->currentExtent++; + vars->extentRemaining = vars->currentExtent->length; + vars->extentPosition = vars->position; + if (!vars->extentRemaining) + { + err = kIOReturnOverrun; + break; + } + } + + if (vars->extentRemaining <= vars->bufferSize) + vars->lastRead = vars->extentRemaining; + else + vars->lastRead = vars->bufferSize; + + uint64_t offset = (vars->position + - vars->extentPosition + vars->currentExtent->start); + uint64_t length = (vars->lastRead); + +//if (length != vars->bufferSize) HIBLOG("short read of %qx ends@ %qx\n", length, offset + length); + + err = IOHibernatePollerIO(vars, kIOPolledRead, vars->bufferHalf, offset, length); + if (kIOReturnSuccess != err) + break; + vars->io = true; + + vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize; + vars->bufferOffset = 0; + + if (cryptvars) + { + uint8_t thisVector[AES_BLOCK_SIZE]; + // save initial vector for following decrypts + bcopy(&cryptvars->aes_iv[0], &thisVector[0], AES_BLOCK_SIZE); + bcopy(vars->buffer + vars->bufferHalf + vars->lastRead - AES_BLOCK_SIZE, + &cryptvars->aes_iv[0], AES_BLOCK_SIZE); + // decrypt the buffer + aes_decrypt_cbc(vars->buffer + vars->bufferHalf, + &thisVector[0], + vars->lastRead / AES_BLOCK_SIZE, + vars->buffer + vars->bufferHalf, + &cryptvars->ctx.decrypt); + } + } + } + while (size); + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOHibernateSystemSleep(void) +{ + IOReturn err; + OSData * data; + OSObject * obj; + OSString * str; + OSNumber * num; + + IOHibernateVars * vars = &gIOHibernateVars; + + if (vars->fileVars && vars->fileVars->fileRef) + // already on the way down + return (kIOReturnSuccess); + + gIOHibernateState = kIOHibernateStateInactive; + + if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateModeKey))) + { + if ((num = OSDynamicCast(OSNumber, obj))) + gIOHibernateMode = num->unsigned32BitValue(); + if (kIOHibernateModeSleep & gIOHibernateMode) + // default to discard clean for safe sleep + gIOHibernateMode ^= (kIOHibernateModeDiscardCleanInactive + | kIOHibernateModeDiscardCleanActive); + + obj->release(); + } + if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFreeRatioKey))) + { + if ((num = OSDynamicCast(OSNumber, obj))) + gIOHibernateFreeRatio = num->unsigned32BitValue(); + obj->release(); + } + if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFreeTimeKey))) + { + if ((num = OSDynamicCast(OSNumber, obj))) + gIOHibernateFreeTime = num->unsigned32BitValue(); + obj->release(); + } + if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFileKey))) + { + if ((str = OSDynamicCast(OSString, obj))) + strcpy(gIOHibernateFilename, str->getCStringNoCopy()); + obj->release(); + } + + if (!gIOHibernateMode || !gIOHibernateFilename[0]) + return (kIOReturnUnsupported); + + HIBLOG("hibernate image path: %s\n", gIOHibernateFilename); + + do + { + vars->srcBuffer = IOBufferMemoryDescriptor::withOptions(0, 4 * page_size, page_size); + vars->ioBuffer = IOBufferMemoryDescriptor::withOptions(0, 2 * kDefaultIOSize, page_size); + + if (!vars->srcBuffer || !vars->ioBuffer) + { + err = kIOReturnNoMemory; + break; + } + + err = IOPolledFileOpen(gIOHibernateFilename, vars->ioBuffer, + &vars->fileVars, &vars->fileExtents, &data); + if (KERN_SUCCESS != err) + { + HIBLOG("IOPolledFileOpen(%x)\n", err); + break; + } + if (vars->fileVars->fileRef) + { + // invalidate the image file + gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature; + int err = kern_write_file(vars->fileVars->fileRef, 0, + (caddr_t) gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); + if (KERN_SUCCESS != err) + HIBLOG("kern_write_file(%d)\n", err); + } + + bzero(gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader)); + + boolean_t encryptedswap; + err = hibernate_setup(gIOHibernateCurrentHeader, + gIOHibernateFreeRatio, gIOHibernateFreeTime, + &vars->page_list, &vars->page_list_wired, &encryptedswap); + if (KERN_SUCCESS != err) + { + HIBLOG("hibernate_setup(%d)\n", err); + break; + } + + if (encryptedswap) + gIOHibernateMode ^= kIOHibernateModeEncrypt; + + vars->videoAllocSize = kVideoMapSize; + if (KERN_SUCCESS != kmem_alloc_pageable(kernel_map, &vars->videoMapping, vars->videoAllocSize)) + vars->videoMapping = 0; + + // generate crypt keys + for (uint32_t i = 0; i < sizeof(vars->wiredCryptKey); i++) + vars->wiredCryptKey[i] = random(); + for (uint32_t i = 0; i < sizeof(vars->cryptKey); i++) + vars->cryptKey[i] = random(); + + // set nvram + + IORegistryEntry * regEntry; + if (!gIOOptionsEntry) + { + regEntry = IORegistryEntry::fromPath("/options", gIODTPlane); + gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry); + if (regEntry && !gIOOptionsEntry) + regEntry->release(); + } + if (!gIOChosenEntry) + gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane); + + if (gIOOptionsEntry) + { + const OSSymbol * sym; + size_t len; + char valueString[16]; + + sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); + if (sym) + { + gIOOptionsEntry->setProperty(sym, data); + sym->release(); + } + data->release(); + + vars->saveBootDevice = gIOOptionsEntry->copyProperty(kIOSelectedBootDeviceKey); + if (gIOChosenEntry) + { + OSData * bootDevice = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOBootPathKey)); + if (bootDevice) + { + sym = OSSymbol::withCStringNoCopy(kIOSelectedBootDeviceKey); + OSString * str2 = OSString::withCStringNoCopy((const char *) bootDevice->getBytesNoCopy()); + if (sym && str2) + gIOOptionsEntry->setProperty(sym, str2); + if (sym) + sym->release(); + if (str2) + str2->release(); + } + + data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMemorySignatureKey)); + if (data) + { + vars->haveFastBoot = true; + + len = sprintf(valueString, "0x%lx", *((UInt32 *)data->getBytesNoCopy())); + data = OSData::withBytes(valueString, len + 1); + sym = OSSymbol::withCStringNoCopy(kIOHibernateMemorySignatureEnvKey); + if (sym && data) + gIOOptionsEntry->setProperty(sym, data); + if (sym) + sym->release(); + if (data) + data->release(); + } + data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMachineSignatureKey)); + if (data) + gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy()); + } + + if (kIOHibernateModeEncrypt & gIOHibernateMode) + { + data = OSData::withBytes(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey)); + sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKeyKey); + if (sym && data) + gIOOptionsEntry->setProperty(sym, data); + if (sym) + sym->release(); + if (data) + data->release(); + if (gIOHibernateBootSignature[0]) + { + data = OSData::withCapacity(16); + sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSignatureKey); + if (sym && data) + { + char c; + uint8_t value; + for (uint32_t i = 0; (c = gIOHibernateBootSignature[i]); i++) + { + if (c >= 'a') + c -= 'a' - 10; + else if (c >= 'A') + c -= 'A' - 10; + else if (c >= '0') + c -= '0'; + else + continue; + value = (value << 4) | c; + if (i & 1) + data->appendBytes(&value, sizeof(value)); + } + gIOOptionsEntry->setProperty(sym, data); + } + if (sym) + sym->release(); + if (data) + data->release(); + } + } + + if (!vars->haveFastBoot) + { + // set boot volume to zero + IODTPlatformExpert * platform = OSDynamicCast(IODTPlatformExpert, IOService::getPlatform()); + if (platform && (kIOReturnSuccess == platform->readXPRAM(kXPRamAudioVolume, + &vars->saveBootAudioVolume, sizeof(vars->saveBootAudioVolume)))) + { + uint8_t newVolume; + newVolume = vars->saveBootAudioVolume & 0xf8; + platform->writeXPRAM(kXPRamAudioVolume, + &newVolume, sizeof(newVolume)); + } + } + } + // -- + + gIOHibernateCurrentHeader->signature = kIOHibernateHeaderSignature; + gIOHibernateState = kIOHibernateStateHibernating; + } + while (false); + + return (err); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOHibernateSystemHasSlept(void) +{ + IOHibernateVars * vars = &gIOHibernateVars; + + if ((vars->previewData = OSDynamicCast(OSData, + IOService::getPMRootDomain()->getProperty(kIOHibernatePreviewBufferKey)))) + { + vars->previewBuffer = IOMemoryDescriptor::withAddress( + (void *) vars->previewData->getBytesNoCopy(), + vars->previewData->getLength(), + kIODirectionInOut); + + if (vars->previewBuffer && (kIOReturnSuccess != vars->previewBuffer->prepare())) + { + vars->previewBuffer->release(); + vars->previewBuffer = 0; + } + if (!vars->previewBuffer) + vars->previewData = 0; + } + if (gIOOptionsEntry) + gIOOptionsEntry->sync(); + + return (kIOReturnSuccess); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +IOReturn +IOHibernateSystemWake(void) +{ + IOHibernateVars * vars = &gIOHibernateVars; + + hibernate_teardown(vars->page_list, vars->page_list_wired); + + if (vars->videoMapping) + { + if (vars->videoMapSize) + // remove mappings + IOUnmapPages(kernel_map, vars->videoMapping, vars->videoMapSize); + if (vars->videoAllocSize) + // dealloc range + kmem_free(kernel_map, trunc_page_32(vars->videoMapping), vars->videoAllocSize); + } + + if (vars->previewBuffer) + { + vars->previewBuffer->release(); + vars->previewBuffer = 0; + } + + if (vars->fileVars) + { + IOPolledFileClose(vars->fileVars); + } + + // invalidate nvram properties - (gIOOptionsEntry != 0) => nvram was touched + + OSData * data = OSData::withCapacity(4); + if (gIOOptionsEntry && data) + { + const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey); + if (sym) + { + gIOOptionsEntry->setProperty(sym, data); + sym->release(); + } + sym = OSSymbol::withCStringNoCopy(kIOSelectedBootDeviceKey); + if (sym) + { + if (vars->saveBootDevice) + { + gIOOptionsEntry->setProperty(sym, vars->saveBootDevice); + vars->saveBootDevice->release(); + } + sym->release(); + } + sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKeyKey); + if (sym) + { + gIOOptionsEntry->setProperty(sym, data); + sym->release(); + } + sym = OSSymbol::withCStringNoCopy(kIOHibernateMemorySignatureEnvKey); + if (sym) + { + gIOOptionsEntry->removeProperty(sym); + sym->release(); + } + } + if (data) + data->release(); + + if (gIOOptionsEntry) + { + if (!vars->haveFastBoot) + { + // reset boot audio volume + IODTPlatformExpert * platform = OSDynamicCast(IODTPlatformExpert, IOService::getPlatform()); + if (platform) + platform->writeXPRAM(kXPRamAudioVolume, + &vars->saveBootAudioVolume, sizeof(vars->saveBootAudioVolume)); + } + + // sync now to hardware if the booter has not + if (kIOHibernateStateInactive == gIOHibernateState) + gIOOptionsEntry->sync(); + else + // just sync the variables in case a later panic syncs nvram (it won't sync variables) + gIOOptionsEntry->syncOFVariables(); + } + + if (vars->srcBuffer) + vars->srcBuffer->release(); + if (vars->ioBuffer) + vars->ioBuffer->release(); + if (vars->fileExtents) + vars->fileExtents->release(); + + bzero(vars, sizeof(*vars)); + +// gIOHibernateState = kIOHibernateStateInactive; // leave it for post wake code to see + + return (kIOReturnSuccess); +} + +IOReturn +IOHibernateSystemPostWake(void) +{ + if (gIOHibernateFileRef) + { + kern_close_file_for_direct_io(gIOHibernateFileRef); + gIOHibernateFileRef = 0; + } + return (kIOReturnSuccess); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +void +IOHibernateSystemInit(IOPMrootDomain * rootDomain) +{ + OSData * data = OSData::withBytesNoCopy(&gIOHibernateState, sizeof(gIOHibernateState)); + if (data) + { + rootDomain->setProperty(kIOHibernateStateKey, data); + data->release(); + } + + if (PE_parse_boot_arg("hfile", gIOHibernateFilename)) + gIOHibernateMode = kIOHibernateModeOn; + else + gIOHibernateFilename[0] = 0; + + static SYSCTL_STRING(_kern, OID_AUTO, hibernatefile, + CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, + gIOHibernateFilename, sizeof(gIOHibernateFilename), ""); + sysctl_register_oid(&sysctl__kern_hibernatefile); + + static SYSCTL_STRING(_kern, OID_AUTO, bootsignature, + CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, + gIOHibernateBootSignature, sizeof(gIOHibernateBootSignature), ""); + sysctl_register_oid(&sysctl__kern_bootsignature); + + static SYSCTL_UINT(_kern, OID_AUTO, hibernatemode, + CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, + &gIOHibernateMode, 0, ""); + sysctl_register_oid(&sysctl__kern_hibernatemode); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static void +hibernate_setup_for_wake(void) +{ +#if __ppc__ + // go slow (state needed for wake) + ml_set_processor_speed(1); +#endif +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +extern "C" boolean_t +hibernate_write_image(void) +{ + IOHibernateImageHeader * header = gIOHibernateCurrentHeader; + IOHibernateVars * vars = &gIOHibernateVars; + IOPolledFileExtent * fileExtents; + + uint32_t pageCount, pagesDone; + IOReturn err; + vm_offset_t ppnum; + IOItemCount page, count; + uint8_t * src; + uint8_t * data; + IOByteCount pageCompressedSize; + uint64_t compressedSize, uncompressedSize; + uint64_t image1Size = 0; + uint32_t bitmap_size; + bool iterDone, pollerOpen, needEncryptStart; + uint32_t restore1Sum, sum, sum1, sum2; + uint32_t tag; + uint32_t pageType; + uint32_t pageAndCount[2]; + + AbsoluteTime startTime, endTime; + AbsoluteTime allTime, compTime, decoTime; + uint64_t nsec; + uint32_t lastProgressStamp = 0; + uint32_t progressStamp; + + hibernate_cryptvars_t _cryptvars; + hibernate_cryptvars_t * cryptvars = 0; + + if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents) + return (false /* sleep */ ); + + restore1Sum = sum1 = sum2 = 0; + + // encryption data. "iv" is the "initial vector". + if (kIOHibernateModeEncrypt & gIOHibernateMode) + { + static const unsigned char first_iv[AES_BLOCK_SIZE] + = { 0xa3, 0x63, 0x65, 0xa9, 0x0b, 0x71, 0x7b, 0x1c, + 0xdf, 0x9e, 0x5f, 0x32, 0xd7, 0x61, 0x63, 0xda }; + + cryptvars = &gIOHibernateCryptWakeContext; + bzero(cryptvars, sizeof(hibernate_cryptvars_t)); + aes_encrypt_key(vars->cryptKey, + kIOHibernateAESKeySize, + &cryptvars->ctx.encrypt); + aes_decrypt_key(vars->cryptKey, + kIOHibernateAESKeySize, + &cryptvars->ctx.decrypt); + + cryptvars = &_cryptvars; + bzero(cryptvars, sizeof(hibernate_cryptvars_t)); + aes_encrypt_key(vars->wiredCryptKey, + kIOHibernateAESKeySize, + &cryptvars->ctx.encrypt); + + bcopy(&first_iv[0], &cryptvars->aes_iv[0], AES_BLOCK_SIZE); + bzero(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey)); + bzero(&vars->cryptKey[0], sizeof(vars->cryptKey)); + bzero(gIOHibernateCryptWakeVars, sizeof(hibernate_cryptwakevars_t)); + } + + hibernate_setup_for_wake(); + + hibernate_page_list_setall(vars->page_list, + vars->page_list_wired, + &pageCount); + + HIBLOG("hibernate_page_list_setall found pageCount %d\n", pageCount); + + fileExtents = (IOPolledFileExtent *) vars->fileExtents->getBytesNoCopy(); + +#if 0 + count = vars->fileExtents->getLength() / sizeof(IOPolledFileExtent); + for (page = 0; page < count; page++) + { + HIBLOG("fileExtents[%d] %qx, %qx (%qx)\n", page, + fileExtents[page].start, fileExtents[page].length, + fileExtents[page].start + fileExtents[page].length); + } +#endif + + needEncryptStart = (0 != (kIOHibernateModeEncrypt & gIOHibernateMode)); + + AbsoluteTime_to_scalar(&compTime) = 0; + AbsoluteTime_to_scalar(&decoTime) = 0; + + clock_get_uptime(&allTime); + + do + { + compressedSize = 0; + uncompressedSize = 0; + iterDone = false; + pageType = 0; // wired pages first + + IOPolledFileSeek(vars->fileVars, sizeof(IOHibernateImageHeader)); + + HIBLOG("IOHibernatePollerOpen, ml_get_interrupts_enabled %d\n", + ml_get_interrupts_enabled()); + err = IOHibernatePollerOpen(vars->fileVars, kIOPolledBeforeSleepState, vars->ioBuffer); + HIBLOG("IOHibernatePollerOpen(%x)\n", err); + pollerOpen = (kIOReturnSuccess == err); + if (!pollerOpen) + break; + + // copy file block extent list if larger than header + + count = vars->fileExtents->getLength(); + if (count > sizeof(header->fileExtentMap)) + { + count -= sizeof(header->fileExtentMap); + err = IOPolledFileWrite(vars->fileVars, + ((uint8_t *) &fileExtents[0]) + sizeof(header->fileExtentMap), count, cryptvars); + if (kIOReturnSuccess != err) + break; + } + + // copy out restore1 code + + page = atop_32(sectHIBB); + count = atop_32(round_page(sectHIBB + sectSizeHIB)) - page; + header->restore1CodePage = page; + header->restore1PageCount = count; + header->restore1CodeOffset = ((uint32_t) &hibernate_machine_entrypoint) - sectHIBB; + header->restore1StackOffset = ((uint32_t) &gIOHibernateRestoreStackEnd[0]) - 64 - sectHIBB; + + // sum __HIB sect, with zeros for the stack + src = (uint8_t *) trunc_page(sectHIBB); + for (page = 0; page < count; page++) + { + if ((src < &gIOHibernateRestoreStack[0]) || (src >= &gIOHibernateRestoreStackEnd[0])) + restore1Sum += hibernate_sum(src, page_size); + else + restore1Sum += 0x10000001; + src += page_size; + } + sum1 = restore1Sum; + + // write the __HIB sect, with zeros for the stack + + src = (uint8_t *) trunc_page(sectHIBB); + count = ((uint32_t) &gIOHibernateRestoreStack[0]) - trunc_page(sectHIBB); + if (count) + { + err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); + if (kIOReturnSuccess != err) + break; + } + err = IOPolledFileWrite(vars->fileVars, + (uint8_t *) 0, + &gIOHibernateRestoreStackEnd[0] - &gIOHibernateRestoreStack[0], + cryptvars); + if (kIOReturnSuccess != err) + break; + src = &gIOHibernateRestoreStackEnd[0]; + count = round_page(sectHIBB + sectSizeHIB) - ((uint32_t) src); + if (count) + { + err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); + if (kIOReturnSuccess != err) + break; + } + + // write the preview buffer + + addr64_t phys64; + IOByteCount segLen; + + if (vars->previewData) + { + ppnum = 0; + count = 0; + do + { + phys64 = vars->previewBuffer->getPhysicalSegment64(count, &segLen); + pageAndCount[0] = atop_64(phys64); + pageAndCount[1] = atop_32(segLen); + err = IOPolledFileWrite(vars->fileVars, + (const uint8_t *) &pageAndCount, sizeof(pageAndCount), + cryptvars); + if (kIOReturnSuccess != err) + break; + count += segLen; + ppnum += sizeof(pageAndCount); + } + while (phys64); + if (kIOReturnSuccess != err) + break; + + src = (uint8_t *) vars->previewData->getBytesNoCopy(); + count = vars->previewData->getLength(); + + header->previewPageListSize = ppnum; + header->previewSize = count + ppnum; + + for (page = 0; page < count; page += page_size) + sum1 += hibernate_sum(src + page, page_size); + + err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars); + if (kIOReturnSuccess != err) + break; + } + + // mark areas for no save + + for (count = 0; + (phys64 = vars->ioBuffer->getPhysicalSegment64(count, &segLen)); + count += segLen) + { + hibernate_set_page_state(vars->page_list, vars->page_list_wired, + atop_64(phys64), atop_32(segLen), + kIOHibernatePageStateFree); + pageCount -= atop_32(segLen); + } + + for (count = 0; + (phys64 = vars->srcBuffer->getPhysicalSegment64(count, &segLen)); + count += segLen) + { + hibernate_set_page_state(vars->page_list, vars->page_list_wired, + atop_64(phys64), atop_32(segLen), + kIOHibernatePageStateFree); + pageCount -= atop_32(segLen); + } + + // copy out bitmap of pages available for trashing during restore + + bitmap_size = vars->page_list_wired->list_size; + src = (uint8_t *) vars->page_list_wired; + err = IOPolledFileWrite(vars->fileVars, src, bitmap_size, cryptvars); + if (kIOReturnSuccess != err) + break; + + // mark more areas for no save, but these are not available + // for trashing during restore + +#if !__i386__ + page = atop_32(sectHIBB); + count = atop_32(round_page(sectHIBB + sectSizeHIB)) - page; +#else + // XXX + page = atop_32(sectHIBB & 0x3FFFFFFF); + count = atop_32(round_page((sectHIBB + sectSizeHIB) & 0x3FFFFFFF)) - page; +#endif + hibernate_set_page_state(vars->page_list, vars->page_list_wired, + page, count, + kIOHibernatePageStateFree); + pageCount -= count; + + + + if (vars->previewBuffer) for (count = 0; + (phys64 = vars->previewBuffer->getPhysicalSegment64(count, &segLen)); + count += segLen) + { + hibernate_set_page_state(vars->page_list, vars->page_list_wired, + atop_64(phys64), atop_32(segLen), + kIOHibernatePageStateFree); + pageCount -= atop_32(segLen); + } + + src = (uint8_t *) vars->srcBuffer->getBytesNoCopy(); + + void * iter = 0; + pagesDone = 0; + + HIBLOG("writing %d pages\n", pageCount); + + do + { + count = hibernate_page_list_iterate(pageType ? vars->page_list : vars->page_list_wired, + &iter, &ppnum); +// kprintf("[%d](%x : %x)\n", pageType, ppnum, count); + + iterDone = !count; + + pageAndCount[0] = ppnum; + pageAndCount[1] = count; + err = IOPolledFileWrite(vars->fileVars, + (const uint8_t *) &pageAndCount, sizeof(pageAndCount), + cryptvars); + if (kIOReturnSuccess != err) + break; + + for (page = 0; page < count; page++) + { + err = IOMemoryDescriptorWriteFromPhysical(vars->srcBuffer, 0, ptoa_64(ppnum), page_size); + if (err) + { + HIBLOG("IOMemoryDescriptorWriteFromPhysical %d [%d] %x\n", __LINE__, ppnum, err); + break; + } + + sum = hibernate_sum(src, page_size); + + clock_get_uptime(&startTime); + + pageCompressedSize = WKdm_compress ((WK_word*) src, (WK_word*) (src + page_size), PAGE_SIZE_IN_WORDS); + + clock_get_uptime(&endTime); + ADD_ABSOLUTETIME(&compTime, &endTime); + SUB_ABSOLUTETIME(&compTime, &startTime); + + if (kIOHibernateModeEncrypt & gIOHibernateMode) + pageCompressedSize = (pageCompressedSize + AES_BLOCK_SIZE - 1) & ~(AES_BLOCK_SIZE - 1); + + if (pageCompressedSize > page_size) + { +// HIBLOG("------------lose: %d\n", pageCompressedSize); + pageCompressedSize = page_size; + } + + if (pageCompressedSize != page_size) + data = (src + page_size); + else + data = src; + + tag = pageCompressedSize | kIOHibernateTagSignature; + + if (pageType) + sum2 += sum; + else + sum1 += sum; + + if (needEncryptStart && (ppnum >= atop_32(sectDATAB))) + { + // start encrypting partway into the data about to be written + vars->fileVars->encryptStart = (vars->fileVars->position + AES_BLOCK_SIZE - 1) + & ~(AES_BLOCK_SIZE - 1); + needEncryptStart = false; + } + + err = IOPolledFileWrite(vars->fileVars, (const uint8_t *) &tag, sizeof(tag), cryptvars); + if (kIOReturnSuccess != err) + break; + + err = IOPolledFileWrite(vars->fileVars, data, (pageCompressedSize + 3) & ~3, cryptvars); + if (kIOReturnSuccess != err) + break; + + compressedSize += pageCompressedSize; + if (pageCompressedSize) + uncompressedSize += page_size; + ppnum++; + pagesDone++; + + if (0 == (8191 & pagesDone)) + { + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &allTime); + absolutetime_to_nanoseconds(endTime, &nsec); + progressStamp = nsec / 750000000ULL; + if (progressStamp != lastProgressStamp) + { + lastProgressStamp = progressStamp; + HIBPRINT("pages %d (%d%%)\n", pagesDone, (100 * pagesDone) / pageCount); + } + } + } + if (kIOReturnSuccess != err) + break; + if (iterDone && !pageType) + { + err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars); + if (kIOReturnSuccess != err) + break; + + iterDone = false; + pageType = 1; + iter = 0; + image1Size = vars->fileVars->position; + if (cryptvars) + { + bcopy(&cryptvars->aes_iv[0], + &gIOHibernateCryptWakeContext.aes_iv[0], + sizeof(cryptvars->aes_iv)); + cryptvars = &gIOHibernateCryptWakeContext; + } + HIBLOG("image1Size %qd\n", image1Size); + } + } + while (!iterDone); + if (kIOReturnSuccess != err) + break; + err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars); + if (kIOReturnSuccess != err) + break; + + // Header: + + header->imageSize = vars->fileVars->position; + header->image1Size = image1Size; + header->bitmapSize = bitmap_size; + header->pageCount = pageCount; + header->encryptStart = vars->fileVars->encryptStart; + + header->restore1Sum = restore1Sum; + header->image1Sum = sum1; + header->image2Sum = sum2; + + count = vars->fileExtents->getLength(); + if (count > sizeof(header->fileExtentMap)) + { + header->fileExtentMapSize = count; + count = sizeof(header->fileExtentMap); + } + else + header->fileExtentMapSize = sizeof(header->fileExtentMap); + bcopy(&fileExtents[0], &header->fileExtentMap[0], count); + + IOPolledFileSeek(vars->fileVars, 0); + err = IOPolledFileWrite(vars->fileVars, + (uint8_t *) header, sizeof(IOHibernateImageHeader), + cryptvars); + if (kIOReturnSuccess != err) + break; + err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars); + if (kIOReturnSuccess != err) + break; + err = IOHibernatePollerIODone(vars->fileVars); + if (kIOReturnSuccess != err) + break; + } + while (false); + + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &allTime); + absolutetime_to_nanoseconds(endTime, &nsec); + HIBLOG("all time: %qd ms, ", + nsec / 1000000ULL); + + absolutetime_to_nanoseconds(compTime, &nsec); + HIBLOG("comp time: %qd ms, ", + nsec / 1000000ULL); + + absolutetime_to_nanoseconds(decoTime, &nsec); + HIBLOG("deco time: %qd ms, ", + nsec / 1000000ULL); + + HIBLOG("\nimage %qd, uncompressed %qd (%d), compressed %qd (%d%%), sum1 %x, sum2 %x\n", + header->imageSize, + uncompressedSize, atop_32(uncompressedSize), compressedSize, + (int) ((compressedSize * 100ULL) / uncompressedSize), + sum1, sum2); + + if (pollerOpen) + IOHibernatePollerClose(vars->fileVars, kIOPolledBeforeSleepState); + + HIBLOG("hibernate_write_image done(%x)\n", err); + + // should we come back via regular wake, set the state in memory. + gIOHibernateState = kIOHibernateStateInactive; + + if ((kIOReturnSuccess == err) && !(kIOHibernateModeSleep & gIOHibernateMode)) + return (true /* power down */ ); + else + return (false /* sleep */ ); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +DECLARE_IOHIBERNATEPROGRESSALPHA + +static void +ProgressUpdate(hibernate_graphics_t * display, uint8_t * screen, int32_t firstBlob, int32_t select) +{ + uint32_t rowBytes, pixelShift; + uint32_t x, y; + int32_t blob, lastBlob; + uint32_t alpha, in, color, result; + uint8_t * out; + uint32_t saveindex[kIOHibernateProgressCount] = { 0 }; + + pixelShift = display->depth >> 4; + if (pixelShift < 1) + return; + + rowBytes = display->rowBytes; + + screen += ((display->width + - kIOHibernateProgressCount * (kIOHibernateProgressWidth + kIOHibernateProgressSpacing)) << (pixelShift - 1)) + + (display->height - kIOHibernateProgressOriginY - kIOHibernateProgressHeight) * rowBytes; + + lastBlob = (select < kIOHibernateProgressCount) ? select : (kIOHibernateProgressCount - 1); + + screen += (firstBlob * (kIOHibernateProgressWidth + kIOHibernateProgressSpacing)) << pixelShift; + + for (y = 0; y < kIOHibernateProgressHeight; y++) + { + out = screen + y * rowBytes; + for (blob = firstBlob; blob <= lastBlob; blob++) + { + color = (blob < select) ? kIOHibernateProgressLightGray : kIOHibernateProgressMidGray; + for (x = 0; x < kIOHibernateProgressWidth; x++) + { + alpha = gIOHibernateProgressAlpha[y][x]; + result = color; + if (alpha) + { + if (0xff != alpha) + { + in = display->progressSaveUnder[blob][saveindex[blob]++]; + result = ((255 - alpha) * in + alpha * result + 0xff) / 255; + } + if (1 == pixelShift) + { + result >>= 3; + *((uint16_t *)out) = (result << 10) | (result << 5) | result; // 16 + } + else + *((uint32_t *)out) = (result << 16) | (result << 8) | result; // 32 + } + out += (1 << pixelShift); + } + out += (kIOHibernateProgressSpacing << pixelShift); + } + } +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +extern "C" void +hibernate_machine_init(void) +{ + IOReturn err; + uint32_t sum; + uint32_t pagesDone; + AbsoluteTime allTime, endTime; + uint64_t nsec; + uint32_t lastProgressStamp = 0; + uint32_t progressStamp; + uint64_t progressZeroPosition = 0; + uint32_t blob, lastBlob = (uint32_t) -1L; + hibernate_cryptvars_t * cryptvars = 0; + + IOHibernateVars * vars = &gIOHibernateVars; + + if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents) + return; + + if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode) + hibernate_page_list_discard(vars->page_list); + + + sum = gIOHibernateCurrentHeader->actualImage1Sum; + pagesDone = gIOHibernateCurrentHeader->actualUncompressedPages; + + HIBLOG("hibernate_machine_init: state %d, image pages %d, sum was %x, image1Size %qx, conflictCount %d, nextFree %x\n", + gIOHibernateState, pagesDone, sum, gIOHibernateCurrentHeader->image1Size, + gIOHibernateCurrentHeader->conflictCount, gIOHibernateCurrentHeader->nextFree); + + if (kIOHibernateStateWakingFromHibernate != gIOHibernateState) + { + HIBLOG("regular wake\n"); + return; + } + + HIBPRINT("diag %x %x %x %x\n", + gIOHibernateCurrentHeader->diag[0], gIOHibernateCurrentHeader->diag[1], + gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]); + + HIBPRINT("video %x %d %d %d\n", + gIOHibernateGraphicsInfo->physicalAddress, gIOHibernateGraphicsInfo->depth, + gIOHibernateGraphicsInfo->width, gIOHibernateGraphicsInfo->height); + + if (vars->videoMapping && gIOHibernateGraphicsInfo->physicalAddress) + { + vars->videoMapSize = round_page(gIOHibernateGraphicsInfo->height + * gIOHibernateGraphicsInfo->rowBytes); + IOMapPages(kernel_map, + vars->videoMapping, gIOHibernateGraphicsInfo->physicalAddress, + vars->videoMapSize, kIOMapInhibitCache ); + } + + uint8_t * src = (uint8_t *) vars->srcBuffer->getBytesNoCopy();; + uint32_t decoOffset; + + clock_get_uptime(&allTime); + + HIBLOG("IOHibernatePollerOpen(), ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled()); + err = IOHibernatePollerOpen(vars->fileVars, kIOPolledAfterSleepState, 0); + HIBLOG("IOHibernatePollerOpen(%x)\n", err); + + if (gIOHibernateCurrentHeader->previewSize) + progressZeroPosition = gIOHibernateCurrentHeader->previewSize + + gIOHibernateCurrentHeader->fileExtentMapSize + - sizeof(gIOHibernateCurrentHeader->fileExtentMap) + + ptoa_64(gIOHibernateCurrentHeader->restore1PageCount); + + IOPolledFileSeek(vars->fileVars, gIOHibernateCurrentHeader->image1Size); + + if (vars->videoMapping) + { + lastBlob = ((vars->fileVars->position - progressZeroPosition) * kIOHibernateProgressCount) + / (gIOHibernateCurrentHeader->imageSize - progressZeroPosition); + ProgressUpdate(gIOHibernateGraphicsInfo, (uint8_t *) vars->videoMapping, 0, lastBlob); + } + + cryptvars = (kIOHibernateModeEncrypt & gIOHibernateMode) ? &gIOHibernateCryptWakeContext : 0; + if (kIOHibernateModeEncrypt & gIOHibernateMode) + { + cryptvars = &gIOHibernateCryptWakeContext; + bcopy(&gIOHibernateCryptWakeVars->aes_iv[0], + &cryptvars->aes_iv[0], + sizeof(cryptvars->aes_iv)); + } + + // kick off the read ahead + vars->fileVars->io = false; + vars->fileVars->bufferHalf = 0; + vars->fileVars->bufferLimit = 0; + vars->fileVars->lastRead = 0; + vars->fileVars->bufferOffset = vars->fileVars->bufferLimit; + + IOPolledFileRead(vars->fileVars, 0, 0, cryptvars); + vars->fileVars->bufferOffset = vars->fileVars->bufferLimit; + // -- + + HIBLOG("hibernate_machine_init reading\n"); + + uint32_t * header = (uint32_t *) src; + sum = 0; + + do + { + unsigned int count; + unsigned int page; + uint32_t tag; + vm_offset_t ppnum, compressedSize; + + IOPolledFileRead(vars->fileVars, src, 8, cryptvars); + + ppnum = header[0]; + count = header[1]; + +// HIBPRINT("(%x, %x)\n", ppnum, count); + + if (!count) + break; + + for (page = 0; page < count; page++) + { + IOPolledFileRead(vars->fileVars, (uint8_t *) &tag, 4, cryptvars); + + compressedSize = kIOHibernateTagLength & tag; + if (!compressedSize) + { + ppnum++; + pagesDone++; + continue; + } + + IOPolledFileRead(vars->fileVars, src, (compressedSize + 3) & ~3, cryptvars); + + if (compressedSize != page_size) + { + decoOffset = page_size; + WKdm_decompress((WK_word*) src, (WK_word*) (src + decoOffset), PAGE_SIZE_IN_WORDS); + } + else + decoOffset = 0; + + sum += hibernate_sum((src + decoOffset), page_size); + + err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size); + if (err) + HIBLOG("IOMemoryDescriptorReadToPhysical [%d] %x\n", ppnum, err); + + ppnum++; + pagesDone++; + + if (vars->videoMapping && (0 == (255 & pagesDone))) + { + blob = ((vars->fileVars->position - progressZeroPosition) * kIOHibernateProgressCount) + / (gIOHibernateCurrentHeader->imageSize - progressZeroPosition); + if (blob != lastBlob) + { + ProgressUpdate(gIOHibernateGraphicsInfo, (uint8_t *) vars->videoMapping, lastBlob, blob); + lastBlob = blob; + } + } + + if (0 == (8191 & pagesDone)) + { + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &allTime); + absolutetime_to_nanoseconds(endTime, &nsec); + progressStamp = nsec / 750000000ULL; + if (progressStamp != lastProgressStamp) + { + lastProgressStamp = progressStamp; + HIBPRINT("pages %d (%d%%)\n", pagesDone, + (100 * pagesDone) / gIOHibernateCurrentHeader->pageCount); + } + } + } + } + while (true); + + gIOHibernateCurrentHeader->actualImage2Sum = sum; + + if (vars->fileVars->io) + (void) IOHibernatePollerIODone(vars->fileVars); + + err = IOHibernatePollerClose(vars->fileVars, kIOPolledAfterSleepState); + + if (vars->videoMapping) + ProgressUpdate(gIOHibernateGraphicsInfo, + (uint8_t *) vars->videoMapping, 0, kIOHibernateProgressCount); + + clock_get_uptime(&endTime); + SUB_ABSOLUTETIME(&endTime, &allTime); + absolutetime_to_nanoseconds(endTime, &nsec); + + HIBLOG("hibernate_machine_init pagesDone %d sum2 %x, time: %qd ms\n", + pagesDone, sum, nsec / 1000000ULL); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ diff --git a/iokit/Kernel/IOHibernateInternal.h b/iokit/Kernel/IOHibernateInternal.h new file mode 100644 index 000000000..59783732f --- /dev/null +++ b/iokit/Kernel/IOHibernateInternal.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include + +#ifdef __cplusplus + +enum { kIOHibernateAESKeySize = 128 }; /* bits */ + +struct IOHibernateVars +{ + hibernate_page_list_t * page_list; + hibernate_page_list_t * page_list_wired; + class IOBufferMemoryDescriptor * ioBuffer; + class IOBufferMemoryDescriptor * srcBuffer; + class IOMemoryDescriptor * previewBuffer; + OSData * previewData; + OSData * fileExtents; + OSObject * saveBootDevice; + + struct IOPolledFileIOVars * fileVars; + vm_offset_t videoMapping; + vm_size_t videoAllocSize; + vm_size_t videoMapSize; + uint8_t haveFastBoot; + uint8_t saveBootAudioVolume; + uint8_t wiredCryptKey[kIOHibernateAESKeySize / 8]; + uint8_t cryptKey[kIOHibernateAESKeySize / 8]; +}; +typedef struct IOHibernateVars IOHibernateVars; + + +struct IOPolledFileIOVars +{ + struct kern_direct_file_io_ref_t * fileRef; + class OSArray * pollers; + IOByteCount blockSize; + uint8_t * buffer; + IOByteCount bufferSize; + IOByteCount bufferLimit; + IOByteCount bufferOffset; + IOByteCount bufferHalf; + IOByteCount extentRemaining; + IOByteCount lastRead; + uint64_t block0; + uint64_t position; + uint64_t extentPosition; + uint64_t encryptStart; + IOPolledFileExtent * extentMap; + IOPolledFileExtent * currentExtent; + bool io; + IOReturn ioStatus; +}; +typedef struct IOPolledFileIOVars IOPolledFileIOVars; + +#endif /* __cplusplus */ + +enum +{ + kIOHibernateTagSignature = 0x53000000, + kIOHibernateTagLength = 0x00001fff, +}; + +#ifdef __cplusplus +extern "C" +#endif /* __cplusplus */ +uint32_t +hibernate_sum(uint8_t *buf, int32_t len); + +extern vm_offset_t sectHIBB; +extern int sectSizeHIB; +extern vm_offset_t sectDATAB; +extern int sectSizeDATA; + diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c new file mode 100644 index 000000000..3c6f16866 --- /dev/null +++ b/iokit/Kernel/IOHibernateRestoreKernel.c @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include + +#include "WKdm.h" +#include "IOHibernateInternal.h" + +/* +This code is linked into the kernel but part of the "__HIB" section, which means +its used by code running in the special context of restoring the kernel text and data +from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything +it calls or references needs to be careful to only touch memory also in the "__HIB" section. +*/ + +uint32_t gIOHibernateState; + +static IOHibernateImageHeader _hibernateHeader; +IOHibernateImageHeader * gIOHibernateCurrentHeader = &_hibernateHeader; + +static hibernate_graphics_t _hibernateGraphics; +hibernate_graphics_t * gIOHibernateGraphicsInfo = &_hibernateGraphics; + +static hibernate_cryptwakevars_t _cryptWakeVars; +hibernate_cryptwakevars_t * gIOHibernateCryptWakeVars = &_cryptWakeVars; + +#if __i386__ +extern void acpi_wake_prot_entry(void); +#endif + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5000 +// NMAX (was 5521) the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +uint32_t +hibernate_sum(uint8_t *buf, int32_t len) +{ + unsigned long s1 = 1; // adler & 0xffff; + unsigned long s2 = 0; // (adler >> 16) & 0xffff; + int k; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#if __ppc__ +static __inline__ unsigned int cntlzw(unsigned int num) +{ + unsigned int result; + __asm__ volatile("cntlzw %0, %1" : "=r" (result) : "r" (num)); + return result; +} +#elif __i386__ +static __inline__ unsigned int cntlzw(unsigned int num) +{ + unsigned int result; + __asm__ volatile( "bsrl %1, %0\n\t" + "cmovel %2, %0" + : "=r" (result) + : "rm" (num), "r" (63)); + return 31 ^ result; +} +#else +#error arch +#endif + +void +hibernate_page_bitset(hibernate_page_list_t * list, boolean_t set, uint32_t page) +{ + uint32_t bank; + hibernate_bitmap_t * bitmap = &list->bank_bitmap[0]; + + for (bank = 0; bank < list->bank_count; bank++) + { + if ((page >= bitmap->first_page) && (page <= bitmap->last_page)) + { + page -= bitmap->first_page; + if (set) + bitmap->bitmap[page >> 5] |= (0x80000000 >> (page & 31)); + //setbit(page - bitmap->first_page, (int *) &bitmap->bitmap[0]); + else + bitmap->bitmap[page >> 5] &= ~(0x80000000 >> (page & 31)); + //clrbit(page - bitmap->first_page, (int *) &bitmap->bitmap[0]); + break; + } + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; + } +} + +boolean_t +hibernate_page_bittst(hibernate_page_list_t * list, uint32_t page) +{ + boolean_t result = TRUE; + uint32_t bank; + hibernate_bitmap_t * bitmap = &list->bank_bitmap[0]; + + for (bank = 0; bank < list->bank_count; bank++) + { + if ((page >= bitmap->first_page) && (page <= bitmap->last_page)) + { + page -= bitmap->first_page; + result = (0 != (bitmap->bitmap[page >> 5] & (0x80000000 >> (page & 31)))); + break; + } + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; + } + return (result); +} + +// count bits clear or set (set == TRUE) starting at index page. +uint32_t +hibernate_page_list_count(hibernate_page_list_t * list, uint32_t set, uint32_t page) +{ + uint32_t bank, count; + hibernate_bitmap_t * bitmap; + + bitmap = &list->bank_bitmap[0]; + count = 0; + + for (bank = 0; bank < list->bank_count; bank++) + { + // bits between banks are "set" + if (set && (page < bitmap->first_page)) + { + count += bitmap->first_page - page; + page = bitmap->first_page; + } + if ((page >= bitmap->first_page) && (page <= bitmap->last_page)) + { + uint32_t index, bit, bits; + + index = (page - bitmap->first_page) >> 5; + bit = (page - bitmap->first_page) & 31; + + while (TRUE) + { + bits = bitmap->bitmap[index]; + if (set) + bits = ~bits; + bits = (bits << bit); + count += cntlzw(bits); + if (bits) + break; + count -= bit; + + while (++index < bitmap->bitmapwords) + { + bits = bitmap->bitmap[index]; + if (set) + bits = ~bits; + count += cntlzw(bits); + if (bits) + break; + } + if (bits) + break; + if (!set) + break; + // bits between banks are "set" + bank++; + if (bank >= list->bank_count) + break; + count -= (bitmap->last_page + 1); + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; + count += bitmap->first_page; + index = 0; + bit = 0; + } + break; + } + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; + } + + return (count); +} + + +static uint32_t +hibernate_page_list_grab(hibernate_page_list_t * map, uint32_t * _nextFree) +{ + uint32_t nextFree = *_nextFree; + + if (!nextFree) + nextFree = hibernate_page_list_count(map, 0, 0); + + *_nextFree = nextFree + 1 + hibernate_page_list_count(map, 0, nextFree + 1); + + return (nextFree); +} + +static uint32_t +store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, + uint32_t * buffer, uint32_t ppnum) +{ + uint64_t dst; + uint32_t sum; + + dst = ptoa_64(ppnum); +#if __ppc__ + if (ppnum < 0x00100000) + buffer = (uint32_t *) (uint32_t) dst; +#elif __i386__ + if (ppnum < atop_32(0xC0000000)) { + buffer = (uint32_t *) (uint32_t) dst; + } +#endif + + if (compressedSize != PAGE_SIZE) + { + WKdm_decompress((WK_word*) src, (WK_word*) buffer, PAGE_SIZE >> 2); + src = buffer; + } + + sum = hibernate_sum((uint8_t *) src, PAGE_SIZE); + + if (((uint64_t) (uint32_t) src) == dst) + src = 0; + + hibernate_restore_phys_page((uint64_t) (uint32_t) src, dst, PAGE_SIZE, procFlags); + + return (sum); +} + +static void +bcopy_internal(const void *src, void *dst, uint32_t len) +{ + const char *s = src; + char *d = dst; + uint32_t idx = 0; + + while (idx < len) + { + d[idx] = s[idx]; + idx++; + } +} + +long +hibernate_kernel_entrypoint(IOHibernateImageHeader * header, + void * p2, void * p3, __unused void * p4) +{ + typedef void (*ResetProc)(void); + uint32_t idx; + uint32_t * src; + uint32_t * buffer; + uint32_t * pageIndexSource; + hibernate_page_list_t * map; + uint32_t count; + uint32_t ppnum; + uint32_t page; + uint32_t conflictCount; + uint32_t compressedSize; + uint32_t uncompressedPages; + uint32_t copyPageListHead; + uint32_t * copyPageList; + uint32_t copyPageIndex; + uint32_t sum; + uint32_t nextFree; + uint32_t lastImagePage; + uint32_t lastMapPage; + uint32_t lastPageIndexPage; + + + bcopy_internal(header, + gIOHibernateCurrentHeader, + sizeof(IOHibernateImageHeader)); + + if (p2) + bcopy_internal(p2, + gIOHibernateGraphicsInfo, + sizeof(hibernate_graphics_t)); + else + gIOHibernateGraphicsInfo->physicalAddress = gIOHibernateGraphicsInfo->depth = 0; + + if (p3) + bcopy_internal(p3, + gIOHibernateCryptWakeVars, + sizeof(hibernate_cryptvars_t)); + + src = (uint32_t *) + (((uint32_t) &header->fileExtentMap[0]) + + header->fileExtentMapSize + + ptoa_32(header->restore1PageCount)); + + if (header->previewSize) + { + pageIndexSource = src; + map = (hibernate_page_list_t *)(((uint32_t) pageIndexSource) + header->previewSize); + src = (uint32_t *) (((uint32_t) pageIndexSource) + header->previewPageListSize); + } + else + { + pageIndexSource = 0; + map = (hibernate_page_list_t *) src; + src = (uint32_t *) (((uint32_t) map) + header->bitmapSize); + } + + lastPageIndexPage = atop_32(src); + + lastImagePage = atop_32(((uint32_t) header) + header->image1Size); + + lastMapPage = atop_32(((uint32_t) map) + header->bitmapSize); + + // knock all the image pages to be used out of free map + for (ppnum = atop_32(header); ppnum <= lastImagePage; ppnum++) + { + hibernate_page_bitset(map, FALSE, ppnum); + } + + nextFree = 0; + buffer = (uint32_t *) ptoa_32(hibernate_page_list_grab(map, &nextFree)); + + sum = gIOHibernateCurrentHeader->actualRestore1Sum; + gIOHibernateCurrentHeader->diag[0] = (uint32_t) header; + gIOHibernateCurrentHeader->diag[1] = sum; + + uncompressedPages = 0; + conflictCount = 0; + copyPageListHead = 0; + copyPageList = 0; + copyPageIndex = PAGE_SIZE >> 2; + + compressedSize = PAGE_SIZE; + + while (1) + { + if (pageIndexSource) + { + ppnum = pageIndexSource[0]; + count = pageIndexSource[1]; + pageIndexSource += 2; + if (!count) + { + pageIndexSource = 0; + src = (uint32_t *) (((uint32_t) map) + gIOHibernateCurrentHeader->bitmapSize); + ppnum = src[0]; + count = src[1]; + src += 2; + } + } + else + { + ppnum = src[0]; + count = src[1]; + if (!count) + break; + src += 2; + } + + for (page = 0; page < count; page++, ppnum++) + { + uint32_t tag; + int conflicts; + + if (!pageIndexSource) + { + tag = *src++; + compressedSize = kIOHibernateTagLength & tag; + } + + conflicts = (((ppnum >= atop_32(map)) && (ppnum <= lastMapPage)) + || ((ppnum >= atop_32(src)) && (ppnum <= lastImagePage))); + + if (pageIndexSource) + conflicts |= ((ppnum >= atop_32(pageIndexSource)) && (ppnum <= lastPageIndexPage)); + + if (!conflicts) + { + if (compressedSize) + sum += store_one_page(gIOHibernateCurrentHeader->processorFlags, + src, compressedSize, buffer, ppnum); + uncompressedPages++; + } + else + { + uint32_t bufferPage; + uint32_t * dst; + + conflictCount++; + + // alloc new buffer page + bufferPage = hibernate_page_list_grab(map, &nextFree); + + if (copyPageIndex > ((PAGE_SIZE >> 2) - 3)) + { + // alloc new copy list page + uint32_t pageListPage = hibernate_page_list_grab(map, &nextFree); + // link to current + if (copyPageList) + copyPageList[1] = pageListPage; + else + copyPageListHead = pageListPage; + copyPageList = (uint32_t *) ptoa_32(pageListPage); + copyPageList[1] = 0; + copyPageIndex = 2; + } + + copyPageList[copyPageIndex++] = ppnum; + copyPageList[copyPageIndex++] = bufferPage; + copyPageList[copyPageIndex++] = compressedSize; + copyPageList[0] = copyPageIndex; + + dst = (uint32_t *) ptoa_32(bufferPage); + for (idx = 0; idx < ((compressedSize + 3) >> 2); idx++) + dst[idx] = src[idx]; + } + src += ((compressedSize + 3) >> 2); + } + } + + // -- copy back conflicts + + copyPageList = (uint32_t *) ptoa_32(copyPageListHead); + while (copyPageList) + { + for (copyPageIndex = 2; copyPageIndex < copyPageList[0]; copyPageIndex += 3) + { + ppnum = copyPageList[copyPageIndex + 0]; + src = (uint32_t *) ptoa_32(copyPageList[copyPageIndex + 1]); + compressedSize = copyPageList[copyPageIndex + 2]; + + sum += store_one_page(gIOHibernateCurrentHeader->processorFlags, + src, compressedSize, buffer, ppnum); + uncompressedPages++; + } + copyPageList = (uint32_t *) ptoa_32(copyPageList[1]); + } + + // -- image has been destroyed... + + gIOHibernateCurrentHeader->actualImage1Sum = sum; + gIOHibernateCurrentHeader->actualUncompressedPages = uncompressedPages; + gIOHibernateCurrentHeader->conflictCount = conflictCount; + gIOHibernateCurrentHeader->nextFree = nextFree; + + gIOHibernateState = kIOHibernateStateWakingFromHibernate; + +#if __ppc__ + ResetProc proc; + proc = (ResetProc) 0x100; + __asm__ volatile("ori 0, 0, 0" : : ); + proc(); +#elif __i386__ + ResetProc proc; + proc = (ResetProc) acpi_wake_prot_entry; + + proc(); +#endif + + return -1; +} + diff --git a/iokit/Kernel/IOPMrootDomain.cpp b/iokit/Kernel/IOPMrootDomain.cpp index b811ffbf8..7c95249a5 100644 --- a/iokit/Kernel/IOPMrootDomain.cpp +++ b/iokit/Kernel/IOPMrootDomain.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -33,40 +33,11 @@ #include "IOKit/pwr_mgt/IOPowerConnection.h" #include "IOPMPowerStateQueue.h" #include +#include -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - - -#include - -OSDefineMetaClassAndAbstractStructors(IOPolledInterface, OSObject); - -OSMetaClassDefineReservedUnused(IOPolledInterface, 0); -OSMetaClassDefineReservedUnused(IOPolledInterface, 1); -OSMetaClassDefineReservedUnused(IOPolledInterface, 2); -OSMetaClassDefineReservedUnused(IOPolledInterface, 3); -OSMetaClassDefineReservedUnused(IOPolledInterface, 4); -OSMetaClassDefineReservedUnused(IOPolledInterface, 5); -OSMetaClassDefineReservedUnused(IOPolledInterface, 6); -OSMetaClassDefineReservedUnused(IOPolledInterface, 7); -OSMetaClassDefineReservedUnused(IOPolledInterface, 8); -OSMetaClassDefineReservedUnused(IOPolledInterface, 9); -OSMetaClassDefineReservedUnused(IOPolledInterface, 10); -OSMetaClassDefineReservedUnused(IOPolledInterface, 11); -OSMetaClassDefineReservedUnused(IOPolledInterface, 12); -OSMetaClassDefineReservedUnused(IOPolledInterface, 13); -OSMetaClassDefineReservedUnused(IOPolledInterface, 14); -OSMetaClassDefineReservedUnused(IOPolledInterface, 15); - -IOReturn -IOPolledInterface::checkAllForWork(void) -{ - return (kIOReturnSuccess); -} - - -/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ - +#ifdef __ppc__ +#include +#endif extern "C" void kprintf(const char *, ...); @@ -217,6 +188,7 @@ static void disk_sync_callout(thread_call_param_t p0, thread_call_param_t p1) IOService *rootDomain = (IOService *) p0; unsigned long pmRef = (unsigned long) p1; + IOHibernateSystemSleep(); sync_internal(); rootDomain->allowPowerChange(pmRef); } @@ -314,6 +286,7 @@ bool IOPMrootDomain::start ( IOService * nub ) temp_entry->release(); } + IOHibernateSystemInit(this); registerService(); // let clients find us @@ -353,6 +326,10 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) const OSSymbol *fileserver_string = OSSymbol::withCString("AutoRestartOnPowerLoss"); const OSSymbol *wakeonlid_string = OSSymbol::withCString("WakeOnLid"); const OSSymbol *wakeonac_string = OSSymbol::withCString("WakeOnACChange"); + const OSSymbol *hibernatemode_string = OSSymbol::withCString(kIOHibernateModeKey); + const OSSymbol *hibernatefile_string = OSSymbol::withCString(kIOHibernateFileKey); + const OSSymbol *hibernatefreeratio_string = OSSymbol::withCString(kIOHibernateFreeRatioKey); + const OSSymbol *hibernatefreetime_string = OSSymbol::withCString(kIOHibernateFreeTimeKey); const OSSymbol *timezone_string = OSSymbol::withCString("TimeZoneOffsetSeconds"); if(!dict) @@ -381,6 +358,26 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj) setProperty(stall_halt_string, b); } + if ( hibernatemode_string + && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatemode_string)))) + { + setProperty(hibernatemode_string, n); + } + if ( hibernatefreeratio_string + && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreeratio_string)))) + { + setProperty(hibernatefreeratio_string, n); + } + if ( hibernatefreetime_string + && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreetime_string)))) + { + setProperty(hibernatefreetime_string, n); + } + if ( hibernatefile_string + && (str = OSDynamicCast(OSString, dict->getObject(hibernatefile_string)))) + { + setProperty(hibernatefile_string, str); + } // Relay AutoWake setting to its controller if( auto_wake_string @@ -582,8 +579,24 @@ void IOPMrootDomain::stopIgnoringClamshellEventsDuringWakeup(void) // same thread. //********************************************************************************* +static int pmsallsetup = 0; + IOReturn IOPMrootDomain::setAggressiveness ( unsigned long type, unsigned long newLevel ) { +#ifdef __ppc__ + if(pmsExperimental & 3) kprintf("setAggressiveness: type = %08X, newlevel = %08X\n", type, newLevel); + if(pmsExperimental & 1) { /* Is experimental mode enabled? */ + if(pmsInstalled && (type == kPMSetProcessorSpeed)) { /* We want to look at all processor speed changes if stepper is installed */ + if(pmsallsetup) return kIOReturnSuccess; /* If already running, just eat this */ + kprintf("setAggressiveness: starting stepper...\n"); + pmsallsetup = 1; /* Remember we did this */ + pmsPark(); + pmsStart(); /* Get it all started up... */ + return kIOReturnSuccess; /* Leave now... */ + } + } +#endif + if ( pm_vars->PMcommandGate ) { pm_vars->PMcommandGate->runAction(broadcast_aggressiveness,(void *)type,(void *)newLevel); } @@ -659,13 +672,17 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState ) // re-enable this timer for next sleep idleSleepPending = false; - IOLog("System Sleep\n"); + IOLog("System %sSleep\n", gIOHibernateState ? "Safe" : ""); + + IOHibernateSystemHasSlept(); + pm_vars->thePlatform->sleepKernel(); // The CPU(s) are off at this point. When they're awakened by CPU interrupt, // code will resume execution here. // Now we're waking... + IOHibernateSystemWake(); // stay awake for at least 30 seconds clock_interval_to_deadline(30, kSecondScale, &deadline); @@ -690,7 +707,7 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState ) tellClients(kIOMessageSystemWillPowerOn); // tell the tree we're waking - IOLog("System Wake\n"); + IOLog("System %sWake\n", gIOHibernateState ? "SafeSleep " : ""); systemWake(); // Allow drivers to request extra processing time before clamshell @@ -1118,6 +1135,7 @@ void IOPMrootDomain::tellChangeUp ( unsigned long stateNum) { if ( stateNum == ON_STATE ) { + IOHibernateSystemPostWake(); return tellClients(kIOMessageSystemHasPoweredOn); } } @@ -1217,6 +1235,8 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon, // We will ack within 20 seconds params->returnValue = 20 * 1000 * 1000; + if (gIOHibernateState) + params->returnValue += gIOHibernateFreeTime * 1000; //add in time we could spend freeing pages if ( ! OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) ) { diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index a5b07dd86..1fa9a6ec7 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -89,25 +89,17 @@ bool IOPlatformExpert::start( IOService * provider ) { IORangeAllocator * physicalRanges; OSData * busFrequency; + uint32_t debugFlags; if (!super::start(provider)) return false; - + + // Override the mapper present flag is requested by boot arguments. + if (PE_parse_boot_arg("dart", &debugFlags) && (debugFlags == 0)) + removeProperty(kIOPlatformMapperPresentKey); + // Register the presence or lack thereof a system // PCI address mapper with the IOMapper class - -#if 1 - IORegistryEntry * regEntry = IORegistryEntry::fromPath("/u3/dart", gIODTPlane); - if (!regEntry) - regEntry = IORegistryEntry::fromPath("/dart", gIODTPlane); - if (regEntry) { - int debugFlags; - if (!PE_parse_boot_arg("dart", &debugFlags) || debugFlags) - setProperty(kIOPlatformMapperPresentKey, kOSBooleanTrue); - regEntry->release(); - } -#endif - IOMapper::setMapperRequired(0 != getProperty(kIOPlatformMapperPresentKey)); gIOInterruptControllers = OSDictionary::withCapacity(1); diff --git a/iokit/Kernel/IOServicePM.cpp b/iokit/Kernel/IOServicePM.cpp index af7e11dd0..a20e0a6a0 100644 --- a/iokit/Kernel/IOServicePM.cpp +++ b/iokit/Kernel/IOServicePM.cpp @@ -2264,15 +2264,22 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange ) { IOReturn k = IOPMAckImplied; unsigned long childPower; - IOService *theChild = (IOService *)(theNub->copyChildEntry(gIOPowerPlane)); - - theNub->setAwaitingAck(true); // in case they don't ack + IOService *theChild; - if ( ! theChild ) + theChild = (IOService *)(theNub->copyChildEntry(gIOPowerPlane)); + if(!theChild) { + // The child has been detached since we grabbed the child iterator. + // Decrement pending_acks, already incremented in notifyAll, + // to account for this unexpected departure. + priv->head_note_pendingAcks--; return true; } + // Unless the child handles the notification immediately and returns + // kIOPMAckImplied, we'll be awaiting their acknowledgement later. + theNub->setAwaitingAck(true); + if ( is_prechange ) { k = theChild->powerDomainWillChangeTo(priv->head_note_outputFlags,theNub); @@ -2284,7 +2291,7 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange ) if ( k == IOPMAckImplied ) { // yes - priv->head_note_pendingAcks -=1; + priv->head_note_pendingAcks--; theNub->setAwaitingAck(false); childPower = theChild->currentPowerConsumption(); if ( childPower == kIOPMUnknown ) diff --git a/iokit/Kernel/WKdm.h b/iokit/Kernel/WKdm.h new file mode 100644 index 000000000..be3ca2d1f --- /dev/null +++ b/iokit/Kernel/WKdm.h @@ -0,0 +1,227 @@ +/* direct-mapped partial matching compressor with simple 22/10 split + * + * Compresses buffers using a dictionary based match and partial match + * (high bits only or full match) scheme. + * + * Paul Wilson -- wilson@cs.utexas.edu + * Scott F. Kaplan -- sfkaplan@cs.utexas.edu + * September 1997 + */ + +/* compressed output format, in memory order + * 1. a four-word HEADER containing four one-word values: + * i. a one-word code saying what algorithm compressed the data + * ii. an integer WORD offset into the page saying + * where the queue position area starts + * iii. an integer WORD offset into the page saying where + * the low-bits area starts + * iv. an integer WORD offset into the page saying where the + * low-bits area ends + * + * 2. a 64-word TAGS AREA holding one two-bit tag for each word in + * the original (1024-word) page, packed 16 per word + * + * 3. a variable-sized FULL WORDS AREA (always word aligned and an + * integral number of words) holding full-word patterns that + * were not in the dictionary when encoded (i.e., dictionary misses) + * + * 4. a variable-sized QUEUE POSITIONS AREA (always word aligned and + * an integral number of words) holding four-bit queue positions, + * packed eight per word. + * + * 5. a variable-sized LOW BITS AREA (always word aligned and an + * integral number of words) holding ten-bit low-bit patterns + * (from partial matches), packed three per word. + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +/* ============================================================ */ +/* Included files */ + +//#include +//#include +//#include +//#include + +typedef unsigned long WK_word; + +/* at the moment we have dependencies on the page size. That should + * be changed to work for any power-of-two size that's at least 16 + * words, or something like that + */ + +#define PAGE_SIZE_IN_WORDS 1024 +#define PAGE_SIZE_IN_BYTES 4096 + +#define DICTIONARY_SIZE 16 + +/* + * macros defining the basic layout of stuff in a page + */ +#define HEADER_SIZE_IN_WORDS 4 +#define TAGS_AREA_OFFSET 4 +#define TAGS_AREA_SIZE 64 + +/* the next few are used during compression to write the header */ +#define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr) \ + (compr_dest_buf[1] = qpos_start_addr - compr_dest_buf) +#define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \ + (compr_dest_buf[2] = lb_start_addr - compr_dest_buf) +#define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \ + (compr_dest_buf[3] = lb_end_addr - compr_dest_buf) + +/* the next few are only use during decompression to read the header */ +#define TAGS_AREA_START(decomp_src_buf) \ + (decomp_src_buf + TAGS_AREA_OFFSET) +#define TAGS_AREA_END(decomp_src_buf) \ + (TAGS_AREA_START(decomp_src_buf) + TAGS_AREA_SIZE) +#define FULL_WORD_AREA_START(the_buf) TAGS_AREA_END(the_buf) +#define QPOS_AREA_START(decomp_src_buf) \ + (decomp_src_buf + decomp_src_buf[1]) +#define LOW_BITS_AREA_START(decomp_src_buf) \ + (decomp_src_buf + (decomp_src_buf[2])) +#define QPOS_AREA_END(the_buf) LOW_BITS_AREA_START(the_buf) +#define LOW_BITS_AREA_END(decomp_src_buf) \ + (decomp_src_buf + (decomp_src_buf[3])) + +/* ============================================================ */ +/* Types and structures */ + +/* A structure to store each element of the dictionary. */ +typedef WK_word DictionaryElement; + +/* ============================================================ */ +/* Misc constants */ + +#define BITS_PER_WORD 32 +#define BYTES_PER_WORD 4 +#define NUM_LOW_BITS 10 +#define LOW_BITS_MASK 0x3FF +#define ALL_ONES_MASK 0xFFFFFFFF + +#define TWO_BITS_PACKING_MASK 0x03030303 +#define FOUR_BITS_PACKING_MASK 0x0F0F0F0F +#define TEN_LOW_BITS_MASK 0x000003FF +#define TWENTY_TWO_HIGH_BITS_MASK 0xFFFFFC00 + +/* Tag values. NOTE THAT CODE MAY DEPEND ON THE NUMBERS USED. + * Check for conditionals doing arithmetic on these things + * before changing them + */ +#define ZERO_TAG 0x0 +#define PARTIAL_TAG 0x1 +#define MISS_TAG 0x2 +#define EXACT_TAG 0x3 + +#define BITS_PER_BYTE 8 + +/* ============================================================ */ +/* Global macros */ + +/* Shift out the low bits of a pattern to give the high bits pattern. + The stripped patterns are used for initial tests of partial + matches. */ +#define HIGH_BITS(word_pattern) (word_pattern >> NUM_LOW_BITS) + +/* String the high bits of a pattern so the low order bits can + be included in an encoding of a partial match. */ +#define LOW_BITS(word_pattern) (word_pattern & LOW_BITS_MASK) + +#if defined DEBUG_WK +#define DEBUG_PRINT_1(string) printf (string) +#define DEBUG_PRINT_2(string,value) printf(string, value) +#else +#define DEBUG_PRINT_1(string) +#define DEBUG_PRINT_2(string, value) +#endif + +/* Set up the dictionary before performing compression or + decompression. Each element is loaded with some value, the + high-bits version of that value, and a next pointer. */ +#define PRELOAD_DICTIONARY { \ + dictionary[0] = 1; \ + dictionary[1] = 1; \ + dictionary[2] = 1; \ + dictionary[3] = 1; \ + dictionary[4] = 1; \ + dictionary[5] = 1; \ + dictionary[6] = 1; \ + dictionary[7] = 1; \ + dictionary[8] = 1; \ + dictionary[9] = 1; \ + dictionary[10] = 1; \ + dictionary[11] = 1; \ + dictionary[12] = 1; \ + dictionary[13] = 1; \ + dictionary[14] = 1; \ + dictionary[15] = 1; \ +} + +/* these are the constants for the hash function lookup table. + * Only zero maps to zero. The rest of the tabale is the result + * of appending 17 randomizations of the multiples of 4 from + * 4 to 56. Generated by a Scheme script in hash.scm. + */ +#define HASH_LOOKUP_TABLE_CONTENTS { \ + 0, 52, 8, 56, 16, 12, 28, 20, 4, 36, 48, 24, 44, 40, 32, 60, \ + 8, 12, 28, 20, 4, 60, 16, 36, 24, 48, 44, 32, 52, 56, 40, 12, \ + 8, 48, 16, 52, 60, 28, 56, 32, 20, 24, 36, 40, 44, 4, 8, 40, \ + 60, 32, 20, 44, 4, 36, 52, 24, 16, 56, 48, 12, 28, 16, 8, 40, \ + 36, 28, 32, 12, 4, 44, 52, 20, 24, 48, 60, 56, 40, 48, 8, 32, \ + 28, 36, 4, 44, 20, 56, 60, 24, 52, 16, 12, 12, 4, 48, 20, 8, \ + 52, 16, 60, 24, 36, 44, 28, 56, 40, 32, 36, 20, 24, 60, 40, 44, \ + 52, 16, 32, 4, 48, 8, 28, 56, 12, 28, 32, 40, 52, 36, 16, 20, \ + 48, 8, 4, 60, 24, 56, 44, 12, 8, 36, 24, 28, 16, 60, 20, 56, \ + 32, 40, 48, 12, 4, 44, 52, 44, 40, 12, 56, 8, 36, 24, 60, 28, \ + 48, 4, 32, 20, 16, 52, 60, 12, 24, 36, 8, 4, 16, 56, 48, 44, \ + 40, 52, 32, 20, 28, 32, 12, 36, 28, 24, 56, 40, 16, 52, 44, 4, \ + 20, 60, 8, 48, 48, 52, 12, 20, 32, 44, 36, 28, 4, 40, 24, 8, \ + 56, 60, 16, 36, 32, 8, 40, 4, 52, 24, 44, 20, 12, 28, 48, 56, \ + 16, 60, 4, 52, 60, 48, 20, 16, 56, 44, 24, 8, 40, 12, 32, 28, \ + 36, 24, 32, 12, 4, 20, 16, 60, 36, 28, 8, 52, 40, 48, 44, 56 \ +} + +#define HASH_TO_DICT_BYTE_OFFSET(pattern) \ + (hashLookupTable[((pattern) >> 10) & 0xFF]) + +extern const char hashLookupTable[]; + +/* EMIT... macros emit bytes or words into the intermediate arrays + */ + +#define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr = byte_value; fill_ptr++;} +#define EMIT_WORD(fill_ptr,word_value) {*fill_ptr = word_value; fill_ptr++;} + +/* RECORD... macros record the results of modeling in the intermediate + * arrays + */ + +#define RECORD_ZERO { EMIT_BYTE(next_tag,ZERO_TAG); } + +#define RECORD_EXACT(queue_posn) EMIT_BYTE(next_tag,EXACT_TAG); \ + EMIT_BYTE(next_qp,(queue_posn)); + +#define RECORD_PARTIAL(queue_posn,low_bits_pattern) { \ + EMIT_BYTE(next_tag,PARTIAL_TAG); \ + EMIT_BYTE(next_qp,(queue_posn)); \ + EMIT_WORD(next_low_bits,(low_bits_pattern)) } + +#define RECORD_MISS(word_pattern) EMIT_BYTE(next_tag,MISS_TAG); \ + EMIT_WORD(next_full_patt,(word_pattern)); + +void +WKdm_decompress (WK_word* src_buf, + WK_word* dest_buf, + unsigned int words); +unsigned int +WKdm_compress (WK_word* src_buf, + WK_word* dest_buf, + unsigned int num_input_words); + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/iokit/Kernel/WKdmCompress.c b/iokit/Kernel/WKdmCompress.c new file mode 100644 index 000000000..aa9d1b541 --- /dev/null +++ b/iokit/Kernel/WKdmCompress.c @@ -0,0 +1,328 @@ +#include "WKdm.h" + +/*********************************************************************** + * THE PACKING ROUTINES + */ + +/* WK_pack_2bits() + * Pack some multiple of four words holding two-bit tags (in the low + * two bits of each byte) into an integral number of words, i.e., + * one fourth as many. + * NOTE: Pad the input out with zeroes to a multiple of four words! + */ +static WK_word* +WK_pack_2bits(WK_word* source_buf, + WK_word* source_end, + WK_word* dest_buf) { + + register WK_word* src_next = source_buf; + WK_word* dest_next = dest_buf; + + while (src_next < source_end) { + register WK_word temp = src_next[0]; + temp |= (src_next[1] << 2); + temp |= (src_next[2] << 4); + temp |= (src_next[3] << 6); + + dest_next[0] = temp; + dest_next++; + src_next += 4; + } + + return dest_next; + +} + +/* WK_pack_4bits() + * Pack an even number of words holding 4-bit patterns in the low bits + * of each byte into half as many words. + * note: pad out the input with zeroes to an even number of words! + */ + +static WK_word* +WK_pack_4bits(WK_word* source_buf, + WK_word* source_end, + WK_word* dest_buf) { + register WK_word* src_next = source_buf; + WK_word* dest_next = dest_buf; + + /* this loop should probably be unrolled */ + while (src_next < source_end) { + register WK_word temp = src_next[0]; + temp |= (src_next[1] << 4); + + dest_next[0] = temp; + dest_next++; + src_next += 2; + } + + return dest_next; + +} + +/* pack_3_tenbits() + * Pack a sequence of three ten bit items into one word. + * note: pad out the input with zeroes to an even number of words! + */ +static WK_word* +WK_pack_3_tenbits(WK_word* source_buf, + WK_word* source_end, + WK_word* dest_buf) { + + register WK_word* src_next = source_buf; + WK_word* dest_next = dest_buf; + + /* this loop should probably be unrolled */ + while (src_next < source_end) { + register WK_word temp = src_next[0]; + temp |= (src_next[1] << 10); + temp |= (src_next[2] << 20); + + dest_next[0] = temp; + dest_next++; + src_next += 3; + } + + return dest_next; + +} + +/*************************************************************************** + * WKdm_compress()---THE COMPRESSOR + */ + +unsigned int +WKdm_compress (WK_word* src_buf, + WK_word* dest_buf, + unsigned int num_input_words) +{ + DictionaryElement dictionary[DICTIONARY_SIZE]; + + /* arrays that hold output data in intermediate form during modeling */ + /* and whose contents are packed into the actual output after modeling */ + + /* sizes of these arrays should be increased if you want to compress + * pages larger than 4KB + */ + WK_word tempTagsArray[300]; /* tags for everything */ + WK_word tempQPosArray[300]; /* queue positions for matches */ + WK_word tempLowBitsArray[1200]; /* low bits for partial matches */ + + /* boundary_tmp will be used for keeping track of what's where in + * the compressed page during packing + */ + WK_word* boundary_tmp; + + /* Fill pointers for filling intermediate arrays (of queue positions + * and low bits) during encoding. + * Full words go straight to the destination buffer area reserved + * for them. (Right after where the tags go.) + */ + WK_word* next_full_patt; + char* next_tag = (char *) tempTagsArray; + char* next_qp = (char *) tempQPosArray; + WK_word* next_low_bits = tempLowBitsArray; + + WK_word* next_input_word = src_buf; + WK_word* end_of_input = src_buf + num_input_words; + + PRELOAD_DICTIONARY; + + next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16); + +#ifdef WK_DEBUG + printf("\nIn WKdm_compress\n"); + printf("About to actually compress, src_buf is %u\n", src_buf); + printf("dictionary is at %u\n", dictionary); + printf("dest_buf is %u next_full_patt is %u\n", dest_buf, next_full_patt); + fflush(stdout); +#endif + + while (next_input_word < end_of_input) + { + WK_word *dict_location; + WK_word dict_word; + WK_word input_word = *next_input_word; + + /* compute hash value, which is a byte offset into the dictionary, + * and add it to the base address of the dictionary. Cast back and + * forth to/from char * so no shifts are needed + */ + dict_location = + (WK_word *) + (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word)); + + dict_word = *dict_location; + + if (input_word == dict_word) + { + RECORD_EXACT(dict_location - dictionary); + } + else if (input_word == 0) { + RECORD_ZERO; + } + else + { + WK_word input_high_bits = HIGH_BITS(input_word); + if (input_high_bits == HIGH_BITS(dict_word)) { + RECORD_PARTIAL(dict_location - dictionary, LOW_BITS(input_word)); + *dict_location = input_word; + } + else { + RECORD_MISS(input_word); + *dict_location = input_word; + } + } + next_input_word++; + } + +#ifdef WK_DEBUG + printf("AFTER MODELING in WKdm_compress()\n"); fflush(stdout); + printf("tempTagsArray holds %u bytes\n", + next_tag - (char *) tempTagsArray); + printf("tempQPosArray holds %u bytes\n", + next_qp - (char *) tempQPosArray); + printf("tempLowBitsArray holds %u bytes\n", + (char *) next_low_bits - (char *) tempLowBitsArray); + + printf("next_full_patt is %u\n", + (unsigned long) next_full_patt); + + printf(" i.e., there are %u full patterns\n", + next_full_patt - (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16))); + fflush(stdout); + + { int i; + WK_word *arr =(dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16)); + + printf(" first 20 full patterns are: \n"); + for (i = 0; i < 20; i++) { + printf(" %d", arr[i]); + } + printf("\n"); + } +#endif + + /* Record (into the header) where we stopped writing full words, + * which is where we will pack the queue positions. (Recall + * that we wrote the full words directly into the dest buffer + * during modeling. + */ + + SET_QPOS_AREA_START(dest_buf,next_full_patt); + + /* Pack the tags into the tags area, between the page header + * and the full words area. We don't pad for the packer + * because we assume that the page size is a multiple of 16. + */ + +#ifdef WK_DEBUG + printf("about to pack %u bytes holding tags\n", + next_tag - (char *) tempTagsArray); + + { int i; + char* arr = (char *) tempTagsArray; + + printf(" first 200 tags are: \n"); + for (i = 0; i < 200; i++) { + printf(" %d", arr[i]); + } + printf("\n"); + } +#endif + + boundary_tmp = WK_pack_2bits(tempTagsArray, + (WK_word *) next_tag, + dest_buf + HEADER_SIZE_IN_WORDS); + +#ifdef WK_DEBUG + printf("packing tags stopped at %u\n", boundary_tmp); +#endif + + /* Pack the queue positions into the area just after + * the full words. We have to round up the source + * region to a multiple of two words. + */ + + { + unsigned int num_bytes_to_pack = next_qp - (char *) tempQPosArray; + unsigned int num_packed_words = (num_bytes_to_pack + 7) >> 3; // ceil((double) num_bytes_to_pack / 8); + unsigned int num_source_words = num_packed_words * 2; + WK_word* endQPosArray = tempQPosArray + num_source_words; + + /* Pad out the array with zeros to avoid corrupting real packed + values. */ + for (; /* next_qp is already set as desired */ + next_qp < (char*)endQPosArray; + next_qp++) { + *next_qp = 0; + } + +#ifdef WK_DEBUG + printf("about to pack %u (bytes holding) queue posns.\n", + num_bytes_to_pack); + printf("packing them from %u words into %u words\n", + num_source_words, num_packed_words); + printf("dest is range %u to %u\n", + next_full_patt, next_full_patt + num_packed_words); + { int i; + char *arr = (char *) tempQPosArray; + printf(" first 200 queue positions are: \n"); + for (i = 0; i < 200; i++) { + printf(" %d", arr[i]); + } + printf("\n"); + } +#endif + + boundary_tmp = WK_pack_4bits(tempQPosArray, + endQPosArray, + next_full_patt); +#ifdef WK_DEBUG + printf("Packing of queue positions stopped at %u\n", boundary_tmp); +#endif WK_DEBUG + + /* Record (into the header) where we stopped packing queue positions, + * which is where we will start packing low bits. + */ + SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp); + + } + + /* Pack the low bit patterns into the area just after + * the queue positions. We have to round up the source + * region to a multiple of three words. + */ + + { + unsigned int num_tenbits_to_pack = + next_low_bits - tempLowBitsArray; + unsigned int num_packed_words = (num_tenbits_to_pack + 2) / 3; //ceil((double) num_tenbits_to_pack / 3); + unsigned int num_source_words = num_packed_words * 3; + WK_word* endLowBitsArray = tempLowBitsArray + num_source_words; + + /* Pad out the array with zeros to avoid corrupting real packed + values. */ + + for (; /* next_low_bits is already set as desired */ + next_low_bits < endLowBitsArray; + next_low_bits++) { + *next_low_bits = 0; + } + +#ifdef WK_DEBUG + printf("about to pack low bits\n"); + printf("num_tenbits_to_pack is %u\n", num_tenbits_to_pack); + printf("endLowBitsArray is %u\n", endLowBitsArray); +#endif + + boundary_tmp = WK_pack_3_tenbits (tempLowBitsArray, + endLowBitsArray, + boundary_tmp); + + SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp); + + } + + return ((char *) boundary_tmp - (char *) dest_buf); +} diff --git a/iokit/Kernel/WKdmDecompress.c b/iokit/Kernel/WKdmDecompress.c new file mode 100644 index 000000000..062f2d32f --- /dev/null +++ b/iokit/Kernel/WKdmDecompress.c @@ -0,0 +1,283 @@ +#include +#include "WKdm.h" + +/*************************************************************************** + * THE UNPACKING ROUTINES should GO HERE + */ + +const char hashLookupTable [] = HASH_LOOKUP_TABLE_CONTENTS; + +#if 0 +#define GET_NEXT_TAG tags[tagsIndex++] +#define GET_NEXT_FULL_PATTERN fullPatterns[fullPatternsIndex++] +#define GET_NEXT_LOW_BITS lowBits[lowBitsIndex++] +#define GET_NEXT_DICTIONARY_INDEX dictionaryIndices[dictionaryIndicesIndex++] +#endif + +/* WK_unpack_2bits takes any number of words containing 16 two-bit values + * and unpacks them into four times as many words containg those + * two bit values as bytes (with the low two bits of each byte holding + * the actual value. + */ +static WK_word* +WK_unpack_2bits(WK_word *input_buf, + WK_word *input_end, + WK_word *output_buf) { + + register WK_word *input_next = input_buf; + register WK_word *output_next = output_buf; + register WK_word packing_mask = TWO_BITS_PACKING_MASK; + + /* loop to repeatedly grab one input word and unpack it into + * 4 output words. This loop could be unrolled a little---it's + * designed to be easy to do that. + */ + while (input_next < input_end) { + register WK_word temp = input_next[0]; + DEBUG_PRINT_2("Unpacked tags word: %.8x\n", temp); + output_next[0] = temp & packing_mask; + output_next[1] = (temp >> 2) & packing_mask; + output_next[2] = (temp >> 4) & packing_mask; + output_next[3] = (temp >> 6) & packing_mask; + + output_next += 4; + input_next++; + } + + return output_next; + +} + +/* unpack four bits consumes any number of words (between input_buf + * and input_end) holding 8 4-bit values per word, and unpacks them + * into twice as many words, with each value in a separate byte. + * (The four-bit values occupy the low halves of the bytes in the + * result). + */ +static WK_word* +WK_unpack_4bits(WK_word *input_buf, + WK_word *input_end, + WK_word *output_buf) { + + register WK_word *input_next = input_buf; + register WK_word *output_next = output_buf; + register WK_word packing_mask = FOUR_BITS_PACKING_MASK; + + + /* loop to repeatedly grab one input word and unpack it into + * 4 output words. This loop should probably be unrolled + * a little---it's designed to be easy to do that. + */ + while (input_next < input_end) { + register WK_word temp = input_next[0]; + DEBUG_PRINT_2("Unpacked dictionary indices word: %.8x\n", temp); + output_next[0] = temp & packing_mask; + output_next[1] = (temp >> 4) & packing_mask; + + output_next += 2; + input_next++; + } + + return output_next; + +} + +/* unpack_3_tenbits unpacks three 10-bit items from (the low 30 bits of) + * a 32-bit word + */ +static WK_word* +WK_unpack_3_tenbits(WK_word *input_buf, + WK_word *input_end, + WK_word *output_buf) { + + register WK_word *input_next = input_buf; + register WK_word *output_next = output_buf; + register WK_word packing_mask = LOW_BITS_MASK; + + /* loop to fetch words of input, splitting each into three + * words of output with 10 meaningful low bits. This loop + * probably ought to be unrolled and maybe coiled + */ + while (input_next < input_end) { + register WK_word temp = input_next[0]; + + output_next[0] = temp & packing_mask; + output_next[1] = (temp >> 10) & packing_mask; + output_next[2] = temp >> 20; + + input_next++; + output_next += 3; + } + + return output_next; + +} + +/********************************************************************* + * WKdm_decompress --- THE DECOMPRESSOR + * Expects WORD pointers to the source and destination buffers + * and a page size in words. The page size had better be 1024 unless + * somebody finds the places that are dependent on the page size and + * fixes them + */ + +void +WKdm_decompress (WK_word* src_buf, + WK_word* dest_buf, + __unused unsigned int words) +{ + + DictionaryElement dictionary[DICTIONARY_SIZE]; + + /* arrays that hold output data in intermediate form during modeling */ + /* and whose contents are packed into the actual output after modeling */ + + /* sizes of these arrays should be increased if you want to compress + * pages larger than 4KB + */ + WK_word tempTagsArray[300]; /* tags for everything */ + WK_word tempQPosArray[300]; /* queue positions for matches */ + WK_word tempLowBitsArray[1200]; /* low bits for partial matches */ + + PRELOAD_DICTIONARY; + +#ifdef WK_DEBUG + printf("\nIn DECOMPRESSOR\n"); + printf("tempTagsArray is at %u\n", (unsigned long int) tempTagsArray); + printf("tempQPosArray is at %u\n", (unsigned long int) tempQPosArray); + printf("tempLowBitsArray is at %u\n", (unsigned long int) tempLowBitsArray); + + printf(" first four words of source buffer are:\n"); + printf(" %u\n %u\n %u\n %u\n", + src_buf[0], src_buf[1], src_buf[2], src_buf[3]); + + { int i; + WK_word *arr =(src_buf + TAGS_AREA_OFFSET + (PAGE_SIZE_IN_WORDS / 16)); + + printf(" first 20 full patterns are: \n"); + for (i = 0; i < 20; i++) { + printf(" %d", arr[i]); + } + printf("\n"); + } +#endif + + WK_unpack_2bits(TAGS_AREA_START(src_buf), + TAGS_AREA_END(src_buf), + tempTagsArray); + +#ifdef WK_DEBUG + { int i; + char* arr = (char *) tempTagsArray; + + printf(" first 200 tags are: \n"); + for (i = 0; i < 200; i++) { + printf(" %d", arr[i]); + } + printf("\n"); + } +#endif + + WK_unpack_4bits(QPOS_AREA_START(src_buf), + QPOS_AREA_END(src_buf), + tempQPosArray); + +#ifdef WK_DEBUG + { int i; + char* arr = (char *) tempQPosArray; + + printf(" first 200 queue positions are: \n"); + for (i = 0; i < 200; i++) { + printf(" %d", arr[i]); + } + printf("\n"); + } +#endif + + WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), + LOW_BITS_AREA_END(src_buf), + tempLowBitsArray); + +#ifdef WK_DEBUG + printf("AFTER UNPACKING, about to enter main block \n"); +#endif + + { + register char *next_tag = (char *) tempTagsArray; + char *tags_area_end = + ((char *) tempTagsArray) + PAGE_SIZE_IN_WORDS; + char *next_q_pos = (char *) tempQPosArray; + WK_word *next_low_bits = tempLowBitsArray; + WK_word *next_full_word = FULL_WORD_AREA_START(src_buf); + + WK_word *next_output = dest_buf; + +#ifdef WK_DEBUG + printf("next_output is %u\n", next_output); + + printf("next_tag is %u \n", next_tag); + printf("tags_area_end is %u\n", tags_area_end); + printf("next_q_pos is %u\n", next_q_pos); + printf("next_low_bits is %u\n", next_low_bits); + printf("next_full_word is %u\n", next_full_word); +#endif + + /* this loop should probably be unrolled. Maybe we should unpack + * as 4 bit values, giving two consecutive tags, and switch on + * that 16 ways to decompress 2 words at a whack + */ + while (next_tag < tags_area_end) { + + char tag = next_tag[0]; + + switch(tag) { + + case ZERO_TAG: { + *next_output = 0; + break; + } + case EXACT_TAG: { + WK_word *dict_location = dictionary + *(next_q_pos++); + /* no need to replace dict. entry if matched exactly */ + *next_output = *dict_location; + break; + } + case PARTIAL_TAG: { + WK_word *dict_location = dictionary + *(next_q_pos++); + { + WK_word temp = *dict_location; + + /* strip out low bits */ + temp = ((temp >> NUM_LOW_BITS) << NUM_LOW_BITS); + + /* add in stored low bits from temp array */ + temp = temp | *(next_low_bits++); + + *dict_location = temp; /* replace old value in dict. */ + *next_output = temp; /* and echo it to output */ + } + break; + } + case MISS_TAG: { + WK_word missed_word = *(next_full_word++); + WK_word *dict_location = + (WK_word *) + (((char *) dictionary) + HASH_TO_DICT_BYTE_OFFSET(missed_word)); + *dict_location = missed_word; + *next_output = missed_word; + break; + } + } + next_tag++; + next_output++; + } + +#ifdef WK_DEBUG + printf("AFTER DECOMPRESSING\n"); + printf("next_output is %u\n", (unsigned long int) next_output); + printf("next_tag is %u\n", (unsigned long int) next_tag); + printf("next_full_word is %u\n", (unsigned long int) next_full_word); + printf("next_q_pos is %u\n", (unsigned long int) next_q_pos); +#endif + } +} diff --git a/iokit/conf/Makefile.template b/iokit/conf/Makefile.template index b24d19114..52571ea9d 100644 --- a/iokit/conf/Makefile.template +++ b/iokit/conf/Makefile.template @@ -87,6 +87,11 @@ LDOBJS = $(OBJS) $(COMPONENT).o: $(LDOBJS) @echo "creating $(COMPONENT).o" + $(SEG_HACK) __HIB IOHibernateRestoreKernel.o -o _IOHibernateRestoreKernel.o + mv _IOHibernateRestoreKernel.o IOHibernateRestoreKernel.o + $(SEG_HACK) __HIB WKdmDecompress.o -o _WKdmDecompress.o + mv _WKdmDecompress.o WKdmDecompress.o + @echo [ updating $(COMPONENT).o ${IOKIT_KERNEL_CONFIG} ] $(LD) $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} diff --git a/iokit/conf/files b/iokit/conf/files index f4aa60ae1..ab719eb34 100644 --- a/iokit/conf/files +++ b/iokit/conf/files @@ -6,6 +6,11 @@ OPTIONS/kdebug optional kdebug # libIOKit +iokit/Kernel/WKdmCompress.c optional iokitcpp +iokit/Kernel/WKdmDecompress.c optional iokitcpp +iokit/Kernel/IOHibernateIO.cpp optional iokitcpp +iokit/Kernel/IOHibernateRestoreKernel.c optional iokitcpp + iokit/Kernel/IOLib.c optional iokitcpp iokit/Kernel/IOLocks.cpp optional iokitcpp iokit/Kernel/IOConditionLock.cpp optional iokitcpp diff --git a/libkern/libkern/Makefile b/libkern/libkern/Makefile index cf3af2a63..41c7ce91f 100644 --- a/libkern/libkern/Makefile +++ b/libkern/libkern/Makefile @@ -36,10 +36,11 @@ INSTALL_MI_LIST = OSByteOrder.h OSDebug.h OSReturn.h OSTypes.h INSTALL_MI_DIR = libkern +INSTALL_MI_LCL_GEN_LIST = OSCrossEndian.h + EXPORT_MI_LIST = ${DATAFILES} -EXPORT_MI_GEN_LIST = \ - version.h +EXPORT_MI_GEN_LIST = version.h EXPORT_MI_DIR = libkern diff --git a/libkern/libkern/OSCrossEndian.h b/libkern/libkern/OSCrossEndian.h new file mode 100644 index 000000000..0131455d1 --- /dev/null +++ b/libkern/libkern/OSCrossEndian.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * This private header exports 3 APIs. + * _OSRosettaCheck() - An inline function that returns true if we are + * currently running under Rosetta. + * IF_ROSETTA() - Which is used to as a regular conditional + * expression that is true only if the current + * code is executing in the Rosetta + * translation space. + * ROSETTA_ONLY(exprs) - Which is used to create a block code that only + * executes if we are running in Rosetta. + * + * for example + * + * IF_ROSETTA() { + * // Do Cross endian swapping of input data + * outdata = OSSwap??(indata); + * } + * else { + * // Do straight through + * outdata = indata; + * } + * + * outdata = indata; + * ROSETTA_ONLY( + * // Do Cross endian swapping of input data + * outdata = OSSwap??(outdata); + * ); + */ + +#ifndef _LIBKERN_OSCROSSENDIAN_H +#define _LIBKERN_OSCROSSENDIAN_H + +#if __ppc__ + +static __inline__ int _OSRosettaCheck(void) +{ + int isCrossEndian; + + __asm__ ( "b 0f\n" + " .long 0x14400004\n" + " li %0,1\n" + "0:" + : "=r" (isCrossEndian) : "0" (0) + ); + + return isCrossEndian; +} + +#else + +static __inline__ int _OSRosettaCheck(void) { return 0; } + +#endif + +#define IF_ROSETTA() if (__builtin_expect(_OSRosettaCheck(), 0) ) + +#define ROSETTA_ONLY(exprs) \ +do { \ + IF_ROSETTA() { \ + exprs \ + } \ +} while(0) + +#endif /* _LIBKERN_OSCROSSENDIAN_H */ diff --git a/osfmk/conf/Makefile.i386 b/osfmk/conf/Makefile.i386 index ae239e230..e7e996b62 100644 --- a/osfmk/conf/Makefile.i386 +++ b/osfmk/conf/Makefile.i386 @@ -34,10 +34,9 @@ OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) $(OBJS_WERROR): WERROR=-Werror - - # Files that must go in the __HIB segment: HIB_FILES= \ + hibernate_restore.o \ gdt.o \ idt.o diff --git a/osfmk/conf/Makefile.ppc b/osfmk/conf/Makefile.ppc index 60251be9b..80f3bd542 100644 --- a/osfmk/conf/Makefile.ppc +++ b/osfmk/conf/Makefile.ppc @@ -19,7 +19,7 @@ db_disasm.o : ppc_disasm.h # Files that must go in the __HIB segment: HIB_FILES= \ - + hibernate_restore.o ###################################################################### #END Machine dependent Makefile fragment for ppc diff --git a/osfmk/conf/files b/osfmk/conf/files index 7514d7843..3a2b613e0 100644 --- a/osfmk/conf/files +++ b/osfmk/conf/files @@ -175,6 +175,7 @@ osfmk/kern/wait_queue.c standard osfmk/kern/xpr.c optional xpr_debug osfmk/kern/zalloc.c standard osfmk/kern/bsd_kern.c optional mach_bsd +osfmk/kern/hibernate.c standard ./mach/clock_server.c standard ./mach/clock_priv_server.c standard ./mach/clock_reply_user.c standard diff --git a/osfmk/conf/files.i386 b/osfmk/conf/files.i386 index abd788656..1121b7b74 100644 --- a/osfmk/conf/files.i386 +++ b/osfmk/conf/files.i386 @@ -111,6 +111,8 @@ osfmk/kdp/ml/i386/kdp_machdep.c optional mach_kdp osfmk/kdp/ml/i386/kdp_vm.c optional mach_kdp +osfmk/i386/hibernate_i386.c standard +osfmk/i386/hibernate_restore.s standard # DUMMIES TO FORCE GENERATION OF .h FILES osfmk/OPTIONS/ln optional ln diff --git a/osfmk/conf/files.ppc b/osfmk/conf/files.ppc index d985923de..e03ce61b9 100644 --- a/osfmk/conf/files.ppc +++ b/osfmk/conf/files.ppc @@ -64,6 +64,8 @@ osfmk/ppc/Diagnostics.c standard osfmk/ppc/PPCcalls.c standard osfmk/ppc/vmachmon.c standard osfmk/ppc/vmachmon_asm.s standard +osfmk/ppc/pms.c standard +osfmk/ppc/pmsCPU.c standard osfmk/ppc/Firmware.s standard osfmk/ppc/FirmwareC.c standard @@ -110,6 +112,8 @@ osfmk/console/panic_dialog.c optional vc device-driver osfmk/console/video_console.c optional vc device-driver osfmk/console/ppc/video_scroll.s optional vc device-driver +osfmk/ppc/hibernate_ppc.c standard +osfmk/ppc/hibernate_restore.s standard # DUMMIES TO FORCE GENERATION OF .h FILES OPTIONS/bm optional bm diff --git a/osfmk/device/iokit_rpc.c b/osfmk/device/iokit_rpc.c index d6fb27e2e..c1e4c2d51 100644 --- a/osfmk/device/iokit_rpc.c +++ b/osfmk/device/iokit_rpc.c @@ -457,7 +457,7 @@ kern_return_t IOMapPages(vm_map_t map, vm_offset_t va, vm_offset_t pa, #if __ppc__ // Set up a block mapped area - pmap_map_block(pmap, (addr64_t)va, (ppnum_t)(pa >> 12), length, prot, flags, 0); + pmap_map_block(pmap, (addr64_t)va, (ppnum_t)(pa >> 12), (uint32_t)(length >> 12), prot, flags, 0); #else // enter each page's physical address in the target map diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c index 51623e41c..e748f6161 100644 --- a/osfmk/i386/acpi.c +++ b/osfmk/i386/acpi.c @@ -28,6 +28,8 @@ #include #include + +#include #include extern void acpi_sleep_cpu(acpi_sleep_callback, void * refcon); @@ -54,16 +56,21 @@ acpi_install_wake_handler(void) return ACPI_WAKE_ADDR; } -typedef struct acpi_sleep_callback_data { +typedef struct acpi_hibernate_callback_data { acpi_sleep_callback func; void *refcon; -} acpi_sleep_callback_data; +} acpi_hibernate_callback_data; static void -acpi_sleep_do_callback(void *refcon) +acpi_hibernate(void *refcon) { - acpi_sleep_callback_data *data = (acpi_sleep_callback_data *)refcon; + boolean_t hib; + + acpi_hibernate_callback_data *data = (acpi_hibernate_callback_data *)refcon; + if (current_cpu_datap()->cpu_hibernate) { + hib = hibernate_write_image(); + } (data->func)(data->refcon); @@ -73,7 +80,8 @@ acpi_sleep_do_callback(void *refcon) void acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) { - acpi_sleep_callback_data data; + acpi_hibernate_callback_data data; + boolean_t did_hibernate; /* shutdown local APIC before passing control to BIOS */ lapic_shutdown(); @@ -86,12 +94,23 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) * Will not return until platform is woken up, * or if sleep failed. */ - acpi_sleep_cpu(acpi_sleep_do_callback, &data); + acpi_sleep_cpu(acpi_hibernate, &data); /* reset UART if kprintf is enabled */ if (FALSE == disableSerialOuput) serial_init(); + if (current_cpu_datap()->cpu_hibernate) { + * (int *) CM1 = 0; + * (int *) CM2 = 0; + * (int *) CM3 = 0; + + current_cpu_datap()->cpu_hibernate = 0; + + did_hibernate = TRUE; + } else { + did_hibernate = FALSE; + } /* restore MTRR settings */ mtrr_update_cpu(); @@ -99,6 +118,10 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) /* set up PAT following boot processor power up */ pat_init(); + if (did_hibernate) { + hibernate_machine_init(); + } + /* re-enable and re-init local apic */ if (lapic_probe()) lapic_init(); @@ -106,4 +129,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon) /* let the realtime clock reset */ rtc_sleep_wakeup(); + if (did_hibernate) { + enable_preemption(); + } } diff --git a/osfmk/i386/cpu.c b/osfmk/i386/cpu.c index 4578329c7..f16989952 100644 --- a/osfmk/i386/cpu.c +++ b/osfmk/i386/cpu.c @@ -134,7 +134,13 @@ cpu_machine_init( cpu = get_cpu_number(); PE_cpu_machine_init(cpu_datap(cpu)->cpu_id, TRUE); - +#if 0 + if (cpu_datap(cpu)->hibernate) + { + cpu_datap(cpu)->hibernate = 0; + hibernate_machine_init(); + } +#endif ml_init_interrupt(); } diff --git a/osfmk/i386/cpu_data.h b/osfmk/i386/cpu_data.h index 7d455b4a1..172738bc9 100644 --- a/osfmk/i386/cpu_data.h +++ b/osfmk/i386/cpu_data.h @@ -128,7 +128,7 @@ typedef struct cpu_data int cpu_kdb_is_slave; int cpu_kdb_active; #endif /* MACH_KDB */ - int cpu_reserved1; + int cpu_hibernate; } cpu_data_t; extern cpu_data_t *cpu_data_ptr[]; diff --git a/osfmk/i386/hibernate_i386.c b/osfmk/i386/hibernate_i386.c new file mode 100644 index 000000000..d243fcd56 --- /dev/null +++ b/osfmk/i386/hibernate_i386.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define KERNEL + +#include +#include + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* This assumes that + * - we never will want to read or write memory below the start of kernel text + * - kernel text and data isn't included in pmap memory regions + */ + +extern void *sectTEXTB; +extern char *first_avail; + +hibernate_page_list_t * +hibernate_page_list_allocate(void) +{ + vm_offset_t base; + vm_size_t size; + uint32_t bank; + uint32_t pages, page_count; + hibernate_page_list_t * list; + hibernate_bitmap_t * bitmap; + pmap_memory_region_t * regions; + pmap_memory_region_t * rp; + uint32_t num_regions, num_alloc_regions; + + page_count = 0; + + /* Make a list of the maximum number of regions needed */ + num_alloc_regions = 1 + pmap_memory_region_count; + + /* Allocate our own list of memory regions so we can sort them in order. */ + regions = (pmap_memory_region_t *)kalloc(sizeof(pmap_memory_region_t) * num_alloc_regions); + if (!regions) + return (0); + + /* Fill in the actual regions we will be returning. */ + rp = regions; + + /* XXX should check for non-volatile memory region below kernel space. */ + /* Kernel region is first. */ + base = (vm_offset_t)(sectTEXTB) & 0x3FFFFFFF; + rp->base = atop_32(base); + rp->end = atop_32((vm_offset_t)first_avail) - 1; + rp->alloc = 0; + num_regions = 1; + + /* Remaining memory regions. Consolidate adjacent regions. */ + for (bank = 0; bank < (uint32_t) pmap_memory_region_count; bank++) + { + if ((rp->end + 1) == pmap_memory_regions[bank].base) { + rp->end = pmap_memory_regions[bank].end; + } else { + ++rp; + ++num_regions; + rp->base = pmap_memory_regions[bank].base; + rp->end = pmap_memory_regions[bank].end; + rp->alloc = 0; + } + } + + /* Size the hibernation bitmap */ + size = sizeof(hibernate_page_list_t); + page_count = 0; + for (bank = 0, rp = regions; bank < num_regions; bank++, rp++) { + pages = rp->end + 1 - rp->base; + page_count += pages; + size += sizeof(hibernate_bitmap_t) + ((pages + 31) >> 5) * sizeof(uint32_t); + } + + list = (hibernate_page_list_t *)kalloc(size); + if (!list) + return (list); + + list->list_size = size; + list->page_count = page_count; + list->bank_count = num_regions; + + /* Convert to hibernation bitmap. */ + /* This assumes that ranges are in order and do not overlap. */ + bitmap = &list->bank_bitmap[0]; + for (bank = 0, rp = regions; bank < num_regions; bank++, rp++) { + bitmap->first_page = rp->base; + bitmap->last_page = rp->end; + bitmap->bitmapwords = (bitmap->last_page + 1 + - bitmap->first_page + 31) >> 5; + kprintf("HIB: Bank %d: 0x%x end 0x%x\n", bank, + ptoa_32(bitmap->first_page), + ptoa_32(bitmap->last_page)); + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; + } + + kfree((void *)regions, sizeof(pmap_memory_region_t) * num_alloc_regions); + return (list); +} + +void +hibernate_page_list_setall_machine(hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut) +{ + KernelBootArgs_t * bootArgs = (KernelBootArgs_t *)PE_state.bootArgs; + MemoryRange * mptr; + uint32_t bank; + uint32_t page, count; + + for (bank = 0, mptr = bootArgs->memoryMap; bank < bootArgs->memoryMapCount; bank++, mptr++) { + + if (kMemoryRangeNVS != mptr->type) continue; + kprintf("Base NVS region 0x%x + 0x%x\n", (vm_offset_t)mptr->base, (vm_size_t)mptr->length); + /* Round to page size. Hopefully this does not overlap any reserved areas. */ + page = atop_32(trunc_page((vm_offset_t)mptr->base)); + count = atop_32(round_page((vm_offset_t)mptr->base + (vm_size_t)mptr->length)) - page; + kprintf("Rounded NVS region 0x%x size 0x%x\n", page, count); + + hibernate_set_page_state(page_list, page_list_wired, page, count, 1); + pagesOut -= count; + } +} + +kern_return_t +hibernate_processor_setup(IOHibernateImageHeader * header) +{ + current_cpu_datap()->cpu_hibernate = 1; + header->processorFlags = 0; + return (KERN_SUCCESS); +} + +void +hibernate_vm_lock(void) +{ + if (FALSE /* getPerProc()->hibernate */) + { + vm_page_lock_queues(); + mutex_lock(&vm_page_queue_free_lock); + } +} + +void +hibernate_vm_unlock(void) +{ + if (FALSE /* getPerProc()->hibernate */) + { + mutex_unlock(&vm_page_queue_free_lock); + vm_page_unlock_queues(); + } +} diff --git a/osfmk/i386/hibernate_restore.s b/osfmk/i386/hibernate_restore.s new file mode 100644 index 000000000..c01f23c50 --- /dev/null +++ b/osfmk/i386/hibernate_restore.s @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include + +#include +#include + +/* +This code is linked into the kernel but part of the "__HIB" section, which means +its used by code running in the special context of restoring the kernel text and data +from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything +it calls or references (ie. hibernate_restore_phys_page()) +needs to be careful to only touch memory also in the "__HIB" section. +*/ + +/* + * GAS won't handle an intersegment jump with a relocatable offset. + */ +#define LJMP(segment,address) \ + .byte 0xea ;\ + .long address ;\ + .word segment + +#define KVTOPHYS (-KERNELBASE) +#define KVTOLINEAR LINEAR_KERNELBASE + +#define PA(addr) ((addr)+KVTOPHYS) +#define VA(addr) ((addr)-KVTOPHYS) + +/* Location of temporary page tables */ +#define HPTD 0x80000 + +#define KERNEL_MAP_SIZE ( 4 * 1024 * 1024) + +/* + * fillkpt + * eax = page frame address + * ebx = index into page table + * ecx = how many pages to map + * base = base address of page dir/table + * prot = protection bits + */ +#define fillkpt(base, prot) \ + shll $2,%ebx ; \ + addl base,%ebx ; \ + orl $(PTE_V), %eax ; \ + orl prot,%eax ; \ +1: movl %eax,(%ebx) ; \ + addl $(PAGE_SIZE),%eax ; /* increment physical address */ \ + addl $4,%ebx ; /* next pte */ \ + loop 1b + +/* + * fillpse + * eax = physical page address + * ebx = index into page table + * ecx = how many pages to map + * base = base address of page dir/table + * prot = protection bits + */ +#define fillpse(base, prot) \ + shll $2,%ebx ; \ + addl base,%ebx ; \ + orl $(PTE_V|PTE_PS), %eax ; \ + orl prot,%eax ; \ +1: movl %eax,(%ebx) ; \ + addl $(1 << PDESHIFT),%eax ; /* increment physical address 4Mb */ \ + addl $4,%ebx ; /* next entry */ \ + loop 1b + +/* + * fillkptphys(base, prot) + * eax = physical address + * ecx = how many pages to map + * base = base of page table + * prot = protection bits + */ +#define fillkptphys(base, prot) \ + movl %eax, %ebx ; \ + shrl $(PAGE_SHIFT), %ebx ; \ + fillkpt(base, prot) + +/* + * Hibernation code restarts here. Steal some pages from 0x10000 + * to 0x90000 for pages tables and directories etc to temporarily + * map the hibernation code (put at 0x100000 (phys) by the booter + * and linked to 0xC0100000 by the linker) to 0xC0100000 so it can + * execute. It's self-contained and won't make any references outside + * of itself. + * + * On the way down it has to save IdlePTD (and if PAE also IdlePDPT) + * and after it runs it has to restore those and load IdlePTD (or + * IdlePDPT if PAE) into %cr3 to re-establish the original mappings + */ + + .align ALIGN + .globl EXT(hibernate_machine_entrypoint) +LEXT(hibernate_machine_entrypoint) + cli + + mov %eax, %edi + + POSTCODE(0x1) + + /* Map physical memory from zero to 0xC0000000 */ + xorl %eax, %eax + xorl %ebx, %ebx + movl $(KPTDI), %ecx + fillpse( $(HPTD), $(PTE_W) ) + + /* Map 0 again at 0xC0000000 */ + xorl %eax, %eax + movl $(KPTDI), %ebx + movl $(KERNEL_MAP_SIZE >> PDESHIFT), %ecx + fillpse( $(HPTD), $(PTE_W) ) + + movl $(HPTD), %eax + movl %eax, %cr3 + + POSTCODE(0x3) + + movl %cr4,%eax + orl $(CR4_PSE),%eax + movl %eax,%cr4 /* enable page size extensions */ + movl %cr0, %eax + orl $(CR0_PG|CR0_WP|CR0_PE), %eax + movl %eax, %cr0 /* ready paging */ + + POSTCODE(0x4) + + lgdt PA(EXT(gdtptr)) /* load GDT */ + lidt PA(EXT(idtptr)) /* load IDT */ + + POSTCODE(0x5) + + LJMP (KERNEL_CS,EXT(hstart)) /* paging on and go to correct vaddr */ + +/* Hib restart code now running with correct addresses */ +LEXT(hstart) + POSTCODE(0x6) + + mov $(KERNEL_DS),%ax /* set kernel data segment */ + mov %ax,%ds + mov %ax,%es + mov %ax,%ss + + mov $0,%ax /* fs must be zeroed; */ + mov %ax,%fs /* some bootstrappers don`t do this */ + mov %ax,%gs + + lea EXT(gIOHibernateRestoreStackEnd),%esp /* switch to the bootup stack */ + + POSTCODE(0x7) + + xorl %eax, %eax /* Video memory - N/A */ + pushl %eax + mov %edi, %eax /* Pointer to hibernate header */ + pushl %eax + call EXT(hibernate_kernel_entrypoint) + /* NOTREACHED */ + hlt + + + +/* +void +hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags); +*/ + + .align 5 + .globl EXT(hibernate_restore_phys_page) + + /* XXX doesn't handle 64-bit addresses yet */ + /* XXX can only deal with exactly one page */ +LEXT(hibernate_restore_phys_page) + pushl %edi + pushl %esi + + movl 8+ 4(%esp),%esi /* source virtual address */ + addl $0, %esi + jz 2f /* If source == 0, nothing to do */ + + + movl 8+ 12(%esp),%edi /* destination physical address */ + cmpl $(LINEAR_KERNELBASE), %edi + jl 1f /* no need to map, below 0xC0000000 */ + + movl %edi, %eax /* destination physical address */ + /* Map physical address to virt. address 0xffc00000 (4GB - 4MB) */ + andl $0xFFC00000, %eax + orl $(PTE_V | PTE_PS | PTE_W), %eax + movl %eax, (HPTD + (0x3FF * 4)) + orl $0xFFC00000, %edi + invlpg (%edi) + +1: + movl 8+ 20(%esp),%edx /* number of bytes */ + cld +/* move longs*/ + movl %edx,%ecx + sarl $2,%ecx + rep + movsl +/* move bytes*/ + movl %edx,%ecx + andl $3,%ecx + rep + movsb +2: + popl %esi + popl %edi + ret diff --git a/osfmk/i386/i386_vm_init.c b/osfmk/i386/i386_vm_init.c index d66ffd4ef..e419a5374 100644 --- a/osfmk/i386/i386_vm_init.c +++ b/osfmk/i386/i386_vm_init.c @@ -263,6 +263,8 @@ i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args) // save other regions if (kMemoryRangeNVS == mptr->type) { + // Mark this as a memory range (for hibernation), + // but don't count as usable memory pmptr->base = base; pmptr->end = ((mptr->base + mptr->length + I386_PGBYTES - 1) >> I386_PGSHIFT) - 1; pmptr->alloc = pmptr->end; diff --git a/osfmk/i386/start.s b/osfmk/i386/start.s index a9f8c45b0..830c1284b 100644 --- a/osfmk/i386/start.s +++ b/osfmk/i386/start.s @@ -92,16 +92,21 @@ EXT(_kick_buffer_): /* * Interrupt and bootup stack for initial processor. */ + /* in the __HIB section since the hibernate restore code uses this stack. */ .section __HIB, __data .align ALIGN .globl EXT(intstack) EXT(intstack): + .globl EXT(gIOHibernateRestoreStack) +EXT(gIOHibernateRestoreStack): .set ., .+INTSTACK_SIZE .globl EXT(eintstack) EXT(eintstack:) + .globl EXT(gIOHibernateRestoreStackEnd) +EXT(gIOHibernateRestoreStackEnd): /* * Pointers to GDT and IDT. These contain linear addresses. diff --git a/osfmk/ipc/ipc_kmsg.c b/osfmk/ipc/ipc_kmsg.c index 5fde45534..2edc40559 100644 --- a/osfmk/ipc/ipc_kmsg.c +++ b/osfmk/ipc/ipc_kmsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -775,7 +775,7 @@ ipc_kmsg_get_from_kernel( ipc_port_t dest_port; assert(size >= sizeof(mach_msg_header_t)); - assert((size & 3) == 0); +// assert((size & 3) == 0); assert(IP_VALID((ipc_port_t) msg->msgh_remote_port)); dest_port = (ipc_port_t)msg->msgh_remote_port; diff --git a/osfmk/kern/hibernate.c b/osfmk/kern/hibernate.c new file mode 100644 index 000000000..e43ee941e --- /dev/null +++ b/osfmk/kern/hibernate.c @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define KERNEL + +#include +#include +#include + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static vm_page_t hibernate_gobble_queue; + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +static void +hibernate_page_list_zero(hibernate_page_list_t *list) +{ + uint32_t bank; + hibernate_bitmap_t * bitmap; + + bitmap = &list->bank_bitmap[0]; + for (bank = 0; bank < list->bank_count; bank++) + { + uint32_t bit, last_bit; + uint32_t *bitmap_word; + + bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2); + + // Set out-of-bound bits at end of bitmap. + bitmap_word = &bitmap->bitmap[bitmap->bitmapwords - 1]; + last_bit = ((bitmap->last_page - bitmap->first_page) & 31); + for (bit = 31; bit > last_bit; bit--) { + *bitmap_word |= (0x80000000 >> bit); + } + + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; + } +} + + +static boolean_t +consider_discard(vm_page_t m) +{ + register vm_object_t object = 0; + int refmod_state; + boolean_t discard = FALSE; + + do + { + if(m->private) + panic("consider_discard: private"); + + if (!vm_object_lock_try(m->object)) + break; + + object = m->object; + + if (m->wire_count != 0) + break; + if (m->precious) + break; + + if (m->busy || !object->alive) + /* + * Somebody is playing with this page. + */ + break; + + if (m->absent || m->unusual || m->error) + /* + * If it's unusual in anyway, ignore it + */ + break; + + if (m->cleaning) + break; + + if (!m->dirty) + { + refmod_state = pmap_get_refmod(m->phys_page); + + if (refmod_state & VM_MEM_REFERENCED) + m->reference = TRUE; + if (refmod_state & VM_MEM_MODIFIED) + m->dirty = TRUE; + } + + /* + * If it's clean we can discard the page on wakeup. + */ + discard = !m->dirty; + } + while (FALSE); + + if (object) + vm_object_unlock(object); + + return (discard); +} + + +static void +discard_page(vm_page_t m) +{ + if (m->absent || m->unusual || m->error) + /* + * If it's unusual in anyway, ignore + */ + return; + + if (!m->no_isync) + { + int refmod_state = pmap_disconnect(m->phys_page); + + if (refmod_state & VM_MEM_REFERENCED) + m->reference = TRUE; + if (refmod_state & VM_MEM_MODIFIED) + m->dirty = TRUE; + } + + if (m->dirty) + panic("discard_page(%p) dirty", m); + if (m->laundry) + panic("discard_page(%p) laundry", m); + if (m->private) + panic("discard_page(%p) private", m); + if (m->fictitious) + panic("discard_page(%p) fictitious", m); + + vm_page_free(m); +} + +/* + Bits zero in the bitmaps => needs to be saved. All pages default to be saved, + pages known to VM to not need saving are subtracted. + Wired pages to be saved are present in page_list_wired, pageable in page_list. +*/ + +void +hibernate_page_list_setall(hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut) +{ + uint64_t start, end, nsec; + vm_page_t m; + uint32_t pages = page_list->page_count; + uint32_t count_zf = 0, count_inactive = 0, count_active = 0; + uint32_t count_wire = pages; + uint32_t count_discard_active = 0, count_discard_inactive = 0; + uint32_t i; + + HIBLOG("hibernate_page_list_setall start\n"); + + clock_get_uptime(&start); + + hibernate_page_list_zero(page_list); + hibernate_page_list_zero(page_list_wired); + + m = (vm_page_t) hibernate_gobble_queue; + while(m) + { + pages--; + count_wire--; + hibernate_page_bitset(page_list, TRUE, m->phys_page); + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + m = (vm_page_t) m->pageq.next; + } + + m = (vm_page_t) vm_page_queue_free; + while(m) + { + pages--; + count_wire--; + hibernate_page_bitset(page_list, TRUE, m->phys_page); + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + m = (vm_page_t) m->pageq.next; + } + + queue_iterate( &vm_page_queue_zf, + m, + vm_page_t, + pageq ) + { + if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) + && consider_discard(m)) + { + hibernate_page_bitset(page_list, TRUE, m->phys_page); + count_discard_inactive++; + } + else + count_zf++; + count_wire--; + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + queue_iterate( &vm_page_queue_inactive, + m, + vm_page_t, + pageq ) + { + if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) + && consider_discard(m)) + { + hibernate_page_bitset(page_list, TRUE, m->phys_page); + count_discard_inactive++; + } + else + count_inactive++; + count_wire--; + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + queue_iterate( &vm_page_queue_active, + m, + vm_page_t, + pageq ) + { + if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) + && consider_discard(m)) + { + hibernate_page_bitset(page_list, TRUE, m->phys_page); + count_discard_active++; + } + else + count_active++; + count_wire--; + hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); + } + + // pull wired from hibernate_bitmap + + uint32_t bank; + hibernate_bitmap_t * bitmap; + hibernate_bitmap_t * bitmap_wired; + + bitmap = &page_list->bank_bitmap[0]; + bitmap_wired = &page_list_wired->bank_bitmap[0]; + for (bank = 0; bank < page_list->bank_count; bank++) + { + for (i = 0; i < bitmap->bitmapwords; i++) + bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i]; + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords]; + bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords]; + } + + // machine dependent adjustments + hibernate_page_list_setall_machine(page_list, page_list_wired, &pages); + + clock_get_uptime(&end); + absolutetime_to_nanoseconds(end - start, &nsec); + HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL); + + HIBLOG("pages %d, wire %d, act %d, inact %d, zf %d, could discard act %d inact %d\n", + pages, count_wire, count_active, count_inactive, count_zf, + count_discard_active, count_discard_inactive); + + *pagesOut = pages; +} + +void +hibernate_page_list_discard(hibernate_page_list_t * page_list) +{ + uint64_t start, end, nsec; + vm_page_t m; + vm_page_t next; + uint32_t count_discard_active = 0, count_discard_inactive = 0; + + clock_get_uptime(&start); + + m = (vm_page_t) queue_first(&vm_page_queue_zf); + while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m)) + { + next = (vm_page_t) m->pageq.next; + if (hibernate_page_bittst(page_list, m->phys_page)) + { + discard_page(m); + count_discard_inactive++; + } + m = next; + } + + m = (vm_page_t) queue_first(&vm_page_queue_inactive); + while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m)) + { + next = (vm_page_t) m->pageq.next; + if (hibernate_page_bittst(page_list, m->phys_page)) + { + discard_page(m); + count_discard_inactive++; + } + m = next; + } + + m = (vm_page_t) queue_first(&vm_page_queue_active); + while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m)) + { + next = (vm_page_t) m->pageq.next; + if (hibernate_page_bittst(page_list, m->phys_page)) + { + discard_page(m); + count_discard_active++; + } + m = next; + } + + clock_get_uptime(&end); + absolutetime_to_nanoseconds(end - start, &nsec); + HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d\n", + nsec / 1000000ULL, + count_discard_active, count_discard_inactive); +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +kern_return_t +hibernate_setup(IOHibernateImageHeader * header, + uint32_t free_page_ratio, + uint32_t free_page_time, + hibernate_page_list_t ** page_list_ret, + hibernate_page_list_t ** page_list_wired_ret, + boolean_t * encryptedswap) +{ + hibernate_page_list_t * page_list = NULL; + hibernate_page_list_t * page_list_wired = NULL; + vm_page_t m; + uint32_t i, gobble_count; + + *page_list_ret = NULL; + *page_list_wired_ret = NULL; + + + page_list = hibernate_page_list_allocate(); + if (!page_list) + return (KERN_RESOURCE_SHORTAGE); + page_list_wired = hibernate_page_list_allocate(); + if (!page_list_wired) + { + kfree(page_list, page_list->list_size); + return (KERN_RESOURCE_SHORTAGE); + } + + *encryptedswap = dp_encryption; + + // pages we could force out to reduce hibernate image size + gobble_count = (((uint64_t) page_list->page_count) * ((uint64_t) free_page_ratio)) / 100; + + // no failures hereafter + + hibernate_processor_setup(header); + + HIBLOG("hibernate_alloc_pages flags %08lx, gobbling %d pages\n", + header->processorFlags, gobble_count); + + if (gobble_count) + { + uint64_t start, end, timeout, nsec; + clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout); + clock_get_uptime(&start); + + for (i = 0; i < gobble_count; i++) + { + while (VM_PAGE_NULL == (m = vm_page_grab())) + { + clock_get_uptime(&end); + if (end >= timeout) + break; + VM_PAGE_WAIT(); + } + if (!m) + break; + m->busy = FALSE; + vm_page_gobble(m); + + m->pageq.next = (queue_entry_t) hibernate_gobble_queue; + hibernate_gobble_queue = m; + } + + clock_get_uptime(&end); + absolutetime_to_nanoseconds(end - start, &nsec); + HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL); + } + + *page_list_ret = page_list; + *page_list_wired_ret = page_list_wired; + + return (KERN_SUCCESS); +} + +kern_return_t +hibernate_teardown(hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired) +{ + vm_page_t m, next; + uint32_t count = 0; + + m = (vm_page_t) hibernate_gobble_queue; + while(m) + { + next = (vm_page_t) m->pageq.next; + vm_page_free(m); + count++; + m = next; + } + hibernate_gobble_queue = VM_PAGE_NULL; + + if (count) + HIBLOG("Freed %d pages\n", count); + + if (page_list) + kfree(page_list, page_list->list_size); + if (page_list_wired) + kfree(page_list_wired, page_list_wired->list_size); + + return (KERN_SUCCESS); +} + diff --git a/osfmk/kern/machine.c b/osfmk/kern/machine.c index b29a1ee40..122d2ae6d 100644 --- a/osfmk/kern/machine.c +++ b/osfmk/kern/machine.c @@ -82,6 +82,7 @@ #include #include +#include /* * Exported variables: @@ -266,6 +267,8 @@ processor_doshutdown( simple_unlock(&pset->sched_lock); processor_unlock(processor); + hibernate_vm_lock(); + processor_lock(processor); simple_lock(&pset->sched_lock); } @@ -276,6 +279,8 @@ processor_doshutdown( simple_unlock(&pset->sched_lock); processor_unlock(processor); + if (pcount == 1) + hibernate_vm_unlock(); /* * Continue processor shutdown in shutdown context. diff --git a/osfmk/kern/sched_prim.c b/osfmk/kern/sched_prim.c index 01b353cc1..aaece698a 100644 --- a/osfmk/kern/sched_prim.c +++ b/osfmk/kern/sched_prim.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -96,6 +96,10 @@ #include +#ifdef __ppc__ +#include +#endif + #define DEFAULT_PREEMPTION_RATE 100 /* (1/s) */ int default_preemption_rate = DEFAULT_PREEMPTION_RATE; @@ -2523,7 +2527,13 @@ idle_thread(void) lcount = &processor->runq.count; gcount = &processor->processor_set->runq.count; - (void)splsched(); + + (void)splsched(); /* Turn interruptions off */ + +#ifdef __ppc__ + pmsDown(); /* Step power down. Note: interruptions must be disabled for this call */ +#endif + while ( (*threadp == THREAD_NULL) && (*gcount == 0) && (*lcount == 0) ) { @@ -2546,6 +2556,10 @@ idle_thread(void) pset = processor->processor_set; simple_lock(&pset->sched_lock); +#ifdef __ppc__ + pmsStep(0); /* Step up out of idle power, may start timer for next step */ +#endif + state = processor->state; if (state == PROCESSOR_DISPATCHING) { /* diff --git a/osfmk/mach/ppc/syscall_sw.h b/osfmk/mach/ppc/syscall_sw.h index 3fc7c6106..b1aedddae 100644 --- a/osfmk/mach/ppc/syscall_sw.h +++ b/osfmk/mach/ppc/syscall_sw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -65,7 +65,7 @@ ppc_trap(CHUDCall,0x6009) ppc_trap(ppcNull,0x600A) ppc_trap(perfmon_control,0x600B) ppc_trap(ppcNullinst,0x600C) - +ppc_trap(pmsCntrl,0x600D) #endif /* _MACH_SYSCALL_SW_H_ */ #endif /* _MACH_PPC_SYSCALL_SW_H_ */ diff --git a/osfmk/ppc/Firmware.s b/osfmk/ppc/Firmware.s index 736859122..d13104750 100644 --- a/osfmk/ppc/Firmware.s +++ b/osfmk/ppc/Firmware.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -2347,7 +2347,6 @@ stsslbm: slbmfee r6,r5 cmplwi r5,64 addi r4,r4,16 blt stsslbm - mtmsr r0 isync diff --git a/osfmk/ppc/Makefile b/osfmk/ppc/Makefile index b978cc676..5649d653b 100644 --- a/osfmk/ppc/Makefile +++ b/osfmk/ppc/Makefile @@ -20,6 +20,7 @@ EXPORT_ONLY_FILES = \ machine_routines.h \ mappings.h \ savearea.h \ + pms.h \ simple_lock.h INSTALL_MD_DIR = ppc diff --git a/osfmk/ppc/PPCcalls.h b/osfmk/ppc/PPCcalls.h index 5d5f75f51..e8beeb20d 100644 --- a/osfmk/ppc/PPCcalls.h +++ b/osfmk/ppc/PPCcalls.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -54,7 +54,7 @@ PPCcallEnt PPCcalls[] = { PPCcall(ppcNull), /* 0x600A Null PPC syscall */ PPCcall(perfmon_control), /* 0x600B performance monitor */ PPCcall(ppcNullinst), /* 0x600C Instrumented Null PPC syscall */ - PPCcall(dis), /* 0x600D disabled */ + PPCcall(pmsCntrl), /* 0x600D Power Management Stepper */ PPCcall(dis), /* 0x600E disabled */ PPCcall(dis), /* 0x600F disabled */ PPCcall(dis), /* 0x6010 disabled */ diff --git a/osfmk/ppc/aligned_data.s b/osfmk/ppc/aligned_data.s index 7145a2493..4f703ebba 100644 --- a/osfmk/ppc/aligned_data.s +++ b/osfmk/ppc/aligned_data.s @@ -172,15 +172,20 @@ EXT(dbspecrs): * Boot processor Interrupt and debug stacks go here. */ + /* in the __HIB section since the hibernate restore code uses this stack. */ .section __HIB, __data .align PPC_PGSHIFT .globl EXT(intstack) EXT(intstack): + .globl EXT(gIOHibernateRestoreStack) +EXT(gIOHibernateRestoreStack): .set .,.+INTSTACK_SIZE + .globl EXT(gIOHibernateRestoreStackEnd) +EXT(gIOHibernateRestoreStackEnd): /* back to the regular __DATA section. */ diff --git a/osfmk/ppc/asm.h b/osfmk/ppc/asm.h index 842167e3e..320af7d1f 100644 --- a/osfmk/ppc/asm.h +++ b/osfmk/ppc/asm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -456,10 +456,6 @@ #define busFIR 0x0A00 #define busFIRrst 0x0A10 -; PowerTune -#define PowerTuneControlReg 0x0AA001 -#define PowerTuneStatusReg 0x408001 - ; HID4 #define hid4RMCI 23 #define hid4FAlgn 24 @@ -662,6 +658,10 @@ #define GUSMstgttim 0x00000038 #define GUSMstgttoff 0x00000004 +/* PowerTune */ +#define PowerTuneControlReg 0x0AA001 +#define PowerTuneStatusReg 0x408001 + /* Tags are placed before Immediately Following Code (IFC) for the debugger * to be able to deduce where to find various registers when backtracing * diff --git a/osfmk/ppc/cpu.c b/osfmk/ppc/cpu.c index f70af13a4..cd42fdd12 100644 --- a/osfmk/ppc/cpu.c +++ b/osfmk/ppc/cpu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -47,6 +48,8 @@ #include #include #include +#include +#include decl_mutex_data(static,ppt_lock); @@ -117,6 +120,8 @@ cpu_init( mttbu(proc_info->save_tbu); mttb(proc_info->save_tbl); } + + setTimerReq(); /* Now that the time base is sort of correct, request the next timer pop */ proc_info->cpu_type = CPU_TYPE_POWERPC; proc_info->cpu_subtype = (cpu_subtype_t)proc_info->pf.rptdProc; @@ -150,6 +155,24 @@ cpu_machine_init( PE_cpu_machine_init(proc_info->cpu_id, !(proc_info->cpu_flags & BootDone)); + if (proc_info->hibernate) { + uint32_t tbu, tbl; + + do { + tbu = mftbu(); + tbl = mftb(); + } while (mftbu() != tbu); + + proc_info->hibernate = 0; + hibernate_machine_init(); + + // hibernate_machine_init() could take minutes and we don't want timeouts + // to fire as soon as scheduling starts. Reset timebase so it appears + // no time has elapsed, as it would for regular sleep. + mttb(0); + mttbu(tbu); + mttb(tbl); + } if (proc_info != mproc_info) { while (!((mproc_info->cpu_flags) & SignalReady)) @@ -167,6 +190,7 @@ cpu_machine_init( thread_wakeup(&proc_info->cpu_flags); } simple_unlock(&SignalReadyLock); + pmsPark(); /* Timers should be cool now, park the power management stepper */ } } @@ -183,30 +207,29 @@ cpu_per_proc_alloc( void *interrupt_stack=0; void *debugger_stack=0; - if ((proc_info = (struct per_proc_info*)kalloc(PAGE_SIZE)) == (struct per_proc_info*)0) - return (struct per_proc_info *)NULL;; + if ((proc_info = (struct per_proc_info*)kalloc(sizeof(struct per_proc_info))) == (struct per_proc_info*)0) + return (struct per_proc_info *)NULL; if ((interrupt_stack = kalloc(INTSTACK_SIZE)) == 0) { - kfree(proc_info, PAGE_SIZE); - return (struct per_proc_info *)NULL;; + kfree(proc_info, sizeof(struct per_proc_info)); + return (struct per_proc_info *)NULL; } -#if MACH_KDP || MACH_KDB + if ((debugger_stack = kalloc(KERNEL_STACK_SIZE)) == 0) { - kfree(proc_info, PAGE_SIZE); + kfree(proc_info, sizeof(struct per_proc_info)); kfree(interrupt_stack, INTSTACK_SIZE); - return (struct per_proc_info *)NULL;; + return (struct per_proc_info *)NULL; } -#endif bzero((void *)proc_info, sizeof(struct per_proc_info)); + proc_info->pp2ndPage = (addr64_t)pmap_find_phys(kernel_pmap, (addr64_t)proc_info + 0x1000) << PAGE_SHIFT; /* Set physical address of the second page */ proc_info->next_savearea = (uint64_t)save_get_init(); proc_info->pf = BootProcInfo.pf; proc_info->istackptr = (vm_offset_t)interrupt_stack + INTSTACK_SIZE - FM_SIZE; proc_info->intstack_top_ss = proc_info->istackptr; -#if MACH_KDP || MACH_KDB proc_info->debstackptr = (vm_offset_t)debugger_stack + KERNEL_STACK_SIZE - FM_SIZE; proc_info->debstack_top_ss = proc_info->debstackptr; -#endif /* MACH_KDP || MACH_KDB */ + return proc_info; } @@ -225,7 +248,7 @@ cpu_per_proc_free( return; kfree((void *)(proc_info->intstack_top_ss - INTSTACK_SIZE + FM_SIZE), INTSTACK_SIZE); kfree((void *)(proc_info->debstack_top_ss - KERNEL_STACK_SIZE + FM_SIZE), KERNEL_STACK_SIZE); - kfree((void *)proc_info, PAGE_SIZE); + kfree((void *)proc_info, sizeof(struct per_proc_info)); /* Release the per_proc */ } @@ -248,7 +271,7 @@ cpu_per_proc_register( cpu = real_ncpus; proc_info->cpu_number = cpu; PerProcTable[cpu].ppe_vaddr = proc_info; - PerProcTable[cpu].ppe_paddr = ((addr64_t)pmap_find_phys(kernel_pmap, (vm_offset_t)proc_info)) << PAGE_SHIFT; + PerProcTable[cpu].ppe_paddr = (addr64_t)pmap_find_phys(kernel_pmap, (addr64_t)proc_info) << PAGE_SHIFT; eieio(); real_ncpus++; mutex_unlock(&ppt_lock); @@ -281,7 +304,13 @@ cpu_start( proc_info->interrupts_enabled = 0; proc_info->pending_ast = AST_NONE; proc_info->istackptr = proc_info->intstack_top_ss; - proc_info->rtcPop = 0xFFFFFFFFFFFFFFFFULL; + proc_info->rtcPop = EndOfAllTime; + proc_info->FPU_owner = 0; + proc_info->VMX_owner = 0; + proc_info->pms.pmsStamp = 0; /* Dummy transition time */ + proc_info->pms.pmsPop = EndOfAllTime; /* Set the pop way into the future */ + proc_info->pms.pmsState = pmsParked; /* Park the stepper */ + proc_info->pms.pmsCSetCmd = pmsCInit; /* Set dummy initial hardware state */ mp = (mapping_t *)(&proc_info->ppUMWmp); mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | 1; mp->mpSpace = invalSpace; @@ -697,6 +726,8 @@ cpu_sync_timebase( while (*(volatile int *)&(syncClkSpot.done) == FALSE) continue; + setTimerReq(); /* Start the timer */ + (void)ml_set_interrupts_enabled(intr); } diff --git a/osfmk/ppc/db_low_trace.c b/osfmk/ppc/db_low_trace.c index 9bf612b48..72454fed5 100644 --- a/osfmk/ppc/db_low_trace.c +++ b/osfmk/ppc/db_low_trace.c @@ -440,7 +440,7 @@ void db_check_pmaps(db_expr_t addr, int have_addr, db_expr_t count, char * modif /* * Displays iokit junk * - * dp + * di */ void db_piokjunk(void); diff --git a/osfmk/ppc/exception.h b/osfmk/ppc/exception.h index 1994d53a8..41ccba26b 100644 --- a/osfmk/ppc/exception.h +++ b/osfmk/ppc/exception.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -44,6 +44,8 @@ #include #include #include +#include +#include /* Per processor CPU features */ #pragma pack(4) /* Make sure the structure stays as we defined it */ @@ -129,12 +131,10 @@ struct procFeatures { unsigned int pfPowerModes; /* 0x07C */ #define pmDPLLVmin 0x00010000 #define pmDPLLVminb 15 -#define pmPowerTune 0x00000004 -#define pmPowerTuneb 29 +#define pmType 0x000000FF +#define pmPowerTune 0x00000003 #define pmDFS 0x00000002 -#define pmDFSb 30 #define pmDualPLL 0x00000001 -#define pmDualPLLb 31 unsigned int pfPowerTune0; /* 0x080 */ unsigned int pfPowerTune1; /* 0x084 */ unsigned int rsrvd88[6]; /* 0x088 */ @@ -344,13 +344,7 @@ struct per_proc_info { void * pp_cbfr; void * pp_chud; uint64_t rtclock_tick_deadline; - struct rtclock_timer { - uint64_t deadline; - uint32_t - /*boolean_t*/ is_set:1, - has_expired:1, - :0; - } rtclock_timer; + rtclock_timer_t rtclock_timer; unsigned int ppbbTaskEnv; /* BlueBox Task Environment */ /* PPC cache line boundary here - 160 */ @@ -377,7 +371,7 @@ struct per_proc_info { ppnum_t VMMareaPhys; /* vmm state page physical addr */ unsigned int VMMXAFlgs; /* vmm extended flags */ unsigned int FAMintercept; /* vmm FAM Exceptions to intercept */ - unsigned int ppinfo_reserved1; + unsigned int hibernate; /* wake from hibernate */ uint32_t save_tbl; uint32_t save_tbu; @@ -518,10 +512,28 @@ struct per_proc_info { hwCtrs hwCtr; /* Hardware exception counters */ /* - A00 */ - - unsigned int processor[384]; /* processor structure */ + addr64_t pp2ndPage; /* Physical address of the second page of the per_proc */ + uint32_t pprsvd0A08[6]; +/* - A20 */ + pmsd pms; /* Power Management Stepper control */ + unsigned int pprsvd0A40[368]; /* Reserved out to next page boundary */ /* - 1000 */ +/* + * This is the start of the second page of the per_proc block. Because we do not + * allocate physically contiguous memory, it may be physically discontiguous from the + * first page. Currently there isn't anything here that is accessed translation off, + * but if we need it, pp2ndPage contains the physical address. + * + * Note that the boot processor's per_proc is statically allocated, so it will be a + * V=R contiguous area. That allows access during early boot before we turn translation on + * for the first time. + */ + + unsigned int processor[384]; /* processor structure */ + + unsigned int pprsvd1[640]; /* Reserved out to next page boundary */ +/* - 2000 */ }; @@ -529,7 +541,7 @@ struct per_proc_info { /* - * Macro to conver a processor_t processor to its attached per_proc_info_t per_proc + * Macro to convert a processor_t processor to its attached per_proc_info_t per_proc */ #define PROCESSOR_TO_PER_PROC(x) \ ((struct per_proc_info*)((unsigned int)(x) \ @@ -540,9 +552,9 @@ extern struct per_proc_info BootProcInfo; #define MAX_CPUS 256 struct per_proc_entry { - addr64_t ppe_paddr; + addr64_t ppe_paddr; /* Physical address of the first page of per_proc, 2nd is in pp2ndPage. */ unsigned int ppe_pad4[1]; - struct per_proc_info *ppe_vaddr; + struct per_proc_info *ppe_vaddr; /* Virtual address of the per_proc */ }; extern struct per_proc_entry PerProcTable[MAX_CPUS-1]; @@ -550,7 +562,7 @@ extern struct per_proc_entry PerProcTable[MAX_CPUS-1]; extern char *trap_type[]; -#endif /* ndef ASSEMBLER */ /* with this savearea should be redriven */ +#endif /* ndef ASSEMBLER */ /* with this savearea should be redriven */ /* cpu_flags defs */ #define SIGPactive 0x8000 diff --git a/osfmk/ppc/genassym.c b/osfmk/ppc/genassym.c index 46bbd060c..a30f7da5e 100644 --- a/osfmk/ppc/genassym.c +++ b/osfmk/ppc/genassym.c @@ -68,6 +68,8 @@ #include #include +/* Undefine standard offsetof because it is different than the one here */ +#undef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE)0)->MEMBER) #define DECLARE(SYM,VAL) \ @@ -129,6 +131,7 @@ int main(int argc, char *argv[]) DECLARE("PP_INTSTACK_TOP_SS", offsetof(struct per_proc_info *, intstack_top_ss)); DECLARE("PP_DEBSTACKPTR", offsetof(struct per_proc_info *, debstackptr)); DECLARE("PP_DEBSTACK_TOP_SS", offsetof(struct per_proc_info *, debstack_top_ss)); + DECLARE("PP_HIBERNATE", offsetof(struct per_proc_info *, hibernate)); DECLARE("FPUowner", offsetof(struct per_proc_info *, FPU_owner)); DECLARE("VMXowner", offsetof(struct per_proc_info *, VMX_owner)); DECLARE("holdQFret", offsetof(struct per_proc_info *, holdQFret)); @@ -225,14 +228,12 @@ int main(int argc, char *argv[]) DECLARE("pfPowerModes", offsetof(struct per_proc_info *, pf.pfPowerModes)); DECLARE("pfPowerTune0", offsetof(struct per_proc_info *, pf.pfPowerTune0)); DECLARE("pfPowerTune1", offsetof(struct per_proc_info *, pf.pfPowerTune1)); + DECLARE("pmType", pmType); DECLARE("pmDPLLVmin", pmDPLLVmin); DECLARE("pmDPLLVminb", pmDPLLVminb); DECLARE("pmPowerTune", pmPowerTune); - DECLARE("pmPowerTuneb", pmPowerTuneb); DECLARE("pmDFS", pmDFS); - DECLARE("pmDFSb", pmDFSb); DECLARE("pmDualPLL", pmDualPLL); - DECLARE("pmDualPLLb", pmDualPLLb); DECLARE("pfPTEG", offsetof(struct per_proc_info *, pf.pfPTEG)); DECLARE("pfMaxVAddr", offsetof(struct per_proc_info *, pf.pfMaxVAddr)); DECLARE("pfMaxPAddr", offsetof(struct per_proc_info *, pf.pfMaxPAddr)); @@ -1367,6 +1368,7 @@ int main(int argc, char *argv[]) DECLARE("lgVerCode", offsetof(struct lowglo *, lgVerCode)); DECLARE("lgPPStart", offsetof(struct lowglo *, lgPPStart)); + DECLARE("maxDec", offsetof(struct lowglo *, lgMaxDec)); DECLARE("mckFlags", offsetof(struct lowglo *, lgMckFlags)); DECLARE("lgPMWvaddr", offsetof(struct lowglo *, lgPMWvaddr)); DECLARE("lgUMWvaddr", offsetof(struct lowglo *, lgUMWvaddr)); diff --git a/osfmk/ppc/hibernate_ppc.c b/osfmk/ppc/hibernate_ppc.c new file mode 100644 index 000000000..7bf11a53e --- /dev/null +++ b/osfmk/ppc/hibernate_ppc.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define KERNEL + +#include +#include + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +hibernate_page_list_t * +hibernate_page_list_allocate(void) +{ + vm_size_t size; + uint32_t bank; + uint32_t pages, page_count; + hibernate_page_list_t * list; + hibernate_bitmap_t * bitmap; + + page_count = 0; + size = sizeof(hibernate_page_list_t); + + for (bank = 0; bank < (uint32_t) pmap_mem_regions_count; bank++) + { + size += sizeof(hibernate_bitmap_t); + pages = pmap_mem_regions[bank].mrEnd + 1 - pmap_mem_regions[bank].mrStart; + page_count += pages; + size += ((pages + 31) >> 5) * sizeof(uint32_t); + } + + list = kalloc(size); + if (!list) + return (list); + + list->list_size = size; + list->page_count = page_count; + list->bank_count = pmap_mem_regions_count; + + bitmap = &list->bank_bitmap[0]; + for (bank = 0; bank < list->bank_count; bank++) + { + bitmap->first_page = pmap_mem_regions[bank].mrStart; + bitmap->last_page = pmap_mem_regions[bank].mrEnd; + bitmap->bitmapwords = (pmap_mem_regions[bank].mrEnd + 1 + - pmap_mem_regions[bank].mrStart + 31) >> 5; + + bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; + } + return (list); +} + +void +hibernate_page_list_setall_machine(hibernate_page_list_t * page_list, + hibernate_page_list_t * page_list_wired, + uint32_t * pagesOut) +{ + uint32_t page, count, PCAsize; + + /* Get total size of PCA table */ + PCAsize = round_page((hash_table_size / PerProcTable[0].ppe_vaddr->pf.pfPTEG) + * sizeof(PCA_t)); + + page = atop_64(hash_table_base - PCAsize); + count = atop_64(hash_table_size + PCAsize); + + hibernate_set_page_state(page_list, page_list_wired, page, count, 0); + pagesOut -= count; + + HIBLOG("removed hash, pca: %d pages\n", count); + + save_snapshot(); +} + +kern_return_t +hibernate_processor_setup(IOHibernateImageHeader * header) +{ + header->processorFlags = PerProcTable[0].ppe_vaddr->pf.Available; + + PerProcTable[0].ppe_vaddr->hibernate = 1; + + return (KERN_SUCCESS); +} + +void +hibernate_vm_lock(void) +{ + if (getPerProc()->hibernate) + { + vm_page_lock_queues(); + mutex_lock(&vm_page_queue_free_lock); + } +} + +void +hibernate_vm_unlock(void) +{ + if (getPerProc()->hibernate) + { + mutex_unlock(&vm_page_queue_free_lock); + vm_page_unlock_queues(); + } +} + +void ml_ppc_sleep(void) +{ + struct per_proc_info *proc_info; + boolean_t dohalt; + + proc_info = getPerProc(); + if (!proc_info->hibernate) + { + ml_ppc_do_sleep(); + return; + } + + { + uint64_t start, end, nsec; + + HIBLOG("mapping_hibernate_flush start\n"); + clock_get_uptime(&start); + + mapping_hibernate_flush(); + + clock_get_uptime(&end); + absolutetime_to_nanoseconds(end - start, &nsec); + HIBLOG("mapping_hibernate_flush time: %qd ms\n", nsec / 1000000ULL); + } + + dohalt = hibernate_write_image(); + + if (dohalt) + { + // off + HIBLOG("power off\n"); + if (PE_halt_restart) + (*PE_halt_restart)(kPEHaltCPU); + } + else + { + // sleep + HIBLOG("sleep\n"); + + // should we come back via regular wake, set the state in memory. + PerProcTable[0].ppe_vaddr->hibernate = 0; + + PE_cpu_machine_quiesce(proc_info->cpu_id); + return; + } +} + diff --git a/osfmk/ppc/hibernate_restore.s b/osfmk/ppc/hibernate_restore.s new file mode 100644 index 000000000..1d46fe2a6 --- /dev/null +++ b/osfmk/ppc/hibernate_restore.s @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#include +#include +#include + +/* +This code is linked into the kernel but part of the "__HIB" section, which means +its used by code running in the special context of restoring the kernel text and data +from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything +it calls or references (ie. hibernate_restore_phys_page()) +needs to be careful to only touch memory also in the "__HIB" section. +*/ + +/* +void +hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags); +*/ + + .align 5 + .globl EXT(hibernate_restore_phys_page) + .globl EXT(hibernate_machine_entrypoint) + +LEXT(hibernate_restore_phys_page) + + andi. r0, r8, pf64Bit + bne hibernate_restore_phys_page64 + + srwi r10,r7,5 ; r10 <- 32-byte chunks to xfer + mtctr r10 + cmpwi r4, 0 + beq hibernate_restore_phys_pageFlush + +hibernate_restore_phys_pageCopy: + lwz r0,0(r4) + lwz r2,4(r4) + lwz r7,8(r4) + lwz r8,12(r4) + lwz r9,16(r4) + lwz r10,20(r4) + lwz r11,24(r4) + lwz r12,28(r4) + + dcbz 0,r6 ; avoid prefetch of next cache line + stw r0,0(r6) + stw r2,4(r6) + stw r7,8(r6) + stw r8,12(r6) + stw r9,16(r6) + stw r10,20(r6) + stw r11,24(r6) + stw r12,28(r6) + + dcbf 0, r6 + sync + icbi 0, r6 + isync + sync + + addi r4,r4,32 + addi r6,r6,32 + + bdnz hibernate_restore_phys_pageCopy ; loop if more chunks + blr + +hibernate_restore_phys_pageFlush: + dcbf 0, r6 + sync + icbi 0, r6 + isync + sync + + addi r6,r6,32 + bdnz hibernate_restore_phys_pageFlush ; loop if more chunks + blr + + +hibernate_restore_phys_page64: + rlwinm r3,r3,0,1,0 ; Duplicate high half of long long paddr into top of reg + rlwimi r3,r4,0,0,31 ; Combine bottom of long long to full 64-bits + rlwinm r4,r5,0,1,0 ; Duplicate high half of long long paddr into top of reg + rlwimi r4,r6,0,0,31 ; Combine bottom of long long to full 64-bits + + mfmsr r9 ; Get the MSR + li r0,1 ; Note - we use this in a couple places below + rldimi r9,r0,63,MSR_SF_BIT ; set SF on in MSR we will copy with + mtmsrd r9 ; turn 64-bit addressing on + isync ; wait for it to happen + + srwi r10,r7,7 ; r10 <- 128-byte chunks to xfer + mtctr r10 + cmpdi r3, 0 + beq hibernate_restore_phys_page64Flush + +hibernate_restore_phys_page64Copy: + ld r0,0(r3) + ld r2,8(r3) + ld r7,16(r3) + ld r8,24(r3) + ld r9,32(r3) + ld r10,40(r3) + ld r11,48(r3) + ld r12,56(r3) + + dcbz128 0,r4 ; avoid prefetch of next cache line + std r0,0(r4) + std r2,8(r4) + std r7,16(r4) + std r8,24(r4) + std r9,32(r4) + std r10,40(r4) + std r11,48(r4) + std r12,56(r4) + + ld r0,64(r3) ; load 2nd half of chunk + ld r2,72(r3) + ld r7,80(r3) + ld r8,88(r3) + ld r9,96(r3) + ld r10,104(r3) + ld r11,112(r3) + ld r12,120(r3) + + std r0,64(r4) + std r2,72(r4) + std r7,80(r4) + std r8,88(r4) + std r9,96(r4) + std r10,104(r4) + std r11,112(r4) + std r12,120(r4) + + dcbf 0, r4 + sync + icbi 0, r4 + isync + sync + + addi r3,r3,128 + addi r4,r4,128 + + bdnz hibernate_restore_phys_page64Copy ; loop if more chunks + + +hibernate_restore_phys_page64Done: + mfmsr r9 ; Get the MSR we used to copy + rldicl r9,r9,0,MSR_SF_BIT+1 ; clear SF + mtmsrd r9 ; turn 64-bit mode off + isync ; wait for it to happen + blr + +hibernate_restore_phys_page64Flush: + dcbf 0, r4 + sync + icbi 0, r4 + isync + sync + + addi r4,r4,128 + + bdnz hibernate_restore_phys_page64Flush ; loop if more chunks + b hibernate_restore_phys_page64Done + +LEXT(hibernate_machine_entrypoint) + b EXT(hibernate_kernel_entrypoint) + diff --git a/osfmk/ppc/hw_exception.s b/osfmk/ppc/hw_exception.s index f2099e2a7..4c66797de 100644 --- a/osfmk/ppc/hw_exception.s +++ b/osfmk/ppc/hw_exception.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -434,7 +434,7 @@ noassist: cmplwi r15,0x7000 ; Do we have a fast path trap? ; and the savearea/pcb as the first parameter. ; It is up to the callee to enable interruptions if ; they should be. We are in a state here where -; both interrupts and preemption is ok, but because we could +; both interrupts and preemption are ok, but because we could ; be calling diagnostic code we will not enable. ; ; Also, the callee is responsible for finding any parameters @@ -504,11 +504,11 @@ LEXT(ppcscret) */ ksystrace: - mr r4,r30 ; Pass in saved state + mr r4,r30 ; Pass in saved state bl EXT(syscall_trace) - cmplw r31,r29 ; Is this syscall in the table? - add r31,r27,r28 ; Point right to the syscall table entry + cmplw r31,r29 ; Is this syscall in the table? + add r31,r27,r28 ; Point right to the syscall table entry bge- .L_call_server_syscall_exception ; The syscall number is invalid @@ -520,20 +520,20 @@ ksystrace: .L_ksystrace_munge: cmplwi r0,0 ; do we have a munger to call? - mtctr r0 ; Set the function call address - addi r3,r30,saver3 ; Pointer to args from save area - addi r4,r1,FM_ARG0+ARG_SIZE ; Pointer for munged args + mtctr r0 ; Set the function call address + addi r3,r30,saver3 ; Pointer to args from save area + addi r4,r1,FM_ARG0+ARG_SIZE ; Pointer for munged args beq-- .L_ksystrace_trapcall ; just make the trap call - bctrl ; Call the munge function + bctrl ; Call the munge function .L_ksystrace_trapcall: - lwz r0,MACH_TRAP_FUNCTION(r31) ; Pick up the function address - mtctr r0 ; Set the function call address - addi r3,r1,FM_ARG0+ARG_SIZE ; Pointer to munged args + lwz r0,MACH_TRAP_FUNCTION(r31) ; Pick up the function address + mtctr r0 ; Set the function call address + addi r3,r1,FM_ARG0+ARG_SIZE ; Pointer to munged args bctrl - mr r4,r30 ; Pass in the savearea - bl EXT(syscall_trace_end) ; Trace the exit of the system call + mr r4,r30 ; Pass in the savearea + bl EXT(syscall_trace_end) ; Trace the exit of the system call b .L_mach_return @@ -553,27 +553,27 @@ ksystrace: ; Call a function that can print out our syscall info ; Note that we don t care about any volatiles yet ; - lwz r10,ACT_TASK(r13) ; Get our task + lwz r10,ACT_TASK(r13) ; Get our task lwz r0,saver0+4(r30) - lis r8,hi16(EXT(kdebug_enable)) ; Get top of kdebug_enable + lis r8,hi16(EXT(kdebug_enable)) ; Get top of kdebug_enable lis r28,hi16(EXT(mach_trap_table)) ; Get address of table ori r8,r8,lo16(EXT(kdebug_enable)) ; Get bottom of kdebug_enable - lwz r8,0(r8) ; Get kdebug_enable + lwz r8,0(r8) ; Get kdebug_enable - lwz r7,TASK_SYSCALLS_MACH(r10) ; Get the current count - neg r31,r0 ; Make this positive - mr r3,r31 ; save it - slwi r27,r3,4 ; multiply by 16 - slwi r3,r3,2 ; and the original by 4 + lwz r7,TASK_SYSCALLS_MACH(r10) ; Get the current count + neg r31,r0 ; Make this positive + mr r3,r31 ; save it + slwi r27,r3,4 ; multiply by 16 + slwi r3,r3,2 ; and the original by 4 ori r28,r28,lo16(EXT(mach_trap_table)) ; Get address of table - add r27,r27,r3 ; for a total of 20x (5 words/entry) - addi r7,r7,1 ; Bump TASK_SYSCALLS_MACH count - cmplwi r8,0 ; Is kdebug_enable non-zero - stw r7,TASK_SYSCALLS_MACH(r10) ; Save count - bne-- ksystrace ; yes, tracing enabled + add r27,r27,r3 ; for a total of 20x (5 words/entry) + addi r7,r7,1 ; Bump TASK_SYSCALLS_MACH count + cmplwi r8,0 ; Is kdebug_enable non-zero + stw r7,TASK_SYSCALLS_MACH(r10) ; Save count + bne-- ksystrace ; yes, tracing enabled - cmplwi r31,MACH_TRAP_TABLE_COUNT ; Is this syscall in the table? - add r31,r27,r28 ; Point right to the syscall table entry + cmplwi r31,MACH_TRAP_TABLE_COUNT ; Is this syscall in the table? + add r31,r27,r28 ; Point right to the syscall table entry bge-- .L_call_server_syscall_exception ; The syscall number is invalid @@ -586,15 +586,15 @@ ksystrace: .L_kernel_syscall_munge: cmplwi r0,0 ; test for null munger mtctr r0 ; Set the function call address - addi r3,r30,saver3 ; Pointer to args from save area - addi r4,r1,FM_ARG0+ARG_SIZE ; Pointer for munged args - beq-- .L_kernel_syscall_trapcall ; null munger - skip to trap call - bctrl ; Call the munge function + addi r3,r30,saver3 ; Pointer to args from save area + addi r4,r1,FM_ARG0+ARG_SIZE ; Pointer for munged args + beq-- .L_kernel_syscall_trapcall ; null munger - skip to trap call + bctrl ; Call the munge function .L_kernel_syscall_trapcall: - lwz r0,MACH_TRAP_FUNCTION(r31) ; Pick up the function address - mtctr r0 ; Set the function call address - addi r3,r1,FM_ARG0+ARG_SIZE ; Pointer to munged args + lwz r0,MACH_TRAP_FUNCTION(r31) ; Pick up the function address + mtctr r0 ; Set the function call address + addi r3,r1,FM_ARG0+ARG_SIZE ; Pointer to munged args #if FPFLOOD stfd f31,emfp31(r25) ; (TEST/DEBUG) @@ -611,12 +611,12 @@ ksystrace: */ .L_mach_return: - srawi r0,r3,31 ; properly extend the return code - cmpi cr0,r3,KERN_INVALID_ARGUMENT ; deal with invalid system calls - mr r31,r16 ; Move the current thread pointer - stw r0, saver3(r30) ; stash the high part of the return code - stw r3,saver3+4(r30) ; Stash the low part of the return code - beq- cr0,.L_mach_invalid_ret ; otherwise fall through into the normal return path + srawi r0,r3,31 ; properly extend the return code + cmpi cr0,r3,KERN_INVALID_ARGUMENT ; deal with invalid system calls + mr r31,r16 ; Move the current thread pointer + stw r0, saver3(r30) ; stash the high part of the return code + stw r3,saver3+4(r30) ; Stash the low part of the return code + beq-- cr0,.L_mach_invalid_ret ; otherwise fall through into the normal return path .L_mach_invalid_arg: @@ -681,12 +681,12 @@ scrnotkern: * we want to pass the error code back to the caller */ lwz r0,saver0+4(r30) ; reload the original syscall number - neg r28,r0 ; Make this positive - mr r4,r28 ; save a copy - slwi r27,r4,4 ; multiply by 16 - slwi r4,r4,2 ; and another 4 + neg r28,r0 ; Make this positive + mr r4,r28 ; save a copy + slwi r27,r4,4 ; multiply by 16 + slwi r4,r4,2 ; and another 4 lis r28,hi16(EXT(mach_trap_table)) ; Get address of table - add r27,r27,r4 ; for a total of 20x (5 words/entry) + add r27,r27,r4 ; for a total of 20x (5 words/entry) ori r28,r28,lo16(EXT(mach_trap_table)) ; Get address of table add r28,r27,r28 ; Point right to the syscall table entry lwz r27,MACH_TRAP_FUNCTION(r28) ; Pick up the function address @@ -957,7 +957,7 @@ ihbootnover: ; (TEST/DEBUG) mr r4,r30 lwz r5,savedsisr(r30) ; Get the DSISR lwz r6,savedar+4(r30) ; Get the DAR - + #if FPFLOOD stfd f31,emfp31(r25) ; (TEST/DEBUG) #endif @@ -982,7 +982,7 @@ LEXT(ihandler_ret) ; Marks our return point from debugger entry lwz r10,ACT_PER_PROC(r8) ; Get the per_proc block lwz r7,SAVflags(r3) ; Pick up the flags - lwz r9,SAVprev+4(r3) ; Get previous save area + lwz r9,SAVprev+4(r3) ; Get previous save area cmplwi cr1,r8,0 ; Are we still initializing? lwz r12,savesrr1+4(r3) ; Get the MSR we will load on return andis. r11,r7,hi16(SAVrststk) ; Is this the first on the stack? diff --git a/osfmk/ppc/hw_lock.s b/osfmk/ppc/hw_lock.s index 7ebbedecd..01b1bdced 100644 --- a/osfmk/ppc/hw_lock.s +++ b/osfmk/ppc/hw_lock.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -1080,7 +1080,7 @@ mlckslow1: bl lockDisa ; Go get a lock on the mutex's interlock lock mr. r4,r3 ; Did we get it? lwz r3,FM_ARG0(r1) ; Restore the lock address - bne+ mlGotInt ; We got it just fine... + bne++ mlGotInt ; We got it just fine... mr r4,r11 ; Saved lock addr lis r3,hi16(mutex_failed1) ; Get the failed mutex message ori r3,r3,lo16(mutex_failed1) ; Get the failed mutex message @@ -1297,9 +1297,9 @@ L_mutex_try_slow: bne- mtFail ; Someone's got it already... bl lockDisa ; Go get a lock on the mutex's interlock lock - mr. r4,r3 ; Did we get it? */ + mr. r4,r3 ; Did we get it? lwz r3,FM_ARG0(r1) ; Restore the lock address - bne+ mtGotInt ; We got it just fine... + bne++ mtGotInt ; We got it just fine... mr r4,r11 ; Saved lock addr lis r3,hi16(mutex_failed2) ; Get the failed mutex message ori r3,r3,lo16(mutex_failed2) ; Get the failed mutex message @@ -1490,7 +1490,7 @@ L_mutex_unlock_slow: bl lockDisa ; Go get a lock on the mutex's interlock lock mr. r4,r3 ; Did we get it? lwz r3,FM_ARG0(r1) ; Restore the lock address - bne+ muGotInt ; We got it just fine... + bne++ muGotInt ; We got it just fine... mr r4,r11 ; Saved lock addr lis r3,hi16(mutex_failed3) ; Get the failed mutex message ori r3,r3,lo16(mutex_failed3) ; Get the failed mutex message diff --git a/osfmk/ppc/hw_vm.s b/osfmk/ppc/hw_vm.s index 988b3e373..e13bd5cfb 100644 --- a/osfmk/ppc/hw_vm.s +++ b/osfmk/ppc/hw_vm.s @@ -31,8 +31,6 @@ #include #include #include - -#define INSTRUMENT 0 .text @@ -199,53 +197,28 @@ hamRescan: lwz r4,mpVAddr(r31) ; Get the new vaddr top half mr r29,r4 ; Save top half of vaddr for later mr r30,r5 ; Save bottom half of vaddr for later -#if INSTRUMENT - mfspr r0,pmc1 ; INSTRUMENT - saveinstr[16] - Take stamp before mapSearchFull - stw r0,0x6100+(16*16)+0x0(0) ; INSTRUMENT - Save it - mfspr r0,pmc2 ; INSTRUMENT - Get stamp - stw r0,0x6100+(16*16)+0x4(0) ; INSTRUMENT - Save it - mfspr r0,pmc3 ; INSTRUMENT - Get stamp - stw r0,0x6100+(16*16)+0x8(0) ; INSTRUMENT - Save it - mfspr r0,pmc4 ; INSTRUMENT - Get stamp - stw r0,0x6100+(16*16)+0xC(0) ; INSTRUMENT - Save it -#endif - bl EXT(mapSearchFull) ; Go see if we can find it -#if INSTRUMENT - mfspr r0,pmc1 ; INSTRUMENT - saveinstr[14] - Take stamp after mapSearchFull - stw r0,0x6100+(17*16)+0x0(0) ; INSTRUMENT - Save it - mfspr r0,pmc2 ; INSTRUMENT - Get stamp - stw r0,0x6100+(17*16)+0x4(0) ; INSTRUMENT - Save it - mfspr r0,pmc3 ; INSTRUMENT - Get stamp - stw r0,0x6100+(17*16)+0x8(0) ; INSTRUMENT - Save it - mfspr r0,pmc4 ; INSTRUMENT - Get stamp - stw r0,0x6100+(17*16)+0xC(0) ; INSTRUMENT - Save it -#endif - - rlwinm r0,r24,0,mpType ; Isolate the mapping type - rlwinm r23,r23,12,0,19 ; Convert standard block size to bytes - cmplwi r0,mpNest ; Is this a nested type? - cmplwi cr1,r0,mpLinkage ; Linkage type? - cror cr0_eq,cr1_eq,cr0_eq ; Nested or linkage type? + li r22,lo16(0x800C) ; Get 0xFFFF800C + rlwinm r0,r24,mpBSub+1,31,31 ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu + addi r23,r23,1 ; Get actual length + rlwnm r22,r22,r0,27,31 ; Rotate to get 12 or 25 lis r0,0x8000 ; Get 0xFFFFFFFF80000000 - li r22,0 ; Assume high part of size is 0 - bne++ hamNoNest ; This is not a nested or linkage type - - rlwinm r22,r23,16,16,31 ; Convert partially converted size to segments - rlwinm r23,r23,16,0,3 ; Finish shift - -hamNoNest: add r0,r0,r0 ; Get 0xFFFFFFFF00000000 for 64-bit or 0 for 32-bit + slw r9,r23,r22 ; Isolate the low part + rlwnm r22,r23,r22,22,31 ; Extract the high order + addic r23,r9,-4096 ; Get the length to the last page + add r0,r0,r0 ; Get 0xFFFFFFFF00000000 for 64-bit or 0 for 32-bit + addme r22,r22 ; Do high order as well... mr. r3,r3 ; Did we find a mapping here? - or r0,r0,r30 ; Make sure a carry will propagate all the way in 64-bit - crmove cr5_eq,cr0_eq ; Remember that if we found the mapping + or r0,r30,r0 ; Fill high word of 64-bit with 1s so we will properly carry + bne-- hamOverlay ; We found a mapping, this is no good, can not double map... + addc r9,r0,r23 ; Add size to get last page in new range or. r0,r4,r5 ; Are we beyond the end? adde r8,r29,r22 ; Add the rest of the length on - bne-- cr5,hamOverlay ; Yeah, this is no good, can not double map... rlwinm r9,r9,0,0,31 ; Clean top half of sum beq++ hamFits ; We are at the end... - + cmplw cr1,r9,r5 ; Is the bottom part of our end less? cmplw r8,r4 ; Is our end before the next (top part) crand cr0_eq,cr0_eq,cr1_lt ; Is the second half less and the first half equal? @@ -283,32 +256,10 @@ hamFits: mr. r21,r21 ; Do we already have the exclusive lock? .align 5 -hamGotX: -#if INSTRUMENT - mfspr r3,pmc1 ; INSTRUMENT - saveinstr[18] - Take stamp before mapSearchFull - stw r3,0x6100+(18*16)+0x0(0) ; INSTRUMENT - Save it - mfspr r3,pmc2 ; INSTRUMENT - Get stamp - stw r3,0x6100+(18*16)+0x4(0) ; INSTRUMENT - Save it - mfspr r3,pmc3 ; INSTRUMENT - Get stamp - stw r3,0x6100+(18*16)+0x8(0) ; INSTRUMENT - Save it - mfspr r3,pmc4 ; INSTRUMENT - Get stamp - stw r4,0x6100+(18*16)+0xC(0) ; INSTRUMENT - Save it -#endif - mr r3,r28 ; Get the pmap to insert into +hamGotX: mr r3,r28 ; Get the pmap to insert into mr r4,r31 ; Point to the mapping bl EXT(mapInsert) ; Insert the mapping into the list -#if INSTRUMENT - mfspr r4,pmc1 ; INSTRUMENT - saveinstr[19] - Take stamp before mapSearchFull - stw r4,0x6100+(19*16)+0x0(0) ; INSTRUMENT - Save it - mfspr r4,pmc2 ; INSTRUMENT - Get stamp - stw r4,0x6100+(19*16)+0x4(0) ; INSTRUMENT - Save it - mfspr r4,pmc3 ; INSTRUMENT - Get stamp - stw r4,0x6100+(19*16)+0x8(0) ; INSTRUMENT - Save it - mfspr r4,pmc4 ; INSTRUMENT - Get stamp - stw r4,0x6100+(19*16)+0xC(0) ; INSTRUMENT - Save it -#endif - rlwinm r11,r24,mpPcfgb+2,mpPcfg>>6 ; Get the index into the page config table lhz r8,mpSpace(r31) ; Get the address space lwz r11,lgpPcfg(r11) ; Get the page config @@ -384,18 +335,7 @@ hamReturn: bt++ pf64Bitb,hamR64 ; Yes... hamR64: mtmsrd r17 ; Restore enables/translation/etc. isync -hamReturnC: -#if INSTRUMENT - mfspr r0,pmc1 ; INSTRUMENT - saveinstr[20] - Take stamp before mapSearchFull - stw r0,0x6100+(20*16)+0x0(0) ; INSTRUMENT - Save it - mfspr r0,pmc2 ; INSTRUMENT - Get stamp - stw r0,0x6100+(20*16)+0x4(0) ; INSTRUMENT - Save it - mfspr r0,pmc3 ; INSTRUMENT - Get stamp - stw r0,0x6100+(20*16)+0x8(0) ; INSTRUMENT - Save it - mfspr r0,pmc4 ; INSTRUMENT - Get stamp - stw r0,0x6100+(20*16)+0xC(0) ; INSTRUMENT - Save it -#endif - lwz r0,(FM_ALIGN((31-17+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Get the return +hamReturnC: lwz r0,(FM_ALIGN((31-17+1)*4)+FM_SIZE+FM_LR_SAVE)(r1) ; Get the return lwz r17,FM_ARG0+0x00(r1) ; Save a register lwz r18,FM_ARG0+0x04(r1) ; Save a register lwz r19,FM_ARG0+0x08(r1) ; Save a register @@ -948,13 +888,18 @@ hrmPanic: lis r0,hi16(Choke) ; System abend .align 5 -hrmBlock32: - lhz r23,mpSpace(r31) ; Get the address space hash +hrmBlock32: lis r29,0xD000 ; Get shift to 32MB bsu + rlwinm r24,r20,mpBSub+1+2,29,29 ; Rotate to get 0 if 4K bsu or 13 if 32MB bsu lhz r25,mpBSize(r31) ; Get the number of pages in block + lhz r23,mpSpace(r31) ; Get the address space hash lwz r9,mpBlkRemCur(r31) ; Get our current remove position + rlwnm r29,r29,r24,28,31 ; Rotate to get 0 or 13 + addi r25,r25,1 ; Account for zero-based counting ori r0,r20,mpRIP ; Turn on the remove in progress flag + slw r25,r25,r29 ; Adjust for 32MB if needed mfsdr1 r29 ; Get the hash table base and size rlwinm r24,r23,maxAdrSpb,32-maxAdrSpb-maxAdrSpb,31-maxAdrSpb ; Get high order of hash + subi r25,r25,1 ; Convert back to zero-based counting lwz r27,mpVAddr+4(r31) ; Get the base vaddr sub r4,r25,r9 ; Get number of pages left cmplw cr1,r9,r25 ; Have we already hit the end? @@ -1279,13 +1224,18 @@ hrmRetn64: rldicr r8,r31,0,51 ; Find start of page .align 5 -hrmBlock64: +hrmBlock64: lis r29,0xD000 ; Get shift to 32MB bsu + rlwinm r10,r20,mpBSub+1+2,29,29 ; Rotate to get 0 if 4K bsu or 13 if 32MB bsu lhz r24,mpSpace(r31) ; Get the address space hash lhz r25,mpBSize(r31) ; Get the number of pages in block lwz r9,mpBlkRemCur(r31) ; Get our current remove position + rlwnm r29,r29,r10,28,31 ; Rotate to get 0 or 13 + addi r25,r25,1 ; Account for zero-based counting ori r0,r20,mpRIP ; Turn on the remove in progress flag + slw r25,r25,r29 ; Adjust for 32MB if needed mfsdr1 r29 ; Get the hash table base and size ld r27,mpVAddr(r31) ; Get the base vaddr + subi r25,r25,1 ; Convert back to zero-based counting rlwinm r5,r29,0,27,31 ; Isolate the size sub r4,r25,r9 ; Get number of pages left cmplw cr1,r9,r25 ; Have we already hit the end? @@ -2545,7 +2495,7 @@ hcmNextPhys32: addi r3,r3,physEntrySize ; Next phys_entry hcmNextMap32: - rlwinm. r4,r4,0,0,25 ; Clean and test mapping address + rlwinm. r4,r4,0,~ppFlags ; Clean and test mapping address beq hcmNoMap32 ; Did not find one... lwz r0,mpPte(r4) ; Grab the offset to the PTE @@ -2688,7 +2638,7 @@ LEXT(hw_walk_phys) blt hwpSrc32 ; Do TLB invalidate/purge/merge/reload for each mapping beq hwpMSrc32 ; Do TLB merge for each mapping -hwpQSrc32: rlwinm. r31,r31,0,0,25 ; Clean and test mapping address +hwpQSrc32: rlwinm. r31,r31,0,~ppFlags ; Clean and test mapping address beq hwpNone32 ; Did not find one... bctrl ; Call the op function @@ -2698,7 +2648,7 @@ hwpQSrc32: rlwinm. r31,r31,0,0,25 ; Clean and test mapping address b hwpQSrc32 ; Check it out... .align 5 -hwpMSrc32: rlwinm. r31,r31,0,0,25 ; Clean and test mapping address +hwpMSrc32: rlwinm. r31,r31,0,~ppFlags ; Clean and test mapping address beq hwpNone32 ; Did not find one... bl mapMergeRC32 ; Merge reference and change into mapping and physent @@ -3541,11 +3491,11 @@ mapFLPNitr: lwz r3,mrPhysTab(r9) ; Get the actual table address lwz r5,mrStart(r9) ; Get start of table entry lwz r0,mrEnd(r9) ; Get end of table entry addi r9,r9,mrSize ; Point to the next slot - cmplwi cr2,r3,0 ; Are we at the end of the table? + cmplwi cr7,r3,0 ; Are we at the end of the table? cmplw r2,r5 ; See if we are in this table cmplw cr1,r2,r0 ; Check end also sub r4,r2,r5 ; Calculate index to physical entry - beq-- cr2,mapFLPNmiss ; Leave if we did not find an entry... + beq-- cr7,mapFLPNmiss ; Leave if we did not find an entry... cror cr0_lt,cr0_lt,cr1_gt ; Set CR0_LT if it is NOT this entry slwi r4,r4,3 ; Get offset to physical entry diff --git a/osfmk/ppc/interrupt.c b/osfmk/ppc/interrupt.c index 170ad2227..aba525045 100644 --- a/osfmk/ppc/interrupt.c +++ b/osfmk/ppc/interrupt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -86,9 +86,15 @@ struct savearea * interrupt( } #endif + now = mach_absolute_time(); /* Find out what time it is */ + + if(now >= proc_info->pms.pmsPop) { /* Is it time for power management state change? */ + pmsStep(1); /* Yes, advance step */ + now = mach_absolute_time(); /* Get the time again since we ran a bit */ + } + thread = current_thread(); /* Find ourselves */ if(thread->machine.qactTimer != 0) { /* Is the timer set? */ - clock_get_uptime(&now); /* Find out what time it is */ if (thread->machine.qactTimer <= now) { /* It is set, has it popped? */ thread->machine.qactTimer = 0; /* Clear single shot timer */ if((unsigned int)thread->machine.vmmControl & 0xFFFFFFFE) { /* Are there any virtual machines? */ @@ -97,7 +103,7 @@ struct savearea * interrupt( } } - rtclock_intr(0, ssp, 0); + rtclock_intr(ssp); break; case T_INTERRUPT: @@ -130,10 +136,8 @@ struct savearea * interrupt( default: -#if MACH_KDP || MACH_KDB - if (!Call_Debugger(type, ssp)) -#endif - unresolved_kernel_trap(type, ssp, dsisr, dar, NULL); + if (!Call_Debugger(type, ssp)) + unresolved_kernel_trap(type, ssp, dsisr, dar, NULL); break; } diff --git a/osfmk/ppc/io_map.c b/osfmk/ppc/io_map.c index 60a1688ad..3c47658ec 100644 --- a/osfmk/ppc/io_map.c +++ b/osfmk/ppc/io_map.c @@ -68,7 +68,7 @@ io_map(phys_addr, size) (void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), (mmFlgBlock | mmFlgUseAttr | mmFlgCInhib | mmFlgGuarded), /* Map as I/O page */ - size >> 12, VM_PROT_READ|VM_PROT_WRITE); + (size >> 12), VM_PROT_READ|VM_PROT_WRITE); return (start + (phys_addr & PAGE_MASK)); /* Pass back the physical address */ @@ -117,7 +117,7 @@ vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size) (void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), (mmFlgBlock | mmFlgUseAttr | mmFlgCInhib | mmFlgGuarded), /* Map as I/O page */ - size >> 12, VM_PROT_READ|VM_PROT_WRITE); + (size >> 12), VM_PROT_READ|VM_PROT_WRITE); return (start + (phys_addr & PAGE_MASK)); } diff --git a/osfmk/ppc/lowglobals.h b/osfmk/ppc/lowglobals.h index 512cef481..debb37c22 100644 --- a/osfmk/ppc/lowglobals.h +++ b/osfmk/ppc/lowglobals.h @@ -60,7 +60,9 @@ typedef struct lowglo { uint64_t lgPMWvaddr; /* 5020 physical memory window virtual address */ uint64_t lgUMWvaddr; /* 5028 user memory window virtual address */ unsigned int lgVMMforcedFeats; /* 5030 VMM boot-args forced feature flags */ - unsigned int lgRsv034[19]; /* 5034 reserved */ + unsigned int lgMaxDec; /* 5034 Maximum decrementer we can set */ + unsigned int lgPmsCtlp; /* 5038 Pointer to power management stepper control */ + unsigned int lgRsv03C[17]; /* 503C reserved */ traceWork lgTrcWork; /* 5080 Tracing control block - trcWork */ unsigned int lgRsv0A0[24]; /* 50A0 reserved */ struct Saveanchor lgSaveanchor; /* 5100 Savearea anchor - saveanchor */ diff --git a/osfmk/ppc/lowmem_vectors.s b/osfmk/ppc/lowmem_vectors.s index a3b0e9e5a..54b63596f 100644 --- a/osfmk/ppc/lowmem_vectors.s +++ b/osfmk/ppc/lowmem_vectors.s @@ -369,26 +369,26 @@ notDCache: mtcrf 255,r13 ; Restore CRs .L_handlerC00: mtsprg 3,r11 ; Save R11 mtsprg 2,r13 ; Save R13 - rlwinm r11,r0,0,0xFFFFFFF8 ; mask off low 3 bits of syscall number - xori r13,r11,0x7FF0 ; start to check for the 0x7FFx traps - addi r11,r11,8 ; make a 0 iff this is a 0xFFFFFFF8 trap - cntlzw r13,r13 ; set bit 0x20 iff a 0x7FFx trap - cntlzw r11,r11 ; set bit 0x20 iff a 0xFFFFFFF8 trap + rlwinm r11,r0,0,0xFFFFFFF8 ; mask off low 3 bits of syscall number + xori r13,r11,0x7FF0 ; start to check for the 0x7FFx traps + addi r11,r11,8 ; make a 0 iff this is a 0xFFFFFFF8 trap + cntlzw r13,r13 ; set bit 0x20 iff a 0x7FFx trap + cntlzw r11,r11 ; set bit 0x20 iff a 0xFFFFFFF8 trap xoris r0,r0,0x8000 ; Flip bit to make 0 iff 0x80000000 - rlwimi r11,r13,31,0x10 ; move 0x7FFx bit into position + rlwimi r11,r13,31,0x10 ; move 0x7FFx bit into position cntlzw r13,r0 ; Set bit 0x20 iff 0x80000000 xoris r0,r0,0x8000 ; Flip bit to restore R0 rlwimi r11,r13,2,0x80 ; Set bit 0x80 iff CutTrace - xori r13,r0,0x6004 ; start to check for 0x6004 - rlwimi r11,r0,1,0xE ; move in low 3 bits of syscall number - cntlzw r13,r13 ; set bit 0x20 iff 0x6004 - rlwinm r11,r11,0,0,30 ; clear out bit 31 - rlwimi r11,r13,1,0x40 ; move 0x6004 bit into position - lhz r11,lo16(scTable)(r11) ; get branch address from sc table - mfctr r13 ; save caller's ctr in r13 - mtctr r11 ; set up branch to syscall handler - mfsprg r11,0 ; get per_proc, which most UFTs use - bctr ; dispatch (r11 in sprg3, r13 in sprg2, ctr in r13, per_proc in r11) + xori r13,r0,0x6004 ; start to check for 0x6004 + rlwimi r11,r0,1,0xE ; move in low 3 bits of syscall number + cntlzw r13,r13 ; set bit 0x20 iff 0x6004 + rlwinm r11,r11,0,0,30 ; clear out bit 31 + rlwimi r11,r13,1,0x40 ; move 0x6004 bit into position + lhz r11,lo16(scTable)(r11) ; get branch address from sc table + mfctr r13 ; save caller's ctr in r13 + mtctr r11 ; set up branch to syscall handler + mfsprg r11,0 ; get per_proc, which most UFTs use + bctr ; dispatch (r11 in sprg3, r13 in sprg2, ctr in r13, per_proc in r11) /* * Trace - generated by single stepping @@ -850,84 +850,84 @@ scTable: ; ABCD E * sprg3 = holds caller's r11 */ -; Handle "vmm_dispatch" (0x6004), of which only some selectors are UFTs. +; Handle "vmm_dispatch" (0x6004), of which only some selectors are UFTs. uftVMM: - mtctr r13 ; restore caller's ctr - lwz r11,spcFlags(r11) ; get the special flags word from per_proc - mfcr r13 ; save caller's entire cr (we use all fields below) + mtctr r13 ; restore caller's ctr + lwz r11,spcFlags(r11) ; get the special flags word from per_proc + mfcr r13 ; save caller's entire cr (we use all fields below) rlwinm r11,r11,16,16,31 ; Extract spcFlags upper bits andi. r11,r11,hi16(runningVM|FamVMena|FamVMmode) cmpwi cr0,r11,hi16(runningVM|FamVMena|FamVMmode) ; Test in VM FAM - bne-- uftNormal80 ; not eligible for FAM UFTs + bne-- uftNormal80 ; not eligible for FAM UFTs cmpwi cr5,r3,kvmmResumeGuest ; Compare r3 with kvmmResumeGuest cmpwi cr2,r3,kvmmSetGuestRegister ; Compare r3 with kvmmSetGuestRegister cror cr1_eq,cr5_lt,cr2_gt ; Set true if out of VMM Fast syscall range - bt-- cr1_eq,uftNormalFF ; Exit if out of range (the others are not UFTs) + bt-- cr1_eq,uftNormalFF ; Exit if out of range (the others are not UFTs) b EXT(vmm_ufp) ; handle UFT range of vmm_dispatch syscall - -; Handle blue box UFTs (syscalls -1 and -2). + +; Handle blue box UFTs (syscalls -1 and -2). uftIsPreemptiveTask: uftIsPreemptiveTaskEnv: - mtctr r13 ; restore caller's ctr - lwz r11,spcFlags(r11) ; get the special flags word from per_proc - mfcr r13,0x80 ; save caller's cr0 so we can use it - andi. r11,r11,bbNoMachSC|bbPreemptive ; Clear what we do not need + mtctr r13 ; restore caller's ctr + lwz r11,spcFlags(r11) ; get the special flags word from per_proc + mfcr r13,0x80 ; save caller's cr0 so we can use it + andi. r11,r11,bbNoMachSC|bbPreemptive ; Clear what we do not need cmplwi r11,bbNoMachSC ; See if we are trapping syscalls - blt-- uftNormal80 ; No... - cmpwi r0,-2 ; is this call IsPreemptiveTaskEnv? + blt-- uftNormal80 ; No... + cmpwi r0,-2 ; is this call IsPreemptiveTaskEnv? rlwimi r13,r11,bbPreemptivebit-cr0_eq,cr0_eq,cr0_eq ; Copy preemptive task flag into user cr0_eq mfsprg r11,0 ; Get the per proc once more - bne++ uftRestoreThenRFI ; do not load r0 if IsPreemptiveTask + bne++ uftRestoreThenRFI ; do not load r0 if IsPreemptiveTask lwz r0,ppbbTaskEnv(r11) ; Get the shadowed taskEnv (only difference) - b uftRestoreThenRFI ; restore modified cr0 and return + b uftRestoreThenRFI ; restore modified cr0 and return -; Handle "Thread Info" UFT (0x7FF2) +; Handle "Thread Info" UFT (0x7FF2) - .globl EXT(uft_uaw_nop_if_32bit) + .globl EXT(uft_uaw_nop_if_32bit) uftThreadInfo: - lwz r3,UAW+4(r11) ; get user assist word, assuming a 32-bit processor + lwz r3,UAW+4(r11) ; get user assist word, assuming a 32-bit processor LEXT(uft_uaw_nop_if_32bit) - ld r3,UAW(r11) ; get the whole doubleword if 64-bit (patched to nop if 32-bit) - mtctr r13 ; restore caller's ctr - b uftRFI ; done + ld r3,UAW(r11) ; get the whole doubleword if 64-bit (patched to nop if 32-bit) + mtctr r13 ; restore caller's ctr + b uftRFI ; done -; Handle "Facility Status" UFT (0x7FF3) +; Handle "Facility Status" UFT (0x7FF3) uftFacilityStatus: - lwz r3,spcFlags(r11) ; get "special flags" word from per_proc - mtctr r13 ; restore caller's ctr - b uftRFI ; done + lwz r3,spcFlags(r11) ; get "special flags" word from per_proc + mtctr r13 ; restore caller's ctr + b uftRFI ; done -; Handle "Load MSR" UFT (0x7FF4). This is not used on 64-bit processors, though it would work. +; Handle "Load MSR" UFT (0x7FF4). This is not used on 64-bit processors, though it would work. uftLoadMSR: - mfsrr1 r11 ; get caller's MSR - mtctr r13 ; restore caller's ctr - mfcr r13,0x80 ; save caller's cr0 so we can test PR - rlwinm. r11,r11,0,MSR_PR_BIT,MSR_PR_BIT ; really in the kernel? - bne- uftNormal80 ; do not permit from user mode - mfsprg r11,0 ; restore per_proc + mfsrr1 r11 ; get caller's MSR + mtctr r13 ; restore caller's ctr + mfcr r13,0x80 ; save caller's cr0 so we can test PR + rlwinm. r11,r11,0,MSR_PR_BIT,MSR_PR_BIT ; really in the kernel? + bne- uftNormal80 ; do not permit from user mode + mfsprg r11,0 ; restore per_proc mtsrr1 r3 ; Set new MSR -; Return to caller after UFT. When called: -; r11 = per_proc ptr -; r13 = callers cr0 in upper nibble (if uftRestoreThenRFI called) -; sprg2 = callers r13 -; sprg3 = callers r11 +; Return to caller after UFT. When called: +; r11 = per_proc ptr +; r13 = callers cr0 in upper nibble (if uftRestoreThenRFI called) +; sprg2 = callers r13 +; sprg3 = callers r11 -uftRestoreThenRFI: ; WARNING: can drop down to here - mtcrf 0x80,r13 ; restore caller's cr0 +uftRestoreThenRFI: ; WARNING: can drop down to here + mtcrf 0x80,r13 ; restore caller's cr0 uftRFI: - .globl EXT(uft_nop_if_32bit) + .globl EXT(uft_nop_if_32bit) LEXT(uft_nop_if_32bit) - b uftX64 ; patched to NOP if 32-bit processor + b uftX64 ; patched to NOP if 32-bit processor uftX32: lwz r11,pfAvailable(r11) ; Get the feature flags mfsprg r13,2 ; Restore R13 @@ -1217,20 +1217,20 @@ ctbail64: mtcrf 0x80,r25 ; Restore the used condition register field -; Handle a system call that is not a UFT and which thus goes upstairs. +; Handle a system call that is not a UFT and which thus goes upstairs. -uftNormalFF: ; here with entire cr in r13 - mtcr r13 ; restore all 8 fields +uftNormalFF: ; here with entire cr in r13 + mtcr r13 ; restore all 8 fields b uftNormalSyscall1 ; Join common... - -uftNormal80: ; here with callers cr0 in r13 - mtcrf 0x80,r13 ; restore cr0 + +uftNormal80: ; here with callers cr0 in r13 + mtcrf 0x80,r13 ; restore cr0 b uftNormalSyscall1 ; Join common... - -uftNormalSyscall: ; r13 = callers ctr - mtctr r13 ; restore ctr + +uftNormalSyscall: ; r13 = callers ctr + mtctr r13 ; restore ctr uftNormalSyscall1: - li r11,T_SYSTEM_CALL|T_FAM ; this is a system call (and fall through) + li r11,T_SYSTEM_CALL|T_FAM ; this is a system call (and fall through) /*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>*/ @@ -1257,7 +1257,7 @@ uftNormalSyscall1: * misses, so these stores won't take all that long. Except the first line that is because * we can't do a DCBZ if the L1 D-cache is off. The rest we will skip if they are * off also. - * + * * Note that if we are attempting to sleep (as opposed to nap or doze) all interruptions * are ignored. */ @@ -1548,7 +1548,7 @@ noPerfMonSave32: lwz r25,traceMask(0) ; Get the trace mask li r0,SAVgeneral ; Get the savearea type value lhz r19,PP_CPU_NUMBER(r2) ; Get the logical processor number - rlwinm r22,r11,30,0,31 ; Divide interrupt code by 4 + rlwinm r22,r11,30,0,31 ; Divide interrupt code by 2 stb r0,SAVflags+2(r13) ; Mark valid context addi r22,r22,10 ; Adjust code so we shift into CR5 li r23,trcWork ; Get the trace work area address @@ -3661,8 +3661,16 @@ EXT(mckFlags): .long 0 ; 5028 user memory window virtual address .long 0 ; 502C user memory window virtual address .long 0 ; 5030 VMM boot-args forced feature flags - .long 0 ; 5034 reserved - .long 0 ; 5038 reserved + + .globl EXT(maxDec) +EXT(maxDec): + .long 0x7FFFFFFF ; 5034 maximum decrementer value + + + .globl EXT(pmsCtlp) +EXT(pmsCtlp): + .long 0 ; 5038 Pointer to power management stepper control + .long 0 ; 503C reserved .long 0 ; 5040 reserved .long 0 ; 5044 reserved diff --git a/osfmk/ppc/machine_cpu.h b/osfmk/ppc/machine_cpu.h index 02c5d403e..438dfd533 100644 --- a/osfmk/ppc/machine_cpu.h +++ b/osfmk/ppc/machine_cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -36,7 +36,7 @@ extern void cpu_doshutdown( extern void cpu_signal_handler( void); -typedef void (*broadcastFunc) (int); +typedef void (*broadcastFunc) (uint32_t); int32_t cpu_broadcast(uint32_t *, broadcastFunc, uint32_t); diff --git a/osfmk/ppc/machine_routines.c b/osfmk/ppc/machine_routines.c index e424bed03..749446f77 100644 --- a/osfmk/ppc/machine_routines.c +++ b/osfmk/ppc/machine_routines.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -380,9 +380,9 @@ ml_processor_register( else proc_info->time_base_enable = (void(*)(cpu_id_t, boolean_t ))NULL; - if (proc_info->pf.pfPowerModes & pmPowerTune) { - proc_info->pf.pfPowerTune0 = in_processor_info->power_mode_0; - proc_info->pf.pfPowerTune1 = in_processor_info->power_mode_1; + if((proc_info->pf.pfPowerModes & pmType) == pmPowerTune) { + proc_info->pf.pfPowerTune0 = in_processor_info->power_mode_0; + proc_info->pf.pfPowerTune1 = in_processor_info->power_mode_1; } donap = in_processor_info->supports_nap; /* Assume we use requested nap */ @@ -626,57 +626,64 @@ void ml_set_processor_speed(unsigned long speed) { struct per_proc_info *proc_info; - uint32_t powerModes, cpu; + uint32_t cpu; kern_return_t result; boolean_t current_state; unsigned int i; proc_info = PerProcTable[master_cpu].ppe_vaddr; - powerModes = proc_info->pf.pfPowerModes; - if (powerModes & pmDualPLL) { - - ml_set_processor_speed_dpll(speed); - - } else if (powerModes & pmDFS) { - - for (cpu = 0; cpu < real_ncpus; cpu++) { - /* - * cpu_signal() returns after .5ms if it fails to signal a running cpu - * retry cpu_signal() for .1s to deal with long interrupt latency at boot - */ - for (i=200; i>0; i--) { - current_state = ml_set_interrupts_enabled(FALSE); - if (cpu != cpu_number()) { - if (PerProcTable[cpu].ppe_vaddr->cpu_flags & SignalReady) - /* - * Target cpu is off-line, skip - */ + switch (proc_info->pf.pfPowerModes & pmType) { /* Figure specific type */ + case pmDualPLL: + + ml_set_processor_speed_dpll(speed); + break; + + case pmDFS: + + for (cpu = 0; cpu < real_ncpus; cpu++) { + /* + * cpu_signal() returns after .5ms if it fails to signal a running cpu + * retry cpu_signal() for .1s to deal with long interrupt latency at boot + */ + for (i=200; i>0; i--) { + current_state = ml_set_interrupts_enabled(FALSE); + if (cpu != cpu_number()) { + if (PerProcTable[cpu].ppe_vaddr->cpu_flags & SignalReady) + /* + * Target cpu is off-line, skip + */ + result = KERN_SUCCESS; + else { + simple_lock(&spsLock); + result = cpu_signal(cpu, SIGPcpureq, CPRQsps, speed); + if (result == KERN_SUCCESS) + thread_sleep_simple_lock(&spsLock, &spsLock, THREAD_UNINT); + simple_unlock(&spsLock); + } + } else { + ml_set_processor_speed_dfs(speed); result = KERN_SUCCESS; - else { - simple_lock(&spsLock); - result = cpu_signal(cpu, SIGPcpureq, CPRQsps, speed); - if (result == KERN_SUCCESS) - thread_sleep_simple_lock(&spsLock, &spsLock, THREAD_UNINT); - simple_unlock(&spsLock); } - } else { - ml_set_processor_speed_dfs(speed); - result = KERN_SUCCESS; + (void) ml_set_interrupts_enabled(current_state); + if (result == KERN_SUCCESS) + break; } - (void) ml_set_interrupts_enabled(current_state); - if (result == KERN_SUCCESS) - break; + if (result != KERN_SUCCESS) + panic("ml_set_processor_speed(): Fail to set cpu%d speed\n", cpu); } - if (result != KERN_SUCCESS) - panic("ml_set_processor_speed(): Fail to set cpu%d speed\n", cpu); - } - - } else if (powerModes & pmPowerTune) { - - ml_set_processor_speed_powertune(speed); + break; + + case pmPowerTune: + + ml_set_processor_speed_powertune(speed); + break; + + default: + break; } + return; } /* diff --git a/osfmk/ppc/machine_routines_asm.s b/osfmk/ppc/machine_routines_asm.s index 854928434..afd81129a 100644 --- a/osfmk/ppc/machine_routines_asm.s +++ b/osfmk/ppc/machine_routines_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -989,10 +989,8 @@ LEXT(machine_idle_ret) ; Force a line boundry here .align 5 - .globl EXT(ml_ppc_sleep) -LEXT(ml_ppc_sleep) - .globl EXT(ml_ppc_do_sleep) + LEXT(ml_ppc_do_sleep) #if 0 @@ -2027,8 +2025,6 @@ LEXT(ml_set_processor_speed_powertune) mfsprg r31,1 ; Get the current activation lwz r31,ACT_PER_PROC(r31) ; Get the per_proc block - lwz r30, pfPowerModes(r31) ; Get the supported power modes - rlwinm r28, r3, 31-dnap, dnap, dnap ; Shift the 1 bit to the dnap+32 bit rlwinm r3, r3, 2, 29, 29 ; Shift the 1 to a 4 and mask addi r3, r3, pfPowerTune0 ; Add in the pfPowerTune0 offset @@ -2137,7 +2133,11 @@ spsDPLL2: /* -** ml_set_processor_speed_dfs() +** ml_set_processor_speed_dfs(divideby) +** divideby == 0 then divide by 1 (full speed) +** divideby == 1 then divide by 2 (half speed) +** divideby == 2 then divide by 4 (quarter speed) +** divideby == 3 then divide by 4 (quarter speed) - preferred ** */ ; Force a line boundry here @@ -2145,19 +2145,13 @@ spsDPLL2: .globl EXT(ml_set_processor_speed_dfs) LEXT(ml_set_processor_speed_dfs) - mfsprg r5,1 ; Get the current activation - lwz r5,ACT_PER_PROC(r5) ; Get the per_proc block - - cmplwi r3, 0 ; full speed? - mfspr r3, hid1 ; Get the current HID1 - rlwinm r3, r3, 0, hid1dfs1+1, hid1dfs0-1 ; assume full speed, clear dfs bits - beq spsDFS - oris r3, r3, hi16(hid1dfs1m) ; slow, set half speed dfs1 bit -spsDFS: - stw r3, pfHID1(r5) ; Save the new hid1 value + mfspr r4,hid1 ; Get the current HID1 + mfsprg r5,0 ; Get the per_proc_info + rlwimi r4,r3,31-hid1dfs1,hid1dfs0,hid1dfs1 ; Stick the new divider bits in + stw r4,pfHID1(r5) ; Save the new hid1 value sync - mtspr hid1, r3 ; Set the new HID1 + mtspr hid1,r4 ; Set the new HID1 sync isync blr @@ -2272,3 +2266,49 @@ mhrcalc: mftb r8 ; Get time now sub r3,r2,r9 ; How many ticks? mtmsrd r12,1 ; Flip EE on if needed blr ; Leave... + + +; +; int setPop(time) +; +; Calculates the number of ticks to the supplied event and +; sets the decrementer. Never set the time for less that the +; minimum, which is 10, nor more than maxDec, which is usually 0x7FFFFFFF +; and never more than that but can be set by root. +; +; + + .align 7 + .globl EXT(setPop) + +#define kMin 10 + +LEXT(setPop) + +spOver: mftbu r8 ; Get upper time + addic r2,r4,-kMin ; Subtract minimum from target + mftb r9 ; Get lower + addme r11,r3 ; Do you have any bits I could borrow? + mftbu r10 ; Get upper again + subfe r0,r0,r0 ; Get -1 if we went negative 0 otherwise + subc r7,r2,r9 ; Subtract bottom and get carry + cmplw r8,r10 ; Did timebase upper tick? + subfe r6,r8,r11 ; Get the upper difference accounting for borrow + lwz r12,maxDec(0) ; Get the maximum decrementer size + addme r0,r0 ; Get -1 or -2 if anything negative, 0 otherwise + addic r2,r6,-1 ; Set carry if diff < 2**32 + srawi r0,r0,1 ; Make all foxes + subi r10,r12,kMin ; Adjust maximum for minimum adjust + andc r7,r7,r0 ; Pin time at 0 if under minimum + subfe r2,r2,r2 ; 0 if diff > 2**32, -1 otherwise + sub r7,r7,r10 ; Negative if duration is less than (max - min) + or r2,r2,r0 ; If the duration is negative, it isn't too big + srawi r0,r7,31 ; -1 if duration is too small + and r7,r7,r2 ; Clear duration if high part too big + and r7,r7,r0 ; Clear duration if low part too big + bne-- spOver ; Timer ticked... + add r3,r7,r12 ; Add back the max for total + mtdec r3 ; Set the decrementer + blr ; Leave... + + diff --git a/osfmk/ppc/mappings.c b/osfmk/ppc/mappings.c index ebeef928b..2f80bc88c 100644 --- a/osfmk/ppc/mappings.c +++ b/osfmk/ppc/mappings.c @@ -272,7 +272,7 @@ addr64_t mapping_remove(pmap_t pmap, addr64_t va) { /* Remove a single mapping * perm Mapping is permanent * cache inhibited Cache inhibited (used if use attribute or block set ) * guarded Guarded access (used if use attribute or block set ) - * size size of block (not used if not block) + * size size of block in pages - 1 (not used if not block) * prot VM protection bits * attr Cachability/Guardedness * @@ -337,6 +337,12 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, pattr = flags & (mmFlgCInhib | mmFlgGuarded); /* Use requested attributes */ mflags |= mpBlock; /* Show that this is a block */ + + if(size > pmapSmallBlock) { /* Is it one? */ + if(size & 0x00001FFF) return mapRtBadSz; /* Fail if bigger than 256MB and not a 32MB multiple */ + size = size >> 13; /* Convert to 32MB chunks */ + mflags = mflags | mpBSu; /* Show 32MB basic size unit */ + } } wimg = 0x2; /* Set basic PPC wimg to 0b0010 - Coherent */ @@ -348,7 +354,7 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, if(flags & mmFlgPerm) mflags |= mpPerm; /* Set permanent mapping */ size = size - 1; /* Change size to offset */ - if(size > 0xFFFF) return 1; /* Leave if size is too big */ + if(size > 0xFFFF) return mapRtBadSz; /* Leave if size is too big */ nlists = mapSetLists(pmap); /* Set number of lists this will be on */ @@ -371,7 +377,7 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, switch (rc) { case mapRtOK: - return 0; /* Mapping added successfully */ + return mapRtOK; /* Mapping added successfully */ case mapRtRemove: /* Remove in progress */ (void)mapping_remove(pmap, colladdr); /* Lend a helping hand to another CPU doing block removal */ @@ -379,12 +385,12 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags, case mapRtMapDup: /* Identical mapping already present */ mapping_free(mp); /* Free duplicate mapping */ - return 0; /* Return success */ + return mapRtOK; /* Return success */ case mapRtSmash: /* Mapping already present but does not match new mapping */ mapping_free(mp); /* Free duplicate mapping */ - return (colladdr | 1); /* Return colliding address, with some dirt added to avoid - confusion if effective address is 0 */ + return (colladdr | mapRtSmash); /* Return colliding address, with some dirt added to avoid + confusion if effective address is 0 */ default: panic("mapping_make: hw_add_map failed - collision addr = %016llX, code = %02X, pmap = %08X, va = %016llX, mapping = %08X\n", colladdr, rc, pmap, va, mp); /* Die dead */ @@ -1739,6 +1745,23 @@ void mapping_phys_unused(ppnum_t pa) { } +void mapping_hibernate_flush(void) +{ + int bank; + unsigned int page; + struct phys_entry * entry; + + for (bank = 0; bank < pmap_mem_regions_count; bank++) + { + entry = (struct phys_entry *) pmap_mem_regions[bank].mrPhysTab; + for (page = pmap_mem_regions[bank].mrStart; page <= pmap_mem_regions[bank].mrEnd; page++) + { + hw_walk_phys(entry, hwpNoop, hwpNoop, hwpNoop, 0, hwpPurgePTE); + entry++; + } + } +} + diff --git a/osfmk/ppc/mappings.h b/osfmk/ppc/mappings.h index 9d7a7c759..6d910c3de 100644 --- a/osfmk/ppc/mappings.h +++ b/osfmk/ppc/mappings.h @@ -336,7 +336,8 @@ typedef struct mappingblok { #define mapRtMapDup 7 #define mapRtGuest 8 #define mapRtEmpty 9 -#define mapRtSmash 0xA /* Mapping already exists and doesn't match new mapping */ +#define mapRtSmash 10 /* Mapping already exists and doesn't match new mapping */ +#define mapRtBadSz 11 /* Requested size too big or more than 256MB and not mult of 32MB */ /* * This struct describes available physical page configurations @@ -400,6 +401,7 @@ extern phys_entry_t *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex); /* extern int mapalc1(struct mappingblok *mb); /* Finds and allcates a 1-bit mapping entry */ extern int mapalc2(struct mappingblok *mb); /* Finds and allcates a 2-bit mapping entry */ extern void ignore_zero_fault(boolean_t type); /* Sets up to ignore or honor any fault on page 0 access for the current thread */ +extern void mapping_hibernate_flush(void); extern void mapping_fake_zone_info( /* return mapping usage stats as a fake zone info */ int *count, diff --git a/osfmk/ppc/misc_asm.s b/osfmk/ppc/misc_asm.s index 33d39c41b..8ca2624a2 100644 --- a/osfmk/ppc/misc_asm.s +++ b/osfmk/ppc/misc_asm.s @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -257,6 +257,12 @@ ENTRY(mfsda, TAG_NO_FRAME_USED) mfspr r3,sda blr + .globl EXT(hid1get) +LEXT(hid1get) + + mfspr r3,hid1 ; Get the HID1 + blr + .globl EXT(hid0get64) LEXT(hid0get64) diff --git a/osfmk/ppc/model_dep.c b/osfmk/ppc/model_dep.c index db35a19ae..b88e4af64 100644 --- a/osfmk/ppc/model_dep.c +++ b/osfmk/ppc/model_dep.c @@ -305,7 +305,11 @@ void machine_init(void) { clock_config(); +/* Note that we must initialize the stepper tables AFTER the clock is configured!!!!! */ + if(pmsExperimental & 1) pmsCPUConf(); /* (EXPERIMENTAL) Initialize the stepper tables */ perfmon_init(); + return; + } void slave_machine_init(void) diff --git a/osfmk/ppc/pmap.c b/osfmk/ppc/pmap.c index 7453b8aa5..875ee6912 100644 --- a/osfmk/ppc/pmap.c +++ b/osfmk/ppc/pmap.c @@ -259,23 +259,30 @@ void pmap_map_physical() { unsigned region; + uint64_t msize, size; + addr64_t paddr, vaddr, colladdr; /* Iterate over physical memory regions, block mapping each into the kernel's address map */ for (region = 0; region < (unsigned)pmap_mem_regions_count; region++) { - addr64_t paddr = ((addr64_t)pmap_mem_regions[region].mrStart << 12); - addr64_t size = (((addr64_t)pmap_mem_regions[region].mrEnd + 1) << 12) - paddr; + paddr = ((addr64_t)pmap_mem_regions[region].mrStart << 12); /* Get starting physical address */ + size = (((addr64_t)pmap_mem_regions[region].mrEnd + 1) << 12) - paddr; + + vaddr = paddr + lowGlo.lgPMWvaddr; /* Get starting virtual address */ + while (size > 0) { - /* Block mappings are limited to 256M, so we map in blocks of up to 256M */ - addr64_t vaddr = paddr + lowGlo.lgPMWvaddr; - unsigned msize = ((size > 0x10000000)? 0x10000000 : size); - addr64_t colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12), - (mmFlgBlock | mmFlgPerm), (msize >> 12), - (VM_PROT_READ | VM_PROT_WRITE)); + + msize = ((size > 0x0000020000000000ULL) ? 0x0000020000000000ULL : size); /* Get size, but no more than 2TBs */ + + colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12), + (mmFlgBlock | mmFlgPerm), (msize >> 12), + (VM_PROT_READ | VM_PROT_WRITE)); if (colladdr) { - panic ("pmap_map_physical: collision with previously mapped range - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n", + panic ("pmap_map_physical: mapping failure - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n", vaddr, (paddr >> 12), (msize >> 12), colladdr); } - paddr += msize; + + vaddr = vaddr + (uint64_t)msize; /* Point to the next virtual addr */ + paddr = paddr + (uint64_t)msize; /* Point to the next physical addr */ size -= msize; } } @@ -290,19 +297,28 @@ pmap_map_physical() void pmap_map_iohole(addr64_t paddr, addr64_t size) { + + addr64_t vaddr, colladdr, msize; + uint32_t psize; + + vaddr = paddr + lowGlo.lgPMWvaddr; /* Get starting virtual address */ + while (size > 0) { - addr64_t vaddr = paddr + lowGlo.lgPMWvaddr; - unsigned msize = ((size > 0x10000000)? 0x10000000 : size); - addr64_t colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12), - (mmFlgBlock | mmFlgPerm | mmFlgGuarded | mmFlgCInhib), (msize >> 12), - (VM_PROT_READ | VM_PROT_WRITE)); + + msize = ((size > 0x0000020000000000ULL) ? 0x0000020000000000ULL : size); /* Get size, but no more than 2TBs */ + + colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12), + (mmFlgBlock | mmFlgPerm | mmFlgGuarded | mmFlgCInhib), (msize >> 12), + (VM_PROT_READ | VM_PROT_WRITE)); if (colladdr) { - panic ("pmap_map_iohole: collision with previously mapped range - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n", - vaddr, (paddr >> 12), (msize >> 12), colladdr); + panic ("pmap_map_iohole: mapping failed - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n", + vaddr, (paddr >> 12), (msize >> 12), colladdr); } - paddr += msize; + + vaddr = vaddr + (uint64_t)msize; /* Point to the next virtual addr */ + paddr = paddr + (uint64_t)msize; /* Point to the next physical addr */ size -= msize; - } + } } /* @@ -1108,11 +1124,13 @@ pmap_enter(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot, * not be changed. The block must be unmapped and then remapped with the new stuff. * We also do not keep track of reference or change flags. * + * Any block that is larger than 256MB must be a multiple of 32MB. We panic if it is not. + * * Note that pmap_map_block_rc is the same but doesn't panic if collision. * */ -void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags) { /* Map an autogenned block */ +void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags) { /* Map an autogenned block */ unsigned int mflags; addr64_t colva; @@ -1125,20 +1143,19 @@ void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_pro // kprintf("pmap_map_block: (%08X) va = %016llX, pa = %08X, size = %08X, prot = %08X, attr = %08X, flags = %08X\n", /* (BRINGUP) */ // current_thread(), va, pa, size, prot, attr, flags); /* (BRINGUP) */ - mflags = mmFlgBlock | mmFlgUseAttr | (attr & VM_MEM_GUARDED) | ((attr & VM_MEM_NOT_CACHEABLE) >> 1); /* Convert to our mapping_make flags */ if(flags) mflags |= mmFlgPerm; /* Mark permanent if requested */ - colva = mapping_make(pmap, va, pa, mflags, (size >> 12), prot); /* Enter the mapping into the pmap */ + colva = mapping_make(pmap, va, pa, mflags, size, prot); /* Enter the mapping into the pmap */ if(colva) { /* If there was a collision, panic */ - panic("pmap_map_block: collision at %016llX, pmap = %08X\n", colva, pmap); + panic("pmap_map_block: mapping error %d, pmap = %08X, va = %016llX\n", (uint32_t)(colva & mapRetCode), pmap, va); } return; /* Return */ } -int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags) { /* Map an autogenned block */ +int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags) { /* Map an autogenned block */ unsigned int mflags; addr64_t colva; @@ -1150,8 +1167,8 @@ int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_p mflags = mmFlgBlock | mmFlgUseAttr | (attr & VM_MEM_GUARDED) | ((attr & VM_MEM_NOT_CACHEABLE) >> 1); /* Convert to our mapping_make flags */ if(flags) mflags |= mmFlgPerm; /* Mark permanent if requested */ - - colva = mapping_make(pmap, va, pa, mflags, (size >> 12), prot); /* Enter the mapping into the pmap */ + + colva = mapping_make(pmap, va, pa, mflags, size, prot); /* Enter the mapping into the pmap */ if(colva) return 0; /* If there was a collision, fail */ @@ -1625,7 +1642,7 @@ void pmap_switch(pmap_t map) * subord = the pmap that goes into the grand * vstart = start of range in pmap to be inserted * nstart = start of range in pmap nested pmap - * size = Size of nest area (up to 16TB) + * size = Size of nest area (up to 2TB) * * Inserts a pmap into another. This is used to implement shared segments. * On the current PPC processors, this is limited to segment (256MB) aligned @@ -1634,8 +1651,6 @@ void pmap_switch(pmap_t map) * We actually kinda allow recursive nests. The gating factor is that we do not allow * nesting on top of something that is already mapped, i.e., the range must be empty. * - * - * * Note that we depend upon higher level VM locks to insure that things don't change while * we are doing this. For example, VM should not be doing any pmap enters while it is nesting * or do 2 nests at once. @@ -1648,9 +1663,8 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t n int nlists; mapping_t *mp; - if(size & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this for multiples of 256MB */ - if((size >> 28) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 16TB */ + if((size >> 25) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 2TB */ if(vstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */ if(nstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */ @@ -1658,13 +1672,13 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t n panic("pmap_nest: size is invalid - %016llX\n", size); } - msize = (size >> 28) - 1; /* Change size to blocks of 256MB */ + msize = (size >> 25) - 1; /* Change size to blocks of 32MB */ nlists = mapSetLists(grand); /* Set number of lists this will be on */ mp = mapping_alloc(nlists); /* Get a spare mapping block */ - mp->mpFlags = 0x01000000 | mpNest | mpPerm | nlists; + mp->mpFlags = 0x01000000 | mpNest | mpPerm | mpBSu | nlists; /* Make this a permanent nested pmap with a 32MB basic size unit */ /* Set the flags. Make sure busy count is 1 */ mp->mpSpace = subord->space; /* Set the address space/pmap lookup ID */ mp->u.mpBSize = msize; /* Set the size */ @@ -1800,10 +1814,10 @@ void MapUserMemoryWindowInit(void) { mp = mapping_alloc(nlists); /* Get a spare mapping block */ - mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | nlists; + mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | mpBSu | nlists; /* Make this a permanent nested pmap with a 32MB basic size unit */ /* Set the flags. Make sure busy count is 1 */ mp->mpSpace = kernel_pmap->space; /* Set the address space/pmap lookup ID */ - mp->u.mpBSize = 1; /* Set the size to 2 segments */ + mp->u.mpBSize = 15; /* Set the size to 2 segments in 32MB chunks - 1 */ mp->mpPte = 0; /* Means nothing */ mp->mpPAddr = 0; /* Means nothing */ mp->mpVAddr = lowGlo.lgUMWvaddr; /* Set the address range we cover */ @@ -2042,9 +2056,3 @@ coredumpok( { return TRUE; } - -/* -;;; Local Variables: *** -;;; tab-width:4 *** -;;; End: *** -*/ diff --git a/osfmk/ppc/pmap.h b/osfmk/ppc/pmap.h index 2d88a66f5..e8b137b64 100644 --- a/osfmk/ppc/pmap.h +++ b/osfmk/ppc/pmap.h @@ -250,6 +250,7 @@ extern pmapTransTab *pmapTrans; /* Space to pmap translate table */ #define PHYS_MEM_WINDOW_VADDR 0x0000000100000000ULL #define IO_MEM_WINDOW_VADDR 0x0000000080000000ULL #define IO_MEM_WINDOW_SIZE 0x0000000080000000ULL +#define pmapSmallBlock 65536 #define pmap_kernel() (kernel_pmap) #define pmap_resident_count(pmap) ((pmap)->stats.resident_count) @@ -302,8 +303,8 @@ extern void invalidate_icache(vm_offset_t va, unsigned length, boolean_t phys); extern void invalidate_icache64(addr64_t va, unsigned length, boolean_t phys); extern void pmap_sync_page_data_phys(ppnum_t pa); extern void pmap_sync_page_attributes_phys(ppnum_t pa); -extern void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags); -extern int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags); +extern void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags); +extern int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags); extern kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size); extern kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr); diff --git a/osfmk/ppc/pms.c b/osfmk/ppc/pms.c new file mode 100644 index 000000000..58c47f9b7 --- /dev/null +++ b/osfmk/ppc/pms.c @@ -0,0 +1,682 @@ +/* + * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern int real_ncpus; + +static uint32_t pmsSyncrolator = 0; /* Only one control operation at a time please */ +uint32_t pmsBroadcastWait = 0; /* Number of outstanding broadcasts */ + +int pmsInstalled = 0; /* Power Management Stepper can run and has table installed */ +int pmsExperimental = 0; /* Power Management Stepper in experimental mode */ +decl_simple_lock_data(,pmsBuildLock) /* Make sure only one guy can replace table at the same time */ + +static pmsDef *altDpmsTab = 0; /* Alternate step definition table */ +static uint32_t altDpmsTabSize = 0; /* Size of alternate step definition table */ + +pmsDef pmsDummy = { /* This is the dummy step for initialization. All it does is to park */ + .pmsLimit = 0, /* Time doesn't matter for a park */ + .pmsStepID = pmsMaxStates - 1, /* Use the very last ID number for the dummy */ + .pmsSetCmd = pmsParkIt, /* Force us to be parked */ + .sf.pmsSetFuncInd = 0, /* No platform call for this one */ + .pmsDown = pmsPrepSleep, /* We always park */ + .pmsNext = pmsPrepSleep /* We always park */ +}; + +pmsStat pmsStatsd[4][pmsMaxStates]; /* Generate enough statistics blocks for 4 processors */ + +pmsCtl pmsCtls = { /* Power Management Stepper control */ + .pmsStats = &pmsStatsd +}; + +pmsSetFunc_t pmsFuncTab[pmsSetFuncMax] = {0}; /* This is the function index table */ +pmsQueryFunc_t pmsQueryFunc = 0; /* Pointer to pmsQuery function */ +uint32_t pmsPlatformData = 0; /* Data provided by and passed to platform functions */ + + +/* + * Do any initialization needed + */ + +void pmsInit(void) { + + int i; + + simple_lock_init(&pmsBuildLock, 0); /* Initialize the build lock */ + for(i = 0; i < pmsMaxStates; i++) pmsCtls.pmsDefs[i] = &pmsDummy; /* Initialize the table to dummy steps */ + + return; +} + + +/* + * Start the power management stepper on all processors + * + * All processors must be parked. This should be called when the hardware + * is ready to step. Probably only at boot and after wake from sleep. + * + */ + + void pmsStart(void) { + + boolean_t intr; + + if(!pmsInstalled) return; /* We can't do this if no table installed */ + + intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ + pmsRun(pmsStartUp); /* Start running the stepper everywhere */ + (void)ml_set_interrupts_enabled(intr); /* Restore interruptions */ + + return; + + } + + +/* + * Park the stepper execution. This will force the stepper on this + * processor to abandon its current step and stop. No changes to the + * hardware state is made and any previous step is lost. + * + * This is used as the initial state at startup and when the step table + * is being changed. + * + */ + +void pmsPark(void) { + + boolean_t intr; + + if(!pmsInstalled) return; /* We can't do this if no table installed */ + + intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ + pmsSetStep(pmsParked, 0); /* Park the stepper */ + (void)ml_set_interrupts_enabled(intr); /* Restore interruptions */ + + return; + +} + + +/* + * Steps down to a lower power. + * Interrupts must be off... + */ + +void pmsDown(void) { + + struct per_proc_info *pp; + uint32_t nstate; + + pp = getPerProc(); /* Get our per_proc */ + + if(!pmsInstalled || pp->pms.pmsState == pmsParked) return; /* No stepping if parked or not installed */ + + nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsDown; /* Get the downward step */ + pmsSetStep(nstate, 0); /* Step to it */ + return; +} + + +/* + * Steps up to a higher power. The "timer" parameter is true if the + * step was driven due to the pms timer expiring. + * + * Interrupts must be off... + */ + +void pmsStep(int timer) { + + struct per_proc_info *pp; + uint32_t nstate; + int dir; + + pp = getPerProc(); /* Get our per_proc */ + + if(!pmsInstalled || pp->pms.pmsState == pmsParked) return; /* No stepping if parked or not installed */ + + nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsNext; /* Assume a normal step */ + dir = 1; /* A normal step is a step up */ + + if(timer && (pmsCtls.pmsDefs[pp->pms.pmsState]->pmsSetCmd == pmsDelay)) { /* If the timer expired and we are in a delay step, use the delay branch */ + nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsTDelay; /* Get the delayed step */ + dir = 0; /* Delayed steps are a step down for accounting purposes. */ + } + + pmsSetStep(nstate, dir); /* Step to it */ + return; +} + + +/* + * Set a specific step + * + * We do not do statistics if exiting park + * Interrupts must be off... + * + */ + +void pmsSetStep(uint32_t nstep, int dir) { + + struct per_proc_info *pp; + uint32_t pstate, ret, nCSetCmd, mCSetCmd; + pmsDef *pnstate, *pcstate; + uint64_t tb, nt, dur; + int cpu, frompark; + + pp = getPerProc(); /* Get our per_proc */ + cpu = cpu_number(); /* Get our processor */ + + while(1) { /* Keep stepping until we get a delay */ + + if(pp->pms.pmsCSetCmd & pmsMustCmp) { /* Do we have to finish the delay before changing? */ + while(mach_absolute_time() < pp->pms.pmsPop); /* Yes, spin here... */ + } + + if((nstep == pmsParked) || ((uint32_t)pmsCtls.pmsDefs[nstep]->pmsSetCmd == pmsParkIt)) { /* Are we parking? */ + + tb = mach_absolute_time(); /* What time is it? */ + pp->pms.pmsStamp = tb; /* Show transition now */ + pp->pms.pmsPop = HalfwayToForever; /* Set the pop way into the future */ + pp->pms.pmsState = pmsParked; /* Make sure we are parked */ + setTimerReq(); /* Cancel our timer if going */ + return; + } + + pnstate = pmsCtls.pmsDefs[nstep]; /* Point to the state definition */ + pstate = pp->pms.pmsState; /* Save the current step */ + pp->pms.pmsState = nstep; /* Set the current to the next step */ + + if(pnstate->pmsSetCmd != pmsDelay) { /* If this is not a delayed state, change the actual hardware now */ + if(pnstate->pmsSetCmd & pmsCngCPU) pmsCPUSet(pnstate->pmsSetCmd); /* We have some CPU work to do... */ + if((uint32_t)pnstate->sf.pmsSetFunc) pnstate->sf.pmsSetFunc(pnstate->pmsSetCmd, cpu, pmsPlatformData); /* Tell the platform to set power mode */ + + mCSetCmd = pnstate->pmsSetCmd & (pmsCngXClk | pmsCngCPU | pmsCngVolt); /* Isolate just the change flags */ + mCSetCmd = (mCSetCmd - (mCSetCmd >> 7)) | pmsSync | pmsMustCmp | pmsPowerID; /* Form mask of bits that come from new command */ + nCSetCmd = pp->pms.pmsCSetCmd & ~mCSetCmd; /* Clear changing bits */ + nCSetCmd = nCSetCmd | (pnstate->pmsSetCmd & mCSetCmd); /* Flip on the changing bits and the always copy bits */ + + pp->pms.pmsCSetCmd = nCSetCmd; /* Set it for real */ + } + + tb = mach_absolute_time(); /* What time is it? */ + pp->pms.pmsPop = tb + pnstate->pmsLimit; /* Set the next pop */ + + if((pnstate->pmsSetCmd != pmsDelay) && (pp->pms.pmsCSetCmd & pmsSync) && (pnstate->pmsLimit != 0)) { /* Is this a synchronous command with a delay? */ + while(mach_absolute_time() < pp->pms.pmsPop); /* Yes, spin here and wait it out... */ + } + +/* + * Gather some statistics + */ + + dur = tb - pp->pms.pmsStamp; /* Get the amount of time we were in the old step */ + pp->pms.pmsStamp = tb; /* Set the new timestamp */ + if(!(pstate == pmsParked)) { /* Only take stats if we were not parked */ + pcstate = pmsCtls.pmsDefs[pstate]; /* Get the previous step */ + pmsCtls.pmsStats[cpu][pcstate->pmsStepID].stTime[dir] += dur; /* Accumulate the total time in the old step */ + pmsCtls.pmsStats[cpu][pcstate->pmsStepID].stCnt[dir] += 1; /* Count transitions */ + } + +/* + * See if we are done chaining steps + */ + + if((pnstate->pmsSetCmd == pmsDelay) + || (!(pp->pms.pmsCSetCmd & pmsSync) && (pnstate->pmsLimit != 0))) { /* Is this not syncronous and a non-zero delay or a delayed step? */ + setTimerReq(); /* Start the timers ticking */ + break; /* We've stepped as far as we're going to... */ + } + + nstep = pnstate->pmsNext; /* Chain on to the next */ + } + + return; + +} + +/* + * Either park the stepper or force the step on a parked stepper for local processor only + * + */ + +void pmsRunLocal(uint32_t nstep) { + + struct per_proc_info *pp; + uint32_t cstate, ret, lastState; + pmsDef *pnstate, *pcstate; + uint64_t tb, nt, dur; + int cpu, i, j; + boolean_t intr; + + if(!pmsInstalled) return; /* Ignore this if no step programs installed... */ + + intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ + + pp = getPerProc(); /* Get our per_proc */ + + if(nstep == pmsStartUp) { /* Should we start up? */ + pmsCPUInit(); /* Get us up to full with high voltage and park */ + nstep = pmsNormHigh; /* Change request to transition to normal high */ + } + + lastState = pp->pms.pmsState; /* Remember if we are parked now */ + + pmsSetStep(nstep, 1); /* Step to the new state */ + + if((lastState == pmsParked) && (pp->pms.pmsState != pmsParked)) { /* Did we just unpark? */ + cpu = cpu_number(); /* Get our processor */ + for(i = 0; i < pmsMaxStates; i++) { /* Step through the steps and clear the statistics since we were parked */ + pmsCtls.pmsStats[cpu][i].stTime[0] = 0; /* Clear accumulated time - downward */ + pmsCtls.pmsStats[cpu][i].stTime[1] = 0; /* Clear accumulated time - forward */ + pmsCtls.pmsStats[cpu][i].stCnt[0] = 0; /* Clear transition count - downward */ + pmsCtls.pmsStats[cpu][i].stCnt[1] = 0; /* Clear transition count - forward */ + } + } + + (void)ml_set_interrupts_enabled(intr); /* Restore interruptions */ + + return; + +} + +/* + * Control the Power Management Stepper. + * Called from user state by the superuser via a ppc system call. + * Interruptions disabled. + * + */ + +int pmsCntrl(struct savearea *save) { + + uint32_t request, nstep, reqsize, result, presult; + int ret, cpu; + kern_return_t kret; + pmsDef *ndefs; + struct per_proc_info *pp; + + pp = getPerProc(); /* Get our per_proc */ + cpu = cpu_number(); /* Get our processor */ + + if(!is_suser()) { /* We are better than most, */ + save->save_r3 = KERN_FAILURE; /* so we will only talk to the superuser. */ + return 1; /* Turn up our noses, say "harrumph," and walk away... */ + } + + if(save->save_r3 >= pmsCFree) { /* Can we understand the request? */ + save->save_r3 = KERN_INVALID_ARGUMENT; /* What language are these guys talking in, anyway? */ + return 1; /* Cock head like a confused puppy and run away... */ + } + + request = (int)save->save_r3; /* Remember the request */ + reqsize = (uint32_t)save->save_r5; /* Get the size of the config table */ + + if(request == pmsCQuery) { /* Are we just checking? */ + result = pmsCPUquery() & pmsCPU; /* Get the processor data and make sure there is no slop */ + presult = 0; /* Assume nothing */ + if((uint32_t)pmsQueryFunc) presult = pmsQueryFunc(cpu, pmsPlatformData); /* Go get the platform state */ + result = result | (presult & (pmsXClk | pmsVoltage | pmsPowerID)); /* Merge the platform state with no slop */ + save->save_r3 = result; /* Tell 'em... */ + return 1; + } + + if(request == pmsCExperimental) { /* Enter experimental mode? */ + + if(pmsInstalled || (pmsExperimental & 1)) { /* Are we already running or in experimental? */ + save->save_r3 = KERN_FAILURE; /* Fail, since we are already running */ + return 1; + } + + pmsExperimental |= 1; /* Flip us into experimental but don't change other flags */ + + pmsCPUConf(); /* Configure for this machine */ + pmsStart(); /* Start stepping */ + save->save_r3 = KERN_SUCCESS; /* We are victorious... */ + return 1; + + } + + if(request == pmsCCnfg) { /* Do some up-front checking before we commit to doing this */ + if((reqsize > (pmsMaxStates * sizeof(pmsDef))) || (reqsize < (pmsFree * sizeof(pmsDef)))) { /* Check that the size is reasonable */ + save->save_r3 = KERN_NO_SPACE; /* Tell them that they messed up */ + return 1; /* l8r... */ + } + } + + +/* + * We are committed after here. If there are any errors detected, we shouldn't die, but we + * will be stuck in park. + * + * Also, we can possibly end up on another processor after the broadcast. + * + */ + + if(!hw_compare_and_store(0, 1, &pmsSyncrolator)) { /* Are we already doing this? */ + save->save_r3 = KERN_RESOURCE_SHORTAGE; /* Tell them that we are already busy and to try again */ + return 1; /* G'wan away and don't bother me... */ + } + save->save_r3 = KERN_SUCCESS; /* Assume success */ + +// NOTE: We will block in the following code until everyone has finished the prepare + + pmsRun(pmsPrepCng); /* Get everyone parked and in a proper state for step table changes, including me */ + + if(request == pmsCPark) { /* Is all we're supposed to do park? */ + pmsSyncrolator = 0; /* Free us up */ + return 1; /* Well, then we're done... */ + } + + switch(request) { /* Select the routine */ + + case pmsCStart: /* Starts normal steppping */ + nstep = pmsNormHigh; /* Set the request */ + break; + + case pmsCFLow: /* Forces low power */ + nstep = pmsLow; /* Set request */ + break; + + case pmsCFHigh: /* Forces high power */ + nstep = pmsHigh; /* Set request */ + break; + + case pmsCCnfg: /* Loads new stepper program */ + + if(!(ndefs = (pmsDef *)kalloc(reqsize))) { /* Get memory for the whole thing */ + save->save_r3 = KERN_INVALID_ADDRESS; /* Return invalid address */ + pmsSyncrolator = 0; /* Free us up */ + return 1; /* All done... */ + } + + ret = copyin((user_addr_t)((unsigned int)(save->save_r4)), (void *)ndefs, reqsize); /* Get the new config table */ + if(ret) { /* Hmmm, something went wrong with the copyin */ + save->save_r3 = KERN_INVALID_ADDRESS; /* Return invalid address */ + kfree((vm_offset_t)ndefs, reqsize); /* Free up the copied in data */ + pmsSyncrolator = 0; /* Free us up */ + return 1; /* All done... */ + } + + kret = pmsBuild(ndefs, reqsize, 0, 0, 0); /* Go build and replace the tables. Make sure we keep the old platform stuff */ + if(kret) { /* Hmmm, something went wrong with the compilation */ + save->save_r3 = kret; /* Pass back the passed back return code */ + kfree((vm_offset_t)ndefs, reqsize); /* Free up the copied in data */ + pmsSyncrolator = 0; /* Free us up */ + return 1; /* All done... */ + } + + nstep = pmsNormHigh; /* Set the request */ + break; + + default: + panic("pmsCntrl: stepper control is so very, very confused = %08X\n", request); + + } + + pmsRun(nstep); /* Get everyone into step */ + pmsSyncrolator = 0; /* Free us up */ + return 1; /* All done... */ + +} + +/* + * Broadcast a change to all processors including ourselves. + * This must transition before broadcasting because we may block and end up on a different processor. + * + * This will block until all processors have transitioned, so + * obviously, this can block. + * + * Called with interruptions disabled. + * + */ + +void pmsRun(uint32_t nstep) { + + pmsRunLocal(nstep); /* If we aren't parking (we are already parked), transition ourselves */ + (void)cpu_broadcast(&pmsBroadcastWait, pmsRemote, nstep); /* Tell everyone else to do it too */ + + return; + +} + +/* + * Receive a broadcast and react. + * This is called from the interprocessor signal handler. + * We wake up the initiator after we are finished. + * + */ + +void pmsRemote(uint32_t nstep) { + + pmsRunLocal(nstep); /* Go set the step */ + if(!hw_atomic_sub(&pmsBroadcastWait, 1)) { /* Drop the wait count */ + thread_wakeup((event_t)&pmsBroadcastWait); /* If we were the last, wake up the signaller */ + } + return; +} + + +/* + * Build the tables needed for the stepper. This includes both the step definitions and the step control table. + * + * We most absolutely need to be parked before this happens because we're gonna change the table. + * We're going to have to be pretty complete about checking for errors. + * Also, a copy is always made because we don't want to be crippled by not being able to change + * the table or description formats. + * + * We pass in a table of external functions and the new stepper def uses the corresponding + * indexes rather than actual function addresses. This is done so that a proper table can be + * built with the control syscall. It can't supply addresses, so the index has to do. We + * internalize the table so our caller does not need to keep it. Note that passing in a 0 + * will use the current function table. Also note that entry 0 is reserved and must be 0, + * we will check and fail the build. + * + * The platformData parameter is a 32-bit word of data that is passed unaltered to the set function. + * + * The queryFunc parameter is the address of a function that will return the current state of the platform. + * The format of the data returned is the same as the platform specific portions of pmsSetCmd, i.e., pmsXClk, + * pmsVoltage, and any part of pmsPowerID that is maintained by the platform hardware (an example would be + * the values of the gpios that correspond to pmsPowerID). The value should be constructed by querying + * hardware rather than returning a value cached by software. One of the intents of this function is to + * help recover lost or determine initial power states. + * + */ + +kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc) { + + int steps, newsize, i, cstp, nstps, oldAltSize, xdsply; + uint32_t setf; + uint64_t nlimit; + pmsDef *newpd, *oldAlt; + boolean_t intr; + + xdsply = (pmsExperimental & 3) != 0; /* Turn on kprintfs if requested or in experimental mode */ + + if(pdsize % sizeof(pmsDef)) return KERN_INVALID_ARGUMENT; /* Length not multiple of definition size */ + + steps = pdsize / sizeof(pmsDef); /* Get the number of steps supplied */ + + if((steps >= pmsMaxStates) || (steps < pmsFree)) /* Complain if too big or too small */ + return KERN_INVALID_ARGUMENT; /* Squeak loudly!!! */ + + if((uint32_t)functab && (uint32_t)functab[0]) /* Verify that if they supplied a new function table, entry 0 is 0 */ + return KERN_INVALID_ARGUMENT; /* Fail because they didn't reserve entry 0 */ + + if(xdsply) kprintf("\n StepID Down Next HWSel HWfun Limit\n"); + + for(i = 0; i < steps; i++) { /* Step through and verify the definitions */ + + if(xdsply) kprintf(" %6d %6d %6d %08X %6d %20lld\n", pd[i].pmsStepID, pd[i].pmsDown, + pd[i].pmsNext, pd[i].pmsSetCmd, + pd[i].sf.pmsSetFuncInd, pd[i].pmsLimit); + + if((pd[i].pmsLimit != 0) && (pd[i].pmsLimit < 100ULL)) { + if(xdsply) kprintf("error step %3d: pmsLimit too small/n", i); + return KERN_INVALID_ARGUMENT; /* Has to be 100µS or more */ + } + + if((pd[i].pmsLimit != 0xFFFFFFFFFFFFFFFFULL) && (pd[i].pmsLimit > (HalfwayToForever / 1000ULL))) { + if(xdsply) kprintf("error step %3d: pmsLimit too big\n", i); + return KERN_INVALID_ARGUMENT; /* Can't be too big */ + } + + if(pd[i].pmsStepID != i) { + if(xdsply) kprintf("error step %3d: step ID does not match (%d)\n", i, pd[i].pmsStepID); + return KERN_INVALID_ARGUMENT; /* ID must match */ + } + + if(pd[i].sf.pmsSetFuncInd >= pmsSetFuncMax) { + if(xdsply) kprintf("error step %3d: function invalid (%d)\n", i, pd[i].sf.pmsSetFuncInd); + return KERN_INVALID_ARGUMENT; /* Fail if this function is not in the table */ + } + + if((pd[i].pmsDown != pmsParked) && pd[i].pmsDown >= steps) { + if(xdsply) kprintf("error step %3d: pmsDown out of range (%d)\n", i, pd[i].pmsDown); + return KERN_INVALID_ARGUMENT; /* Step down must be in the table or park */ + } + + if((pd[i].pmsNext != pmsParked) && pd[i].pmsNext >= steps) { + if(xdsply) kprintf("error step %3d: pmsNext out of range (%d)\n", i, pd[i].pmsNext); + return KERN_INVALID_ARGUMENT; /* Step up must be in the table or park */ + } + + if((pd[i].pmsSetCmd == pmsDelay) && (pd[i].pmsTDelay >= steps)) { + if(xdsply) kprintf("error step %3d: pmsTDelay out of range (%d)\n", i, pd[i].pmsTDelay); + return KERN_INVALID_ARGUMENT; /* Delayed step must be in the table */ + } + + if((pd[i].pmsSetCmd == pmsDelay) && (pd[i].pmsLimit == 0xFFFFFFFFFFFFFFFFULL)) { + if(xdsply) kprintf("error step %3d: delay time limit must not be infinite\n", i); + return KERN_INVALID_ARGUMENT; /* Delayed step must have a time limit */ + } + + } + +/* + * Verify that there are no infinite synchronous forward loops in the table + */ + + if(xdsply) kprintf("\nInitial scan passed, start in loop check\n"); + for(i = 0; i < steps; i++) { /* Start with each step. Inefficient, but who cares */ + + cstp = i; /* Set starting point */ + nstps = 0; /* Initialize chain length counter */ + while(1) { /* Do until we hit the end */ + if(pd[cstp].pmsSetCmd == pmsParkIt) break; /* Parking always terminates a chain so no endless loop here */ + if(pd[cstp].pmsSetCmd == pmsDelay) break; /* Delayed steps always terminate a chain so no endless loop here */ + if((pd[cstp].pmsLimit != 0) && ((pd[cstp].pmsSetCmd & pmsSync) != pmsSync)) break; /* If time limit is not 0 and not synchrouous, no endless loop */ + if(pd[cstp].pmsNext == pmsParked) break; /* If the next step is parked, no endless loop */ + + cstp = pd[cstp].pmsNext; /* Chain to the next */ + nstps = nstps + 1; /* Count this step */ + if(nstps >= steps) { /* We've stepped for more steps than we have, must be an endless loop! */ + if(xdsply) kprintf("error step %3d: infinite pmsNext loop\n", i); + return KERN_INVALID_ARGUMENT; /* Suggest to our caller that they can't program... */ + } + } + } + + if((pmsExperimental & 4) && (pmsInstalled) && ((uint32_t)functab != 0)) { /* If we are already initted and experimental is locked in, and we are doing first */ + if(xdsply) kprintf("Experimental locked, ignoring driver pmsBuild\n"); + return KERN_RESOURCE_SHORTAGE; /* Just ignore the request. */ + } + + + +/* + * Well, things look ok, let's do it to it... + */ + + if(xdsply) kprintf("Loop check passed, building and installing table\n"); + + newsize = steps * sizeof(pmsDef); /* Get the size needed for the definition blocks */ + + if(!(newpd = (pmsDef *)kalloc(newsize))) { /* Get memory for the whole thing */ + return KERN_RESOURCE_SHORTAGE; /* No storage... */ + } + + bzero((void *)newpd, newsize); /* Make it pretty */ + +/* + * Ok, this is it, finish intitializing, switch the tables, and pray... + * We want no interruptions at all and we need to lock the table. Everybody should be parked, + * so no one should ever touch this. The lock is to keep multiple builders safe. It probably + * will never ever happen, but paranoia is a good thing... + */ + + intr = ml_set_interrupts_enabled(FALSE); /* No interruptions in here */ + simple_lock(&pmsBuildLock); /* Lock out everyone... */ + + if(platformData) pmsPlatformData = platformData; /* Remember the platform data word passed in if any was... */ + if((uint32_t)queryFunc) pmsQueryFunc = queryFunc; /* Remember the query function passed in, if it was... */ + + oldAlt = altDpmsTab; /* Remember any old alternate we had */ + oldAltSize = altDpmsTabSize; /* Remember its size */ + + altDpmsTab = newpd; /* Point to the new table */ + altDpmsTabSize = newsize; /* Set the size */ + + if((uint32_t)functab) { /* Did we get a new function table? */ + for(i = 0; i < pmsSetFuncMax; i++) pmsFuncTab[i] = functab[i]; /* Copy in the new table */ + } + + for(i = 0; i < pmsMaxStates; i++) pmsCtls.pmsDefs[i] = &pmsDummy; /* Initialize the table to point to the dummy step */ + + for(i = 0; i < steps; i++) { /* Replace the step table entries */ + if(pd[i].pmsLimit == 0xFFFFFFFFFFFFFFFFULL) nlimit = century; /* Default to 100 years */ + else nlimit = pd[i].pmsLimit; /* Otherwise use what was supplied */ + + nanoseconds_to_absolutetime(nlimit * 1000ULL, &newpd[i].pmsLimit); /* Convert microseconds to nanoseconds and then to ticks */ + + setf = pd[i].sf.pmsSetFuncInd; /* Make convienient */ + newpd[i].sf.pmsSetFunc = pmsFuncTab[setf]; /* Replace the index with the function address */ + + newpd[i].pmsStepID = pd[i].pmsStepID; /* Set the step ID */ + newpd[i].pmsSetCmd = pd[i].pmsSetCmd; /* Set the hardware selector ID */ + newpd[i].pmsDown = pd[i].pmsDown; /* Set the downward step */ + newpd[i].pmsNext = pd[i].pmsNext; /* Set the next setp */ + newpd[i].pmsTDelay = pd[i].pmsTDelay; /* Set the delayed setp */ + pmsCtls.pmsDefs[i] = &newpd[i]; /* Copy it in */ + } + + pmsCtlp = (uint32_t)&pmsCtls; /* Point to the new pms table */ + + pmsInstalled = 1; /* The stepper has been born or born again... */ + + simple_unlock(&pmsBuildLock); /* Free play! */ + (void)ml_set_interrupts_enabled(intr); /* Interrupts back the way there were */ + + if((uint32_t)oldAlt) kfree((vm_offset_t)oldAlt, oldAltSize); /* If we already had an alternate, free it */ + + if(xdsply) kprintf("Stepper table installed\n"); + + return KERN_SUCCESS; /* We're in fate's hands now... */ +} diff --git a/osfmk/ppc/pms.h b/osfmk/ppc/pms.h new file mode 100644 index 000000000..799b9a462 --- /dev/null +++ b/osfmk/ppc/pms.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifdef KERNEL_PRIVATE + +#ifndef _PPC_PMS_H_ +#define _PPC_PMS_H_ + +#define pmsMaxStates 64 +#define HalfwayToForever 0x7FFFFFFFFFFFFFFFULL +#define century 790560000000000ULL + +typedef void (*pmsSetFunc_t)(uint32_t, uint32_t, uint32_t); /* Function used to set hardware power state */ +typedef uint32_t (*pmsQueryFunc_t)(uint32_t, uint32_t); /* Function used to query hardware power state */ + +typedef struct pmsStat { + uint64_t stTime[2]; /* Total time until switch to next step */ + uint32_t stCnt[2]; /* Number of times switched to next step */ +} pmsStat; + +typedef struct pmsDef { + uint64_t pmsLimit; /* Max time in this state in microseconds */ + uint32_t pmsStepID; /* Unique ID for this step */ + uint32_t pmsSetCmd; /* Command to select power state */ +#define pmsCngXClk 0x80000000 /* Change external clock */ +#define pmsXUnk 0x7F /* External clock unknown */ +#define pmsXClk 0x7F000000 /* External clock frequency */ +#define pmsCngCPU 0x00800000 /* Change CPU parameters */ +#define pmsSync 0x00400000 /* Make changes synchronously, i.e., spin until delay finished */ +#define pmsMustCmp 0x00200000 /* Delay must complete before next change */ +#define pmsCPU 0x001F0000 /* CPU frequency */ +#define pmsCPUUnk 0x1F /* CPU frequency unknown */ +#define pmsCngVolt 0x00008000 /* Change voltage */ +#define pmsVoltage 0x00007F00 /* Voltage */ +#define pmsVoltUnk 0x7F /* Voltage unknown */ +#define pmsPowerID 0x000000FF /* Identify power state to HW */ + +/* Special commands - various things */ +#define pmsDelay 0xFFFFFFFD /* Delayed step, no processor or platform changes. Timer expiration causes transition to pmsTDelay */ +#define pmsParkIt 0xFFFFFFFF /* Enters the parked state. No processor or platform changes. Timers cancelled */ +#define pmsCInit ((pmsXUnk << 24) | (pmsCPUUnk << 16) | (pmsVoltUnk << 8)) /* Initial current set command value */ +/* Note: pmsSetFuncInd is an index into a table of function pointers and pmsSetFunc is the address + * of a function. Initially, when you create a step table, this field is set as an index into + * a table of function addresses that gets passed as a parameter to pmsBuild. When pmsBuild + * internalizes the step and function tables, it converts the index to the function address. + */ + union sf { + pmsSetFunc_t pmsSetFunc; /* Function used to set platform power state */ + uint32_t pmsSetFuncInd; /* Index to function in function table */ + } sf; + + uint32_t pmsDown; /* Next state if going lower */ + uint32_t pmsNext; /* Normal next state */ + uint32_t pmsTDelay; /* State if command was pmsDelay and timer expired */ +} pmsDef; + +typedef struct pmsCtl { + pmsStat (*pmsStats)[pmsMaxStates]; /* Pointer to statistics information, 0 if not enabled */ + pmsDef *pmsDefs[pmsMaxStates]; /* Indexed pointers to steps */ +} pmsCtl; + +/* + * Note that this block is in the middle of the per_proc and the size (32 bytes) + * can't be changed without moving it. + */ + +typedef struct pmsd { + uint32_t pmsState; /* Current power management state */ + uint32_t pmsCSetCmd; /* Current select command */ + uint64_t pmsPop; /* Time of next step */ + uint64_t pmsStamp; /* Time of transition to current state */ + uint64_t pmsTime; /* Total time in this state */ +} pmsd; + +/* + * Required power management step programs + */ + +enum { + pmsIdle = 0, /* Power state in idle loop */ + pmsNorm = 1, /* Normal step - usually low power */ + pmsNormHigh = 2, /* Highest power in normal step */ + pmsBoost = 3, /* Boost/overdrive step */ + pmsLow = 4, /* Lowest non-idle power state, no transitions */ + pmsHigh = 5, /* Power step for full on, no transitions */ + pmsPrepCng = 6, /* Prepare for step table change */ + pmsPrepSleep = 7, /* Prepare for sleep */ + pmsOverTemp = 8, /* Machine is too hot */ + pmsEnterNorm = 9, /* Enter into the normal step program */ + pmsFree = 10, /* First available empty step */ + pmsStartUp = 0xFFFFFFFE, /* Start stepping */ + pmsParked = 0xFFFFFFFF /* Power parked - used when changing stepping table */ +}; + +/* + * Power Management Stepper Control requests + */ + +enum { + pmsCPark = 0, /* Parks the stepper */ + pmsCStart = 1, /* Starts normal steppping */ + pmsCFLow = 2, /* Forces low power */ + pmsCFHigh = 3, /* Forces high power */ + pmsCCnfg = 4, /* Loads new stepper program */ + pmsCQuery = 5, /* Query current step and state */ + pmsCExperimental = 6, /* Enter experimental mode */ + pmsCFree = 7 /* Next control command to be assigned */ +}; + +extern pmsCtl pmsCtls; /* Power Management Stepper control */ +extern uint32_t pmsCtlp; +extern uint32_t pmsBroadcastWait; /* Number of outstanding broadcasts */ +extern pmsDef pmsDefault[]; +extern int pmsInstalled; +extern int pmsExperimental; + +#define pmsSetFuncMax 32 +extern pmsSetFunc_t pmsFuncTab[pmsSetFuncMax]; +extern pmsQueryFunc_t pmsQueryFunc; +extern uint32_t pmsPlatformData; + +extern int pmsCntrl(struct savearea *save); +extern void pmsInit(void); +extern void pmsStep(int timer); +extern void pmsDown(void); +extern void pmsSetStep(uint32_t nstep, int dir); +extern void pmsRemote(uint32_t nstep); +extern void pmsCPUSet(uint32_t sel); +extern uint32_t pmsCPUquery(void); +extern void pmsCPUConf(void); +extern void pmsCPUInit(void); + +#ifdef __cplusplus +extern "C" { +#endif + +extern kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc); +extern void pmsRun(uint32_t nstep); +extern void pmsRunLocal(uint32_t nstep); +extern void pmsPark(void); +extern void pmsStart(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _PPC_PMS_H_ */ +#endif /* KERNEL_PRIVATE */ diff --git a/osfmk/ppc/pmsCPU.c b/osfmk/ppc/pmsCPU.c new file mode 100644 index 000000000..3350292f0 --- /dev/null +++ b/osfmk/ppc/pmsCPU.c @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +pmsDef pmsDefault[] = { + { + .pmsLimit = century, /* We can normally stay here for 100 years */ + .pmsStepID = pmsIdle, /* Unique identifier to this step */ + .pmsSetCmd = 0, /* Dummy platform power level */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsIdle, /* We stay here */ + .pmsNext = pmsNorm /* Next step */ + }, + { + .pmsLimit = century, /* We can normally stay here for 100 years */ + .pmsStepID = pmsNorm, /* Unique identifier to this step */ + .pmsSetCmd = 0, /* Dummy platform power level */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsIdle, /* Down to idle */ + .pmsNext = pmsNorm /* Next step */ + }, + { + .pmsLimit = century, /* We can normally stay here for 100 years */ + .pmsStepID = pmsNormHigh, /* Unique identifier to this step */ + .pmsSetCmd = 0, /* Dummy platform power level */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsIdle, /* Down to idle */ + .pmsNext = pmsNormHigh /* Next step */ + }, + { + .pmsLimit = century, /* We can normally stay here for 100 years */ + .pmsStepID = pmsBoost, /* Unique identifier to this step */ + .pmsSetCmd = 0, /* Dummy platform power level */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsIdle, /* Step down */ + .pmsNext = pmsBoost /* Next step */ + }, + { + .pmsLimit = century, /* We can normally stay here for 100 years */ + .pmsStepID = pmsLow, /* Unique identifier to this step */ + .pmsSetCmd = 0, /* Dummy platform power level */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsLow, /* We always stay here */ + .pmsNext = pmsLow /* We always stay here */ + }, + { + .pmsLimit = century, /* We can normally stay here for 100 years */ + .pmsStepID = pmsHigh, /* Unique identifier to this step */ + .pmsSetCmd = 0, /* Dummy platform power level */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsHigh, /* We always stay here */ + .pmsNext = pmsHigh /* We always stay here */ + }, + { + .pmsLimit = 0, /* Time doesn't matter for a prepare for change */ + .pmsStepID = pmsPrepCng, /* Unique identifier to this step */ + .pmsSetCmd = pmsParkIt, /* Force us to be parked */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsPrepCng, /* We always stay here */ + .pmsNext = pmsPrepCng /* We always stay here */ + }, + { + .pmsLimit = 0, /* Time doesn't matter for a prepare for sleep */ + .pmsStepID = pmsPrepSleep, /* Unique identifier to this step */ + .pmsSetCmd = pmsParkIt, /* Force us to be parked */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsPrepSleep, /* We always stay here */ + .pmsNext = pmsPrepSleep /* We always stay here */ + }, + { + .pmsLimit = 0, /* Time doesn't matter for a prepare for sleep */ + .pmsStepID = pmsOverTemp, /* Unique identifier to this step */ + .pmsSetCmd = 0, /* Dummy platform power level */ + .sf.pmsSetFuncInd = 0, /* Dummy platform set function */ + .pmsDown = pmsOverTemp, /* We always stay here */ + .pmsNext = pmsOverTemp /* We always stay here */ + } +}; + + + +/* + * This is where the CPU part of the stepper code lives. + * + * It also contains the "hacked kext" experimental code. This is/was used for + * experimentation and bringup. It should neither live long nor prosper. + * + */ + +/* + * Set the processor frequency and stuff + */ + +void pmsCPUSet(uint32_t sel) { + int nvoltage, nfreq; + uint32_t oldaack; + struct per_proc_info *pp; + + pp = getPerProc(); /* Get our per_proc */ + + if(!((sel ^ pp->pms.pmsCSetCmd) & pmsCPU)) return; /* If there aren't any changes, bail now... */ + + nfreq = (sel & pmsCPU) >> 16; /* Isolate the new frequency */ + + switch(pp->pf.pfPowerModes & pmType) { /* Figure out what type to do */ + + case pmDFS: /* This is a DFS machine */ + ml_set_processor_speed_dfs(nfreq); /* Yes, set it */ + break; + + case pmDualPLL: + ml_set_processor_speed_dpll(nfreq); /* THIS IS COMPLETELY UNTESTED!!! */ + break; + + case pmPowerTune: /* This is a PowerTune machine */ + ml_set_processor_speed_powertune(nfreq); /* Diddle the deal */ + break; + + default: /* Not this time dolt!!! */ + panic("pmsCPUSet: unsupported power manager type: %08X\n", pp->pf.pfPowerModes); + break; + + } + +} + +/* + * This code configures the initial step tables. It should be called after the timebase frequency is initialized. + */ + +void pmsCPUConf(void) { + + int i; + kern_return_t ret; + pmsSetFunc_t pmsDfltFunc[pmsSetFuncMax]; /* List of functions for the external power control to use */ + + for(i = 0; i < pmsSetFuncMax; i++) pmsDfltFunc[i] = 0; /* Clear this */ + + + ret = pmsBuild((pmsDef *)&pmsDefault, sizeof(pmsDefault), pmsDfltFunc, 0, (pmsQueryFunc_t)0); /* Configure the default stepper */ + +pCCfinish: + if(ret != KERN_SUCCESS) { /* Some screw up? */ + panic("pmsCPUConf: initial stepper table build failed, ret = %08X\n", ret); /* Squeal */ + } + + pmsSetStep(pmsHigh, 1); /* Slew to high speed */ + pmsPark(); /* Then park */ + return; +} + +/* + * This function should be called once for each processor to force the + * processor to the correct voltage and frequency. + */ + +void pmsCPUInit(void) { + + int cpu; + + cpu = cpu_number(); /* Who are we? */ + + kprintf("************ Initializing stepper hardware, cpu %d ******************\n", cpu); /* (BRINGUP) */ + + pmsSetStep(pmsHigh, 1); /* Slew to high speed */ + pmsPark(); /* Then park */ + + kprintf("************ Stepper hardware initialized, cpu %d ******************\n", cpu); /* (BRINGUP) */ + + return; +} + +uint32_t pmsCPUquery(void) { + + uint32_t result; + struct per_proc_info *pp; + uint64_t scdata; + + pp = getPerProc(); /* Get our per_proc */ + + switch(pp->pf.pfPowerModes & pmType) { /* Figure out what type to do */ + + case pmDFS: /* This is a DFS machine */ + result = hid1get(); /* Get HID1 */ + result = (result >> 6) & 0x00030000; /* Isolate the DFS bits */ + break; + + case pmPowerTune: /* This is a PowerTune machine */ + (void)ml_scom_read(PowerTuneStatusReg, &scdata); /* Get the current power level */ + result = (scdata >> (32 + 8)) & 0x00030000; /* Shift the data to align with the set command */ + break; + + default: /* Query not supported for this kind */ + result = 0; /* Return highest if not supported */ + break; + + } + + return result; +} + + diff --git a/osfmk/ppc/ppc_init.c b/osfmk/ppc/ppc_init.c index 71168cd3a..f1dfa33b4 100644 --- a/osfmk/ppc/ppc_init.c +++ b/osfmk/ppc/ppc_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -48,6 +48,8 @@ #include #include #include +#include +#include #include @@ -118,6 +120,7 @@ patch_entry_t patch_table[] = { {NULL, 0x00000000, PATCH_END_OF_TABLE, 0} }; + /* * Forward definition */ @@ -153,7 +156,7 @@ ppc_init( BootProcInfo.cpu_number = 0; BootProcInfo.cpu_flags = 0; - BootProcInfo.istackptr = 0; /* we're on the interrupt stack */ + BootProcInfo.istackptr = 0; /* we're on the interrupt stack */ BootProcInfo.intstack_top_ss = (vm_offset_t)&intstack + INTSTACK_SIZE - FM_SIZE; BootProcInfo.debstack_top_ss = (vm_offset_t)&debstack + KERNEL_STACK_SIZE - FM_SIZE; BootProcInfo.debstackptr = BootProcInfo.debstack_top_ss; @@ -162,17 +165,27 @@ ppc_init( BootProcInfo.FPU_owner = 0; BootProcInfo.VMX_owner = 0; BootProcInfo.pp_cbfr = console_per_proc_alloc(TRUE); - BootProcInfo.rtcPop = 0xFFFFFFFFFFFFFFFFULL; + BootProcInfo.rtcPop = EndOfAllTime; + BootProcInfo.pp2ndPage = (addr64_t)&BootProcInfo; /* Initial physical address of the second page */ + + BootProcInfo.pms.pmsStamp = 0; /* Dummy transition time */ + BootProcInfo.pms.pmsPop = EndOfAllTime; /* Set the pop way into the future */ + + BootProcInfo.pms.pmsState = pmsParked; /* Park the power stepper */ + BootProcInfo.pms.pmsCSetCmd = pmsCInit; /* Set dummy initial hardware state */ + mp = (mapping_t *)BootProcInfo.ppUMWmp; mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | 1; mp->mpSpace = invalSpace; + pmsInit(); /* Initialize the stepper */ + thread_bootstrap(); thread = current_thread(); thread->machine.curctx = &thread->machine.facctx; thread->machine.facctx.facAct = thread; - thread->machine.umwSpace = invalSpace; /* Initialize user memory window space to invalid */ + thread->machine.umwSpace = invalSpace; /* Initialize user memory window space to invalid */ thread->machine.preemption_count = 1; cpu_bootstrap(); @@ -185,33 +198,34 @@ ppc_init( static_memory_end = round_page(args->topOfKernelData);; - PE_init_platform(FALSE, args); /* Get platform expert set up */ + PE_init_platform(FALSE, args); /* Get platform expert set up */ if (!PE_parse_boot_arg("novmx", &novmx)) novmx=0; /* Special run without VMX? */ - if(novmx) { /* Yeah, turn it off */ - BootProcInfo.pf.Available &= ~pfAltivec; /* Turn off Altivec available */ + if(novmx) { /* Yeah, turn it off */ + BootProcInfo.pf.Available &= ~pfAltivec; /* Turn off Altivec available */ __asm__ volatile("mtsprg 2,%0" : : "r" (BootProcInfo.pf.Available)); /* Set live value */ } if (!PE_parse_boot_arg("fn", &forcenap)) forcenap = 0; /* If force nap not set, make 0 */ else { - if(forcenap < 2) forcenap = forcenap + 1; /* Else set 1 for off, 2 for on */ - else forcenap = 0; /* Clear for error case */ + if(forcenap < 2) forcenap = forcenap + 1; /* Else set 1 for off, 2 for on */ + else forcenap = 0; /* Clear for error case */ } - if (!PE_parse_boot_arg("diag", &dgWork.dgFlags)) dgWork.dgFlags=0; /* Set diagnostic flags */ - if (!PE_parse_boot_arg("lcks", &LcksOpts)) LcksOpts=0; /* Set lcks options */ + if (!PE_parse_boot_arg("pmsx", &pmsExperimental)) pmsExperimental = 0; /* Check if we should start in experimental power management stepper mode */ + if (!PE_parse_boot_arg("lcks", &LcksOpts)) LcksOpts = 0; /* Set lcks options */ + if (!PE_parse_boot_arg("diag", &dgWork.dgFlags)) dgWork.dgFlags = 0; /* Set diagnostic flags */ if(dgWork.dgFlags & enaExpTrace) trcWork.traceMask = 0xFFFFFFFF; /* If tracing requested, enable it */ - if(PE_parse_boot_arg("ctrc", &cputrace)) { /* See if tracing is limited to a specific cpu */ + if(PE_parse_boot_arg("ctrc", &cputrace)) { /* See if tracing is limited to a specific cpu */ trcWork.traceMask = (trcWork.traceMask & 0xFFFFFFF0) | (cputrace & 0xF); /* Limit to 4 */ } if(!PE_parse_boot_arg("tb", &trcWork.traceSize)) { /* See if non-default trace buffer size */ #if DEBUG - trcWork.traceSize = 32; /* Default 32 page trace table for DEBUG */ + trcWork.traceSize = 32; /* Default 32 page trace table for DEBUG */ #else - trcWork.traceSize = 8; /* Default 8 page trace table for RELEASE */ + trcWork.traceSize = 8; /* Default 8 page trace table for RELEASE */ #endif } @@ -228,7 +242,7 @@ ppc_init( else wcte = (wcte != 0); /* Force to 0 or 1 */ if (!PE_parse_boot_arg("mcklog", &mckFlags)) mckFlags = 0; /* If machine check flags not specified, clear */ - else if(mckFlags > 1) mckFlags = 0; /* If bogus, clear */ + else if(mckFlags > 1) mckFlags = 0; /* If bogus, clear */ if (!PE_parse_boot_arg("ht_shift", &hash_table_shift)) /* should we use a non-default hash table size? */ hash_table_shift = 0; /* no, use default size */ @@ -257,9 +271,7 @@ ppc_init( } } } - - PE_init_platform(TRUE, args); - + machine_startup(args); } diff --git a/osfmk/ppc/ppc_vm_init.c b/osfmk/ppc/ppc_vm_init.c index 21a24e3f3..cca618d4f 100644 --- a/osfmk/ppc/ppc_vm_init.c +++ b/osfmk/ppc/ppc_vm_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -64,6 +64,8 @@ extern int disableConsoleOutput; struct shadowBAT shadow_BAT; + + /* * NOTE: mem_size is bogus on large memory machines. We will pin it to 0x80000000 if there is more than 2 GB * This is left only for compatibility and max_mem should be used. @@ -329,6 +331,8 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args) */ hw_start_trans(); /* Start translating */ + PE_init_platform(TRUE, args); /* Initialize this right off the bat */ + #if 0 GratefulDebInit((bootBumbleC *)&(args->Video)); /* Initialize the GratefulDeb debugger */ diff --git a/osfmk/ppc/rtclock.c b/osfmk/ppc/rtclock.c index bd97881bd..ecd5ee24f 100644 --- a/osfmk/ppc/rtclock.c +++ b/osfmk/ppc/rtclock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -44,6 +44,8 @@ #include #include #include +#include +#include #include @@ -53,8 +55,6 @@ int sysclk_config(void); int sysclk_init(void); -void treqs(uint32_t dec); - kern_return_t sysclk_gettime( mach_timespec_t *cur_time); @@ -140,21 +140,12 @@ static void nanotime_to_absolutetime( uint32_t nanosecs, uint64_t *result); -static int deadline_to_decrementer( - uint64_t deadline, - uint64_t now); - static void rtclock_alarm_expire( timer_call_param_t p0, timer_call_param_t p1); /* global data declarations */ -#define DECREMENTER_MAX 0x7FFFFFFFUL -#define DECREMENTER_MIN 0xAUL - -natural_t rtclock_decrementer_min; - decl_simple_lock_data(static,rtclock_lock) /* @@ -234,28 +225,16 @@ sysclk_config(void) int sysclk_init(void) { - uint64_t abstime, nexttick; - int decr1, decr2; - struct rtclock_timer *mytimer; + uint64_t abstime; struct per_proc_info *pp; - decr1 = decr2 = DECREMENTER_MAX; - pp = getPerProc(); - mytimer = &pp->rtclock_timer; abstime = mach_absolute_time(); - nexttick = abstime + rtclock_tick_interval; - pp->rtclock_tick_deadline = nexttick; - decr1 = deadline_to_decrementer(nexttick, abstime); - - if (mytimer->is_set) - decr2 = deadline_to_decrementer(mytimer->deadline, abstime); - - if (decr1 > decr2) - decr1 = decr2; - - treqs(decr1); + pp->rtclock_tick_deadline = abstime + rtclock_tick_interval; /* Get the time we need to pop */ + pp->rtcPop = pp->rtclock_tick_deadline; /* Set the rtc pop time the same for now */ + + (void)setTimerReq(); /* Start the timers going */ return (1); } @@ -595,6 +574,43 @@ clock_set_calendar_microtime( commpage_set_timestamp(0,0,0,0); + /* + * Cancel any adjustment in progress. + */ + if (rtclock_calend.adjdelta < 0) { + uint64_t now, t64; + uint32_t delta, t32; + + delta = -rtclock_calend.adjdelta; + + sys = rtclock_calend.epoch; + microsys = rtclock_calend.microepoch; + + now = mach_absolute_time(); + + if (now > rtclock_calend.epoch1) + t64 = now - rtclock_calend.epoch1; + else + t64 = 0; + + t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor; + + if (t32 > delta) + TIME_ADD(sys, 0, microsys, (t32 - delta), USEC_PER_SEC); + + rtclock_calend.epoch = sys; + rtclock_calend.microepoch = microsys; + + sys = t64 = now / rtclock_sec_divisor; + now -= (t64 * rtclock_sec_divisor); + microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor; + + TIME_SUB(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC); + } + + rtclock_calend.epoch1 = 0; + rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0; + /* * Calculate the new calendar epoch based on * the new value and the system clock. @@ -613,12 +629,6 @@ clock_set_calendar_microtime( rtclock_calend.epoch = secs; rtclock_calend.microepoch = microsecs; - /* - * Cancel any adjustment in progress. - */ - rtclock_calend.epoch1 = 0; - rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0; - simple_unlock(&rtclock_lock); /* @@ -877,9 +887,9 @@ void clock_set_timer_deadline( uint64_t deadline) { - uint64_t abstime; int decr; - struct rtclock_timer *mytimer; + uint64_t abstime; + rtclock_timer_t *mytimer; struct per_proc_info *pp; spl_t s; @@ -887,21 +897,15 @@ clock_set_timer_deadline( pp = getPerProc(); mytimer = &pp->rtclock_timer; mytimer->deadline = deadline; - mytimer->is_set = TRUE; - if (!mytimer->has_expired) { - abstime = mach_absolute_time(); - if ( mytimer->deadline < pp->rtclock_tick_deadline ) { - decr = deadline_to_decrementer(mytimer->deadline, abstime); - if ( rtclock_decrementer_min != 0 && - rtclock_decrementer_min < (natural_t)decr ) - decr = rtclock_decrementer_min; - - treqs(decr); - - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) - | DBG_FUNC_NONE, decr, 2, 0, 0, 0); - } + + if (!mytimer->has_expired && (deadline < pp->rtclock_tick_deadline)) { /* Has the timer already expired or is less that set? */ + pp->rtcPop = deadline; /* Yes, set the new rtc pop time */ + decr = setTimerReq(); /* Start the timers going */ + + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) + | DBG_FUNC_NONE, decr, 2, 0, 0, 0); } + splx(s); } @@ -917,64 +921,67 @@ clock_set_timer_func( UNLOCK_RTC(s); } -void -rtclock_intr( - int device, - struct savearea *ssp, - spl_t old); - /* * Real-time clock device interrupt. */ void -rtclock_intr( - __unused int device, - struct savearea *ssp, - __unused spl_t old_spl) -{ +rtclock_intr(struct savearea *ssp) { + uint64_t abstime; - int decr1, decr2; - struct rtclock_timer *mytimer; + int decr; + rtclock_timer_t *mytimer; struct per_proc_info *pp; - decr1 = decr2 = DECREMENTER_MAX; - pp = getPerProc(); + mytimer = &pp->rtclock_timer; abstime = mach_absolute_time(); - if ( pp->rtclock_tick_deadline <= abstime ) { + if (pp->rtclock_tick_deadline <= abstime) { /* Have we passed the pop time? */ clock_deadline_for_periodic_event(rtclock_tick_interval, abstime, &pp->rtclock_tick_deadline); hertz_tick(USER_MODE(ssp->save_srr1), ssp->save_srr0); + abstime = mach_absolute_time(); /* Refresh the current time since we went away */ } - mytimer = &pp->rtclock_timer; - - abstime = mach_absolute_time(); - if ( mytimer->is_set && - mytimer->deadline <= abstime ) { - mytimer->has_expired = TRUE; mytimer->is_set = FALSE; - (*rtclock_timer_expire)(abstime); + if (mytimer->deadline <= abstime) { /* Have we expired the deadline? */ + mytimer->has_expired = TRUE; /* Remember that we popped */ + mytimer->deadline = EndOfAllTime; /* Set timer request to the end of all time in case we have no more events */ + (*rtclock_timer_expire)(abstime); /* Process pop */ mytimer->has_expired = FALSE; } - abstime = mach_absolute_time(); - decr1 = deadline_to_decrementer(pp->rtclock_tick_deadline, abstime); + pp->rtcPop = (pp->rtclock_tick_deadline < mytimer->deadline) ? /* Get shortest pop */ + pp->rtclock_tick_deadline : /* It was the periodic timer */ + mytimer->deadline; /* Actually, an event request */ + + decr = setTimerReq(); /* Request the timer pop */ - if (mytimer->is_set) - decr2 = deadline_to_decrementer(mytimer->deadline, abstime); + KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) + | DBG_FUNC_NONE, decr, 3, 0, 0, 0); +} - if (decr1 > decr2) - decr1 = decr2; +/* + * Request an interruption at a specific time + * + * Sets the decrementer to pop at the right time based on the timebase. + * The value is chosen by comparing the rtc request with the power management. + * request. We may add other values at a future time. + * + */ + +int setTimerReq(void) { - if ( rtclock_decrementer_min != 0 && - rtclock_decrementer_min < (natural_t)decr1 ) - decr1 = rtclock_decrementer_min; + struct per_proc_info *pp; + int decr; + uint64_t nexttime; + + pp = getPerProc(); /* Get per_proc */ - treqs(decr1); + nexttime = pp->rtcPop; /* Assume main timer */ - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) - | DBG_FUNC_NONE, decr1, 3, 0, 0, 0); + decr = setPop((pp->pms.pmsPop < nexttime) ? pp->pms.pmsPop : nexttime); /* Schedule timer pop */ + + return decr; /* Pass back what we actually set */ } static void @@ -989,22 +996,6 @@ rtclock_alarm_expire( clock_alarm_intr(SYSTEM_CLOCK, ×tamp); } -static int -deadline_to_decrementer( - uint64_t deadline, - uint64_t now) -{ - uint64_t delt; - - if (deadline <= now) - return DECREMENTER_MIN; - else { - delt = deadline - now; - return (delt >= (DECREMENTER_MAX + 1))? DECREMENTER_MAX: - ((delt >= (DECREMENTER_MIN + 1))? (delt - 1): DECREMENTER_MIN); - } -} - static void nanotime_to_absolutetime( uint32_t secs, @@ -1110,23 +1101,3 @@ machine_delay_until( } while (now < deadline); } -/* - * Request a decrementer pop - * - */ - -void treqs(uint32_t dec) { - - - struct per_proc_info *pp; - uint64_t nowtime, newtime; - - nowtime = mach_absolute_time(); /* What time is it? */ - pp = getPerProc(); /* Get our processor block */ - newtime = nowtime + (uint64_t)dec; /* Get requested pop time */ - pp->rtcPop = newtime; /* Copy it */ - - mtdec((uint32_t)(newtime - nowtime)); /* Set decrementer */ - return; - -} diff --git a/osfmk/ppc/rtclock.h b/osfmk/ppc/rtclock.h new file mode 100644 index 000000000..4c2800d7d --- /dev/null +++ b/osfmk/ppc/rtclock.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +/* + * @OSF_COPYRIGHT@ + */ +/* + * @APPLE_FREE_COPYRIGHT@ + */ +/* + * File: rtclock.h + * Purpose: Routines for handling the machine dependent + * real-time clock. + */ + +#ifndef _PPC_RTCLOCK_H_ +#define _PPC_RTCLOCK_H_ + +#define EndOfAllTime 0xFFFFFFFFFFFFFFFFULL + +extern void rtclock_intr(struct savearea *ssp); +extern int setTimerReq(void); + +#pragma pack(push,4) +struct rtclock_timer_t { + uint64_t deadline; + uint32_t + /*boolean_t*/ is_set:1, + has_expired:1, + :0; +}; +#pragma pack(pop) +typedef struct rtclock_timer_t rtclock_timer_t; + +#endif /* _PPC_RTCLOCK_H_ */ diff --git a/osfmk/ppc/savearea.h b/osfmk/ppc/savearea.h index 640b063fc..492b0dda3 100644 --- a/osfmk/ppc/savearea.h +++ b/osfmk/ppc/savearea.h @@ -67,7 +67,7 @@ typedef struct savearea_comm { /* offset 0x040 */ uint64_t save_misc0; /* Various stuff */ - uint64_t save_misc1; /* Various stuff */ + uint64_t save_misc1; /* Various stuff - snapshot chain during hibernation */ unsigned int sac_alloc; /* Bitmap of allocated slots */ unsigned int save_054; unsigned int save_misc2; diff --git a/osfmk/ppc/serial_io.c b/osfmk/ppc/serial_io.c index 848c8c25b..176ff88f3 100644 --- a/osfmk/ppc/serial_io.c +++ b/osfmk/ppc/serial_io.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -99,7 +99,7 @@ struct scc_tty scc_tty[NSCC_LINE]; extern unsigned int disableSerialOuput; int serial_initted = 0; -unsigned int scc_parm_done = 0; /* (TEST/DEBUG) */ +unsigned int scc_parm_done = 0; extern unsigned int serialmode; @@ -182,7 +182,7 @@ boolean_t scc_funnel_initted = FALSE; * Adapt/Probe/Attach functions */ boolean_t scc_uses_modem_control = FALSE;/* patch this with adb */ -decl_simple_lock_data(,scc_stomp) /* (TEST/DEBUG) */ +decl_simple_lock_data(,scc_stomp) /* This is called VERY early on in the init and therefore has to have * hardcoded addresses of the serial hardware control registers. The @@ -210,7 +210,7 @@ initialize_serial( caddr_t scc_phys_base, int32_t serial_baud ) return; } - simple_lock_init(&scc_stomp, FALSE); /* (TEST/DEBUG) */ + simple_lock_init(&scc_stomp, FALSE); if (serial_baud == -1) serial_baud = DEFAULT_SPEED; @@ -241,7 +241,7 @@ initialize_serial( caddr_t scc_phys_base, int32_t serial_baud ) scc_read_reg_zero(regs, 0, bits);/* Clear the status */ } - scc_parm_done = 1; /* (TEST/DEBUG) */ + scc_parm_done = 1; } serial_initted = TRUE; @@ -323,12 +323,14 @@ scc_getc(int unit, int line, boolean_t wait, boolean_t raw) register scc_regmap_t regs; unsigned char c, value; int rcvalue, from_line; + uint32_t fcrmunge; spl_t s = splhigh(); DECL_FUNNEL_VARS FUNNEL_ENTER(&SCC_FUNNEL); - simple_lock(&scc_stomp); /* (TEST/DEBUG) */ + + simple_lock(&scc_stomp); regs = scc_softc[0].regs; /* @@ -344,7 +346,7 @@ again: break; if (!wait) { - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ + simple_unlock(&scc_stomp); splx(s); FUNNEL_EXIT(&SCC_FUNNEL); return -1; @@ -362,14 +364,14 @@ again: if (console_is_serial() && c == ('_' & 0x1f)) { /* Drop into the debugger */ - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ + simple_unlock(&scc_stomp); Debugger("Serial Line Request"); - simple_lock(&scc_stomp); /* (TEST/DEBUG) */ + simple_lock(&scc_stomp); scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS); if (wait) { goto again; } - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ + simple_unlock(&scc_stomp); splx(s); FUNNEL_EXIT(&SCC_FUNNEL); return -1; @@ -390,7 +392,7 @@ again: scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS); - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ + simple_unlock(&scc_stomp); splx(s); FUNNEL_EXIT(&SCC_FUNNEL); @@ -408,14 +410,16 @@ scc_putc(int unit, int line, int c) scc_regmap_t regs; spl_t s; unsigned char value; + uint32_t fcrmunge; DECL_FUNNEL_VARS + if (disableSerialOuput) return 0; s = splhigh(); FUNNEL_ENTER(&SCC_FUNNEL); - simple_lock(&scc_stomp); /* (TEST/DEBUG) */ + simple_lock(&scc_stomp); regs = scc_softc[0].regs; @@ -435,7 +439,7 @@ scc_putc(int unit, int line, int c) break; } while (1); scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS); - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ + simple_unlock(&scc_stomp); splx(s); @@ -485,7 +489,7 @@ scc_param(struct scc_tty *tp) assert(FUNNEL_IN_USE(&SCC_FUNNEL)); s = splhigh(); - simple_lock(&scc_stomp); /* (TEST/DEBUG) */ + simple_lock(&scc_stomp); chan = scc_chan(tp->t_dev); scc = &scc_softc[0]; @@ -497,29 +501,29 @@ scc_param(struct scc_tty *tp) if ((sr->flags & (TF_ODDP|TF_EVENP)) == (tp->t_flags & (TF_ODDP|TF_EVENP)) && sr->speed == tp->t_ispeed) { assert(FUNNEL_IN_USE(&SCC_FUNNEL)); - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ - splx(s); /* (TEST/DEBUG) */ - return 0; /* (TEST/DEBUG) */ + simple_unlock(&scc_stomp); + splx(s); + return 0; } if(scc_parm_done) { - scc_write_reg(regs, chan, 3, SCC_WR3_RX_8_BITS|SCC_WR3_RX_ENABLE); /* (TEST/DEBUG) */ - sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE; /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 1, sr->wr1); /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 15, SCC_WR15_ENABLE_ESCC); /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 7, SCC_WR7P_RX_FIFO); /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 0, SCC_IE_NEXT_CHAR); /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 0, SCC_RESET_EXT_IP); /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 0, SCC_RESET_EXT_IP); /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 9, SCC_WR9_MASTER_IE|SCC_WR9_NV); /* (TEST/DEBUG) */ - scc_read_reg_zero(regs, 0, bits); /* (TEST/DEBUG) */ - sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE; /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 1, sr->wr1); /* (TEST/DEBUG) */ - scc_write_reg(regs, chan, 0, SCC_IE_NEXT_CHAR); /* (TEST/DEBUG) */ - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ - splx(s); /* (TEST/DEBUG) */ - return 0; /* (TEST/DEBUG) */ + scc_write_reg(regs, chan, 3, SCC_WR3_RX_8_BITS|SCC_WR3_RX_ENABLE); + sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE; + scc_write_reg(regs, chan, 1, sr->wr1); + scc_write_reg(regs, chan, 15, SCC_WR15_ENABLE_ESCC); + scc_write_reg(regs, chan, 7, SCC_WR7P_RX_FIFO); + scc_write_reg(regs, chan, 0, SCC_IE_NEXT_CHAR); + scc_write_reg(regs, chan, 0, SCC_RESET_EXT_IP); + scc_write_reg(regs, chan, 0, SCC_RESET_EXT_IP); + scc_write_reg(regs, chan, 9, SCC_WR9_MASTER_IE|SCC_WR9_NV); + scc_read_reg_zero(regs, 0, bits); + sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE; + scc_write_reg(regs, chan, 1, sr->wr1); + scc_write_reg(regs, chan, 0, SCC_IE_NEXT_CHAR); + simple_unlock(&scc_stomp); + splx(s); + return 0; } sr->flags = tp->t_flags; @@ -529,7 +533,7 @@ scc_param(struct scc_tty *tp) if (tp->t_ispeed == 0) { sr->wr5 &= ~SCC_WR5_DTR; scc_write_reg(regs, chan, 5, sr->wr5); - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ + simple_unlock(&scc_stomp); splx(s); assert(FUNNEL_IN_USE(&SCC_FUNNEL)); @@ -635,7 +639,7 @@ scc_param(struct scc_tty *tp) sr->wr5 |= SCC_WR5_TX_ENABLE; scc_write_reg(regs, chan, 5, sr->wr5); - simple_unlock(&scc_stomp); /* (TEST/DEBUG) */ + simple_unlock(&scc_stomp); splx(s); assert(FUNNEL_IN_USE(&SCC_FUNNEL)); @@ -671,6 +675,8 @@ serial_keyboard_start(void) panic("serial_keyboard_start: we can't get back here\n"); } +static int ptestxxx = 0; + void serial_keyboard_poll(void) { @@ -678,6 +684,7 @@ serial_keyboard_poll(void) uint64_t next; extern void cons_cinput(char ch); /* The BSD routine that gets characters */ + while(1) { /* Do this for a while */ chr = scc_getc(0, 1, 0, 1); /* Get a character if there is one */ if(chr < 0) break; /* The serial buffer is empty */ diff --git a/osfmk/ppc/skiplists.s b/osfmk/ppc/skiplists.s index 5acc66143..d5653260d 100644 --- a/osfmk/ppc/skiplists.s +++ b/osfmk/ppc/skiplists.s @@ -149,34 +149,26 @@ mapSrch64d: ; never for the most-common case of finding a scalar mapping. The full searches ; must check _in_ the inner loop, to get the prev ptrs right. - mr. r9,r9 ; was there a prev ptr? - li r3,0 ; assume we are going to return null - ld r4,pmapSkipLists(r6) ; assume prev ptr null... so next is first - beq-- mapSrch64Exit ; prev ptr was null, search failed - lwz r0,mpFlags(r9) ; get flag bits from prev mapping - ld r10,mpVAddr(r9) ; re-fetch base address of prev ptr - ld r4,mpList0(r9) ; get 64-bit ptr to next mapping, if any - lhz r11,mpBSize(r9) ; get #pages/#segments in block/submap mapping - - rlwinm r0,r0,0,mpType ; isolate mapping type code - cmplwi cr1,r0,mpBlock ; cr1_eq <- block type? - cmplwi r0,mpNest ; cr0_eq <- nested type? - cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- block or nested type? - cmplwi cr5,r0,mpLinkage ; cr5_eq <- linkage type? - cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? - - rldicr r10,r10,0,51 ; zero low 12 bits of mapping va - bne mapSrch64Exit ; prev mapping was just a scalar page, search failed - sldi r0,r11,12 ; assume block mapping, get size in bytes - 4k - beq cr1,mapSrch64f ; we guessed right, it was a block mapping - addi r11,r11,1 ; mpBSize is 1 too low - sldi r11,r11,28 ; in a nested pmap, mpBSize is in units of segments - subi r0,r11,4096 ; get address of last page in submap -mapSrch64f: - add r10,r10,r0 ; r10 <- last page in this mapping - cmpld r5,r10 ; does this mapping cover our page? - bgt mapSrch64Exit ; no, search failed - mr r3,r9 ; yes, we found it + mr. r9,r9 ; was there a prev ptr? + li r3,0 ; assume we are going to return null + ld r4,pmapSkipLists(r6) ; assume prev ptr null... so next is first + beq-- mapSrch64Exit ; prev ptr was null, search failed + lwz r0,mpFlags(r9) ; get flag bits from prev mapping + lhz r11,mpBSize(r9) ; get #pages/#segments in block/submap mapping + + rlwinm r0,r0,mpBSub+1,31,31 ; 0 if 4K bsu or 1 if 32MB bsu + ld r10,mpVAddr(r9) ; re-fetch base address of prev ptr + ori r0,r0,0x3216 ; OR in 0x00003216 (0x3200 and a base rotate of 22) + addi r11,r11,1 ; Convert 0-based to 1-based + rlwnm r0,r0,r0,27,31 ; Rotate to get 12 or 25 + ld r4,mpList0(r9) ; get 64-bit ptr to next mapping, if any + sld r11,r11,r0 ; Get the length in bytes + rldicr r10,r10,0,51 ; zero low 12 bits of mapping va + subi r0,r11,4096 ; get offset last page in mapping + add r10,r10,r0 ; r10 <- last page in this mapping + cmpld r5,r10 ; does this mapping cover our page? + bgt mapSrch64Exit ; no, search failed + mr r3,r9 ; yes, we found it ; found the mapping ; r2 = count of nodes visited @@ -245,34 +237,26 @@ mapSrch32d: ; never for the most-common case of finding a scalar mapping. The full searches ; must check _in_ the inner loop, to get the prev ptrs right. - mr. r9,r9 ; was there a prev ptr? - li r3,0 ; assume we are going to return null - lwz r4,pmapSkipLists+4(r6) ; assume prev ptr null... so next is first - beq- mapSrch32Exit ; prev ptr was null, search failed - lwz r0,mpFlags(r9) ; get flag bits from prev mapping - lwz r10,mpVAddr+4(r9) ; re-fetch base address of prev ptr - lwz r4,mpList0+4(r9) ; get ptr to next mapping, if any - - rlwinm r0,r0,0,mpType ; isolate mapping type code - cmplwi cr1,r0,mpBlock ; cr1_eq <- block type? - cmplwi r0,mpNest ; cr0_eq <- nested type? - cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- block or nested type? - cmplwi cr5,r0,mpLinkage ; cr5_eq <- linkage type? - cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? - - bne mapSrch32Exit ; prev mapping was just a scalar page, search failed - lhz r11,mpBSize(r9) ; get #pages/#segments in block/submap mapping - rlwinm r10,r10,0,0,19 ; zero low 12 bits of block mapping va - slwi r0,r11,12 ; assume block mapping, get size in bytes - 4k - beq cr1,mapSrch32f ; we guessed right, it was a block mapping - addi r11,r11,1 ; mpBSize is 1 too low - slwi r11,r11,28 ; in a nested pmap, mpBSize is in units of segments - subi r0,r11,4096 ; get address of last page in submap -mapSrch32f: - add r10,r10,r0 ; r10 <- last page in this mapping - cmplw r5,r10 ; does this mapping cover our page? - bgt mapSrch32Exit ; no, search failed - mr r3,r9 ; yes, we found it + mr. r9,r9 ; was there a prev ptr? + li r3,0 ; assume we are going to return null + lwz r4,pmapSkipLists+4(r6) ; assume prev ptr null... so next is first + beq- mapSrch32Exit ; prev ptr was null, search failed + lwz r0,mpFlags(r9) ; get flag bits from prev mapping + lhz r11,mpBSize(r9) ; get #pages/#segments in block/submap mapping + lwz r10,mpVAddr+4(r9) ; re-fetch base address of prev ptr + + rlwinm r0,r0,mpBSub+1,31,31 ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu + addi r11,r11,1 ; Convert 0-based to 1-based + ori r0,r0,0x3216 ; OR in 0x00003216 (0x3200 and a base rotate of 22) + rlwnm r0,r0,r0,27,31 ; Rotate to get 12 or 25 + lwz r4,mpList0+4(r9) ; get ptr to next mapping, if any + slw r11,r11,r0 ; Get length in bytes + rlwinm r10,r10,0,0,19 ; zero low 12 bits of block mapping va + subi r0,r11,4096 ; get address of last page in submap + add r10,r10,r0 ; r10 <- last page in this mapping + cmplw r5,r10 ; does this mapping cover our page? + bgt mapSrch32Exit ; no, search failed + mr r3,r9 ; yes, we found it ; found the mapping ; r2 = count of nodes visited @@ -378,35 +362,36 @@ LEXT(mapSearchFull) ; r7 = current skip list number * 8 ; r8 = ptr to skip list vector of mapping pointed to by r9 ; r9 = prev ptr, ie highest mapping that comes before search target (initially the pmap) - ; r10 = prev mappings va, or 0 if r9==pmap + ; r10 = lowest expected next va, 0 at the beginning of the search ; r12 = ptr to the skipListPrev vector in the per-proc .align 5 mapSrchFull64a: ; loop over each mapping - ld r4,mpVAddr(r3) ; get va for this mapping (plus flags in low 12 bits) - addi r2,r2,1 ; count mappings visited - lwz r0,mpFlags(r3) ; get mapping flag bits - - cmpld cr0,r10,r4 ; make sure VAs come in strictly ascending order + addi r2,r2,1 ; count mappings visited + lwz r0,mpFlags(r3) ; get mapping flag bits + lhz r11,mpBSize(r3) ; get #pages/#segments in block/submap mapping + ld r4,mpVAddr(r3) ; get va for this mapping (plus flags in low 12 bits) + + rlwinm r0,r0,mpBSub+1,31,31 ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu + addi r11,r11,1 ; Convert 0-based to 1-based + ori r0,r0,0x3216 ; OR in 0x00003216 (0x3200 and a base rotate of 22) + rlwnm r0,r0,r0,27,31 ; Rotate to get 12 or 25 + sld r11,r11,r0 ; Get the length in bytes rldicr r4,r4,0,51 ; zero low 12 bits of mapping va - cmpld cr1,r5,r4 ; compare the vas - bge-- cr0,mapSkipListPanic ; die if keys are out of order + addic. r0,r11,-4096 ; get offset last page in mapping (set cr0_eq if 1 page) - rlwinm r0,r0,0,mpType ; isolate mapping type code - cmplwi r0,mpNest ; cr0_eq <- nested type? - cmplwi cr5,r0,mpLinkage ; cr5_eq <- linkage type? - cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- nested type or linkage type? - cmplwi cr5,r0,mpBlock ; cr5_eq <- block type? - cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? + cmpld cr5,r10,r4 ; make sure VAs come in strictly ascending order + cmpld cr1,r5,r4 ; compare the vas + bgt-- cr5,mapSkipListPanic ; die if keys are out of order blt cr1,mapSrchFull64d ; key is less, try next list beq cr1,mapSrchFull64Found ; this is the correct mapping - beq-- cr0,mapSrchFull64e ; handle block mapping or nested pmap + bne-- cr0,mapSrchFull64e ; handle mapping larger than one page mapSrchFull64b: la r8,mpList0(r3) ; point to skip list vector in this mapping mr r9,r3 ; current becomes previous ldx r3,r7,r8 ; get ptr to next mapping in current list - mr r10,r4 ; remember prev ptrs VA + addi r10,r4,0x1000 ; Get the lowest VA we can get next mapSrchFull64c: mr. r3,r3 ; was there another mapping on current list? bne++ mapSrchFull64a ; was another, so loop @@ -427,13 +412,6 @@ mapSrchFull64d: ; the end of the block to see if key fits within it. mapSrchFull64e: - lhz r11,mpBSize(r3) ; get #pages/#segments in block/submap mapping (if nonscalar) - sldi r0,r11,12 ; assume block mapping, get size in bytes - 4k - beq cr5,mapSrchFull64f ; we guessed right, it was a block mapping - addi r11,r11,1 ; mpBSize is 1 too low - sldi r11,r11,28 ; in a nested pmap, mpBSize is in units of segments - subi r0,r11,4096 ; get address of last page in submap -mapSrchFull64f: add r4,r4,r0 ; r4 <- last page in this mapping cmpld r5,r4 ; does this mapping cover our page? bgt mapSrchFull64b ; no, try next mapping (r4 is advanced to end of range) @@ -467,35 +445,36 @@ mapSrchFull64Found: ; WARNING: can drop down to here ; r7 = current skip list number * 8 ; r8 = ptr to skip list vector of mapping pointed to by r9 ; r9 = prev ptr, ie highest mapping that comes before search target (initially the pmap) - ; r10 = prev mappings va, or 0 if r9==pmap + ; r10 = lowest expected next va, 0 at the beginning of the search ; r12 = ptr to the skipListPrev vector in the per-proc .align 4 mapSrchFull32a: ; loop over each mapping - lwz r4,mpVAddr+4(r3) ; get va for this mapping (plus flags in low 12 bits) - addi r2,r2,1 ; count mappings visited - lwz r0,mpFlags(r3) ; get mapping flag bits - - cmplw cr0,r10,r4 ; make sure VAs come in strictly ascending order - rlwinm r4,r4,0,0,19 ; zero low 12 bits of mapping va - cmplw cr1,r5,r4 ; compare the vas - bge- cr0,mapSkipListPanic ; die if keys are out of order - - rlwinm r0,r0,0,mpType ; isolate mapping type code - cmplwi cr5,r0,mpLinkage ; cr5_eq <- linkage type? - cmplwi r0,mpNest ; cr0_eq <- nested type? - cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- linkage type or nested type? - cmplwi cr5,r0,mpBlock ; cr5_eq <- block type? - cror cr0_eq,cr5_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? + addi r2,r2,1 ; count mappings visited + lwz r0,mpFlags(r3) ; get mapping flag bits + lhz r11,mpBSize(r3) ; get #pages/#segments in block/submap mapping + lwz r4,mpVAddr+4(r3) ; get va for this mapping (plus flags in low 12 bits) + + rlwinm r0,r0,mpBSub+1,31,31 ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu + addi r11,r11,1 ; Convert 0-based to 1-based + ori r0,r0,0x3216 ; OR in 0x00003216 (0x3200 and a base rotate of 22) + rlwnm r0,r0,r0,27,31 ; Rotate to get 12 or 25 + slw r11,r11,r0 ; Get the length in bytes + rlwinm r4,r4,0,0,19 ; zero low 12 bits of mapping va + addic. r0,r11,-4096 ; get offset last page in mapping (set cr0_eq if 1 page) - blt cr1,mapSrchFull32d ; key is less than this va, try next list - beq- cr1,mapSrchFull32Found ; this is the correct mapping - beq- cr0,mapSrchFull32e ; handle block mapping or nested pmap + cmplw cr0,r10,r4 ; make sure VAs come in strictly ascending order + cmplw cr1,r5,r4 ; compare the vas + bgt- cr0,mapSkipListPanic ; die if keys are out of order + + blt cr1,mapSrchFull32d ; key is less than this va, try next list + beq cr1,mapSrchFull32Found ; this is the correct mapping + bne- cr0,mapSrchFull32e ; handle mapping larger than one page mapSrchFull32b: la r8,mpList0+4(r3) ; point to skip list vector in this mapping mr r9,r3 ; current becomes previous lwzx r3,r7,r8 ; get ptr to next mapping in current list - mr r10,r4 ; remember prev ptrs VA + addi r10,r4,0x1000 ; Get the lowest VA we can get next mapSrchFull32c: mr. r3,r3 ; next becomes current bne+ mapSrchFull32a ; was another, so loop @@ -516,13 +495,6 @@ mapSrchFull32d: ; the end of the block to see if our key fits within it. mapSrchFull32e: - lhz r11,mpBSize(r3) ; get #pages/#segments in block/submap mapping (if nonscalar) - slwi r0,r11,12 ; assume block mapping, get size in bytes - 4k - beq cr5,mapSrchFull32f ; we guessed right, it was a block mapping - addi r11,r11,1 ; mpBSize is 1 too low - slwi r11,r11,28 ; in a nested pmap, mpBSize is in units of segments - subi r0,r11,4096 ; get address of last page in submap -mapSrchFull32f: add r4,r4,r0 ; r4 <- last page in this mapping cmplw r5,r4 ; does this mapping cover our page? bgt mapSrchFull32b ; no, try next mapping @@ -1089,25 +1061,17 @@ mapVer64a: ; Do some additional checks (so we only do them once per mapping.) ; First, if a block mapping or nested pmap, compute block end. - rlwinm r29,r29,0,mpType ; isolate mapping type code - cmplwi r29,mpNest ; cr0_eq <- nested type? - cmplwi cr1,r29,mpLinkage ; cr1_eq <- linkage type? - cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- linkage type or nested type? - cmplwi cr1,r29,mpBlock ; cr1_eq <- block type? - cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? - - subi r21,r21,1 ; count mappings in this pmap - bne++ mapVer64b ; not nested or pmap - lhz r27,mpBSize(r26) ; get #pages or #segments - sldi r29,r27,12 ; assume block mapping, units are (pages-1) - beq cr1,mapVer64b ; guessed correctly - addi r27,r27,1 ; units of nested pmap are (#segs-1) - sldi r29,r27,28 ; convert to #bytes - subi r29,r29,4096 ; get offset to last byte in nested pmap + lhz r27,mpBSize(r26) ; get #pages or #segments + rlwinm r29,r29,mpBSub+1,31,31 ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu + addi r27,r27,1 ; units of nested pmap are (#segs-1) + ori r29,r29,0x3216 ; OR in 0x00003216 (0x3200 and a base rotate of 22) + rlwnm r29,r29,r29,27,31 ; Rotate to get 12 or 25 + subi r21,r21,1 ; count mappings in this pmap + sld r29,r27,r29 ; Get the length in bytes + subi r29,r29,4096 ; get offset to last byte in nested pmap ; Here with r29 = size of block - 4k, or 0 if mapping is a scalar page. -mapVer64b: add r24,r28,r29 ; r24 <- address of last valid page in this mapping la r28,mpList0(r26) ; get base of this mappings vector lwz r27,mpFlags(r26) ; Get the number of lists @@ -1213,32 +1177,22 @@ mapVer32a: ; Do some additional checks (so we only do them once per mapping.) ; First, make sure upper words of the mpList vector are 0. - subi r21,r21,1 ; count mappings in this pmap + lhz r27,mpBSize(r26) ; get #blocks + rlwinm r29,r29,mpBSub+1,31,31 ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu + addi r27,r27,1 ; units of nested pmap are (#segs-1) + ori r29,r29,0x3216 ; OR in 0x00003216 (0x3200 and a base rotate of 22) + rlwnm r29,r29,r29,27,31 ; Rotate to get 12 or 25 + subi r21,r21,1 ; count mappings in this pmap + slw r29,r27,r29 ; Get the length in bytes + subi r29,r29,4096 ; get offset to last byte in nested pmap + lwz r24,mpFlags(r26) ; Get number of lists la r30,mpList0(r26) ; point to base of skiplist vector andi. r24,r24,mpLists ; Clean the number of lists bl mapVerUpperWordsAre0 ; make sure upper words are all 0 (uses r24 and r27) - - ; Then, if a block mapping or nested pmap, compute block end. - - rlwinm r29,r29,0,mpType ; isolate mapping type code - cmplwi cr1,r29,mpLinkage ; cr1_eq <- linkage type? - cmplwi r29,mpNest ; cr0_eq <- nested type? - cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- linkage type or nested type? - cmplwi cr1,r29,mpBlock ; cr1_eq <- block type? - cror cr0_eq,cr1_eq,cr0_eq ; cr0_eq <- block or nested or linkage type? - - bne+ mapVer32b ; not block or nested type - lhz r27,mpBSize(r26) ; get #pages or #segments - slwi r29,r27,12 ; assume block mapping, units are pages - beq cr1,mapVer32b ; guessed correctly - addi r27,r27,1 ; units of nested pmap are (#segs-1) - slwi r29,r27,28 ; convert to #bytes - subi r29,r29,4096 ; get offset to last byte in nested pmap - + ; Here with r29 = size of block - 4k, or 0 if mapping is a scalar page. -mapVer32b: add r24,r28,r29 ; r24 <- address of last valid page in this mapping la r28,mpList0+4(r26) ; get base of this mappings vector lwz r27,mpFlags(r26) ; Get the number of lists diff --git a/osfmk/ppc/start.s b/osfmk/ppc/start.s index c5d46ed5c..8222e4c39 100644 --- a/osfmk/ppc/start.s +++ b/osfmk/ppc/start.s @@ -425,6 +425,13 @@ noVector: bt bootCPU,run32 mfsprg r30,0 ; Phys per proc + lwz r29,PP_HIBERNATE(r30) + andi. r29, r29, 1 + beq noHashTableInit ; Skip following if not waking from from hibernate + bl EXT(hw_clear_maps) ; Mark all maps as absent from hash table + bl EXT(hw_hash_init) ; Clear hash table + bl EXT(save_snapshot_restore) ; Reset save area chains +noHashTableInit: bl EXT(hw_setup_trans) ; Set up hardware needed for translation bl EXT(hw_start_trans) ; Start translating diff --git a/osfmk/ppc/vmachmon_asm.s b/osfmk/ppc/vmachmon_asm.s index 2d5cdb785..538553085 100644 --- a/osfmk/ppc/vmachmon_asm.s +++ b/osfmk/ppc/vmachmon_asm.s @@ -1223,7 +1223,7 @@ swap64: lwz r22,vmmXAFlgs(r27) ; Get the eXtended Architecture flags lwz r15,vmmppcpc(r5) ; First line of context lis r22,hi16(MSR_IMPORT_BITS) ; Get the MSR bits that are controllable by user lwz r23,vmmppcmsr(r5) - ori r22,r25,lo16(MSR_IMPORT_BITS) ; Get the rest of the MSR bits that are controllable by user + ori r22,r22,lo16(MSR_IMPORT_BITS) ; Get the rest of the MSR bits that are controllable by user lwz r17,vmmppcr0(r5) lwz r18,vmmppcr1(r5) and r23,r23,r22 ; Keep only the controllable bits @@ -1259,7 +1259,7 @@ sw64x1: ld r15,vmmppcXpc(r5) ; First line of context lis r22,hi16(MSR_IMPORT_BITS) ; Get the MSR bits that are controllable by user (we will also allow 64-bit here) sldi r0,r0,63 ; Get 64-bit bit ld r23,vmmppcXmsr(r5) - ori r22,r25,lo16(MSR_IMPORT_BITS) ; Get the rest of the MSR bits that are controllable by user + ori r22,r22,lo16(MSR_IMPORT_BITS) ; Get the rest of the MSR bits that are controllable by user ld r17,vmmppcXr0(r5) or r22,r22,r0 ; Add the 64-bit bit ld r18,vmmppcXr1(r5) diff --git a/osfmk/vm/vm_fault.c b/osfmk/vm/vm_fault.c index c601cb0da..75d1c3f23 100644 --- a/osfmk/vm/vm_fault.c +++ b/osfmk/vm/vm_fault.c @@ -3081,8 +3081,8 @@ FastPmapEnter: (entry->object.vm_object->shadow_offset)) + entry->offset + (laddr - entry->vme_start) - - ldelta)>>12, - ldelta + hdelta, prot, + - ldelta) >> 12, + ((ldelta + hdelta) >> 12), prot, (VM_WIMG_MASK & (int)object->wimg_bits), 0); } else { /* Set up a block mapped area */ @@ -3091,8 +3091,8 @@ FastPmapEnter: (((vm_map_offset_t) (entry->object.vm_object->shadow_offset)) + entry->offset + - (laddr - entry->vme_start) - ldelta)>>12, - ldelta + hdelta, prot, + (laddr - entry->vme_start) - ldelta) >> 12, + ((ldelta + hdelta) >> 12), prot, (VM_WIMG_MASK & (int)object->wimg_bits), 0); } } diff --git a/osfmk/vm/vm_pageout.c b/osfmk/vm/vm_pageout.c index d75ec79de..0ccb3e1ac 100644 --- a/osfmk/vm/vm_pageout.c +++ b/osfmk/vm/vm_pageout.c @@ -5305,7 +5305,7 @@ vm_paging_map_object( pmap_map_block(kernel_pmap, page_map_offset, page->phys_page, - PAGE_SIZE, + 1, /* Size is number of 4k pages */ VM_PROT_DEFAULT, ((int) page->object->wimg_bits & VM_WIMG_MASK), diff --git a/pexpert/ppc/pe_init.c b/pexpert/ppc/pe_init.c index 02b5251d6..8c6176347 100644 --- a/pexpert/ppc/pe_init.c +++ b/pexpert/ppc/pe_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -32,6 +32,7 @@ #include #include + /* extern references */ void pe_identify_machine(void); @@ -148,7 +149,13 @@ void PE_init_iokit(void) void PE_init_platform(boolean_t vm_initialized, void *_args) { - boot_args *args = (boot_args *)_args; + DTEntry dsouth, dnorth, root, dcpu; + char *model; + int msize, size; + uint32_t *south, *north, *pdata, *ddata; + int i; + + boot_args *args = (boot_args *)_args; if (PE_state.initialized == FALSE) { @@ -179,6 +186,7 @@ void PE_init_platform(boolean_t vm_initialized, void *_args) else { pe_init_debug(); + } } -- 2.45.2