]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-792.6.22.tar.gz mac-os-x-1043 v792.6.22
authorApple <opensource@apple.com>
Tue, 25 Oct 2005 00:01:58 +0000 (00:01 +0000)
committerApple <opensource@apple.com>
Tue, 25 Oct 2005 00:01:58 +0000 (00:01 +0000)
136 files changed:
bsd/crypto/aes/aescrypt.c
bsd/hfs/hfs.h
bsd/hfs/hfs_btreeio.c
bsd/hfs/hfs_catalog.c
bsd/hfs/hfs_catalog.h
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_endian.c
bsd/hfs/hfs_endian.h
bsd/hfs/hfs_format.h
bsd/hfs/hfs_readwrite.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vfsutils.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfscommon/BTree/BTree.c
bsd/hfs/hfscommon/BTree/BTreeNodeOps.c
bsd/hfs/hfscommon/BTree/BTreeScanner.c
bsd/hfs/hfscommon/headers/BTreesInternal.h
bsd/hfs/hfscommon/headers/BTreesPrivate.h
bsd/kern/kdebug.c
bsd/kern/kern_control.c
bsd/kern/kern_core.c
bsd/kern/kern_descrip.c
bsd/kern/kern_event.c
bsd/kern/kern_symfile.c
bsd/kern/kpi_mbuf.c
bsd/kern/kpi_socketfilter.c
bsd/kern/sysctl_init.c
bsd/kern/sysv_sem.c
bsd/kern/uipc_mbuf.c
bsd/kern/uipc_mbuf2.c
bsd/kern/uipc_socket.c
bsd/kern/uipc_usrreq.c
bsd/net/dlil.c
bsd/net/if.c
bsd/net/if_mib.c
bsd/net/kext_net.h
bsd/netat/atp_write.c
bsd/netinet/ip_divert.c
bsd/netinet/ip_fw2.c
bsd/netinet/ip_input.c
bsd/netinet/ip_output.c
bsd/netinet/raw_ip.c
bsd/netinet/tcp_input.c
bsd/netinet/tcp_subr.c
bsd/netinet/tcp_usrreq.c
bsd/netinet/udp_usrreq.c
bsd/netkey/key.c
bsd/nfs/nfs.h
bsd/nfs/nfs_bio.c
bsd/nfs/nfs_serv.c
bsd/nfs/nfs_subs.c
bsd/nfs/nfs_vnops.c
bsd/sys/fcntl.h
bsd/sys/socketvar.h
bsd/sys/vnode.h
bsd/vfs/vfs_attrlist.c
bsd/vfs/vfs_subr.c
config/MasterVersion
config/System6.0.ppc.exports
config/Unsupported.ppc.exports
iokit/IOKit/IOHibernatePrivate.h [new file with mode: 0644]
iokit/IOKit/Makefile
iokit/Kernel/IODeviceTreeSupport.cpp
iokit/Kernel/IOHibernateIO.cpp [new file with mode: 0644]
iokit/Kernel/IOHibernateInternal.h [new file with mode: 0644]
iokit/Kernel/IOHibernateRestoreKernel.c [new file with mode: 0644]
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOPlatformExpert.cpp
iokit/Kernel/IOServicePM.cpp
iokit/Kernel/WKdm.h [new file with mode: 0644]
iokit/Kernel/WKdmCompress.c [new file with mode: 0644]
iokit/Kernel/WKdmDecompress.c [new file with mode: 0644]
iokit/conf/Makefile.template
iokit/conf/files
libkern/libkern/Makefile
libkern/libkern/OSCrossEndian.h [new file with mode: 0644]
osfmk/conf/Makefile.i386
osfmk/conf/Makefile.ppc
osfmk/conf/files
osfmk/conf/files.i386
osfmk/conf/files.ppc
osfmk/device/iokit_rpc.c
osfmk/i386/acpi.c
osfmk/i386/cpu.c
osfmk/i386/cpu_data.h
osfmk/i386/hibernate_i386.c [new file with mode: 0644]
osfmk/i386/hibernate_restore.s [new file with mode: 0644]
osfmk/i386/i386_vm_init.c
osfmk/i386/start.s
osfmk/ipc/ipc_kmsg.c
osfmk/kern/hibernate.c [new file with mode: 0644]
osfmk/kern/machine.c
osfmk/kern/sched_prim.c
osfmk/mach/ppc/syscall_sw.h
osfmk/ppc/Firmware.s
osfmk/ppc/Makefile
osfmk/ppc/PPCcalls.h
osfmk/ppc/aligned_data.s
osfmk/ppc/asm.h
osfmk/ppc/cpu.c
osfmk/ppc/db_low_trace.c
osfmk/ppc/exception.h
osfmk/ppc/genassym.c
osfmk/ppc/hibernate_ppc.c [new file with mode: 0644]
osfmk/ppc/hibernate_restore.s [new file with mode: 0644]
osfmk/ppc/hw_exception.s
osfmk/ppc/hw_lock.s
osfmk/ppc/hw_vm.s
osfmk/ppc/interrupt.c
osfmk/ppc/io_map.c
osfmk/ppc/lowglobals.h
osfmk/ppc/lowmem_vectors.s
osfmk/ppc/machine_cpu.h
osfmk/ppc/machine_routines.c
osfmk/ppc/machine_routines_asm.s
osfmk/ppc/mappings.c
osfmk/ppc/mappings.h
osfmk/ppc/misc_asm.s
osfmk/ppc/model_dep.c
osfmk/ppc/pmap.c
osfmk/ppc/pmap.h
osfmk/ppc/pms.c [new file with mode: 0644]
osfmk/ppc/pms.h [new file with mode: 0644]
osfmk/ppc/pmsCPU.c [new file with mode: 0644]
osfmk/ppc/ppc_init.c
osfmk/ppc/ppc_vm_init.c
osfmk/ppc/rtclock.c
osfmk/ppc/rtclock.h [new file with mode: 0644]
osfmk/ppc/savearea.h
osfmk/ppc/serial_io.c
osfmk/ppc/skiplists.s
osfmk/ppc/start.s
osfmk/ppc/vmachmon_asm.s
osfmk/vm/vm_fault.c
osfmk/vm/vm_pageout.c
pexpert/ppc/pe_init.c

index 141cd3fbfb09eb9ece2f0e58cb83dac297fcc40e..f23e9131c54e0251de680772ab0cdec4efc31012 100644 (file)
@@ -123,7 +123,8 @@ extern "C"
 aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,\r
                                         unsigned char *out, const aes_encrypt_ctx cx[1])\r
 {   aes_32t         locals(b0, b1);\r
-    const aes_32t   *kp = cx->ks;\r
+    const aes_32t   *kp;\r
+    const aes_32t   *kptr = cx->ks;\r
 #if defined(ENC_ROUND_CACHE_TABLES)\r
        dtables(t_fn);\r
 #endif\r
@@ -145,6 +146,7 @@ aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, un
 \r
        for (;num_blk; in += AES_BLOCK_SIZE, out += AES_BLOCK_SIZE, --num_blk)\r
        {\r
+               kp = kptr;\r
 #if 0\r
                // Read the plaintext into b1\r
                state_in(b1, in);\r
@@ -289,7 +291,8 @@ aes_rval aes_encrypt_cbc(const unsigned char *in, const unsigned char *in_iv, un
 aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, unsigned int num_blk,\r
                                         unsigned char *out, const aes_decrypt_ctx cx[1])\r
 {   aes_32t        locals(b0, b1);\r
-    const aes_32t *kp = cx->ks + cx->rn * N_COLS;\r
+    const aes_32t *kptr = cx->ks + cx->rn * N_COLS;\r
+       const aes_32t *kp;\r
 #if defined(DEC_ROUND_CACHE_TABLES)\r
        dtables(t_in);\r
 #endif\r
@@ -317,6 +320,7 @@ aes_rval aes_decrypt_cbc(const unsigned char *in, const unsigned char *in_iv, un
 \r
        for (;num_blk; out -= AES_BLOCK_SIZE, --num_blk)\r
        {\r
+               kp = kptr;\r
                // Do the xor part of state_in, where b1 is the previous block's ciphertext.\r
                key_in(b0, b1, kp);\r
 \r
index e9b96c239b18ddb5eabf0798bbf2f9a1681743fd..e238dbb089d8d2d6398436a754fa3c3c78829275 100644 (file)
@@ -247,6 +247,7 @@ typedef struct hfsmount {
 
        lck_mtx_t      hfs_mutex;      /* protects access to hfsmount data */
        void          *hfs_freezing_proc;  /* who froze the fs */
+       lck_rw_t       hfs_insync;     /* protects sync/freeze interaction */
 } hfsmount_t;
 
 typedef hfsmount_t  ExtendedVCB;
index 6889834193e70c4eba700013d110880dce5b95a3..5035285535f9ab683fcd813e881b3f0dc8cf13f1 100644 (file)
@@ -85,36 +85,57 @@ OSStatus GetBTreeBlock(FileReference vp, UInt32 blockNum, GetBlockOptions option
     if (retval == E_NONE) {
         block->blockHeader = bp;
         block->buffer = (char *)buf_dataptr(bp);
+       block->blockNum = buf_lblkno(bp);
         block->blockReadFromDisk = (buf_fromcache(bp) == 0);   /* not found in cache ==> came from disk */
 
                // XXXdbg 
                block->isModified = 0;
 
-#if BYTE_ORDER == LITTLE_ENDIAN
-        /* Endian swap B-Tree node (only if it's a valid block) */
+        /* Check and endian swap B-Tree node (only if it's a valid block) */
         if (!(options & kGetEmptyBlock)) {
             /* This happens when we first open the b-tree, we might not have all the node data on hand */
             if ((((BTNodeDescriptor *)block->buffer)->kind == kBTHeaderNode) &&
                 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize != buf_count(bp)) &&
                 (SWAP_BE16 (((BTHeaderRec *)((char *)block->buffer + 14))->nodeSize) != buf_count(bp))) {
 
-                /* Don't swap the descriptors at all, we don't care (this block will be invalidated) */
-                SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 3);
-
-            /* The node needs swapping */
+                /*
+                 * Don't swap the node descriptor, record offsets, or other records.
+                 * This record will be invalidated and re-read with the correct node
+                 * size once the B-tree control block is set up with the node size
+                 * from the header record.
+                 */
+                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly);
+
+                       } else if (block->blockReadFromDisk) {
+               /*
+                * The node was just read from disk, so always swap/check it.
+                * This is necessary on big endian since the test below won't trigger.
+                */
+                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost);
             } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) == 0x0e00) {
-                SWAP_BT_NODE (block, ISHFSPLUS(VTOVCB(vp)), VTOC(vp)->c_fileid, 0);
-#if 0
-            /* The node is not already in native byte order, hence corrupt */
-            } else if (*((UInt16 *)((char *)block->buffer + (block->blockSize - sizeof (UInt16)))) != 0x000e) {
-                panic ("%s Corrupt B-Tree node detected!\n", "GetBTreeBlock:");
-#endif
+                               /*
+                                * The node was left in the cache in non-native order, so swap it.
+                                * This only happens on little endian, after the node is written
+                                * back to disk.
+                                */
+                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost);
             }
+            
+               /*
+                * If we got an error, then the node is only partially swapped.
+                * We mark the buffer invalid so that the next attempt to get the
+                * node will read it and attempt to swap again, and will notice
+                * the error again.  If we didn't do this, the next attempt to get
+                * the node might use the partially swapped node as-is.
+                */
+            if (retval)
+                               buf_markinvalid(bp);
         }
-#endif
-    } else {
+    }
+    
+    if (retval) {
        if (bp)
-               buf_brelse(bp);
+                       buf_brelse(bp);
         block->blockHeader = NULL;
         block->buffer = NULL;
     }
@@ -146,20 +167,22 @@ void ModifyBlockStart(FileReference vp, BlockDescPtr blockPtr)
 static int
 btree_journal_modify_block_end(struct hfsmount *hfsmp, struct buf *bp)
 {
-#if BYTE_ORDER == LITTLE_ENDIAN
+       int retval;
     struct vnode *vp = buf_vnode(bp);
     BlockDescriptor block;
                                    
     /* Prepare the block pointer */
     block.blockHeader = bp;
     block.buffer = (char *)buf_dataptr(bp);
+    block.blockNum = buf_lblkno(bp);
     /* not found in cache ==> came from disk */
     block.blockReadFromDisk = (buf_fromcache(bp) == 0);
     block.blockSize = buf_count(bp);
 
     // XXXdbg have to swap the data before it goes in the journal
-    SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
-#endif
+    retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
+    if (retval)
+       panic("btree_journal_modify_block_end: about to write corrupt node!\n");
 
     return journal_modify_block_end(hfsmp->jnl, bp);
 }
index 1b05373d5b78d48bc1ea7cbad1dc8c0aa6681eb0..d21b4c4e0b1a51334732cdfe0009683e94400fdf 100644 (file)
@@ -1784,7 +1784,12 @@ struct packdirentry_state {
        linkinfo_t *   cbs_linkinfo;
        struct cat_desc * cbs_desc;
 //     struct dirent  * cbs_stdentry;
+       // followign fields are only used for NFS readdir, which uses the next file id as the seek offset of each entry
        struct direntry * cbs_direntry;
+       struct direntry * cbs_prevdirentry;
+       u_int32_t      cbs_previlinkref;
+       Boolean        cbs_hasprevdirentry;
+       Boolean        cbs_eof;
 };
 
 static int
@@ -1798,7 +1803,8 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp,
        struct dirent catent;
        struct direntry * entry = NULL;
        time_t itime;
-       u_long ilinkref = 0;
+       u_int32_t ilinkref = 0;
+       u_int32_t curlinkref = 0;
        cnid_t  cnid;
        int hide = 0;
        u_int8_t type;
@@ -1809,6 +1815,7 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp,
        size_t maxnamelen;
        size_t uiosize = 0;
        caddr_t uioaddr;
+       Boolean stop_after_pack = false;
        
        hfsmp = state->cbs_hfsmp;
 
@@ -1819,8 +1826,18 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp,
 
        /* We're done when parent directory changes */
        if (state->cbs_parentID != curID) {
-               state->cbs_result = ENOENT;
-               return (0);     /* stop */
+               if (state->cbs_extended) {
+                       if (state->cbs_hasprevdirentry) { /* the last record haven't been returned yet, so we want to stop after
+                                                                                          * packing the last item */
+                               stop_after_pack = true;
+                       } else {
+                               state->cbs_result = ENOENT;
+                               return (0);     /* stop */
+                       }                               
+               } else {
+                       state->cbs_result = ENOENT;
+                       return (0);     /* stop */
+               }
        }
 
        if (state->cbs_extended) {
@@ -1832,95 +1849,93 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp,
                maxnamelen = NAME_MAX;
        }
 
-       if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
-               switch(crp->recordType) {
-               case kHFSPlusFolderRecord:
-                       type = DT_DIR;
-                       cnid = crp->hfsPlusFolder.folderID;
-                       /* Hide our private meta data directory */
-                       if ((curID == kHFSRootFolderID) &&
-                           (cnid == hfsmp->hfs_privdir_desc.cd_cnid)) {
-                               hide = 1;
+       if (state->cbs_extended && stop_after_pack) {
+               cnid = INT_MAX;                 /* the last item returns a non-zero invalid cookie */
+       } else {
+               if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
+                       switch(crp->recordType) {
+                       case kHFSPlusFolderRecord:
+                               type = DT_DIR;
+                               cnid = crp->hfsPlusFolder.folderID;
+                               /* Hide our private meta data directory */
+                               if ((curID == kHFSRootFolderID) &&
+                                       (cnid == hfsmp->hfs_privdir_desc.cd_cnid)) {
+                                       hide = 1;
+                               }
+
+                               break;
+                       case kHFSPlusFileRecord:
+                               itime = to_bsd_time(crp->hfsPlusFile.createDate);
+                               /*
+                                * When a hardlink link is encountered save its link ref.
+                                */
+                               if ((SWAP_BE32(crp->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) &&
+                                       (SWAP_BE32(crp->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator) &&
+                                       ((itime == (time_t)hfsmp->hfs_itime) ||
+                                        (itime == (time_t)hfsmp->hfs_metadata_createdate))) {
+                                       ilinkref = crp->hfsPlusFile.bsdInfo.special.iNodeNum;
+                               }
+                               type = MODE_TO_DT(crp->hfsPlusFile.bsdInfo.fileMode);
+                               cnid = crp->hfsPlusFile.fileID;
+                               /* Hide the journal files */
+                               if ((curID == kHFSRootFolderID) &&
+                                       (hfsmp->jnl) &&
+                                       ((cnid == hfsmp->hfs_jnlfileid) ||
+                                        (cnid == hfsmp->hfs_jnlinfoblkid))) {
+                                       hide = 1;
+                               }
+                               break;
+                       default:
+                               return (0);     /* stop */
+                       };
+
+                       cnp = (CatalogName*) &ckp->hfsPlus.nodeName;
+                       result = utf8_encodestr(cnp->ustr.unicode, cnp->ustr.length * sizeof(UniChar),
+                                                                       nameptr, &namelen, maxnamelen + 1, ':', 0);
+                       if (result == ENAMETOOLONG) {
+                               result = ConvertUnicodeToUTF8Mangled(cnp->ustr.length * sizeof(UniChar),
+                                                                                                        cnp->ustr.unicode, maxnamelen + 1,
+                                                                                                        (ByteCount*)&namelen, nameptr,
+                                                                                                        cnid);         
+                               is_mangled = 1;
                        }
+               } else { /* hfs */
+                       switch(crp->recordType) {
+                       case kHFSFolderRecord:
+                               type = DT_DIR;
+                               cnid = crp->hfsFolder.folderID;
+                               break;
+                       case kHFSFileRecord:
+                               type = DT_REG;
+                               cnid = crp->hfsFile.fileID;
+                               break;
+                       default:
+                               return (0);     /* stop */
+                       };
 
-                       break;
-               case kHFSPlusFileRecord:
-                       itime = to_bsd_time(crp->hfsPlusFile.createDate);
+                       cnp = (CatalogName*) ckp->hfs.nodeName;
+                       result = hfs_to_utf8(hfsmp, cnp->pstr, maxnamelen + 1,
+                                                                (ByteCount *)&namelen, nameptr);
                        /*
-                        * When a hardlink link is encountered save its link ref.
+                        * When an HFS name cannot be encoded with the current
+                        * volume encoding we use MacRoman as a fallback.
                         */
-                       if ((SWAP_BE32(crp->hfsPlusFile.userInfo.fdType) == kHardLinkFileType) &&
-                           (SWAP_BE32(crp->hfsPlusFile.userInfo.fdCreator) == kHFSPlusCreator) &&
-                           ((itime == (time_t)hfsmp->hfs_itime) ||
-                            (itime == (time_t)hfsmp->hfs_metadata_createdate))) {
-                               ilinkref = crp->hfsPlusFile.bsdInfo.special.iNodeNum;
-                       }
-                       type = MODE_TO_DT(crp->hfsPlusFile.bsdInfo.fileMode);
-                       cnid = crp->hfsPlusFile.fileID;
-                       /* Hide the journal files */
-                       if ((curID == kHFSRootFolderID) &&
-                           (hfsmp->jnl) &&
-                           ((cnid == hfsmp->hfs_jnlfileid) ||
-                            (cnid == hfsmp->hfs_jnlinfoblkid))) {
-                               hide = 1;
-                       }
-                       break;
-               default:
-                       return (0);     /* stop */
-               };
-
-               cnp = (CatalogName*) &ckp->hfsPlus.nodeName;
-               result = utf8_encodestr(cnp->ustr.unicode, cnp->ustr.length * sizeof(UniChar),
-                                       nameptr, &namelen, maxnamelen + 1, ':', 0);
-               if (result == ENAMETOOLONG) {
-                       result = ConvertUnicodeToUTF8Mangled(cnp->ustr.length * sizeof(UniChar),
-                                                            cnp->ustr.unicode, maxnamelen + 1,
-                                                            (ByteCount*)&namelen, nameptr,
-                                                            cnid);             
-                       is_mangled = 1;
+                       if (result)
+                               result = mac_roman_to_utf8(cnp->pstr, maxnamelen + 1,
+                                                                                  (ByteCount *)&namelen, nameptr);
                }
-       } else { /* hfs */
-               switch(crp->recordType) {
-               case kHFSFolderRecord:
-                       type = DT_DIR;
-                       cnid = crp->hfsFolder.folderID;
-                       break;
-               case kHFSFileRecord:
-                       type = DT_REG;
-                       cnid = crp->hfsFile.fileID;
-                       break;
-               default:
-                       return (0);     /* stop */
-               };
-
-               cnp = (CatalogName*) ckp->hfs.nodeName;
-               result = hfs_to_utf8(hfsmp, cnp->pstr, maxnamelen + 1,
-                                   (ByteCount *)&namelen, nameptr);
-               /*
-                * When an HFS name cannot be encoded with the current
-                * volume encoding we use MacRoman as a fallback.
-                */
-               if (result)
-                       result = mac_roman_to_utf8(cnp->pstr, maxnamelen + 1,
-                                   (ByteCount *)&namelen, nameptr);
        }
 
        if (state->cbs_extended) {
-               entry->d_type = type;
-               entry->d_namlen = namelen;
-               entry->d_reclen = uiosize = EXT_DIRENT_LEN(namelen);
-               if (hide)
-                       entry->d_fileno = 0;  /* file number = 0 means skip entry */
-               else
-                       entry->d_fileno = cnid;
-
                /*
                 * The index is 1 relative and includes "." and ".."
                 *
-                * Also stuff the cnid in the upper 32 bits of the cookie.
+                * Also stuff the cnid in the upper 32 bits of the cookie.  The cookie is stored to the previous entry, which
+                * will be packed and copied this time
                 */
-               entry->d_seekoff = (state->cbs_index + 3) | ((u_int64_t)cnid << 32);
-               uioaddr = (caddr_t) entry;
+               state->cbs_prevdirentry->d_seekoff = (state->cbs_index + 3) | ((u_int64_t)cnid << 32);
+               uiosize = state->cbs_prevdirentry->d_reclen;
+               uioaddr = (caddr_t) state->cbs_prevdirentry;
        } else {
                catent.d_type = type;
                catent.d_namlen = namelen;
@@ -1941,58 +1956,89 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp,
                return (0);     /* stop */
        }
 
-       state->cbs_result = uiomove(uioaddr, uiosize, state->cbs_uio);
-       if (state->cbs_result == 0) {
-               ++state->cbs_index;
+       if (!state->cbs_extended || state->cbs_hasprevdirentry) {
+               state->cbs_result = uiomove(uioaddr, uiosize, state->cbs_uio);
+               if (state->cbs_result == 0) {
+                       ++state->cbs_index;
 
-               /* Remember previous entry */
-               state->cbs_desc->cd_cnid = cnid;
-               if (type == DT_DIR) {
-                       state->cbs_desc->cd_flags |= CD_ISDIR;
-               } else {
-                       state->cbs_desc->cd_flags &= ~CD_ISDIR;
-               }
-               if (state->cbs_desc->cd_nameptr != NULL) {
-                       vfs_removename(state->cbs_desc->cd_nameptr);
-               }
+                       /* Remember previous entry */
+                       state->cbs_desc->cd_cnid = cnid;
+                       if (type == DT_DIR) {
+                               state->cbs_desc->cd_flags |= CD_ISDIR;
+                       } else {
+                               state->cbs_desc->cd_flags &= ~CD_ISDIR;
+                       }
+                       if (state->cbs_desc->cd_nameptr != NULL) {
+                               vfs_removename(state->cbs_desc->cd_nameptr);
+                       }
 #if 0
-               state->cbs_desc->cd_encoding = xxxx;
+                       state->cbs_desc->cd_encoding = xxxx;
 #endif
-               if (!is_mangled) {
-                       state->cbs_desc->cd_namelen = namelen;
-                       state->cbs_desc->cd_nameptr = vfs_addname(nameptr, namelen, 0, 0);
-               } else {
-                       /* Store unmangled name for the directory hint else it will 
-                        * restart readdir at the last location again 
-                        */
-                       char *new_nameptr;
-                       size_t bufsize;
+                       if (!is_mangled) {
+                               state->cbs_desc->cd_namelen = namelen;
+                               state->cbs_desc->cd_nameptr = vfs_addname(nameptr, namelen, 0, 0);
+                       } else {
+                               /* Store unmangled name for the directory hint else it will 
+                                * restart readdir at the last location again 
+                                */
+                               char *new_nameptr;
+                               size_t bufsize;
+                               size_t tmp_namelen = 0;
                        
-                       cnp = (CatalogName *)&ckp->hfsPlus.nodeName;
-                       bufsize = 1 + utf8_encodelen(cnp->ustr.unicode,
-                                                    cnp->ustr.length * sizeof(UniChar),
-                                                    ':', 0);
-                       MALLOC(new_nameptr, char *, bufsize, M_TEMP, M_WAITOK);
-                       result = utf8_encodestr(cnp->ustr.unicode,
-                                           cnp->ustr.length * sizeof(UniChar),
-                                               new_nameptr, &namelen,
-                                           bufsize, ':', 0);
+                               cnp = (CatalogName *)&ckp->hfsPlus.nodeName;
+                               bufsize = 1 + utf8_encodelen(cnp->ustr.unicode,
+                                                                                        cnp->ustr.length * sizeof(UniChar),
+                                                                                        ':', 0);
+                               MALLOC(new_nameptr, char *, bufsize, M_TEMP, M_WAITOK);
+                               result = utf8_encodestr(cnp->ustr.unicode,
+                                                                               cnp->ustr.length * sizeof(UniChar),
+                                                                               new_nameptr, &tmp_namelen,
+                                                                               bufsize, ':', 0);
                        
-                       state->cbs_desc->cd_namelen = namelen;
-                       state->cbs_desc->cd_nameptr = vfs_addname(new_nameptr, namelen, 0, 0);
+                               state->cbs_desc->cd_namelen = tmp_namelen;
+                               state->cbs_desc->cd_nameptr = vfs_addname(new_nameptr, tmp_namelen, 0, 0);
                        
-                       FREE(new_nameptr, M_TEMP);
-               } 
+                               FREE(new_nameptr, M_TEMP);
+                       } 
+               }
+               if (state->cbs_hasprevdirentry) {
+                       curlinkref = ilinkref;               /* save current */
+                       ilinkref = state->cbs_previlinkref;  /* use previous */
+               }
+               /*
+                * Record any hard links for post processing.
+                */
+               if ((ilinkref != 0) &&
+                       (state->cbs_result == 0) &&
+                       (state->cbs_nlinks < state->cbs_maxlinks)) {
+                       state->cbs_linkinfo[state->cbs_nlinks].dirent_addr = uiobase;
+                       state->cbs_linkinfo[state->cbs_nlinks].link_ref = ilinkref;
+                       state->cbs_nlinks++;
+               }
+               if (state->cbs_hasprevdirentry) {
+                       ilinkref = curlinkref;   /* restore current */
+               }
        }
-       /*
-        * Record any hard links for post processing.
-        */
-       if ((ilinkref != 0) &&
-           (state->cbs_result == 0) &&
-           (state->cbs_nlinks < state->cbs_maxlinks)) {
-               state->cbs_linkinfo[state->cbs_nlinks].dirent_addr = uiobase;
-               state->cbs_linkinfo[state->cbs_nlinks].link_ref = ilinkref;
-               state->cbs_nlinks++;
+
+       if (state->cbs_extended) {      /* fill the direntry to be used the next time */
+               if (stop_after_pack) {
+                       state->cbs_eof = true;
+                       return (0);     /* stop */
+               }
+               entry->d_type = type;
+               entry->d_namlen = namelen;
+               entry->d_reclen = EXT_DIRENT_LEN(namelen);
+               if (hide)
+                       entry->d_fileno = 0;  /* file number = 0 means skip entry */
+               else
+                       entry->d_fileno = cnid;
+               /* swap the current and previous entry */
+               struct direntry * tmp;
+               tmp = state->cbs_direntry;
+               state->cbs_direntry = state->cbs_prevdirentry;
+               state->cbs_prevdirentry = tmp;
+               state->cbs_hasprevdirentry = true;
+               state->cbs_previlinkref = ilinkref;
        }
 
        /* Continue iteration if there's room */
@@ -2007,7 +2053,7 @@ cat_packdirentry(const CatalogKey *ckp, const CatalogRecord *crp,
 __private_extern__
 int
 cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint,
-               uio_t uio, int extended, int * items)
+                                 uio_t uio, int extended, int * items, int * eofflag)
 {
        FCB* fcb;
        BTreeIterator * iterator;
@@ -2022,16 +2068,20 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
        
        fcb = GetFileControlBlock(hfsmp->hfs_catalog_vp);
 
-       /* Get a buffer for collecting link info and for a btree iterator */
+       /*
+        * Get a buffer for link info array, btree iterator and a direntry:
+        */
        maxlinks = MIN(entrycnt, uio_resid(uio) / SMALL_DIRENTRY_SIZE);
        bufsize = (maxlinks * sizeof(linkinfo_t)) + sizeof(*iterator);
        if (extended) {
-               bufsize += sizeof(struct direntry);
+               bufsize += 2*sizeof(struct direntry);
        }
        MALLOC(buffer, void *, bufsize, M_TEMP, M_WAITOK);
        bzero(buffer, bufsize);
 
        state.cbs_extended = extended;
+       state.cbs_hasprevdirentry = false;
+       state.cbs_previlinkref = 0;
        state.cbs_nlinks = 0;
        state.cbs_maxlinks = maxlinks;
        state.cbs_linkinfo = (linkinfo_t *) buffer;
@@ -2041,7 +2091,9 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
        have_key = 0;
        index = dirhint->dh_index + 1;
        if (extended) {
-               state.cbs_direntry = (struct direntry *)((char *)buffer + sizeof(BTreeIterator));
+               state.cbs_direntry = (struct direntry *)((char *)iterator + sizeof(BTreeIterator));
+               state.cbs_prevdirentry = state.cbs_direntry + 1;
+               state.cbs_eof = false;
        }
        /*
         * Attempt to build a key from cached filename
@@ -2100,15 +2152,25 @@ cat_getdirentries(struct hfsmount *hfsmp, int entrycnt, directoryhint_t *dirhint
        state.cbs_result = 0;
        state.cbs_parentID = dirhint->dh_desc.cd_parentcnid;
 
+       enum BTreeIterationOperations op;
+       if (extended && index != 0 && have_key)
+               op = kBTreeCurrentRecord;
+       else
+               op = kBTreeNextRecord;
+
        /*
         * Process as many entries as possible starting at iterator->key.
         */
-       result = BTIterateRecords(fcb, kBTreeNextRecord, iterator,
+       result = BTIterateRecords(fcb, op, iterator,
                                  (IterateCallBackProcPtr)cat_packdirentry, &state);
 
        /* Note that state.cbs_index is still valid on errors */
        *items = state.cbs_index - index;
        index = state.cbs_index;
+
+       if (state.cbs_eof) {
+               *eofflag = 1;
+       }
        
        /* Finish updating the catalog iterator. */
        dirhint->dh_desc.cd_hint = iterator->hint.nodeNum;
index 63c2fe994850440e8f3d1cda8868edc0f5bd51f9..2f91eae900a1309bad1e6c919761eb70c5ce17c8 100644 (file)
@@ -269,7 +269,8 @@ extern int cat_getdirentries(
                        directoryhint_t *dirhint,
                        uio_t uio,
                        int extended,
-                       int * items);
+                       int * items,
+                       int * eofflag);
 
 extern int cat_insertfilethread (
                        struct hfsmount *hfsmp,
index 8351989ed54824ed1cb75340f768dd6a1146ddcd..132b17cc2e2c7ba3cc0afd73514496e9048f1d96 100644 (file)
@@ -85,9 +85,11 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
        v_type = vnode_vtype(vp);
        cp = VTOC(vp);
 
-       if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp)) {
+       if ((hfsmp->hfs_flags & HFS_READ_ONLY) || vnode_issystem(vp) ||
+           (hfsmp->hfs_freezing_proc == p)) {
                return (0);
        }
+
        /*
         * Ignore nodes related to stale file handles.
         */
@@ -142,11 +144,11 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
                    // them in the catalog entry and then double
                    // free them later.
                    //
-                   if (hfs_start_transaction(hfsmp) != 0) {
-                       error = EINVAL;
-                       goto out;
-                   }
-                   started_tr = 1;
+//                 if (hfs_start_transaction(hfsmp) != 0) {
+//                     error = EINVAL;
+//                     goto out;
+//                 }
+//                 started_tr = 1;
                    
                        /*
                         * Since we're already inside a transaction,
index 0341f15db049c4a559f7e8ab734030e6236661f0..304f27e839f28f2f5a906ee25eb37b0951fb3a2f 100644 (file)
 
 #include "hfs_endian.h"
 #include "hfs_dbg.h"
+#include "hfscommon/headers/BTreesPrivate.h"
 
 #undef ENDIAN_DEBUG
 
-/* Private swapping routines */
-int hfs_swap_HFSPlusBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, int unswap);
-int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, int unswap);
+/*
+ * Internal swapping routines
+ *
+ * These routines handle swapping the records of leaf and index nodes.  The
+ * layout of the keys and records varies depending on the kind of B-tree
+ * (determined by fileID).
+ *
+ * The direction parameter must be kSwapBTNodeBigToHost or kSwapBTNodeHostToBig.
+ * The kSwapBTNodeHeaderRecordOnly "direction" is not valid for these routines.
+ */
+static int hfs_swap_HFSPlusBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction);
+static int hfs_swap_HFSBTInternalNode (BlockDescriptor *src, HFSCatalogNodeID fileID, enum HFSBTSwapDirection direction);
 
 /*
  * hfs_swap_HFSPlusForkData
- *
- *  There's still a few spots where we still need to swap the fork data.
  */
-void
+static void
 hfs_swap_HFSPlusForkData (
     HFSPlusForkData *src
 )
@@ -70,79 +78,142 @@ hfs_swap_HFSPlusForkData (
 int
 hfs_swap_BTNode (
     BlockDescriptor *src,
-    int isHFSPlus,
-    HFSCatalogNodeID fileID,
-    int unswap
+    vnode_t vp,
+    enum HFSBTSwapDirection direction
 )
 {
     BTNodeDescriptor *srcDesc = src->buffer;
     UInt16 *srcOffs = NULL;
-
+       BTreeControlBlockPtr btcb = (BTreeControlBlockPtr)VTOF(vp)->fcbBTCBPtr;
     UInt32 i;
     int error = 0;
 
-
 #ifdef ENDIAN_DEBUG
-    if (unswap == 0) {
-        printf ("BE -> LE Swap\n");
-    } else if (unswap == 1) {
-        printf ("LE -> BE Swap\n");
-    } else if (unswap == 3) {
+    if (direction == kSwapBTNodeBigToHost) {
+        printf ("BE -> Native Swap\n");
+    } else if (direction == kSwapBTNodeHostToBig) {
+        printf ("Native -> BE Swap\n");
+    } else if (direction == kSwapBTNodeHeaderRecordOnly) {
         printf ("Not swapping descriptors\n");
     } else {
-        panic ("%s This is impossible", "hfs_swap_BTNode:");
+        panic ("hfs_swap_BTNode: This is impossible");
     }
 #endif
 
-    /* If we are doing a swap */
-    if (unswap == 0) {
-        /* Swap the node descriptor */
+    /*
+     * If we are doing a swap from on-disk to in-memory, then swap the node
+     * descriptor and record offsets before we need to use them.
+     */
+    if (direction == kSwapBTNodeBigToHost) {
         srcDesc->fLink         = SWAP_BE32 (srcDesc->fLink);
         srcDesc->bLink         = SWAP_BE32 (srcDesc->bLink);
     
-        /* Don't swap srcDesc->kind */
-        /* Don't swap srcDesc->height */
+       /*
+        * When first opening a BTree, we have to read the header node before the
+        * control block is initialized.  In this case, totalNodes will be zero,
+        * so skip the bounds checking.
+        */
+       if (btcb->totalNodes != 0) {
+                       if (srcDesc->fLink >= btcb->totalNodes) {
+                               printf("hfs_swap_BTNode: invalid forward link (0x%08X)\n", srcDesc->fLink);
+                               error = fsBTInvalidHeaderErr;
+                               goto fail;
+                       }
+                       if (srcDesc->bLink >= btcb->totalNodes) {
+                               printf("hfs_swap_BTNode: invalid backward link (0x%08X)\n", srcDesc->bLink);
+                               error = fsBTInvalidHeaderErr;
+                               goto fail;
+                       }
+               }
+               
+               /* 
+                * Check srcDesc->kind.  Don't swap it because it's only one byte.
+                */
+               if (srcDesc->kind < kBTLeafNode || srcDesc->kind > kBTMapNode) {
+                       printf("hfs_swap_BTNode: invalid node kind (%d)\n", srcDesc->kind);
+                       error = fsBTInvalidHeaderErr;
+                       goto fail;
+               }
+               
+               /*
+                * Check srcDesc->height.  Don't swap it because it's only one byte.
+                */
+               if (srcDesc->height > btcb->treeDepth) {
+                       printf("hfs_swap_BTNode: invalid node height (%d)\n", srcDesc->height);
+                       error = fsBTInvalidHeaderErr;
+                       goto fail;
+               }
+        
         /* Don't swap srcDesc->reserved */
     
         srcDesc->numRecords    = SWAP_BE16 (srcDesc->numRecords);
         
-        /* Swap the node offsets (including the free space one!) */
+        /*
+         * Swap the node offsets (including the free space one!).
+         */
         srcOffs = (UInt16 *)((char *)src->buffer + (src->blockSize - ((srcDesc->numRecords + 1) * sizeof (UInt16))));
 
-        /* Sanity check */
-        if ((char *)srcOffs > ((char *)src->buffer + src->blockSize)) {
-            panic ("%s Too many records in the B-Tree node", "hfs_swap_BTNode:");
+        /*
+         * Sanity check that the record offsets are within the node itself.
+         */
+        if ((char *)srcOffs > ((char *)src->buffer + src->blockSize) ||
+            (char *)srcOffs < ((char *)src->buffer + sizeof(BTNodeDescriptor))) {
+            printf("hfs_swap_BTNode: invalid record count (0x%04X)\n", srcDesc->numRecords);
+            error = fsBTInvalidHeaderErr;
+            goto fail;
         }
 
-        for (i = 0; i < srcDesc->numRecords + 1; i++) {
+               /*
+                * Swap and sanity check each of the record offsets.
+                */
+        for (i = 0; i <= srcDesc->numRecords; i++) {
             srcOffs[i] = SWAP_BE16 (srcOffs[i]);
 
-            /* Sanity check */
-            if (srcOffs[i] >= src->blockSize) {
-                panic ("%s B-Tree node offset out of range", "hfs_swap_BTNode:");
+            /*
+             * Sanity check: must be even, and within the node itself.
+             *
+             * We may be called to swap an unused node, which contains all zeroes.
+             * This is why we allow the record offset to be zero.
+             */
+            if ((srcOffs[i] & 1) || (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || (srcOffs[i] >= src->blockSize)) {
+               printf("hfs_swap_BTNode: record #%d invalid offset (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+               error = fsBTInvalidHeaderErr;
+               goto fail;
+            }
+
+            /*
+             * Make sure the offsets are strictly increasing.  Note that we're looping over
+             * them backwards, hence the order in the comparison.
+             */
+            if ((i != 0) && (srcOffs[i] >= srcOffs[i-1])) {
+               printf("hfs_swap_BTNode: offsets %d and %d out of order (0x%04X, 0x%04X)\n",
+                   srcDesc->numRecords-i-1, srcDesc->numRecords-i, srcOffs[i], srcOffs[i-1]);
+               error = fsBTInvalidHeaderErr;
+               goto fail;
             }
         }
     }
     
-    /* Swap the records (ordered by frequency of access) */
-    /* Swap a B-Tree internal node */
+    /*
+     * Swap the records (ordered by frequency of access)
+     */
     if ((srcDesc->kind == kBTIndexNode) ||
         (srcDesc-> kind == kBTLeafNode)) {
 
-        if (isHFSPlus) {
-            error = hfs_swap_HFSPlusBTInternalNode (src, fileID, unswap);
+        if (VTOVCB(vp)->vcbSigWord == kHFSPlusSigWord) {
+            error = hfs_swap_HFSPlusBTInternalNode (src, VTOC(vp)->c_fileid, direction);
         } else {
-            error = hfs_swap_HFSBTInternalNode (src, fileID, unswap);
+            error = hfs_swap_HFSBTInternalNode (src, VTOC(vp)->c_fileid, direction);
         }
         
-    /* Swap a B-Tree map node */
+        if (error) goto fail;
+        
     } else if (srcDesc-> kind == kBTMapNode) {
         /* Don't swap the bitmaps, they'll be done in the bitmap routines */
     
-    /* Swap a B-Tree header node */
     } else if (srcDesc-> kind == kBTHeaderNode) {
-        /* The header's offset is hard-wired because we cannot trust the offset pointers */
-        BTHeaderRec *srcHead = (BTHeaderRec *)((char *)src->buffer + 14);
+        /* The header's offset is hard-wired because we cannot trust the offset pointers. */
+        BTHeaderRec *srcHead = (BTHeaderRec *)((char *)src->buffer + sizeof(BTNodeDescriptor));
         
         srcHead->treeDepth             =       SWAP_BE16 (srcHead->treeDepth);
         
@@ -161,34 +232,93 @@ hfs_swap_BTNode (
         srcHead->attributes            =       SWAP_BE32 (srcHead->attributes);
 
         /* Don't swap srcHead->reserved1 */
-        /* Don't swap srcHead->btreeType */
+        /* Don't swap srcHead->btreeType; it's only one byte */
         /* Don't swap srcHead->reserved2 */
         /* Don't swap srcHead->reserved3 */
         /* Don't swap bitmap */
     }
     
-    /* If we are doing an unswap */
-    if (unswap == 1) {
-        /* Swap the node descriptor */
+    /*
+     * If we are doing a swap from in-memory to on-disk, then swap the node
+     * descriptor and record offsets after we're done using them.
+     */
+    if (direction == kSwapBTNodeHostToBig) {
+               /*
+                * Sanity check and swap the forkward and backward links.
+                */
+               if (srcDesc->fLink >= btcb->totalNodes) {
+                       printf("hfs_UNswap_BTNode: invalid forward link (0x%08X)\n", srcDesc->fLink);
+                       error = fsBTInvalidHeaderErr;
+                       goto fail;
+               }
+               if (srcDesc->bLink >= btcb->totalNodes) {
+                       printf("hfs_UNswap_BTNode: invalid backward link (0x%08X)\n", srcDesc->bLink);
+                       error = fsBTInvalidHeaderErr;
+                       goto fail;
+               }
         srcDesc->fLink         = SWAP_BE32 (srcDesc->fLink);
         srcDesc->bLink         = SWAP_BE32 (srcDesc->bLink);
     
-        /* Don't swap srcDesc->kind */
-        /* Don't swap srcDesc->height */
+               /* 
+                * Check srcDesc->kind.  Don't swap it because it's only one byte.
+                */
+               if (srcDesc->kind < kBTLeafNode || srcDesc->kind > kBTMapNode) {
+                       printf("hfs_UNswap_BTNode: invalid node kind (%d)\n", srcDesc->kind);
+                       error = fsBTInvalidHeaderErr;
+                       goto fail;
+               }
+
+               /* 
+                * Check srcDesc->height.  Don't swap it because it's only one byte.
+                */
+               if (srcDesc->height > btcb->treeDepth) {
+                       printf("hfs_UNswap_BTNode: invalid node height (%d)\n", srcDesc->height);
+                       error = fsBTInvalidHeaderErr;
+                       goto fail;
+               }
+
         /* Don't swap srcDesc->reserved */
     
-        /* Swap the node offsets (including the free space one!) */
+        /*
+         * Swap the node offsets (including the free space one!).
+         */
         srcOffs = (UInt16 *)((char *)src->buffer + (src->blockSize - ((srcDesc->numRecords + 1) * sizeof (UInt16))));
 
-        /* Sanity check */
-        if ((char *)srcOffs > ((char *)src->buffer + src->blockSize)) {
-            panic ("%s Too many records in the B-Tree node", "hfs_swap_BTNode:");
+        /*
+         * Sanity check that the record offsets are within the node itself.
+         */
+        if ((char *)srcOffs > ((char *)src->buffer + src->blockSize) ||
+               (char *)srcOffs < ((char *)src->buffer + sizeof(BTNodeDescriptor))) {
+            printf("hfs_UNswap_BTNode: invalid record count (0x%04X)\n", srcDesc->numRecords);
+            error = fsBTInvalidHeaderErr;
+            goto fail;
         }
 
-        for (i = 0; i < srcDesc->numRecords + 1; i++) {
-            /* Sanity check */
-            if (srcOffs[i] >= src->blockSize) {
-                panic ("%s B-Tree node offset out of range", "hfs_swap_BTNode:");
+               /*
+                * Swap and sanity check each of the record offsets.
+                */
+        for (i = 0; i <= srcDesc->numRecords; i++) {
+            /*
+             * Sanity check: must be even, and within the node itself.
+             *
+             * We may be called to swap an unused node, which contains all zeroes.
+             * This is why we allow the record offset to be zero.
+             */
+            if ((srcOffs[i] & 1) || (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || (srcOffs[i] >= src->blockSize)) {
+               printf("hfs_UNswap_BTNode: record #%d invalid offset (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+               error = fsBTInvalidHeaderErr;
+               goto fail;
+            }
+
+            /*
+             * Make sure the offsets are strictly increasing.  Note that we're looping over
+             * them backwards, hence the order in the comparison.
+             */
+            if ((i < srcDesc->numRecords) && (srcOffs[i+1] >= srcOffs[i])) {
+               printf("hfs_UNswap_BTNode: offsets %d and %d out of order (0x%04X, 0x%04X)\n",
+                   srcDesc->numRecords-i-2, srcDesc->numRecords-i-1, srcOffs[i+1], srcOffs[i]);
+               error = fsBTInvalidHeaderErr;
+               goto fail;
             }
 
             srcOffs[i] = SWAP_BE16 (srcOffs[i]);
@@ -196,86 +326,182 @@ hfs_swap_BTNode (
         
         srcDesc->numRecords    = SWAP_BE16 (srcDesc->numRecords);
     }
-    
+
+fail:
+       if (error) {
+               /*
+                * Log some useful information about where the corrupt node is.
+                */
+               printf("node=%lld fileID=%u volume=%s device=%s\n", src->blockNum, VTOC(vp)->c_fileid,
+                       VTOVCB(vp)->vcbVN, vfs_statfs(vnode_mount(vp))->f_mntfromname);
+               VTOVCB(vp)->vcbFlags |= kHFS_DamagedVolume;
+       }
+       
     return (error);
 }
 
-int
+static int
 hfs_swap_HFSPlusBTInternalNode (
     BlockDescriptor *src,
     HFSCatalogNodeID fileID,
-    int unswap
+    enum HFSBTSwapDirection direction
 )
 {
     BTNodeDescriptor *srcDesc = src->buffer;
     UInt16 *srcOffs = (UInt16 *)((char *)src->buffer + (src->blockSize - (srcDesc->numRecords * sizeof (UInt16))));
-
+       char *nextRecord;       /*  Points to start of record following current one */
     UInt32 i;
     UInt32 j;
 
     if (fileID == kHFSExtentsFileID) {
         HFSPlusExtentKey *srcKey;
         HFSPlusExtentDescriptor *srcRec;
+               size_t recordSize;      /* Size of the data part of the record, or node number for index nodes */
         
+        if (srcDesc->kind == kBTIndexNode)
+               recordSize = sizeof(UInt32);
+        else
+               recordSize = sizeof(HFSPlusExtentDescriptor);
+
         for (i = 0; i < srcDesc->numRecords; i++) {
+               /* Point to the start of the record we're currently checking. */
             srcKey = (HFSPlusExtentKey *)((char *)src->buffer + srcOffs[i]);
+            
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+                       nextRecord = (char *)src->buffer + srcOffs[i-1];
 
-            if (!unswap) srcKey->keyLength             = SWAP_BE16 (srcKey->keyLength);
-            srcRec = (HFSPlusExtentDescriptor *)((char *)srcKey + srcKey->keyLength + 2);
-            if (unswap) srcKey->keyLength              = SWAP_BE16 (srcKey->keyLength);
+                       /*
+                        * Make sure the key and data are within the buffer.  Since both key
+                        * and data are fixed size, this is relatively easy.  Note that this
+                        * relies on the keyLength being a constant; we verify the keyLength
+                        * below.
+                        */
+                       if ((char *)srcKey + sizeof(HFSPlusExtentKey) + recordSize > nextRecord) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: extents key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+                               return fsBTInvalidNodeErr;
+                       }
+                       
+            if (direction == kSwapBTNodeBigToHost) 
+               srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
+            if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: extents key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+                               return fsBTInvalidNodeErr;
+            }
+            srcRec = (HFSPlusExtentDescriptor *)((char *)srcKey + srcKey->keyLength + sizeof(srcKey->keyLength));
+            if (direction == kSwapBTNodeHostToBig)
+               srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
 
-            /* Don't swap srcKey->forkType */
+            /* Don't swap srcKey->forkType; it's only one byte */
             /* Don't swap srcKey->pad */
 
             srcKey->fileID                     = SWAP_BE32 (srcKey->fileID);
             srcKey->startBlock         = SWAP_BE32 (srcKey->startBlock);
             
-            /* Stop if this is just an index node */
             if (srcDesc->kind == kBTIndexNode) {
+               /* For index nodes, the record data is just a child node number. */
                 *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec));
-                continue;
-            }
-
-            /* Swap the extent data */
-            
-            /* Swap each extent */
-            for (j = 0; j < kHFSPlusExtentDensity; j++) {
-                srcRec[j].startBlock   = SWAP_BE32 (srcRec[j].startBlock);
-                srcRec[j].blockCount   = SWAP_BE32 (srcRec[j].blockCount);
+            } else {
+                               /* Swap the extent data */
+                               for (j = 0; j < kHFSPlusExtentDensity; j++) {
+                                       srcRec[j].startBlock    = SWAP_BE32 (srcRec[j].startBlock);
+                                       srcRec[j].blockCount    = SWAP_BE32 (srcRec[j].blockCount);
+                               }
             }
         }
 
     } else if (fileID == kHFSCatalogFileID) {
         HFSPlusCatalogKey *srcKey;
         SInt16 *srcPtr;
-        
+        u_int16_t keyLength;
+
         for (i = 0; i < srcDesc->numRecords; i++) {
+               /* Point to the start of the record we're currently checking. */
             srcKey = (HFSPlusCatalogKey *)((char *)src->buffer + srcOffs[i]);
 
-            if (!unswap) srcKey->keyLength                     = SWAP_BE16 (srcKey->keyLength);
-            srcPtr = (SInt16 *)((char *)srcKey + srcKey->keyLength + 2);
-            if (unswap) srcKey->keyLength                      = SWAP_BE16 (srcKey->keyLength);
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+                       nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+                       /*
+                        * Make sure we can safely dereference the keyLength and parentID fields. */
+                       if ((char *)srcKey + offsetof(HFSPlusCatalogKey, nodeName.unicode[0]) > nextRecord) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+                               return fsBTInvalidNodeErr;
+                       }
+
+                       /*
+                        * Swap and sanity check the key length
+                        */
+            if (direction == kSwapBTNodeBigToHost)
+               srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
+            keyLength = srcKey->keyLength;     /* Put it in a local (native order) because we use it several times */
+            if (direction == kSwapBTNodeHostToBig)
+               srcKey->keyLength = SWAP_BE16 (keyLength);
             
+            /* Sanity check the key length */
+            if (keyLength < kHFSPlusCatalogKeyMinimumLength || keyLength > kHFSPlusCatalogKeyMaximumLength) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, keyLength);
+                               return fsBTInvalidNodeErr;
+            }
+
+            /*
+             * Make sure that we can safely dereference the record's type field or
+             * an index node's child node number.
+             */
+            srcPtr = (SInt16 *)((char *)srcKey + keyLength + sizeof(srcKey->keyLength));
+            if ((char *)srcPtr + sizeof(UInt32) > nextRecord) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: catalog key #%d too big\n", srcDesc->numRecords-i-1);
+                               return fsBTInvalidNodeErr;
+            }
+
             srcKey->parentID                                           = SWAP_BE32 (srcKey->parentID);
 
-            if (!unswap) srcKey->nodeName.length       = SWAP_BE16 (srcKey->nodeName.length);
+                       /*
+                        * Swap and sanity check the key's node name
+                        */
+            if (direction == kSwapBTNodeBigToHost)
+               srcKey->nodeName.length = SWAP_BE16 (srcKey->nodeName.length);
+            /* Make sure name length is consistent with key length */
+            if (keyLength < sizeof(srcKey->parentID) + sizeof(srcKey->nodeName.length) +
+                srcKey->nodeName.length*sizeof(srcKey->nodeName.unicode[0])) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: catalog record #%d keyLength=%d expected=%d\n",
+                                       srcDesc->numRecords-i, keyLength, sizeof(srcKey->parentID) + sizeof(srcKey->nodeName.length) +
+                    srcKey->nodeName.length*sizeof(srcKey->nodeName.unicode[0]));
+                               return fsBTInvalidNodeErr;
+            }
             for (j = 0; j < srcKey->nodeName.length; j++) {
                 srcKey->nodeName.unicode[j]    = SWAP_BE16 (srcKey->nodeName.unicode[j]);
             }
-            if (unswap) srcKey->nodeName.length        = SWAP_BE16 (srcKey->nodeName.length);
+            if (direction == kSwapBTNodeHostToBig)
+               srcKey->nodeName.length = SWAP_BE16 (srcKey->nodeName.length);
  
-            /* Stop if this is just an index node */
+            /* 
+             * For index nodes, the record data is just the child's node number.
+             * Skip over swapping the various types of catalog record.
+             */
             if (srcDesc->kind == kBTIndexNode) {
                 *((UInt32 *)srcPtr) = SWAP_BE32 (*((UInt32 *)srcPtr));
                 continue;
             }
             
-            /* Swap the recordType field, if unswapping, leave to later */
-            if (!unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]);
+            /* Make sure the recordType is in native order before using it. */
+            if (direction == kSwapBTNodeBigToHost)
+               srcPtr[0] = SWAP_BE16 (srcPtr[0]);
             
             if (srcPtr[0] == kHFSPlusFolderRecord) {
                 HFSPlusCatalogFolder *srcRec = (HFSPlusCatalogFolder *)srcPtr;
-                
+                if ((char *)srcRec + sizeof(*srcRec) > nextRecord) {
+                                       printf("hfs_swap_HFSPlusBTInternalNode: catalog folder record #%d too big\n", srcDesc->numRecords-i-1);
+                                       return fsBTInvalidNodeErr;
+                }
+
                 srcRec->flags                          = SWAP_BE16 (srcRec->flags);
                 srcRec->valence                                = SWAP_BE32 (srcRec->valence);
                 srcRec->folderID                       = SWAP_BE32 (srcRec->folderID);
@@ -288,8 +514,8 @@ hfs_swap_HFSPlusBTInternalNode (
                 srcRec->bsdInfo.ownerID                = SWAP_BE32 (srcRec->bsdInfo.ownerID);
                 srcRec->bsdInfo.groupID                = SWAP_BE32 (srcRec->bsdInfo.groupID);
     
-                /* Don't swap srcRec->bsdInfo.adminFlags */
-                /* Don't swap srcRec->bsdInfo.ownerFlags */
+                /* Don't swap srcRec->bsdInfo.adminFlags; it's only one byte */
+                /* Don't swap srcRec->bsdInfo.ownerFlags; it's only one byte */
     
                 srcRec->bsdInfo.fileMode                       = SWAP_BE16 (srcRec->bsdInfo.fileMode);
                 srcRec->bsdInfo.special.iNodeNum       = SWAP_BE32 (srcRec->bsdInfo.special.iNodeNum);
@@ -302,6 +528,10 @@ hfs_swap_HFSPlusBTInternalNode (
     
             } else if (srcPtr[0] == kHFSPlusFileRecord) {
                 HFSPlusCatalogFile *srcRec = (HFSPlusCatalogFile *)srcPtr;
+                if ((char *)srcRec + sizeof(*srcRec) > nextRecord) {
+                                       printf("hfs_swap_HFSPlusBTInternalNode: catalog file record #%d too big\n", srcDesc->numRecords-i-1);
+                                       return fsBTInvalidNodeErr;
+                }
                 
                 srcRec->flags                          = SWAP_BE16 (srcRec->flags);
     
@@ -316,8 +546,8 @@ hfs_swap_HFSPlusBTInternalNode (
                 srcRec->bsdInfo.ownerID                = SWAP_BE32 (srcRec->bsdInfo.ownerID);
                 srcRec->bsdInfo.groupID                = SWAP_BE32 (srcRec->bsdInfo.groupID);
     
-                /* Don't swap srcRec->bsdInfo.adminFlags */
-                /* Don't swap srcRec->bsdInfo.ownerFlags */
+                /* Don't swap srcRec->bsdInfo.adminFlags; it's only one byte */
+                /* Don't swap srcRec->bsdInfo.ownerFlags; it's only one byte */
     
                 srcRec->bsdInfo.fileMode                       = SWAP_BE16 (srcRec->bsdInfo.fileMode);
                 srcRec->bsdInfo.special.iNodeNum       = SWAP_BE32 (srcRec->bsdInfo.special.iNodeNum);
@@ -335,65 +565,164 @@ hfs_swap_HFSPlusBTInternalNode (
             } else if ((srcPtr[0] == kHFSPlusFolderThreadRecord) ||
                        (srcPtr[0] == kHFSPlusFileThreadRecord)) {
     
+                               /*
+                                * Make sure there is room for parentID and name length.
+                                */
                 HFSPlusCatalogThread *srcRec = (HFSPlusCatalogThread *)srcPtr;
-    
+                               if ((char *) &srcRec->nodeName.unicode[0] > nextRecord) {
+                                       printf("hfs_swap_HFSPlusBTInternalNode: catalog thread record #%d too big\n", srcDesc->numRecords-i-1);
+                                       return fsBTInvalidNodeErr;
+                               }
+
                 /* Don't swap srcRec->reserved */
                 
                 srcRec->parentID                                               = SWAP_BE32 (srcRec->parentID);
                 
-                if (!unswap) srcRec->nodeName.length   = SWAP_BE16 (srcRec->nodeName.length);
+                if (direction == kSwapBTNodeBigToHost)
+                       srcRec->nodeName.length = SWAP_BE16 (srcRec->nodeName.length);
+
+                /* 
+                 * Make sure there is room for the name in the buffer.
+                 * Then swap the characters of the name itself.
+                 */
+                               if ((char *) &srcRec->nodeName.unicode[srcRec->nodeName.length] > nextRecord) {
+                                       printf("hfs_swap_HFSPlusBTInternalNode: catalog thread record #%d name too big\n", srcDesc->numRecords-i-1);
+                                       return fsBTInvalidNodeErr;
+                               }
                 for (j = 0; j < srcRec->nodeName.length; j++) {
                     srcRec->nodeName.unicode[j]        = SWAP_BE16 (srcRec->nodeName.unicode[j]);
                 }
-                if (unswap) srcRec->nodeName.length            = SWAP_BE16 (srcRec->nodeName.length);
+                
+                if (direction == kSwapBTNodeHostToBig)
+                       srcRec->nodeName.length = SWAP_BE16 (srcRec->nodeName.length);
 
             } else {
-                panic ("%s unrecognized catalog record type", "hfs_swap_BTNode:");
+               printf("hfs_swap_HFSPlusBTInternalNode: unrecognized catalog record type (0x%04X; record #%d)\n", srcPtr[0], srcDesc->numRecords-i-1);
+                               return fsBTInvalidNodeErr;
             }
     
-            /* If unswapping, we can safely unswap type field now */
-            if (unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]);
+            /* We can swap the record type now that we're done using it. */
+            if (direction == kSwapBTNodeHostToBig)
+               srcPtr[0] = SWAP_BE16 (srcPtr[0]);
         }
         
     } else if (fileID == kHFSAttributesFileID) {
        HFSPlusAttrKey *srcKey;
        HFSPlusAttrRecord *srcRec;
-       
+       u_int16_t keyLength;
+               u_int32_t attrSize = 0;
+
        for (i = 0; i < srcDesc->numRecords; i++) {
+               /* Point to the start of the record we're currently checking. */
                srcKey = (HFSPlusAttrKey *)((char *)src->buffer + srcOffs[i]);
+
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+                       nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+               /* Make sure there is room in the buffer for a minimal key */
+               if ((char *) &srcKey->attrName[1] > nextRecord) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+                               return fsBTInvalidNodeErr;
+               }
                
-               if (!unswap) srcKey->keyLength = SWAP_BE16(srcKey->keyLength);
-               srcRec = (HFSPlusAttrRecord *)((char *)srcKey + srcKey->keyLength + 2);
-               if (unswap) srcKey->keyLength = SWAP_BE16(srcKey->keyLength);
+               /* Swap the key length field */
+               if (direction == kSwapBTNodeBigToHost)
+                       srcKey->keyLength = SWAP_BE16(srcKey->keyLength);
+               keyLength = srcKey->keyLength;  /* Keep a copy in native order */
+               if (direction == kSwapBTNodeHostToBig)
+                       srcKey->keyLength = SWAP_BE16(srcKey->keyLength);
+
+            /*
+             * Make sure that we can safely dereference the record's type field or
+             * an index node's child node number.
+             */
+               srcRec = (HFSPlusAttrRecord *)((char *)srcKey + keyLength + sizeof(srcKey->keyLength));
+               if ((char *)srcRec + sizeof(u_int32_t) > nextRecord) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d too big (%d)\n", srcDesc->numRecords-i-1, keyLength);
+                               return fsBTInvalidNodeErr;
+               }
                
                srcKey->fileID = SWAP_BE32(srcKey->fileID);
                srcKey->startBlock = SWAP_BE32(srcKey->startBlock);
-               
-               if (!unswap) srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen);
+
+                       /* 
+                        * Swap and check the attribute name
+                        */
+               if (direction == kSwapBTNodeBigToHost)
+                       srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen);
+               /* Sanity check the attribute name length */
+               if (srcKey->attrNameLen > kHFSMaxAttrNameLen || keyLength < (kHFSPlusAttrKeyMinimumLength + sizeof(u_int16_t)*srcKey->attrNameLen)) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: attr key #%d keyLength=%d attrNameLen=%d\n", srcDesc->numRecords-i-1, keyLength, srcKey->attrNameLen);
+                               return fsBTInvalidNodeErr;
+               }
                for (j = 0; j < srcKey->attrNameLen; j++)
                        srcKey->attrName[j] = SWAP_BE16(srcKey->attrName[j]);
-               if (unswap) srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen);
+               if (direction == kSwapBTNodeHostToBig)
+                       srcKey->attrNameLen = SWAP_BE16(srcKey->attrNameLen);
                
-               /* If this is an index node, just swap the child node number */
+            /* 
+             * For index nodes, the record data is just the child's node number.
+             * Skip over swapping the various types of attribute record.
+             */
             if (srcDesc->kind == kBTIndexNode) {
                 *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec));
                 continue;
             }
             
-            /* Swap the data record */
-            if (!unswap) srcRec->recordType = SWAP_BE32(srcRec->recordType);
+            /* Swap the record data */
+            if (direction == kSwapBTNodeBigToHost)
+               srcRec->recordType = SWAP_BE32(srcRec->recordType);
             switch (srcRec->recordType) {
                case kHFSPlusAttrInlineData:
+                       /* Is there room for the inline data header? */
+                       if ((char *) &srcRec->attrData.attrData[0]  > nextRecord) {
+                                               printf("hfs_swap_HFSPlusBTInternalNode: attr inline #%d too big\n", srcDesc->numRecords-i-1);
+                                               return fsBTInvalidNodeErr;
+                       }
+                       
                        /* We're not swapping the reserved fields */
+                       
+                       /* Swap the attribute size */
+                       if (direction == kSwapBTNodeHostToBig)
+                               attrSize = srcRec->attrData.attrSize;
                        srcRec->attrData.attrSize = SWAP_BE32(srcRec->attrData.attrSize);
-                       /* Not swapping the attrData */
+                       if (direction == kSwapBTNodeBigToHost)
+                               attrSize = srcRec->attrData.attrSize;
+                               
+                       /* Is there room for the inline attribute data? */
+                       if ((char *) &srcRec->attrData.attrData[attrSize] > nextRecord) {
+                                               printf("hfs_swap_HFSPlusBTInternalNode: attr inline #%d too big (attrSize=%u)\n", srcDesc->numRecords-i-1, attrSize);
+                                               return fsBTInvalidNodeErr;
+                       }
+                       
+                       /* Not swapping the attribute data itself */
                        break;
+                       
                case kHFSPlusAttrForkData:
+                       /* Is there room for the fork data record? */
+                       if ((char *)srcRec + sizeof(HFSPlusAttrForkData) > nextRecord) {
+                                               printf("hfs_swap_HFSPlusBTInternalNode: attr fork data #%d too big\n", srcDesc->numRecords-i-1);
+                                               return fsBTInvalidNodeErr;
+                       }
+                       
                        /* We're not swapping the reserved field */
+                       
                        hfs_swap_HFSPlusForkData(&srcRec->forkData.theFork);
                        break;
+                       
                case kHFSPlusAttrExtents:
+                       /* Is there room for an extent record? */
+                       if ((char *)srcRec + sizeof(HFSPlusAttrExtents) > nextRecord) {
+                                               printf("hfs_swap_HFSPlusBTInternalNode: attr extents #%d too big\n", srcDesc->numRecords-i-1);
+                                               return fsBTInvalidNodeErr;
+                       }
+                       
                        /* We're not swapping the reserved field */
+                       
                        for (j = 0; j < kHFSPlusExtentDensity; j++) {
                                srcRec->overflowExtents.extents[j].startBlock =
                                        SWAP_BE32(srcRec->overflowExtents.extents[j].startBlock);
@@ -402,19 +731,40 @@ hfs_swap_HFSPlusBTInternalNode (
                        }
                        break;
             }
-            if (unswap) srcRec->recordType = SWAP_BE32(srcRec->recordType);
+            if (direction == kSwapBTNodeHostToBig)
+               srcRec->recordType = SWAP_BE32(srcRec->recordType);
        }
     } else if (fileID > kHFSFirstUserCatalogNodeID) {
+       /* The only B-tree with a non-system CNID that we use is the hotfile B-tree */
                HotFileKey *srcKey;
                UInt32 *srcRec;
         
                for (i = 0; i < srcDesc->numRecords; i++) {
+               /* Point to the start of the record we're currently checking. */
                        srcKey = (HotFileKey *)((char *)src->buffer + srcOffs[i]);
 
-                       if (!unswap)
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+                       nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+                       /* Make sure there is room for the key (HotFileKey) and data (UInt32) */
+                       if ((char *)srcKey + sizeof(HotFileKey) + sizeof(UInt32) > nextRecord) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: hotfile #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+                               return fsBTInvalidNodeErr;
+                       }
+                       
+                       /* Swap and sanity check the key length field */
+                       if (direction == kSwapBTNodeBigToHost)
                                srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
-                       srcRec = (u_int32_t *)((char *)srcKey + srcKey->keyLength + 2);
-                       if (unswap)
+                       if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) {
+                               printf("hfs_swap_HFSPlusBTInternalNode: hotfile #%d incorrect keyLength %d\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+                               return fsBTInvalidNodeErr;
+                       }
+                       srcRec = (u_int32_t *)((char *)srcKey + srcKey->keyLength + sizeof(srcKey->keyLength));
+                       if (direction == kSwapBTNodeHostToBig)
                                srcKey->keyLength = SWAP_BE16 (srcKey->keyLength);
 
                        /* Don't swap srcKey->forkType */
@@ -426,22 +776,23 @@ hfs_swap_HFSPlusBTInternalNode (
                        *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec));
                }
     } else {
-        panic ("%s unrecognized B-Tree type", "hfs_swap_BTNode:");
+        panic ("hfs_swap_HFSPlusBTInternalNode: fileID %u is not a system B-tree\n", fileID);
     }
 
 
     return (0);
 }
 
-int
+static int
 hfs_swap_HFSBTInternalNode (
     BlockDescriptor *src,
     HFSCatalogNodeID fileID,
-    int unswap
+    enum HFSBTSwapDirection direction
 )
 {
     BTNodeDescriptor *srcDesc = src->buffer;
     UInt16 *srcOffs = (UInt16 *)((char *)src->buffer + (src->blockSize - (srcDesc->numRecords * sizeof (UInt16))));
+       char *nextRecord;       /*  Points to start of record following current one */
 
     UInt32 i;
     UInt32 j;
@@ -449,12 +800,42 @@ hfs_swap_HFSBTInternalNode (
     if (fileID == kHFSExtentsFileID) {
         HFSExtentKey *srcKey;
         HFSExtentDescriptor *srcRec;
+               size_t recordSize;      /* Size of the data part of the record, or node number for index nodes */
         
+        if (srcDesc->kind == kBTIndexNode)
+               recordSize = sizeof(UInt32);
+        else
+               recordSize = sizeof(HFSExtentDescriptor);
+
         for (i = 0; i < srcDesc->numRecords; i++) {
+               /* Point to the start of the record we're currently checking. */
             srcKey = (HFSExtentKey *)((char *)src->buffer + srcOffs[i]);
 
-            /* Don't swap srcKey->keyLength */
-            /* Don't swap srcKey->forkType */
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+                       nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+                       /*
+                        * Make sure the key and data are within the buffer.  Since both key
+                        * and data are fixed size, this is relatively easy.  Note that this
+                        * relies on the keyLength being a constant; we verify the keyLength
+                        * below.
+                        */
+                       if ((char *)srcKey + sizeof(HFSExtentKey) + recordSize > nextRecord) {
+                               printf("hfs_swap_HFSBTInternalNode: extents key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+                               return fsBTInvalidNodeErr;
+                       }
+                       
+            /* Don't swap srcKey->keyLength (it's only one byte), but do sanity check it */
+            if (srcKey->keyLength != sizeof(*srcKey) - sizeof(srcKey->keyLength)) {
+                               printf("hfs_swap_HFSBTInternalNode: extents key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+                               return fsBTInvalidNodeErr;
+            }
+
+            /* Don't swap srcKey->forkType; it's only one byte */
 
             srcKey->fileID                     = SWAP_BE32 (srcKey->fileID);
             srcKey->startBlock         = SWAP_BE16 (srcKey->startBlock);
@@ -462,47 +843,99 @@ hfs_swap_HFSBTInternalNode (
             /* Point to record data (round up to even byte boundary) */
             srcRec = (HFSExtentDescriptor *)((char *)srcKey + ((srcKey->keyLength + 2) & ~1));
     
-            /* Stop if this is just an index node */
             if (srcDesc->kind == kBTIndexNode) {
+               /* For index nodes, the record data is just a child node number. */
                 *((UInt32 *)srcRec) = SWAP_BE32 (*((UInt32 *)srcRec));
-                continue;
-            }
-            
-            /* Swap each extent */
-            for (j = 0; j < kHFSExtentDensity; j++) {
-                srcRec[j].startBlock   = SWAP_BE16 (srcRec[j].startBlock);
-                srcRec[j].blockCount   = SWAP_BE16 (srcRec[j].blockCount);
+            } else {
+                               /* Swap the extent data */
+                               for (j = 0; j < kHFSExtentDensity; j++) {
+                                       srcRec[j].startBlock    = SWAP_BE16 (srcRec[j].startBlock);
+                                       srcRec[j].blockCount    = SWAP_BE16 (srcRec[j].blockCount);
+                               }
             }
         }
         
     } else if (fileID == kHFSCatalogFileID) {
         HFSCatalogKey *srcKey;
         SInt16 *srcPtr;
-        
+        unsigned expectedKeyLength;
+
         for (i = 0; i < srcDesc->numRecords; i++) {
+               /* Point to the start of the record we're currently checking. */
             srcKey = (HFSCatalogKey *)((char *)src->buffer + srcOffs[i]);
 
-            /* Don't swap srcKey->keyLength */
+            /*
+             * Point to start of next (larger offset) record.  We'll use this
+             * to be sure the current record doesn't overflow into the next
+             * record.
+             */
+                       nextRecord = (char *)src->buffer + srcOffs[i-1];
+
+                       /*
+                        * Make sure we can safely dereference the keyLength and parentID fields.
+                        * The value 8 below is 1 bytes for keyLength + 1 byte reserved + 4 bytes
+                        * for parentID + 1 byte for nodeName's length + 1 byte to round up the
+                        * record start to an even offset, which forms a minimal key.
+                        */
+                       if ((char *)srcKey + 8 > nextRecord) {
+                               printf("hfs_swap_HFSBTInternalNode: catalog key #%d offset too big (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
+                               return fsBTInvalidNodeErr;
+                       }
+                       
+            /* Don't swap srcKey->keyLength (it's only one byte), but do sanity check it */
+            if (srcKey->keyLength < kHFSCatalogKeyMinimumLength || srcKey->keyLength > kHFSCatalogKeyMaximumLength) {
+                               printf("hfs_swap_HFSBTInternalNode: catalog key #%d invalid length (%d)\n", srcDesc->numRecords-i-1, srcKey->keyLength);
+                               return fsBTInvalidNodeErr;
+            }
+            
             /* Don't swap srcKey->reserved */
 
             srcKey->parentID                   = SWAP_BE32 (srcKey->parentID);
 
             /* Don't swap srcKey->nodeName */
+            
+                       /* Make sure the keyLength is big enough for the key's content */
+                       if (srcDesc->kind == kBTIndexNode)
+                               expectedKeyLength = sizeof(*srcKey) - sizeof(srcKey->keyLength);
+                       else
+                               expectedKeyLength = srcKey->nodeName[0] + kHFSCatalogKeyMinimumLength;
+            if (srcKey->keyLength < expectedKeyLength) {
+                               printf("hfs_swap_HFSBTInternalNode: catalog record #%d keyLength=%u expected=%u\n",
+                                       srcDesc->numRecords-i, srcKey->keyLength, expectedKeyLength);
+                               return fsBTInvalidNodeErr;
+            }
 
             /* Point to record data (round up to even byte boundary) */
             srcPtr = (SInt16 *)((char *)srcKey + ((srcKey->keyLength + 2) & ~1));
             
-            /* Stop if this is just an index node */
+            /*
+             * Make sure that we can safely dereference the record's type field or
+             * and index node's child node number.
+             */
+            if ((char *)srcPtr + sizeof(UInt32) > nextRecord) {
+                               printf("hfs_swap_HFSBTInternalNode: catalog key #%d too big\n", srcDesc->numRecords-i-1);
+                               return fsBTInvalidNodeErr;
+            }
+            
+            /* 
+             * For index nodes, the record data is just the child's node number.
+             * Skip over swapping the various types of catalog record.
+             */
             if (srcDesc->kind == kBTIndexNode) {
                 *((UInt32 *)srcPtr) = SWAP_BE32 (*((UInt32 *)srcPtr));
                 continue;
             }
     
-            /* Swap the recordType field, if unswapping, leave to later */
-            if (!unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]);
+            /* Make sure the recordType is in native order before using it. */
+            if (direction == kSwapBTNodeBigToHost)
+               srcPtr[0] = SWAP_BE16 (srcPtr[0]);
             
             if (srcPtr[0] == kHFSFolderRecord) {
                 HFSCatalogFolder *srcRec = (HFSCatalogFolder *)srcPtr;
+                if ((char *)srcRec + sizeof(*srcRec) > nextRecord) {
+                                       printf("hfs_swap_HFSBTInternalNode: catalog folder record #%d too big\n", srcDesc->numRecords-i-1);
+                                       return fsBTInvalidNodeErr;
+                }
                 
                 srcRec->flags                          = SWAP_BE16 (srcRec->flags);
                 srcRec->valence                                = SWAP_BE16 (srcRec->valence);
@@ -518,6 +951,10 @@ hfs_swap_HFSBTInternalNode (
     
             } else if (srcPtr[0] == kHFSFileRecord) {
                 HFSCatalogFile *srcRec = (HFSCatalogFile *)srcPtr;
+                if ((char *)srcRec + sizeof(*srcRec) > nextRecord) {
+                                       printf("hfs_swap_HFSBTInternalNode: catalog file record #%d too big\n", srcDesc->numRecords-i-1);
+                                       return fsBTInvalidNodeErr;
+                }
                 
                 srcRec->flags                          = srcRec->flags;
                 srcRec->fileType                       = srcRec->fileType;
@@ -552,25 +989,37 @@ hfs_swap_HFSBTInternalNode (
                 
             } else if ((srcPtr[0] == kHFSFolderThreadRecord) ||
                     (srcPtr[0] == kHFSFileThreadRecord)) {
-    
                 HFSCatalogThread *srcRec = (HFSCatalogThread *)srcPtr;
+                
+                /* Make sure there is room for parentID and name length */
+                if ((char *) &srcRec->nodeName[1] > nextRecord) {
+                                       printf("hfs_swap_HFSBTInternalNode: catalog thread record #%d too big\n", srcDesc->numRecords-i-1);
+                                       return fsBTInvalidNodeErr;
+                }
     
                 /* Don't swap srcRec->reserved array */
     
                 srcRec->parentID                       = SWAP_BE32 (srcRec->parentID);
     
                 /* Don't swap srcRec->nodeName */
-    
+                
+                       /* Make sure there is room for the name in the buffer */
+                if ((char *) &srcRec->nodeName[srcRec->nodeName[0]] > nextRecord) {
+                                       printf("hfs_swap_HFSBTInternalNode: catalog thread record #%d name too big\n", srcDesc->numRecords-i-1);
+                                       return fsBTInvalidNodeErr;
+                }
             } else {
-                panic ("%s unrecognized catalog record type", "hfs_swap_BTNode:");
+               printf("hfs_swap_HFSBTInternalNode: unrecognized catalog record type (0x%04X; record #%d)\n", srcPtr[0], srcDesc->numRecords-i-1);
+                               return fsBTInvalidNodeErr;
             }
     
-            /* If unswapping, we can safely swap type now */
-            if (unswap) srcPtr[0] = SWAP_BE16 (srcPtr[0]);
+            /* We can swap the record type now that we're done using it */
+            if (direction == kSwapBTNodeHostToBig)
+               srcPtr[0] = SWAP_BE16 (srcPtr[0]);
         }
         
     } else {
-        panic ("%s unrecognized B-Tree type", "hfs_swap_BTNode:");
+        panic ("hfs_swap_HFSBTInternalNode: fileID %u is not a system B-tree\n", fileID);
     }
 
     return (0);
index 06801dc693bdfabfaf8f88163cd88a98b40de9a2..330839d2924b80ef2214c65a334bbd679a2bb6ff 100644 (file)
@@ -48,7 +48,6 @@
     
     /* HFS is always big endian, no swapping needed */
     #define SWAP_HFS_PLUS_FORK_DATA(__a)
-    #define SWAP_BT_NODE(__a, __b, __c, __d)
 
 /************************/
 /* LITTLE ENDIAN Macros */
@@ -61,7 +60,6 @@
     #define SWAP_BE64(__a)                                                     NXSwapBigLongLongToHost (__a)
     
     #define SWAP_HFS_PLUS_FORK_DATA(__a)                       hfs_swap_HFSPlusForkData ((__a))
-    #define SWAP_BT_NODE(__a, __b, __c, __d)   hfs_swap_BTNode ((__a), (__b), (__c), (__d))
 
 #else
 #warning Unknown byte order
 extern "C" {
 #endif
 
-void hfs_swap_HFSPlusForkData (HFSPlusForkData *src);
-int  hfs_swap_BTNode (BlockDescriptor *src, int isHFSPlus, HFSCatalogNodeID fileID, int unswap);
+/*
+ * Constants for the "unswap" argument to hfs_swap_BTNode:
+ */
+enum HFSBTSwapDirection {
+       kSwapBTNodeBigToHost            =       0,
+       kSwapBTNodeHostToBig            =       1,
+
+       /*
+        * kSwapBTNodeHeaderRecordOnly is used to swap just the header record
+        * of a header node from big endian (on disk) to host endian (in memory).
+        * It does not swap the node descriptor (forward/backward links, record
+        * count, etc.).  It assumes the header record is at offset 0x000E.
+        *
+        * Since HFS Plus doesn't have fixed B-tree node sizes, we have to read
+        * the header record to determine the actual node size for that tree
+        * before we can set up the B-tree control block.  We read it initially
+        * as 512 bytes, then re-read it once we know the correct node size.  Since
+        * we may not have read the entire header node the first time, we can't
+        * swap the record offsets, other records, or do most sanity checks.
+        */
+       kSwapBTNodeHeaderRecordOnly     =       3
+};
+
+int  hfs_swap_BTNode (BlockDescriptor *src, vnode_t vp, enum HFSBTSwapDirection direction);
 
 #ifdef __cplusplus
 }
index 001206d450d69fe32a718f3bb590e6b712f0a59f..a285f0da8f2eadb5e12a90ca5b15ffe9f648790b 100644 (file)
@@ -458,18 +458,19 @@ union HFSPlusAttrRecord {
 typedef union HFSPlusAttrRecord HFSPlusAttrRecord;
 
 /* Attribute key */
+enum { kHFSMaxAttrNameLen = 127 };
 struct HFSPlusAttrKey {
        u_int16_t     keyLength;       /* key length (in bytes) */
        u_int16_t     pad;             /* set to zero */
        u_int32_t     fileID;          /* file associated with attribute */
        u_int32_t     startBlock;      /* first attribue allocation block number for extents */
        u_int16_t     attrNameLen;     /* number of unicode characters */
-       u_int16_t     attrName[127];   /* attribute name (Unicode) */
+       u_int16_t     attrName[kHFSMaxAttrNameLen];   /* attribute name (Unicode) */
 };
 typedef struct HFSPlusAttrKey HFSPlusAttrKey;
 
 #define kHFSPlusAttrKeyMaximumLength   (sizeof(HFSPlusAttrKey) - sizeof(u_int16_t))
-#define kHFSPlusAttrKeyMinimumLength   (kHFSPlusAttrKeyMaximumLength - (127 * sizeof(u_int16_t)))
+#define kHFSPlusAttrKeyMinimumLength   (kHFSPlusAttrKeyMaximumLength - kHFSMaxAttrNameLen*sizeof(u_int16_t))
 
 #endif /* __APPLE_API_UNSTABLE */
 
index 46f8e54e5e4837324710ccae3c6891070ed2ddf6..76cd198d35554abe56c6dc95033df678bae680fe 100644 (file)
@@ -990,6 +990,8 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 
                if (!(hfsmp->jnl))
                        return (ENOTSUP);
+
+               lck_rw_lock_exclusive(&hfsmp->hfs_insync);
  
                task = current_task();
                task_working_set_disable(task);
@@ -1001,9 +1003,9 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
                vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
                hfs_global_exclusive_lock_acquire(hfsmp);
                journal_flush(hfsmp->jnl);
+
                // don't need to iterate on all vnodes, we just need to
                // wait for writes to the system files and the device vnode
-               // vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
                if (HFSTOVCB(hfsmp)->extentsRefNum)
                    vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
                if (HFSTOVCB(hfsmp)->catalogRefNum)
@@ -1026,7 +1028,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
                // if we're not the one who froze the fs then we
                // can't thaw it.
                if (hfsmp->hfs_freezing_proc != current_proc()) {
-                   return EINVAL;
+                   return EPERM;
                }
 
                // NOTE: if you add code here, also go check the
@@ -1034,6 +1036,7 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
                //
                hfsmp->hfs_freezing_proc = NULL;
                hfs_global_exclusive_lock_release(hfsmp);
+               lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
 
                return (0);
        }
@@ -1262,13 +1265,18 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
        case HFS_SETACLSTATE: {
                int state;
 
-               if (!is_suser()) {
-                       return (EPERM);
-               }
                if (ap->a_data == NULL) {
                        return (EINVAL);
                }
+
+               vfsp = vfs_statfs(HFSTOVFS(hfsmp));
                state = *(int *)ap->a_data;
+
+               // super-user can enable or disable acl's on a volume.
+               // the volume owner can only enable acl's
+               if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
+                       return (EPERM);
+               }
                if (state == 0 || state == 1)
                        return hfs_setextendedsecurity(hfsmp, state);
                else
@@ -1605,6 +1613,11 @@ hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
        int started_tr = 0;
        int tooklock = 0;
 
+       /* Do not allow blockmap operation on a directory */
+       if (vnode_isdir(vp)) {
+               return (ENOTSUP);
+       }
+
        /*
         * Check for underlying vnode requests and ensure that logical
         * to physical mapping is requested.
@@ -2106,6 +2119,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
        off_t filebytes;
        u_long fileblocks;
        int blksize, error = 0;
+       struct cnode *cp = VTOC(vp);
 
        if (vnode_isdir(vp))
                return (EISDIR);        /* cannot truncate an HFS directory! */
@@ -2125,6 +2139,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
                        } else {
                                filebytes = length;
                        }
+                       cp->c_flag |= C_FORCEUPDATE;
                        error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
                        if (error)
                                break;
@@ -2136,6 +2151,7 @@ hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
                        } else {
                                filebytes = length;
                        }
+                       cp->c_flag |= C_FORCEUPDATE;
                        error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
                        if (error)
                                break;
@@ -2516,7 +2532,6 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
        int retval = 0;
        register struct buf *bp = ap->a_bp;
        register struct vnode *vp = buf_vnode(bp);
-#if BYTE_ORDER == LITTLE_ENDIAN
        BlockDescriptor block;
 
        /* Trap B-Tree writes */
@@ -2524,22 +2539,29 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
            (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
            (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
 
-               /* Swap if the B-Tree node is in native byte order */
+               /* 
+                * Swap and validate the node if it is in native byte order.
+                * This is always be true on big endian, so we always validate
+                * before writing here.  On little endian, the node typically has
+                * been swapped and validatated when it was written to the journal,
+                * so we won't do anything here.
+                */
                if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
                        /* Prepare the block pointer */
                        block.blockHeader = bp;
                        block.buffer = (char *)buf_dataptr(bp);
+                       block.blockNum = buf_lblkno(bp);
                        /* not found in cache ==> came from disk */
                        block.blockReadFromDisk = (buf_fromcache(bp) == 0);
                        block.blockSize = buf_count(bp);
     
                        /* Endian un-swap B-Tree node */
-                       SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
+                       retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
+                       if (retval)
+                               panic("hfs_vnop_bwrite: about to write corrupt node!\n");
                }
-
-               /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
        }
-#endif
+
        /* This buffer shouldn't be locked anymore but if it is clear it */
        if ((buf_flags(bp) & B_LOCKED)) {
                // XXXdbg
index f6569bf7191c3ee993257477efff010d27024dd8..7ebe8aff727050771fff87363d7dc9037f378960 100644 (file)
@@ -860,6 +860,7 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
        lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
        lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
+       lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
 
        vfs_setfsprivate(mp, hfsmp);
        hfsmp->hfs_mp = mp;                     /* Make VFSTOHFS work */
@@ -1655,6 +1656,10 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
        if (hfsmp->hfs_flags & HFS_READ_ONLY)
                return (EROFS);
 
+       /* skip over frozen volumes */
+       if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
+               return 0;
+
        args.cred = vfs_context_proc(context);
        args.waitfor = waitfor;
        args.p = p;
@@ -1734,7 +1739,8 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
        if (hfsmp->jnl) {
            journal_flush(hfsmp->jnl);
        }
-       
+
+       lck_rw_unlock_shared(&hfsmp->hfs_insync);       
        return (allerror);
 }
 
index 3dfe383b68c519b6a2981df330235ac5c4b82dc2..4eed699c9223f227dbd622a98f4b71a0cdc0c1c8 100644 (file)
@@ -1492,7 +1492,7 @@ short MacToVFSError(OSErr err)
                return EOVERFLOW;
        
        case btBadNode:                 /* -32731 */
-               return EBADF;
+               return EIO;
        
        case memFullErr:                /*  -108 */
                return ENOMEM;          /*   +12 */
index 5c0cc83ed9cb7c55c1cca837d64a9985d2487ee0..59c278f109475b27909707c3cb8d18f292bb6c1c 100644 (file)
@@ -43,6 +43,7 @@
 #include <machine/spl.h>
 
 #include <sys/kdebug.h>
+#include <sys/sysctl.h>
 
 #include "hfs.h"
 #include "hfs_catalog.h"
@@ -65,6 +66,9 @@
 
 /* Global vfs data structures for hfs */
 
+/* Always F_FULLFSYNC? 1=yes,0=no (default due to "various" reasons is 'no') */
+int always_do_fullfsync = 0;
+SYSCTL_INT (_kern, OID_AUTO, always_do_fullfsync, CTLFLAG_RW, &always_do_fullfsync, 0, "always F_FULLFSYNC when fsync is called");
 
 extern unsigned long strtoul(const char *, char **, int);
 
@@ -236,6 +240,7 @@ hfs_vnop_close(ap)
        if (hfsmp->hfs_freezing_proc == p && proc_exiting(p)) {
            hfsmp->hfs_freezing_proc = NULL;
            hfs_global_exclusive_lock_release(hfsmp);
+           lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
        }
 
        busy = vnode_isinuse(vp, 1);
@@ -962,6 +967,7 @@ hfs_vnop_exchange(ap)
        from_cp->c_uid = to_cp->c_uid;
        from_cp->c_flags = to_cp->c_flags;
        from_cp->c_mode = to_cp->c_mode;
+       from_cp->c_attr.ca_recflags = to_cp->c_attr.ca_recflags;
        bcopy(to_cp->c_finderinfo, from_cp->c_finderinfo, 32);
 
        bcopy(&tempdesc, &to_cp->c_desc, sizeof(struct cat_desc));
@@ -975,6 +981,7 @@ hfs_vnop_exchange(ap)
        to_cp->c_uid = tempattr.ca_uid;
        to_cp->c_flags = tempattr.ca_flags;
        to_cp->c_mode = tempattr.ca_mode;
+       to_cp->c_attr.ca_recflags = tempattr.ca_recflags;
        bcopy(tempattr.ca_finderinfo, to_cp->c_finderinfo, 32);
 
        /* Rehash the cnodes using their new file IDs */
@@ -1137,7 +1144,7 @@ metasync:
                cp->c_touch_acctime = FALSE;
                cp->c_touch_chgtime = FALSE;
                cp->c_touch_modtime = FALSE;
-       } else /* User file */ {
+       } else if ( !(vp->v_flag & VSWAP) ) /* User file */ {
                retval = hfs_update(vp, wait);
 
                /* When MNT_WAIT is requested push out any delayed meta data */
@@ -1150,7 +1157,7 @@ metasync:
                // fsync() and if so push out any pending transactions 
                // that this file might is a part of (and get them on
                // stable storage).
-               if (fullsync) {
+               if (fullsync || always_do_fullfsync) {
                    if (hfsmp->jnl) {
                        journal_flush(hfsmp->jnl);
                    } else {
@@ -2441,6 +2448,10 @@ hfs_vnop_readdir(ap)
        if (nfs_cookies) {
                cnid_hint = (cnid_t)(uio_offset(uio) >> 32);
                uio_setoffset(uio, uio_offset(uio) & 0x00000000ffffffffLL);
+               if (cnid_hint == INT_MAX) { /* searching pass the last item */
+                       eofflag = 1;
+                       goto out;
+               }
        }
        /*
         * Synthesize entries for "." and ".."
@@ -2565,7 +2576,7 @@ hfs_vnop_readdir(ap)
        }
        
        /* Pack the buffer with dirent entries. */
-       error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items);
+       error = cat_getdirentries(hfsmp, cp->c_entries, dirhint, uio, extended, &items, &eofflag);
 
        hfs_systemfile_unlock(hfsmp, lockflags);
 
index f11af332afca5e58f0b6370b45c36881c771476c..9920c87422c2c7a5d2ee18e5fd398fcf8fbd1437 100644 (file)
@@ -315,15 +315,12 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc)
                }
        }
 
-       // if nodeSize Matches then we don't need to release, just CheckNode
-       if ( btreePtr->nodeSize == nodeRec.blockSize )
-       {
-               err = CheckNode (btreePtr, nodeRec.buffer);
-               if (err)
-                       VTOVCB(btreePtr->fileRefNum)->vcbFlags |= kHFS_DamagedVolume;
-               M_ExitOnError (err);
-       }
-       else
+       /*
+        * If the actual node size is different than the amount we read,
+        * then release and trash this block, and re-read with the correct
+        * node size.
+        */
+       if ( btreePtr->nodeSize != nodeRec.blockSize )
        {
                err = SetBTreeBlockSize (btreePtr->fileRefNum, btreePtr->nodeSize, 32);
                M_ExitOnError (err);
@@ -336,7 +333,7 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc)
                ++btreePtr->numReleaseNodes;
                M_ExitOnError (err);
 
-               err = GetNode (btreePtr, kHeaderNodeNum, &nodeRec );            // calls CheckNode...
+               err = GetNode (btreePtr, kHeaderNodeNum, &nodeRec );
                M_ExitOnError (err);
        }
 
@@ -1286,15 +1283,19 @@ OSStatus        BTInsertRecord          (FCB                                            *filePtr,
                                                                        goto ErrorExit;
                                                                }
 
-                                                               err = UpdateNode (btreePtr, &nodeRec, 0, kLockTransaction);
-                                                               M_ExitOnError (err);
-
-                                                               // update BTreeControlBlock
+                                                               /*
+                                                                * Update the B-tree control block.  Do this before
+                                                                * calling UpdateNode since it will compare the node's
+                                                                * height with treeDepth.
+                                                                */
                                                                btreePtr->treeDepth                     = 1;
                                                                btreePtr->rootNode                      = insertNodeNum;
                                                                btreePtr->firstLeafNode         = insertNodeNum;
                                                                btreePtr->lastLeafNode          = insertNodeNum;
 
+                                                               err = UpdateNode (btreePtr, &nodeRec, 0, kLockTransaction);
+                                                               M_ExitOnError (err);
+
                                                                M_BTreeHeaderDirty (btreePtr);
 
                                                                goto Success;
index fe9b141dfb989a348f2ca85c19d8ec93fcafb6f9..590dfecc5398c29ae53afa2e28509caff23639f3 100644 (file)
 //     ReleaseNode                     - Call FS Agent to release node obtained by GetNode.
 //     UpdateNode                      - Mark a node as dirty and call FS Agent to release it.
 //
-//     CheckNode                       - Checks the validity of a node.
 //     ClearNode                       - Clear a node to all zeroes.
 //
 //     InsertRecord            - Inserts a record into a BTree node.
@@ -215,58 +214,6 @@ OSStatus   GetNode         (BTreeControlBlockPtr    btreePtr,
                goto ErrorExit;
        }
        ++btreePtr->numGetNodes;
-       
-       //
-       // Optimization
-       // Only call CheckNode if the node came from disk.
-       // If it was in the cache, we'll assume its already a valid node.
-       //
-       
-       if ( nodePtr->blockReadFromDisk )       // if we read it from disk then check it
-       {
-               err = CheckNode (btreePtr, nodePtr->buffer);
-
-               if (err != noErr)
-               {       
-               
-               VTOVCB(btreePtr->fileRefNum)->vcbFlags |= kHFS_DamagedVolume;
-
-                 #if HFS_DIAGNOSTIC
-                       if (((NodeDescPtr)nodePtr->buffer)->numRecords != 0)
-                               PrintNode(nodePtr->buffer, btreePtr->nodeSize, nodeNum);
-                 #endif
-
-                       if (DEBUG_BUILD)
-                       {
-                               // With the removal of bounds checking in IsItAHint(), it's possible that
-                               // GetNode() will be called to fetch a clear (all zeroes) node. We want
-                               // CheckNode() to fail in this case (it does), however we don't want to assert
-                               // this case because it is not really an "error". Returning an error from GetNode()
-                               // in this case will cause the hint checking code to ignore the hint and revert to
-                               // the full search mode.
-                               
-                               {
-                                       UInt32  *cur;
-                                       UInt32  *lastPlusOne;
-                                       
-                                       cur             = nodePtr->buffer;
-                                       lastPlusOne = (UInt32 *) ((UInt8 *) cur + btreePtr->nodeSize);
-                                       
-                                       while( cur < lastPlusOne )
-                                       {
-                                               if( *cur++ != 0 )
-                                               {
-                                                       Panic ("\pGetNode: CheckNode returned error.");
-                                                       break;
-                                               }
-                                       }
-                               }
-                       }
-                       
-                       (void) TrashNode (btreePtr, nodePtr);                                   // ignore error
-                       goto ErrorExit;
-               }
-       }
 
        return noErr;
 
@@ -427,9 +374,6 @@ Routine:    UpdateNode      -       Mark a node as dirty and call FS Agent to release it.
 
 Function:      Marks a BTree node dirty and informs the FS Agent that it may be released.
 
-                       //\80\80 have another routine that clears & writes a node, so we can call
-                       CheckNode from this routine.
-
 Input:         btreePtr                - pointer to BTree control block
                        nodeNum                 - number of node to release
                        transactionID   - ID of transaction this node update is a part of
@@ -450,14 +394,8 @@ OSStatus   UpdateNode      (BTreeControlBlockPtr    btreePtr,
        
        err = noErr;
                
-       if (nodePtr->buffer != nil)                     //\80\80 why call UpdateNode if nil ?!?
+       if (nodePtr->buffer != nil)                     // Why call UpdateNode if nil ?!?
        {
-               if (DEBUG_BUILD)
-               {
-                       if ( btreePtr->attributes & kBTVariableIndexKeysMask )
-                               (void) CheckNode (btreePtr, nodePtr->buffer);
-               }
-
                releaseNodeProc = btreePtr->releaseBlockProc;
                err = releaseNodeProc (btreePtr->fileRefNum,
                                                           nodePtr,
@@ -478,90 +416,6 @@ ErrorExit:
 
 
 
-/*-------------------------------------------------------------------------------
-
-Routine:       CheckNode       -       Checks the validity of a node.
-
-Function:      Checks the validity of a node by verifying that the fLink and bLink fields
-                       are within the forks EOF. The node type must be one of the four known
-                       types. The node height must be less than or equal to the tree height. The
-                       node must not have more than the maximum number of records, and the record
-                       offsets must make sense.
-
-Input:         btreePtr                - pointer to BTree control block
-                       node                    - pointer to node to check
-                                               
-Result:                noErr           - success
-                       fsBTInvalidNodeErr              - failure
--------------------------------------------------------------------------------*/
-
-OSStatus       CheckNode       (BTreeControlBlockPtr    btreePtr, NodeDescPtr   node )
-{
-       SInt32          index;
-       SInt32          maxRecords;
-       UInt32          maxNode;
-       UInt16          nodeSize;
-       UInt16          offset;
-       UInt16          prevOffset;
-
-       nodeSize = btreePtr->nodeSize;
-
-       ///////////////////// are fLink and bLink within EOF ////////////////////////
-
-       maxNode = (GetFileControlBlock(btreePtr->fileRefNum)->fcbEOF / nodeSize) - 1;
-
-       if ( (node->fLink > maxNode) || (node->bLink > maxNode) )
-               return fsBTInvalidNodeErr;
-
-       /////////////// check node type (leaf, index, header, map) //////////////////
-
-       if ( (node->kind < kBTLeafNode) || (node->kind > kBTMapNode) )
-               return fsBTInvalidNodeErr;
-
-       ///////////////////// is node height > tree depth? //////////////////////////
-
-       if ( node->height > btreePtr->treeDepth )
-               return fsBTInvalidNodeErr;
-
-       //////////////////////// check number of records ////////////////////////////
-               
-       //XXX can we calculate a more accurate minimum record size?
-       maxRecords = ( nodeSize - sizeof (BTNodeDescriptor) ) >> 3;
-
-       if (node->numRecords == 0 || node->numRecords > maxRecords)
-               return fsBTInvalidNodeErr;
-
-       ////////////////////////// check record offsets /////////////////////////////
-
-       index = node->numRecords;               /* start index at free space */
-       prevOffset = nodeSize - (index << 1);   /* use 2 bytes past end of free space */
-
-       do {
-               offset = GetRecordOffset (btreePtr, node, index);
-                       
-               if (offset & 1)                                                         // offset is odd
-                       return fsBTInvalidNodeErr;
-               
-               if (offset >= prevOffset)                                       // offset >= previous offset
-                       return fsBTInvalidNodeErr;
-
-               /* reject keys that overflow record slot */
-               if ((node->kind == kBTLeafNode) &&
-                   (index < node->numRecords) &&       /* ignore free space record */
-                   (CalcKeySize(btreePtr, (KeyPtr) ((Ptr)node + offset)) > (prevOffset - offset))) {
-                       return fsBTInvalidNodeErr;
-               }
-               
-               prevOffset = offset;
-       } while ( --index >= 0 );
-
-       if (offset < sizeof (BTNodeDescriptor) )        // first offset < minimum ?
-               return fsBTInvalidNodeErr;
-       
-       return noErr;
-}
-
-
 #if HFS_DIAGNOSTIC
 static void PrintNode(const NodeDescPtr node, UInt16 nodeSize, UInt32 nodeNumber)
 {
index 66521dbbd247fd5cba9b763f75dc73a835db9ae6..1139f641562e8ad87eb2ed06a05ec95ef9580b06 100644 (file)
@@ -140,7 +140,9 @@ int BTScanNextRecord(       BTScanState *   scanState,
 
 static int FindNextLeafNode(   BTScanState *scanState, Boolean avoidIO )
 {
-       int             err;
+       int err;
+       BlockDescriptor block;
+       FileReference fref;
        
        err = noErr;            // Assume everything will be OK
        
@@ -180,29 +182,23 @@ static int FindNextLeafNode(      BTScanState *scanState, Boolean avoidIO )
                        (u_int8_t *) scanState->currentNodePtr += scanState->btcb->nodeSize;
                }
                
-#if BYTE_ORDER == LITTLE_ENDIAN
-               {
-               BlockDescriptor block;
-               FileReference fref;
-
                /* Fake a BlockDescriptor */
+               block.blockHeader = NULL;       /* No buffer cache buffer */
                block.buffer = scanState->currentNodePtr;
+               block.blockNum = scanState->nodeNum;
                block.blockSize = scanState->btcb->nodeSize;
                block.blockReadFromDisk = 1;
                block.isModified = 0;
                
                fref = scanState->btcb->fileRefNum;
                
-               SWAP_BT_NODE(&block, ISHFSPLUS(VTOVCB(fref)), VTOC(fref)->c_fileid, 0);
-               }
-#endif
-
-               // Make sure this is a valid node
-               if ( CheckNode( scanState->btcb, scanState->currentNodePtr ) != noErr )
-               {
+               /* This node was read from disk, so it must be swapped/checked. */
+               err = hfs_swap_BTNode(&block, fref, kSwapBTNodeBigToHost);
+               if ( err != noErr ) {
+                       printf("FindNextLeafNode: Error from hfs_swap_BTNode (node %u)\n", scanState->nodeNum);
                        continue;
                }
-               
+
                if ( scanState->currentNodePtr->kind == kBTLeafNode )
                        break;
        }
index 0cce7eb236eb524853b803766f39941a592933e3..5747e31aae6e9eb84b251b2319b57c04c82ddc57 100644 (file)
@@ -113,6 +113,7 @@ enum {
 struct BlockDescriptor{
        void            *buffer;
        void            *blockHeader;
+       daddr64_t        blockNum;      /* logical block number (used by hfs_swap_BTNode) */
        ByteCount        blockSize;
        Boolean          blockReadFromDisk;
        Byte         isModified;             // XXXdbg - for journaling
index 35c5830a5ae6b270a1f546285f87e487792ba4fb..852942dd069c29230fc7636765ac2a63d2ea0047 100644 (file)
@@ -402,9 +402,6 @@ OSStatus    GetMapNode                              (BTreeControlBlockPtr    btreePtr,
 
 //// Node Buffer Operations
 
-OSStatus       CheckNode                               (BTreeControlBlockPtr    btreePtr,
-                                                                        NodeDescPtr                     node );
-
 void           ClearNode                               (BTreeControlBlockPtr    btreePtr,
                                                                         NodeDescPtr                     node );
 
index 381b74fe2f83a531e2ce374837b57d1e6b1f062b..c72e598fba8fda96dece91412318ee61801ffb15 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @Apple_LICENSE_HEADER_START@
  * 
@@ -95,10 +95,6 @@ pid_t global_state_pid = -1;       /* Used to control exclusive use of kd_buffer
 
 #define DBG_FUNC_MASK 0xfffffffc
 
-#ifdef ppc
-extern natural_t rtclock_decrementer_min;
-#endif /* ppc */
-
 /* task to string structure */
 struct tts
 {
@@ -774,7 +770,7 @@ kdbg_setpidex(kd_regtype *kdr)
   return(ret);
 }
 
-/* This is for setting a minimum decrementer value */
+/* This is for setting a maximum decrementer value */
 kdbg_setrtcdec(kd_regtype *kdr)
 {
   int ret=0;
@@ -783,13 +779,17 @@ kdbg_setrtcdec(kd_regtype *kdr)
   decval = (natural_t)kdr->value1;
 
   if (decval && decval < KDBG_MINRTCDEC)
-      ret = EINVAL;
+       ret = EINVAL;
 #ifdef ppc
-  else
-      rtclock_decrementer_min = decval;
+       else {
+
+               extern uint32_t maxDec;
+
+               maxDec = decval ? decval : 0x7FFFFFFF;  /* Set or reset the max decrementer */
+       }
 #else
-  else
-    ret = ENOTSUP;
+       else
+               ret = ENOTSUP;
 #endif /* ppc */
 
   return(ret);
index fa4e4c2167badcf6ae6814febb0eece8b54eb231..95c9cbe2eb5f0697ba2b96f2373cc6c96809b28a 100644 (file)
@@ -772,7 +772,7 @@ ctl_register(struct kern_ctl_reg *userkctl, kern_ctl_ref *kctlref)
                userkctl->ctl_sendsize = CTL_SENDSIZE;
        kctl->sendbufsize = userkctl->ctl_sendsize;
 
-       if (kctl->recvbufsize == 0)
+       if (userkctl->ctl_recvsize == 0)
                userkctl->ctl_recvsize = CTL_RECVSIZE;
        kctl->recvbufsize = userkctl->ctl_recvsize;
 
index d17444fd62c7b2c58d353586eeb41ffdbc38aef5..105181c1aa0fa499e236dcf208342545969bfc1d 100644 (file)
@@ -222,7 +222,7 @@ coredump(struct proc *p)
        context.vc_proc = p;
        context.vc_ucred = cred;
 
-       if ((error = vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR, 0, &vp, &context)))
+       if ((error = vnode_open(name, (O_CREAT | FWRITE | O_NOFOLLOW), S_IRUSR, VNODE_LOOKUP_NOFOLLOW, &vp, &context)))
                return (error);
 
        VATTR_INIT(&va);
index 331e24761ca4aba1960fa880552d3ee99127e684..1ec789877ebb6bda029ed592e256c7b7b20662c1 100644 (file)
@@ -701,6 +701,27 @@ fcntl(p, uap, retval)
                }
                goto outdrop;
 
+       case F_GLOBAL_NOCACHE:
+               if (fp->f_type != DTYPE_VNODE) {
+                       error = EBADF;
+                       goto out;
+               }
+               vp = (struct vnode *)fp->f_data;
+               proc_fdunlock(p);
+
+               if ( (error = vnode_getwithref(vp)) == 0 ) {
+
+                       *retval = vnode_isnocache(vp);
+
+                       if (uap->arg)
+                               vnode_setnocache(vp);
+                       else
+                               vnode_clearnocache(vp);
+
+                       (void)vnode_put(vp);
+               }
+               goto outdrop;
+
        case F_RDADVISE: {
                struct radvisory ra_struct;
 
index 1bf948822845e820ff8c1deed02cf8bfaa30fead..d9392549b0b510d1e4a0d71515b031164ab89667 100644 (file)
@@ -1055,21 +1055,21 @@ kevent(struct proc *p, struct kevent_args *uap, register_t *retval)
 
        /* register all the change requests the user provided... */
        noutputs = 0;
-       while (nchanges > 0) {
+       while (nchanges > 0 && error == 0) {
                error = kevent_copyin(&changelist, &kev, p);
                if (error)
                        break;
                                
                kev.flags &= ~EV_SYSFLAGS;
                error = kevent_register(kq, &kev, p);
-               if (error) {
-                       if (nevents == 0)
-                               break;
+               if (error && nevents > 0) {
                        kev.flags = EV_ERROR;
                        kev.data = error;
-                       (void) kevent_copyout(&kev, &ueventlist, p);
-                       nevents--;
-                       noutputs++;
+                       error = kevent_copyout(&kev, &ueventlist, p);
+                       if (error == 0) {
+                               nevents--;
+                               noutputs++;
+                       }
                }
                nchanges--;
        }
index ed56bd1cddf50ff18cb82d942ac60cb0a7d72ab3..4dc099a0dfb35062a3bcc0e53444b713a40d706c 100644 (file)
@@ -71,6 +71,7 @@
 #include <kern/kalloc.h>
 #include <vm/vm_kern.h>
 #include <pexpert/pexpert.h>
+#include <IOKit/IOHibernatePrivate.h>
 
 extern unsigned char   rootdevice[];
 extern struct mach_header _mh_execute_header;
@@ -341,3 +342,230 @@ int get_kernel_symfile(struct proc *p, char **symfile)
     return error_code;
 }
 
+struct kern_direct_file_io_ref_t
+{
+    struct vfs_context         context;
+    struct vnode               *vp;
+};
+
+
+static int file_ioctl(void * p1, void * p2, int theIoctl, caddr_t result)
+{
+    dev_t device = (dev_t) p1;
+
+    return ((*bdevsw[major(device)].d_ioctl)
+                   (device, theIoctl, result, S_IFBLK, p2));
+}
+
+static int device_ioctl(void * p1, __unused void * p2, int theIoctl, caddr_t result)
+{
+    return (VNOP_IOCTL(p1, theIoctl, result, 0, p2));
+}
+
+struct kern_direct_file_io_ref_t *
+kern_open_file_for_direct_io(const char * name, 
+                            kern_get_file_extents_callback_t callback, 
+                            void * callback_ref,
+                            dev_t * device_result,
+                             uint64_t * partitionbase_result,
+                             uint64_t * maxiocount_result)
+{
+    struct kern_direct_file_io_ref_t * ref;
+
+    struct proc                *p;
+    struct ucred               *cred;
+    struct vnode_attr          va;
+    int                                error;
+    off_t                      f_offset;
+    uint32_t                   blksize;
+    uint64_t                   size;
+    dev_t                      device;
+    off_t                      maxiocount, count;
+
+    int (*do_ioctl)(void * p1, void * p2, int theIoctl, caddr_t result);
+    void * p1;
+    void * p2;
+
+    error = EFAULT;
+
+    ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t));
+    if (!ref)
+    {
+       error = EFAULT;
+       goto out;
+    }
+
+    ref->vp = NULL;
+    p = current_proc();                // kernproc;
+    cred = p->p_ucred;
+    ref->context.vc_proc = p;
+    ref->context.vc_ucred = cred;
+
+    if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, &ref->context)))
+        goto out;
+
+    VATTR_INIT(&va);
+    VATTR_WANTED(&va, va_rdev);
+    VATTR_WANTED(&va, va_fsid);
+    VATTR_WANTED(&va, va_data_size);
+    VATTR_WANTED(&va, va_nlink);
+    error = EFAULT;
+    if (vnode_getattr(ref->vp, &va, &ref->context))
+       goto out;
+
+    kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev));
+    kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid));
+    kprintf("vp size %qd\n", va.va_data_size);
+
+    if (ref->vp->v_type == VREG)
+    {
+       /* Don't dump files with links. */
+       if (va.va_nlink != 1)
+           goto out;
+
+        device = va.va_fsid;
+        p1 = (void *) device;
+        p2 = p;
+        do_ioctl = &file_ioctl;
+    }
+    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
+    {
+       /* Partition. */
+        device = va.va_rdev;
+
+        p1 = ref->vp;
+        p2 = &ref->context;
+        do_ioctl = &device_ioctl;
+    }
+    else
+    {
+       /* Don't dump to non-regular files. */
+       error = EFAULT;
+        goto out;
+    }
+
+    // get partition base
+
+    error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result);
+    if (error)
+        goto out;
+
+    // get block size & constraints
+
+    error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize);
+    if (error)
+        goto out;
+
+    maxiocount = 1*1024*1024*1024;
+
+    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count);
+    if (error)
+        count = 0;
+    count *= blksize;
+    if (count && (count < maxiocount))
+        maxiocount = count;
+
+    error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count);
+    if (error)
+        count = 0;
+    count *= blksize;
+    if (count && (count < maxiocount))
+        maxiocount = count;
+
+    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count);
+    if (error)
+        count = 0;
+    if (count && (count < maxiocount))
+        maxiocount = count;
+
+    error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count);
+    if (error)
+        count = 0;
+    if (count && (count < maxiocount))
+        maxiocount = count;
+
+    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count);
+    if (error)
+        count = 0;
+    if (count && (count < maxiocount))
+        maxiocount = count;
+
+    error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count);
+    if (error)
+        count = 0;
+    if (count && (count < maxiocount))
+        maxiocount = count;
+
+    kprintf("max io 0x%qx bytes\n", maxiocount);
+    if (maxiocount_result)
+        *maxiocount_result = maxiocount;
+
+    // generate the block list
+
+    error = 0;
+    if (ref->vp->v_type == VREG)
+    {
+       f_offset = 0;
+       while(f_offset < (off_t) va.va_data_size) 
+       {
+           size_t io_size = 1*1024*1024*1024;
+           daddr64_t blkno;
+
+           error = VNOP_BLOCKMAP(ref->vp, f_offset, io_size, &blkno, (size_t *)&io_size, NULL, 0, NULL);
+           if (error)
+               goto out;
+           callback(callback_ref, ((uint64_t) blkno) * blksize, (uint64_t) io_size);
+           f_offset += io_size;
+       }
+       callback(callback_ref, 0ULL, 0ULL);
+    }
+    else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR))
+    {
+        error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &size);
+        if (error)
+            goto out;
+       size *= blksize;
+       callback(callback_ref, 0ULL, size);
+       callback(callback_ref, size, 0ULL);
+    }
+
+    if (device_result)
+        *device_result = device;
+
+out:
+    kprintf("kern_open_file_for_direct_io(%d)\n", error);
+
+    if (error && ref) {
+       if (ref->vp)
+           vnode_close(ref->vp, FWRITE, &ref->context);
+
+       kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
+    }
+
+    return(ref);
+}
+
+int
+kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len)
+{
+    return (vn_rdwr(UIO_WRITE, ref->vp,
+                       addr, len, offset,
+                       UIO_SYSSPACE32, IO_SYNC|IO_NODELOCKED|IO_UNIT, 
+                        ref->context.vc_ucred, (int *) 0, ref->context.vc_proc));
+}
+
+void
+kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref)
+{
+    kprintf("kern_close_file_for_direct_io\n");
+
+    if (ref) {
+       int                error;
+
+       if (ref->vp) {
+           error = vnode_close(ref->vp, FWRITE, &ref->context);
+           kprintf("vnode_close(%d)\n", error);
+       }
+       kfree(ref, sizeof(struct kern_direct_file_io_ref_t));
+    }
+}
index 15c290aab77d26bf6424c5c5da8dee2c1a3b162b..c4b070b42e7a535ebeedad5bcbf47709fb73579f 100644 (file)
@@ -462,12 +462,26 @@ extern void in_delayed_cksum_offset(struct mbuf *m, int ip_offset);
 void
 mbuf_outbound_finalize(mbuf_t mbuf, u_long protocol_family, size_t protocol_offset)
 {
-       if ((mbuf->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_IP)) == 0)
+       if ((mbuf->m_pkthdr.csum_flags &
+                (CSUM_DELAY_DATA | CSUM_DELAY_IP | CSUM_TCP_SUM16)) == 0)
                return;
        
        /* Generate the packet in software, client needs it */
        switch (protocol_family) {
                case PF_INET:
+                       if (mbuf->m_pkthdr.csum_flags & CSUM_TCP_SUM16) {
+                               /*
+                                * If you're wondering where this lovely code comes
+                                * from, we're trying to undo what happens in ip_output.
+                                * Look for CSUM_TCP_SUM16 in ip_output.
+                                */
+                               u_int16_t       first, second;
+                               mbuf->m_pkthdr.csum_flags &= ~CSUM_TCP_SUM16;
+                               mbuf->m_pkthdr.csum_flags |= CSUM_TCP;
+                               first = mbuf->m_pkthdr.csum_data >> 16;
+                               second = mbuf->m_pkthdr.csum_data & 0xffff;
+                               mbuf->m_pkthdr.csum_data = first - second;
+                       }
                        if (mbuf->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
                                in_delayed_cksum_offset(mbuf, protocol_offset);
                        }
index 68ea45cc43d6945b5e63878c5a272b3aa9c6e3e5..86b3e9c60aa0199ba300613fc8472db17b6bc483 100644 (file)
@@ -33,6 +33,8 @@
 static struct socket_filter_list       sock_filter_head;
 static lck_mtx_t                                       *sock_filter_lock = 0;
 
+static void    sflt_detach_private(struct socket_filter_entry *entry, int unregistering);
+
 __private_extern__ void
 sflt_init(void)
 {
@@ -82,6 +84,7 @@ sflt_termsock(
                filter_next = filter->sfe_next_onsocket;
                sflt_detach_private(filter, 0);
        }
+       so->so_filt = NULL;
 }
 
 __private_extern__ void
@@ -103,7 +106,7 @@ sflt_unuse(
                for (filter = so->so_filt; filter; filter = next_filter) {
                        next_filter = filter->sfe_next_onsocket;
                        
-                       if (filter->sfe_flags & SFEF_DETACHING) {
+                       if (filter->sfe_flags & SFEF_DETACHUSEZERO) {
                                sflt_detach_private(filter, 0);
                        }
                }
@@ -219,6 +222,7 @@ sflt_attach_private(
                entry->sfe_filter = filter;
                entry->sfe_socket = so;
                entry->sfe_cookie = NULL;
+               entry->sfe_flags = 0;
                if (entry->sfe_filter->sf_filter.sf_attach) {
                        filter->sf_usecount++;
                
@@ -247,9 +251,6 @@ sflt_attach_private(
                entry->sfe_next_onfilter = filter->sf_entry_head;
                filter->sf_entry_head = entry;
                
-               /* Increment the socket's usecount */
-               so->so_usecount++;
-               
                /* Incremenet the parent filter's usecount */
                filter->sf_usecount++;
        }
@@ -270,17 +271,17 @@ sflt_attach_private(
  * list and the socket lock is not held.
  */
 
-__private_extern__ void
+static void
 sflt_detach_private(
        struct socket_filter_entry *entry,
-       int     filter_detached)
+       int     unregistering)
 {
        struct socket *so = entry->sfe_socket;
        struct socket_filter_entry **next_ptr;
        int                             detached = 0;
        int                             found = 0;
        
-       if (filter_detached) {
+       if (unregistering) {
                socket_lock(entry->sfe_socket, 0);
        }
        
@@ -290,7 +291,16 @@ sflt_detach_private(
         * same time from attempting to remove the same entry.
         */
        lck_mtx_lock(sock_filter_lock);
-       if (!filter_detached) {
+       if (!unregistering) {
+               if ((entry->sfe_flags & SFEF_UNREGISTERING) != 0) {
+                       /*
+                        * Another thread is unregistering the filter, we need to
+                        * avoid detaching the filter here so the socket won't go
+                        * away.
+                        */
+                       lck_mtx_unlock(sock_filter_lock);
+                       return;
+               }
                for (next_ptr = &entry->sfe_filter->sf_entry_head; *next_ptr;
                         next_ptr = &((*next_ptr)->sfe_next_onfilter)) {
                        if (*next_ptr == entry) {
@@ -299,24 +309,30 @@ sflt_detach_private(
                                break;
                        }
                }
+               
+               if (!found && (entry->sfe_flags & SFEF_DETACHUSEZERO) == 0) {
+                       lck_mtx_unlock(sock_filter_lock);
+                       return;
+               }
        }
-       
-       if (!filter_detached && !found && (entry->sfe_flags & SFEF_DETACHING) == 0) {
-               lck_mtx_unlock(sock_filter_lock);
-               return;
+       else {
+               /*
+                * Clear the removing flag. We will perform the detach here or
+                * request a delayed deatch.
+                */
+               entry->sfe_flags &= ~SFEF_UNREGISTERING;
        }
 
        if (entry->sfe_socket->so_filteruse != 0) {
+               entry->sfe_flags |= SFEF_DETACHUSEZERO;
                lck_mtx_unlock(sock_filter_lock);
-               entry->sfe_flags |= SFEF_DETACHING;
                return;
        }
-       
-       /*
-        * Check if we are removing the last attached filter and
-        * the parent filter is being unregistered.
-        */
-       if (entry->sfe_socket->so_filteruse == 0) {
+       else {
+               /*
+                * Check if we are removing the last attached filter and
+                * the parent filter is being unregistered.
+                */
                entry->sfe_filter->sf_usecount--;
                if ((entry->sfe_filter->sf_usecount == 0) &&
                        (entry->sfe_filter->sf_flags & SFF_DETACHING) != 0)
@@ -340,14 +356,10 @@ sflt_detach_private(
                entry->sfe_filter->sf_filter.sf_unregistered(entry->sfe_filter->sf_filter.sf_handle);
                FREE(entry->sfe_filter, M_IFADDR);
        }
-       
-       if (filter_detached) {
+
+       if (unregistering) 
                socket_unlock(entry->sfe_socket, 1);
-       }
-       else {
-               // We need some better way to decrement the usecount
-               so->so_usecount--;
-       }
+
        FREE(entry, M_IFADDR);
 }
 
@@ -385,6 +397,7 @@ sflt_detach(
                sflt_detach_private(filter, 0);
        }
        else {
+               socket->so_filt = NULL;
                result = ENOENT;
        }
        
@@ -453,6 +466,7 @@ sflt_unregister(
 {
        struct socket_filter *filter;
        struct socket_filter_entry *entry_head = NULL;
+       struct socket_filter_entry *next_entry = NULL;
        
        /* Find the entry and remove it from the global and protosw lists */
        lck_mtx_lock(sock_filter_lock);
@@ -469,6 +483,13 @@ sflt_unregister(
                entry_head = filter->sf_entry_head;
                filter->sf_entry_head = NULL;
                filter->sf_flags |= SFF_DETACHING;
+       
+               for (next_entry = entry_head; next_entry;
+                        next_entry = next_entry->sfe_next_onfilter) {
+                       socket_lock(next_entry->sfe_socket, 1);
+                       next_entry->sfe_flags |= SFEF_UNREGISTERING;
+                       socket_unlock(next_entry->sfe_socket, 0);       /* Radar 4201550: prevents the socket from being deleted while being unregistered */
+               }
        }
        
        lck_mtx_unlock(sock_filter_lock);
@@ -482,7 +503,6 @@ sflt_unregister(
                        filter->sf_filter.sf_unregistered(filter->sf_filter.sf_handle);
        } else {
                while (entry_head) {
-                       struct socket_filter_entry *next_entry;
                        next_entry = entry_head->sfe_next_onfilter;
                        sflt_detach_private(entry_head, 1);
                        entry_head = next_entry;
index e50013d3882c485eac7b25a1e43690f72fff1da3..42ac8142c5c0eda8b6bff190d188958598a28e30 100644 (file)
@@ -113,6 +113,7 @@ extern struct sysctl_oid sysctl__kern_posix;
 extern struct sysctl_oid sysctl__kern_posix_sem;
 extern struct sysctl_oid sysctl__kern_posix_sem_max;
 extern struct sysctl_oid sysctl__kern_sugid_scripts;
+extern struct sysctl_oid sysctl__kern_always_do_fullfsync;
 extern struct sysctl_oid sysctl__net_inet_icmp_icmplim;
 extern struct sysctl_oid sysctl__net_inet_icmp_maskrepl;
 extern struct sysctl_oid sysctl__net_inet_icmp_timestamp;
@@ -523,6 +524,7 @@ struct sysctl_oid *newsysctl_list[] =
     ,&sysctl__kern_ipc_maxsockets
 
     ,&sysctl__kern_sugid_scripts
+    ,&sysctl__kern_always_do_fullfsync
 
     ,&sysctl__hw_machine
     ,&sysctl__hw_model
index 8f7b26537c25e1b34e3638dd26fd71ba171af894..24bc5cfada9665983a4ee330fb5d6e456f9c1dc3 100644 (file)
@@ -248,12 +248,6 @@ grow_semu_array(int newSize)
 {
        register int i;
        register struct sem_undo *newSemu;
-       static boolean_t grow_semu_array_in_progress = FALSE;
-
-       while (grow_semu_array_in_progress) {
-               msleep(&grow_semu_array_in_progress, &sysv_sem_subsys_mutex,
-                      PPAUSE, "grow_semu_array", NULL);
-       }
 
        if (newSize <= seminfo.semmnu)
                return 1;
@@ -271,13 +265,8 @@ grow_semu_array(int newSize)
 #ifdef SEM_DEBUG
        printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize);
 #endif
-       grow_semu_array_in_progress = TRUE;
-       SYSV_SEM_SUBSYS_UNLOCK();
-       MALLOC(newSemu, struct sem_undo*, sizeof(struct sem_undo) * newSize,
-               M_SYSVSEM, M_WAITOK);
-       SYSV_SEM_SUBSYS_LOCK();
-       grow_semu_array_in_progress = FALSE;
-       wakeup((caddr_t) &grow_semu_array_in_progress);
+       MALLOC(newSemu, struct sem_undo *, sizeof (struct sem_undo) * newSize,
+              M_SYSVSEM, M_WAITOK | M_ZERO);
        if (NULL == newSemu)
        {
 #ifdef SEM_DEBUG
@@ -286,15 +275,16 @@ grow_semu_array(int newSize)
                return 0;
        }
 
-               /* Initialize our structure.  */
+               /* copy the old data to the new array */
        for (i = 0; i < seminfo.semmnu; i++)
        {
                newSemu[i] = semu[i];
        }
-               for (i = seminfo.semmnu; i < newSize; i++)
-        {
-                       newSemu[i].un_proc = NULL;
-        }
+       /*
+        * The new elements (from newSemu[i] to newSemu[newSize-1]) have their
+        * "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above,
+        * so they're already marked as "not in use".
+        */
 
        /* Clean up the old array */
        if (semu)
@@ -336,8 +326,9 @@ grow_sema_array(int newSize)
 #ifdef SEM_DEBUG
        printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize);
 #endif
-       MALLOC(newSema, struct user_semid_ds *, sizeof(struct user_semid_ds) * newSize,
-               M_SYSVSEM, M_WAITOK);
+       MALLOC(newSema, struct user_semid_ds *,
+              sizeof (struct user_semid_ds) * newSize,
+              M_SYSVSEM, M_WAITOK | M_ZERO);
        if (NULL == newSema)
        {
 #ifdef SEM_DEBUG
@@ -346,7 +337,7 @@ grow_sema_array(int newSize)
                return 0;
        }
 
-       /* Initialize our new ids, and copy over the old ones */
+       /* copy over the old ids */
        for (i = 0; i < seminfo.semmni; i++)
        {
                newSema[i] = sema[i];
@@ -361,12 +352,11 @@ grow_sema_array(int newSize)
                if (sema[i].sem_perm.mode & SEM_ALLOC)
                        wakeup((caddr_t)&sema[i]);
        }
-
-       for (i = seminfo.semmni; i < newSize; i++)
-       {
-               newSema[i].sem_base = NULL;
-               newSema[i].sem_perm.mode = 0;
-       }
+       /*
+        * The new elements (from newSema[i] to newSema[newSize-1]) have their
+        * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO
+        * flag to MALLOC() above, so they're already marked as "not in use".
+        */
 
        /* Clean up the old array */
        if (sema)
@@ -410,8 +400,8 @@ grow_sem_pool(int new_pool_size)
 #ifdef SEM_DEBUG
        printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size);
 #endif
-       MALLOC(new_sem_pool, struct sem *, sizeof(struct sem) * new_pool_size,
-               M_SYSVSEM, M_WAITOK);
+       MALLOC(new_sem_pool, struct sem *, sizeof (struct sem) * new_pool_size,
+              M_SYSVSEM, M_WAITOK | M_ZERO);
        if (NULL == new_sem_pool) {
 #ifdef SEM_DEBUG
                printf("allocation failed.  no changes made.\n");
@@ -535,8 +525,9 @@ semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid,
        register struct undo *sueptr, **suepptr, *new_sueptr;
        int i;
 
-       /* Look for and remember the sem_undo if the caller doesn't provide
-          it */
+       /*
+        * Look for and remember the sem_undo if the caller doesn't provide it
+        */
 
        suptr = *supptr;
        if (suptr == NULL) {
@@ -562,7 +553,6 @@ semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid,
         * 0).
         */
        new_sueptr = NULL;
-lookup:
        for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent;
             i < suptr->un_cnt;
             i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) {
@@ -578,61 +568,36 @@ lookup:
                        FREE(sueptr, M_SYSVSEM);
                        sueptr = NULL;
                }
-               if (new_sueptr != NULL) {
-                       /*
-                        * We lost the race: free the "undo" entry we allocated
-                        * and use the one that won.
-                        */
-                       FREE(new_sueptr, M_SYSVSEM);
-                       new_sueptr = NULL;
-               }
-               return(0);
+               return 0;
        }
 
        /* Didn't find the right entry - create it */
        if (adjval == 0) {
-               if (new_sueptr != NULL) {
-                       FREE(new_sueptr, M_SYSVSEM);
-                       new_sueptr = NULL;
-               }
-               return(0);
+               /* no adjustment: no need for a new entry */
+               return 0;
        }
 
-       if (new_sueptr != NULL) {
-               /*
-                * Use the new "undo" entry we allocated in the previous pass
-                */
-               new_sueptr->une_next = suptr->un_ent;
-               suptr->un_ent = new_sueptr;
-               suptr->un_cnt++;
-               new_sueptr->une_adjval = adjval;
-               new_sueptr->une_id = semid;
-               new_sueptr->une_num = semnum;
-               return 0;
+       if (suptr->un_cnt == limitseminfo.semume) {
+               /* reached the limit number of semaphore undo entries */
+               return EINVAL;
        }
 
-       if (suptr->un_cnt != limitseminfo.semume) {
-               SYSV_SEM_SUBSYS_UNLOCK();
-               /*
-                * Unlocking opens the door to race conditions.  Someone else
-                * could be trying to allocate the same thing at this point,
-                * so we'll have to check if we lost the race.
-                */
-               MALLOC(new_sueptr, struct undo *, sizeof (struct undo),
-                      M_SYSVSEM, M_WAITOK);
-               SYSV_SEM_SUBSYS_LOCK();
-               if (new_sueptr == NULL) {
-                       return ENOMEM;
-               }
-               /*
-                * There might be other threads doing the same thing for this
-                * process, so check again if an "undo" entry exists for that
-                * semaphore.
-                */
-               goto lookup;
-       } else
-               return(EINVAL);
-       return(0);
+       /* allocate a new semaphore undo entry */
+       MALLOC(new_sueptr, struct undo *, sizeof (struct undo),
+              M_SYSVSEM, M_WAITOK);
+       if (new_sueptr == NULL) {
+               return ENOMEM;
+       }
+
+       /* fill in the new semaphore undo entry */
+       new_sueptr->une_next = suptr->un_ent;
+       suptr->un_ent = new_sueptr;
+       suptr->un_cnt++;
+       new_sueptr->une_adjval = adjval;
+       new_sueptr->une_id = semid;
+       new_sueptr->une_num = semnum;
+
+       return 0;
 }
 
 /* Assumes we already hold the subsystem lock.
@@ -742,8 +707,6 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval)
                if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
                                goto semctlout;
 
-               SYSV_SEM_SUBSYS_UNLOCK();
-
                if (IS_64BIT_PROCESS(p)) {
                        eval = copyin(user_arg.buf, &sbuf, sizeof(struct user_semid_ds));
                } else {
@@ -752,10 +715,9 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval)
                        semid_ds_32to64((struct semid_ds *)&sbuf, &sbuf);
                }
                
-               if (eval != 0)
-                       return(eval);
-
-               SYSV_SEM_SUBSYS_LOCK();
+               if (eval != 0) {
+                       goto semctlout;
+               }
 
                semaptr->sem_perm.uid = sbuf.sem_perm.uid;
                semaptr->sem_perm.gid = sbuf.sem_perm.gid;
@@ -768,7 +730,6 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval)
                if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
                                goto semctlout;
                bcopy(semaptr, &uds, sizeof(struct user_semid_ds));
-               SYSV_SEM_SUBSYS_UNLOCK();
                if (IS_64BIT_PROCESS(p)) {
                        eval = copyout(&uds, user_arg.buf, sizeof(struct user_semid_ds));
                } else {
@@ -776,7 +737,6 @@ semctl(struct proc *p, struct semctl_args *uap, register_t *retval)
                        semid_ds_64to32(&uds, &semid_ds32);
                        eval = copyout(&semid_ds32, user_arg.buf, sizeof(struct semid_ds));
                }
-               SYSV_SEM_SUBSYS_LOCK();
                break;
 
        case GETNCNT:
@@ -904,7 +864,7 @@ semget(__unused struct proc *p, struct semget_args *uap, register_t *retval)
 #endif
 
 
-               SYSV_SEM_SUBSYS_LOCK();
+       SYSV_SEM_SUBSYS_LOCK();
 
     
        if (key != IPC_PRIVATE) {
@@ -1190,42 +1150,44 @@ semop(struct proc *p, struct semop_args *uap, register_t *retval)
 #ifdef SEM_DEBUG
                printf("semop:  good morning (eval=%d)!\n", eval);
 #endif
-               /* we need the lock here due to mods on semptr */
                if (eval != 0) {
-                       if (sopptr->sem_op == 0)
-                               semptr->semzcnt--;
-                       else
-                               semptr->semncnt--;
-
                        eval = EINTR;
-                       goto semopout;
                }
 
+               /*
+                * IMPORTANT: while we were asleep, the semaphore array might
+                * have been reallocated somewhere else (see grow_sema_array()).
+                * When we wake up, we have to re-lookup the semaphore 
+                * structures and re-validate them.
+                */
+
                suptr = NULL;   /* sem_undo may have been reallocated */
                semaptr = &sema[semid];    /* sema may have been reallocated */
 
-
-#ifdef SEM_DEBUG
-               printf("semop:  good morning!\n");
-#endif
-
                /*
                 * Make sure that the semaphore still exists
                 */
                if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
-                   semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
-                       /* The man page says to return EIDRM. */
-                       /* Unfortunately, BSD doesn't define that code! */
-                       if (sopptr->sem_op == 0)
-                               semptr->semzcnt--;
-                       else
-                               semptr->semncnt--;
+                   semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid) ||
+                   sopptr->sem_num >= semaptr->sem_nsems) {
+                       if (eval == EINTR) {
+                               /*
+                                * EINTR takes precedence over the fact that
+                                * the semaphore disappeared while we were
+                                * sleeping...
+                                */
+                       } else {
+                               /*
+                                * The man page says to return EIDRM.
+                                * Unfortunately, BSD doesn't define that code!
+                                */
 #ifdef EIDRM
-               eval = EIDRM;
+                               eval = EIDRM;
 #else
-               eval = EINVAL;
+                               eval = EINVAL;
 #endif
-               goto semopout;
+                       }
+                       goto semopout;
                }
 
                /*
@@ -1239,6 +1201,10 @@ semop(struct proc *p, struct semop_args *uap, register_t *retval)
                        semptr->semzcnt--;
                else
                        semptr->semncnt--;
+
+               if (eval != 0) { /* EINTR */
+                       goto semopout;
+               }
        }
 
 done:
@@ -1553,9 +1519,7 @@ IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
                        error = EINVAL;
                        break;
                }
-               SYSV_SEM_SUBSYS_UNLOCK();
                error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
-               SYSV_SEM_SUBSYS_LOCK();
                break;
 
        case IPCS_SEM_ITER:     /* Iterate over existing segments */
@@ -1588,14 +1552,12 @@ IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
                        semid_ds_64to32(semid_dsp, &semid_ds32);
                        semid_dsp = &semid_ds32;
                }
-               SYSV_SEM_SUBSYS_UNLOCK();
                error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
                if (!error) {
                        /* update cursor */
                        ipcs.u64.ipcs_cursor = cursor + 1;
                        error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
                }
-               SYSV_SEM_SUBSYS_LOCK();
                break;
 
        default:
index 84100312acfca1837141b8a8f1c156f123a3ed20..ec648d6757843b1282c70bb80b73d4b9108fe953 100644 (file)
@@ -812,8 +812,7 @@ m_move_pkthdr(struct mbuf *to, struct mbuf *from)
  * "from" must have M_PKTHDR set, and "to" must be empty.
  * In particular, this does a deep copy of the packet tags.
  */
-#ifndef __APPLE__
-int
+static int
 m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
 {
         to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
@@ -823,7 +822,6 @@ m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
         SLIST_INIT(&to->m_pkthdr.tags);
         return (m_tag_copy_chain(to, from, how));
 }
-#endif
 
 /*
  * return a list of mbuf hdrs that point to clusters...
@@ -2146,17 +2144,8 @@ m_dup(struct mbuf *m, int how)
                        {       if ((n = m_gethdr(how, m->m_type)) == NULL)
                                        return(NULL);
                                n->m_len = m->m_len;
-                               n->m_flags |= (m->m_flags & M_COPYFLAGS);
-                               n->m_pkthdr.len = m->m_pkthdr.len;
-                               n->m_pkthdr.rcvif = m->m_pkthdr.rcvif;
-                               n->m_pkthdr.header = NULL;
-                               n->m_pkthdr.csum_flags = 0;
-                               n->m_pkthdr.csum_data = 0;
-                               n->m_pkthdr.aux = NULL;
-                               n->m_pkthdr.vlan_tag = 0;
-                               n->m_pkthdr.socket_id = 0;
-                               SLIST_INIT(&n->m_pkthdr.tags);
-                                bcopy(m->m_data, n->m_data, m->m_pkthdr.len);
+                               m_dup_pkthdr(n, m, how);
+                               bcopy(m->m_data, n->m_data, m->m_len);
                                return(n);
                        }
                } else if (m->m_len <= MLEN)
@@ -2187,8 +2176,7 @@ m_dup(struct mbuf *m, int how)
                *np = n;
                if (copyhdr)
                {       /* Don't use M_COPY_PKTHDR: preserve m_data */
-                       n->m_pkthdr = m->m_pkthdr;
-                       n->m_flags |= (m->m_flags & M_COPYFLAGS);
+                       m_dup_pkthdr(n, m, how);
                        copyhdr = 0;
                        if ((n->m_flags & M_EXT) == 0)
                                n->m_data = n->m_pktdat;
index a8c8652b2309f61ce953efab95c288d15d8de3ab..3dd9060de6aaeb71a8c3a261aaf01f00358256d8 100644 (file)
@@ -552,7 +552,7 @@ m_tag_copy(struct m_tag *t, int how)
        struct m_tag *p;
 
        KASSERT(t, ("m_tag_copy: null tag"));
-       p = m_tag_alloc(t->m_tag_type, t->m_tag_id, t->m_tag_len, how);
+       p = m_tag_alloc(t->m_tag_id, t->m_tag_type, t->m_tag_len, how);
        if (p == NULL)
                return (NULL);
        bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */
index b0068b4c86a6fe5b88b029c39de8e86a268d95ee..29380a399f39a913f3c8ba0ce146fa6ea0f9e1b8 100644 (file)
@@ -642,6 +642,9 @@ sofreelastref(so, dealloc)
 
        /*### Assume socket is locked */
 
+       /* Remove any filters - may be called more than once */
+       sflt_termsock(so);
+       
        if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) {
 #ifdef __APPLE__
                selthreadclear(&so->so_snd.sb_sel);
@@ -1029,13 +1032,28 @@ sosendcheck(
 {
        int error = 0;
        long space;
+       int     assumelock = 0;
 
 restart:
        if (*sblocked == 0) {
-               error = sblock(&so->so_snd, SBLOCKWAIT(flags));
-               if (error)
-                       return error;
-               *sblocked = 1;
+               if ((so->so_snd.sb_flags & SB_LOCK) != 0 &&
+                       so->so_send_filt_thread != 0 &&
+                       so->so_send_filt_thread == current_thread()) {
+                       /*
+                        * We're being called recursively from a filter,
+                        * allow this to continue. Radar 4150520.
+                        * Don't set sblocked because we don't want
+                        * to perform an unlock later.
+                        */
+                       assumelock = 1;
+               }
+               else {
+                       error = sblock(&so->so_snd, SBLOCKWAIT(flags));
+                       if (error) {
+                               return error;
+                       }
+                       *sblocked = 1;
+               }
        }
        
        if (so->so_state & SS_CANTSENDMORE) 
@@ -1070,8 +1088,9 @@ restart:
                return EMSGSIZE;
        if (space < resid + clen && 
                (atomic || space < so->so_snd.sb_lowat || space < clen)) {
-               if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO))
+               if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) {
                        return EWOULDBLOCK;
+               }
                sbunlock(&so->so_snd, 1);
                error = sbwait(&so->so_snd);
                if (error) {
@@ -1164,12 +1183,7 @@ sosend(so, addr, uio, top, control, flags)
        do {
                error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked);
                if (error) {
-                       if (sblocked)
-                               goto release;
-                       else {
-                               socket_unlock(so, 1);
-                               goto out;
-                       }
+                       goto release;
                }
                mp = &top;
                space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0);
@@ -1237,12 +1251,7 @@ sosend(so, addr, uio, top, control, flags)
                                                if (freelist == NULL) {
                                                        error = ENOBUFS;
                                                        socket_lock(so, 0);
-                                                       if (sblocked) {
-                                                               goto release;
-                                                       } else {
-                                                               socket_unlock(so, 1);
-                                                               goto out;
-                                                       }
+                                                       goto release;
                                                }
                                                /*
                                                 * For datagram protocols, leave room
@@ -1294,25 +1303,28 @@ sosend(so, addr, uio, top, control, flags)
                        }
             
                    if (flags & (MSG_HOLD|MSG_SEND))
-                   {   /* Enqueue for later, go away if HOLD */
-                       register struct mbuf *mb1;
-                       if (so->so_temp && (flags & MSG_FLUSH))
-                       {       m_freem(so->so_temp);
-                               so->so_temp = NULL;
-                       }
-                       if (so->so_temp)
-                               so->so_tail->m_next = top;
-                       else
-                               so->so_temp = top;
-                       mb1 = top;
-                       while (mb1->m_next)
-                               mb1 = mb1->m_next;
-                       so->so_tail = mb1;
-                       if (flags&MSG_HOLD)
-                       {       top = NULL;
-                               goto release;
-                       }
-                       top = so->so_temp;
+                   {
+                               /* Enqueue for later, go away if HOLD */
+                               register struct mbuf *mb1;
+                               if (so->so_temp && (flags & MSG_FLUSH))
+                               {
+                                       m_freem(so->so_temp);
+                                       so->so_temp = NULL;
+                               }
+                               if (so->so_temp)
+                                       so->so_tail->m_next = top;
+                               else
+                                       so->so_temp = top;
+                               mb1 = top;
+                               while (mb1->m_next)
+                                               mb1 = mb1->m_next;
+                               so->so_tail = mb1;
+                               if (flags & MSG_HOLD)
+                               {
+                                       top = NULL;
+                                       goto release;
+                               }
+                               top = so->so_temp;
                    }
                    if (dontroute)
                            so->so_options |= SO_DONTROUTE;
@@ -1345,12 +1357,7 @@ sosend(so, addr, uio, top, control, flags)
                                                int so_flags = 0;
                                                if (filtered == 0) {
                                                        filtered = 1;
-                                                       /*
-                                                        * We don't let sbunlock unlock the socket because
-                                                        * we don't want it to decrement the usecount.
-                                                        */
-                                                       sbunlock(&so->so_snd, 1);
-                                                       sblocked = 0;
+                                                       so->so_send_filt_thread = current_thread();
                                                        socket_unlock(so, 0);
                                                        so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0;
                                                }
@@ -1365,33 +1372,16 @@ sosend(so, addr, uio, top, control, flags)
                                         * The socket is unlocked as is the socket buffer.
                                         */
                                        socket_lock(so, 0);
-                                       if (error == EJUSTRETURN) {
-                                               error = 0;
-                                               clen = 0;
-                                               control = 0;
-                                               top = 0;
-                                               socket_unlock(so, 1);
-                                               goto out;
-                                       }
-                                       else if (error) {
-                                               socket_unlock(so, 1);
-                                               goto out;
-                                       }
-                                       
-                                       
-                                       /* Verify our state again, this will lock the socket buffer */
-                                       error = sosendcheck(so, addr, top->m_pkthdr.len,
-                                                               control ? control->m_pkthdr.len : 0,
-                                                               atomic, flags, &sblocked);
+                                       so->so_send_filt_thread = 0;
                                        if (error) {
-                                               if (sblocked) {
-                                                       /* sbunlock at release will unlock the socket */
-                                                       goto release;
-                                               }
-                                               else {
-                                                       socket_unlock(so, 1);
-                                                       goto out;
+                                               if (error == EJUSTRETURN) {
+                                                       error = 0;
+                                                       clen = 0;
+                                                       control = 0;
+                                                       top = 0;
                                                }
+                                               
+                                               goto release;
                                        }
                                }
                        }
@@ -1423,7 +1413,10 @@ sosend(so, addr, uio, top, control, flags)
        } while (resid);
 
 release:
-       sbunlock(&so->so_snd, 0);       /* will unlock socket */
+       if (sblocked)
+               sbunlock(&so->so_snd, 0);       /* will unlock socket */
+       else
+               socket_unlock(so, 1);
 out:
        if (top)
                m_freem(top);
@@ -2930,9 +2923,6 @@ sofree(so)
                mutex_held = so->so_proto->pr_domain->dom_mtx;
        lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED);
        
-       /* Remove the filters */
-       sflt_termsock(so);
-       
        sofreelastref(so, 0);
 }
 
index 60eb7747e71c5ce946cb556b9209ca618210b47c..78487fdcfcaa05dae0001c088a25916984a90790 100644 (file)
@@ -905,6 +905,7 @@ unp_pcblist SYSCTL_HANDLER_ARGS
        gencnt = unp_gencnt;
        n = unp_count;
 
+       bzero(&xug, sizeof(xug));
        xug.xug_len = sizeof xug;
        xug.xug_count = n;
        xug.xug_gen = gencnt;
@@ -941,6 +942,8 @@ unp_pcblist SYSCTL_HANDLER_ARGS
                unp = unp_list[i];
                if (unp->unp_gencnt <= gencnt) {
                        struct xunpcb xu;
+
+                       bzero(&xu, sizeof(xu));
                        xu.xu_len = sizeof xu;
                        xu.xu_unpp = (struct  unpcb_compat *)unp;
                        /*
@@ -967,6 +970,8 @@ unp_pcblist SYSCTL_HANDLER_ARGS
                 * while we were processing this request, and it
                 * might be necessary to retry.
                 */
+               bzero(&xug, sizeof(xug));
+               xug.xug_len = sizeof xug;
                xug.xug_gen = unp_gencnt;
                xug.xug_sogen = so_gencnt;
                xug.xug_count = unp_count;
index 5f766f6b7af4b45a3771552806493dfe4397fab3..1118b61426b8401be58eeddcac4a23fd27a27983 100644 (file)
@@ -78,8 +78,6 @@
 #define DLIL_PRINTF    kprintf
 #endif
 
-//#define DLIL_ALWAYS_DELAY_DETACH 1
-
 enum {
        kProtoKPI_DLIL  = 0,
        kProtoKPI_v1    = 1
@@ -632,34 +630,59 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached)
 {
        int retval = 0;
        
-       
-       /* Take the write lock */
-#if DLIL_ALWAYS_DELAY_DETACH
-       retval = EDEADLK;
-#else
-       if (detached == 0 && (retval = dlil_write_begin()) != 0)
-#endif
-        {
+       if (detached == 0) {
+               ifnet_t                         ifp = NULL;
+               interface_filter_t      entry = NULL;
+
+               /* Take the write lock */
+               retval = dlil_write_begin();
+               if (retval != 0 && retval != EDEADLK)
+                       return retval;
+               
+               /*
+                * At this point either we have the write lock (retval == 0)
+                * or we couldn't get it (retval == EDEADLK) because someone
+                * else up the stack is holding the read lock. It is safe to
+                * read, either the read or write is held. Verify the filter
+                * parameter before proceeding.
+                */
+               ifnet_head_lock_shared();
+               TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
+                       TAILQ_FOREACH(entry, &ifp->if_flt_head, filt_next) {
+                               if (entry == filter)
+                                       break;
+                       }
+                       if (entry == filter)
+                               break;
+               }
+               ifnet_head_done();
+               
+               if (entry != filter) {
+                       /* filter parameter is not a valid filter ref */
+                       if (retval == 0) {
+                               dlil_write_end();
+                       }
+                       return EINVAL;
+               }
+               
                if (retval == EDEADLK) {
                        /* Perform a delayed detach */
                        filter->filt_detaching = 1;
                        dlil_detach_waiting = 1;
                        wakeup(&dlil_detach_waiting);
-                       retval = 0;
+                       return 0;
                }
-               return retval;
-       }
-       
-       if (detached == 0)
-               TAILQ_REMOVE(&filter->filt_ifp->if_flt_head, filter, filt_next);
-       
-       /* release the write lock */
-       if (detached == 0)
+               
+               /* Remove the filter from the list */
+               TAILQ_REMOVE(&ifp->if_flt_head, filter, filt_next);
                dlil_write_end();
+       }
        
+       /* Call the detached funciton if there is one */
        if (filter->filt_detached)
                filter->filt_detached(filter->filt_cookie, filter->filt_ifp);
 
+       /* Free the filter */
        FREE(filter, M_NKE);
        
        return retval;
@@ -668,6 +691,8 @@ dlil_detach_filter_internal(interface_filter_t filter, int detached)
 void
 dlil_detach_filter(interface_filter_t filter)
 {
+       if (filter == NULL)
+               return;
        dlil_detach_filter_internal(filter, 0);
 }
 
@@ -972,6 +997,7 @@ dlil_event(struct ifnet *ifp, struct kern_event_msg *event)
        return result;
 }
 
+int
 dlil_output_list(
        struct ifnet* ifp,
        u_long proto_family,
@@ -1964,12 +1990,7 @@ dlil_detach_protocol(struct ifnet *ifp, u_long proto_family)
        int use_reached_zero = 0;
        
 
-#if DLIL_ALWAYS_DELAY_DETACH
-       {
-               retval = EDEADLK;
-#else
        if ((retval = dlil_write_begin()) != 0) {
-#endif
                if (retval == EDEADLK) {
                        retval = 0;
                        dlil_read_begin();
index 3f6d1157d7f710a03c1f0c6b19f426e01b4e06ac..d5e1a5eaf165fcdd8fda512230e8ad544d4593be 100644 (file)
@@ -1454,6 +1454,12 @@ ifconf(u_long cmd, user_addr_t ifrp, int * ret_space)
        int error = 0;
        size_t space;
        
+       /*
+        * Zero the ifr buffer to make sure we don't
+        * disclose the contents of the stack.
+        */
+       bzero(&ifr, sizeof(struct ifreq));
+
        space = *ret_space;
        ifnet_head_lock_shared();
        for (ifp = ifnet_head.tqh_first; space > sizeof(ifr) && ifp; ifp = ifp->if_link.tqe_next) {
@@ -1932,10 +1938,10 @@ if_rtdel(
  */
 void if_rtproto_del(struct ifnet *ifp, int protocol)
 {
-       
-        struct radix_node_head  *rnh;
+       struct radix_node_head  *rnh;
 
-       if ((protocol <= AF_MAX) && ((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) {
+       if ((protocol <= AF_MAX) && (protocol >= 0) &&
+               ((rnh = rt_tables[protocol]) != NULL) && (ifp != NULL)) {
                lck_mtx_lock(rt_mtx);
                (void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
                lck_mtx_unlock(rt_mtx);
index faa462f441c3111a751a280db132b29b8b203be7..d78dabb5bba86123e7187ad584d8123eaca55896 100644 (file)
@@ -117,6 +117,7 @@ make_ifmibdata(struct ifnet *ifp, int *name, struct sysctl_req *req)
 
        case IFDATA_GENERAL:
                
+               bzero(&ifmd, sizeof(ifmd));
                snprintf(ifmd.ifmd_name, sizeof(ifmd.ifmd_name), "%s%d",
                        ifp->if_name, ifp->if_unit);
 
index 33d783bb28654c9a75df55545d8fad81dc0cd28a..e56930a55e84085f8ff7d9863231d636ac67566f 100644 (file)
@@ -42,7 +42,8 @@
 
 struct socket_filter;
 
-#define SFEF_DETACHING         0x1
+#define        SFEF_DETACHUSEZERO      0x1     // Detach when use reaches zero
+#define        SFEF_UNREGISTERING      0x2     // Remove due to unregister
 
 struct socket_filter_entry {
        struct socket_filter_entry      *sfe_next_onsocket;
@@ -80,7 +81,6 @@ void  sflt_notify(struct socket *so, sflt_event_t event, void *param);
 int            sflt_data_in(struct socket *so, const struct sockaddr *from, mbuf_t *data,
                                         mbuf_t *control, sflt_data_flag_t flags, int *filtered);
 int            sflt_attach_private(struct socket *so, struct socket_filter *filter, sflt_handle handle, int locked);
-void   sflt_detach_private(struct socket_filter_entry *entry, int filter_detached);
 
 #endif /* BSD_KERNEL_PRIVATE */
 
index 865962ef6191bd94cac060d522aba137e66ee523..d84ecb6ff7c291e666e7728af8789220c3d605ab 100644 (file)
@@ -65,7 +65,7 @@ static int loop_cnt; /* for debugging loops */
   } \
 }
 
-static void atp_pack_bdsp(struct atp_trans *, struct atpBDS *);
+static int atp_pack_bdsp(struct atp_trans *, struct atpBDS *);
 static int atp_unpack_bdsp(struct atp_state *, gbuf_t *, struct atp_rcb *, 
                           int, int);
 void atp_trp_clock(), asp_clock(), asp_clock_locked(), atp_trp_clock_locked();;
@@ -604,7 +604,7 @@ nothing_to_send:
 } /* atp_send_replies */
 
 
-static void
+static int
 atp_pack_bdsp(trp, bdsp)
      register struct atp_trans *trp;
      register struct atpBDS *bdsp;
@@ -612,12 +612,13 @@ atp_pack_bdsp(trp, bdsp)
        register gbuf_t *m = NULL;
        register int i, datsize = 0;
        struct atpBDS *bdsbase = bdsp;
+       int error = 0;
 
        dPrintf(D_M_ATP, D_L_INFO, ("atp_pack_bdsp: socket=%d\n",
                trp->tr_queue->atp_socket_no));
 
        for (i = 0; i < ATP_TRESP_MAX; i++, bdsp++) {
-               short bufsize = UAS_VALUE(bdsp->bdsBuffSz);
+               unsigned short bufsize = UAS_VALUE(bdsp->bdsBuffSz);
                long bufaddr = UAL_VALUE(bdsp->bdsBuffAddr);
 
                if ((m = trp->tr_rcv[i]) == NULL)
@@ -639,13 +640,15 @@ atp_pack_bdsp(trp, bdsp)
                        register char *buf = (char *)bufaddr;
 
                        while (m) {
-                               short len = (short)(gbuf_len(m));
+                               unsigned short len = (unsigned short)(gbuf_len(m));
                                if (len) {
                                        if (len > bufsize)
                                                len = bufsize;
-                                       copyout((caddr_t)gbuf_rptr(m), 
+                                       if ((error = copyout((caddr_t)gbuf_rptr(m), 
                                                CAST_USER_ADDR_T(&buf[tmp]),
-                                               len);
+                                               len)) != 0) {
+                                               return error;
+                                       }
                                        bufsize -= len;
                                        tmp += len;
                                }
@@ -664,6 +667,8 @@ atp_pack_bdsp(trp, bdsp)
 
        dPrintf(D_M_ATP, D_L_INFO, ("             : size=%d\n",
                datsize));
+       
+       return 0;
 } /* atp_pack_bdsp */
 
 
@@ -1635,12 +1640,20 @@ _ATPsndreq(fd, buf, len, nowait, err, proc)
        /*
         * copy out the recv data
         */
-       atp_pack_bdsp(trp, (struct atpBDS *)bds);
+       if ((*err = atp_pack_bdsp(trp, (struct atpBDS *)bds)) != 0) {
+               atp_free(trp);
+               file_drop(fd);
+               return -1;
+       }
 
        /*
         * copyout the result info
         */
-       copyout((caddr_t)bds, CAST_USER_ADDR_T(buf), atpBDSsize);
+       if ((*err = copyout((caddr_t)bds, CAST_USER_ADDR_T(buf), atpBDSsize)) != 0) {
+               atp_free(trp);
+               file_drop(fd);
+               return -1;
+       }
 
        atp_free(trp);
        file_drop(fd);
@@ -1885,13 +1898,21 @@ _ATPgetrsp(fd, bdsp, err, proc)
                ATENABLE(s, atp->atp_lock);
                        if ((*err = copyin(CAST_USER_ADDR_T(bdsp),
                                        (caddr_t)bds, sizeof(bds))) != 0) {
+                               atp_free(trp);
+                               file_drop(fd);
+                               return -1;
+                       }
+                       if ((*err = atp_pack_bdsp(trp, (struct atpBDS *)bds)) != 0) {
+                               atp_free(trp);
                                file_drop(fd);
                                return -1;
                        }
-                       atp_pack_bdsp(trp, (struct atpBDS *)bds);
                        tid = (int)trp->tr_tid;
                        atp_free(trp);
-                       copyout((caddr_t)bds, CAST_USER_ADDR_T(bdsp), sizeof(bds));
+                       if ((*err = copyout((caddr_t)bds, CAST_USER_ADDR_T(bdsp), sizeof(bds))) != 0) {
+                               file_drop(fd);
+                               return -1;
+                       }
                        file_drop(fd);
                        return tid;
 
index e92d7e6a35bb9b61b1d09cbdf0357987563d1f98..5d4ffb3f2e8c9ba33192fe96d1f7955129ee8baf 100644 (file)
@@ -567,6 +567,7 @@ div_pcblist SYSCTL_HANDLER_ARGS
        gencnt = divcbinfo.ipi_gencnt;
        n = divcbinfo.ipi_count;
 
+       bzero(&xig, sizeof(xig));
        xig.xig_len = sizeof xig;
        xig.xig_count = n;
        xig.xig_gen = gencnt;
@@ -599,6 +600,8 @@ div_pcblist SYSCTL_HANDLER_ARGS
                inp = inp_list[i];
                if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
                        struct xinpcb xi;
+
+                       bzero(&xi, sizeof(xi));
                        xi.xi_len = sizeof xi;
                        /* XXX should avoid extra copy */
                        inpcb_to_compat(inp, &xi.xi_inp);
@@ -615,6 +618,8 @@ div_pcblist SYSCTL_HANDLER_ARGS
                 * while we were processing this request, and it
                 * might be necessary to retry.
                 */
+               bzero(&xig, sizeof(xig));
+               xig.xig_len = sizeof xig;
                xig.xig_gen = divcbinfo.ipi_gencnt;
                xig.xig_sogen = so_gencnt;
                xig.xig_count = divcbinfo.ipi_count;
index 5f45949f1b8f3ec5dfe90ba8e1d12572b7113dbc..b3a716cec2726f6b15ac72c9376f5074d85f64fe 100644 (file)
@@ -238,7 +238,6 @@ SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW,
 #endif /* SYSCTL_NODE */
 
 
-extern lck_mtx_t *ip_mutex;
 static ip_fw_chk_t     ipfw_chk;
 
 /* firewall lock */
@@ -1305,18 +1304,14 @@ send_reject(struct ip_fw_args *args, int code, int offset, int ip_len)
                        ip->ip_len = ntohs(ip->ip_len);
                        ip->ip_off = ntohs(ip->ip_off);
                }
-               lck_mtx_unlock(ip_mutex);
                icmp_error(args->m, ICMP_UNREACH, code, 0L, 0);
-               lck_mtx_lock(ip_mutex);
        } else if (offset == 0 && args->f_id.proto == IPPROTO_TCP) {
                struct tcphdr *const tcp =
                    L3HDR(struct tcphdr, mtod(args->m, struct ip *));
                if ( (tcp->th_flags & TH_RST) == 0) {
-                       lck_mtx_unlock(ip_mutex);
                        send_pkt(&(args->f_id), ntohl(tcp->th_seq),
                                ntohl(tcp->th_ack),
                                tcp->th_flags | TH_RST);
-                       lck_mtx_lock(ip_mutex);
                }
                m_freem(args->m);
        } else
index 89f607747655b1e15cb41b0d86adf810e1522719..96ad70ed7a6a1542331a0601f25c3fc24ba1c967 100644 (file)
@@ -685,8 +685,10 @@ iphack:
        if (fr_checkp) {
                struct  mbuf    *m1 = m;
 
-               if (fr_checkp(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1)
+               if (fr_checkp(ip, hlen, m->m_pkthdr.rcvif, 0, &m1) || !m1) {
+                       lck_mtx_unlock(ip_mutex);
                        return;
+               }
                ip = mtod(m = m1, struct ip *);
        }
        if (fw_enable && IPFW_LOADED) {
@@ -700,22 +702,24 @@ iphack:
 #endif /* IPFIREWALL_FORWARD */
 
                args.m = m;
+               lck_mtx_unlock(ip_mutex);
+
                i = ip_fw_chk_ptr(&args);
                m = args.m;
 
                if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
                        if (m)
-              m_freem(m);
-                       lck_mtx_unlock(ip_mutex);
+                               m_freem(m);
                        return;
                }
                ip = mtod(m, struct ip *); /* just in case m changed */
-               if (i == 0 && args.next_hop == NULL)    /* common case */
+               if (i == 0 && args.next_hop == NULL) {  /* common case */
+                       lck_mtx_lock(ip_mutex);
                        goto pass;
+               }
 #if DUMMYNET
                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
                        /* Send packet to the appropriate pipe */
-                       lck_mtx_unlock(ip_mutex);
                        ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
                        return;
                }
@@ -723,19 +727,21 @@ iphack:
 #if IPDIVERT
                if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
                        /* Divert or tee packet */
+                       lck_mtx_lock(ip_mutex);
                        div_info = i;
                        goto ours;
                }
 #endif
 #if IPFIREWALL_FORWARD
-               if (i == 0 && args.next_hop != NULL)
+               if (i == 0 && args.next_hop != NULL) {
+                       lck_mtx_lock(ip_mutex);
                        goto pass;
+               }
 #endif
                /*
                 * if we get here, the packet must be dropped
                 */
                m_freem(m);
-               lck_mtx_unlock(ip_mutex);
                return;
        }
 pass:
index 9fd7a09a1b8ed476e419eb547f71990ab7a67cd0..259e5dd54e6cf5fc754f2e513067820dad35c6a1 100644 (file)
@@ -850,6 +850,7 @@ skip_ipsec:
                args.m = m;
                args.next_hop = dst;
                args.oif = ifp;
+               lck_mtx_unlock(ip_mutex);
                off = ip_fw_chk_ptr(&args);
                m = args.m;
                dst = args.next_hop;
@@ -873,12 +874,13 @@ skip_ipsec:
                        if (m)
                                m_freem(m);
                        error = EACCES ;
-                       lck_mtx_unlock(ip_mutex);
                        goto done ;
                }
                ip = mtod(m, struct ip *);
-               if (off == 0 && dst == old) /* common case */
+               if (off == 0 && dst == old) {/* common case */
+                       lck_mtx_lock(ip_mutex);
                        goto pass ;
+               }
 #if DUMMYNET
                 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
                     /*
@@ -894,12 +896,12 @@ skip_ipsec:
                    args.dst = dst;
                    args.flags = flags;
 
-                   lck_mtx_unlock(ip_mutex);
                    error = ip_dn_io_ptr(m, off & 0xffff, DN_TO_IP_OUT,
                                &args);
                    goto done;
                }
 #endif /* DUMMYNET */
+               lck_mtx_lock(ip_mutex);
 #if IPDIVERT
                if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
                        struct mbuf *clone = NULL;
index f361be89261cee55748fafe97a7463191ae6c5c7..5332bf70807fe6d3c239061f96e049e02366c1fc 100644 (file)
@@ -794,7 +794,8 @@ rip_pcblist SYSCTL_HANDLER_ARGS
         */
        gencnt = ripcbinfo.ipi_gencnt;
        n = ripcbinfo.ipi_count;
-
+       
+       bzero(&xig, sizeof(xig));
        xig.xig_len = sizeof xig;
        xig.xig_count = n;
        xig.xig_gen = gencnt;
@@ -830,6 +831,8 @@ rip_pcblist SYSCTL_HANDLER_ARGS
                inp = inp_list[i];
                if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
                        struct xinpcb xi;
+
+                       bzero(&xi, sizeof(xi));
                        xi.xi_len = sizeof xi;
                        /* XXX should avoid extra copy */
                        inpcb_to_compat(inp, &xi.xi_inp);
@@ -846,6 +849,8 @@ rip_pcblist SYSCTL_HANDLER_ARGS
                 * while we were processing this request, and it
                 * might be necessary to retry.
                 */
+               bzero(&xig, sizeof(xig));
+               xig.xig_len = sizeof xig;
                xig.xig_gen = ripcbinfo.ipi_gencnt;
                xig.xig_sogen = so_gencnt;
                xig.xig_count = ripcbinfo.ipi_count;
index b931b642df36ea5481eab35827bd71d00d9ffd2f..e85e6aed5b5d150ba86adc7f11c4a4d2e70211a5 100644 (file)
@@ -3420,6 +3420,7 @@ tcpdropdropablreq(struct socket *head)
        static unsigned int cur_cnt, old_cnt;
        struct timeval tv;
        struct inpcb *inp = NULL;
+       struct tcpcb *tp;
        
        microtime(&tv);
        if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
@@ -3459,16 +3460,29 @@ tcpdropdropablreq(struct socket *head)
                tcp_unlock(so, 1, 0);
                return 0;
        }
-       sototcpcb(so)->t_flags |= TF_LQ_OVERFLOW;
        head->so_incqlen--;
        head->so_qlen--;
-       so->so_head = NULL;
        TAILQ_REMOVE(&head->so_incomp, so, so_list);
+       tcp_unlock(head, 0, 0);
+       
+       so->so_head = NULL;
        so->so_usecount--;      /* No more held by so_head */
 
-       tcp_drop(sototcpcb(so), ETIMEDOUT);
-
+       /* 
+        * We do not want to lose track of the PCB right away in case we receive 
+        * more segments from the peer
+        */
+       tp = sototcpcb(so);
+       tp->t_flags |= TF_LQ_OVERFLOW;
+       tp->t_state = TCPS_CLOSED;
+       (void) tcp_output(tp);
+       tcpstat.tcps_drops++;
+       soisdisconnected(so);
+       tcp_canceltimers(tp);
+       add_to_time_wait(tp);
+       
        tcp_unlock(so, 1, 0);
+       tcp_lock(head, 0, 0);
        
        return 1;
        
index 0d8a168671fbc441aeac73865b65d95ed277c365..0e72a2325626e894a2b119273fec59e59366f813 100644 (file)
@@ -1058,6 +1058,7 @@ tcp_pcblist SYSCTL_HANDLER_ARGS
        gencnt = tcbinfo.ipi_gencnt;
        n = tcbinfo.ipi_count;
 
+       bzero(&xig, sizeof(xig));
        xig.xig_len = sizeof xig;
        xig.xig_count = n;
        xig.xig_gen = gencnt;
@@ -1098,6 +1099,8 @@ tcp_pcblist SYSCTL_HANDLER_ARGS
                if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
                        struct xtcpcb xt;
                        caddr_t inp_ppcb;
+
+                       bzero(&xt, sizeof(xt));
                        xt.xt_len = sizeof xt;
                        /* XXX should avoid extra copy */
                        inpcb_to_compat(inp, &xt.xt_inp);
@@ -1120,6 +1123,8 @@ tcp_pcblist SYSCTL_HANDLER_ARGS
                 * while we were processing this request, and it
                 * might be necessary to retry.
                 */
+               bzero(&xig, sizeof(xig));
+               xig.xig_len = sizeof xig;
                xig.xig_gen = tcbinfo.ipi_gencnt;
                xig.xig_sogen = so_gencnt;
                xig.xig_count = tcbinfo.ipi_count;
index d29331e289be53d6734058c868803e3b5ca86e7d..2a5ab698857a1d0a7cacb55717c64ed597e2bd5f 100644 (file)
@@ -754,9 +754,11 @@ tcp_connect(tp, nam, p)
 
        tcp_lock(inp->inp_socket, 0, 0);
        if (oinp) {
-               tcp_lock(oinp->inp_socket, 1, 0);
+               if (oinp != inp) /* 4143933: avoid deadlock if inp == oinp */
+                       tcp_lock(oinp->inp_socket, 1, 0);
                if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) {
-                       tcp_unlock(oinp->inp_socket, 1, 0);
+                       if (oinp != inp)
+                               tcp_unlock(oinp->inp_socket, 1, 0);
                        goto skip_oinp;
                }
 
@@ -767,10 +769,12 @@ tcp_connect(tp, nam, p)
                        otp = tcp_close(otp);
                else {
                        printf("tcp_connect: inp=%x err=EADDRINUSE\n", inp);
-                       tcp_unlock(oinp->inp_socket, 1, 0);
+                       if (oinp != inp)
+                               tcp_unlock(oinp->inp_socket, 1, 0);
                        return EADDRINUSE;
                }
-               tcp_unlock(oinp->inp_socket, 1, 0);
+               if (oinp != inp)
+                       tcp_unlock(oinp->inp_socket, 1, 0);
        }
 skip_oinp:
        if ((inp->inp_laddr.s_addr == INADDR_ANY ? ifaddr->sin_addr.s_addr :
index 4eafd6c8ffb4fda8f0c61f5fb2b7cd194a37c4b5..3ecfaee90a435a7d2503312ebcceb567aeffa461 100644 (file)
@@ -809,6 +809,7 @@ udp_pcblist SYSCTL_HANDLER_ARGS
        gencnt = udbinfo.ipi_gencnt;
        n = udbinfo.ipi_count;
 
+       bzero(&xig, sizeof(xig));
        xig.xig_len = sizeof xig;
        xig.xig_count = n;
        xig.xig_gen = gencnt;
@@ -844,6 +845,8 @@ udp_pcblist SYSCTL_HANDLER_ARGS
                inp = inp_list[i];
                if (inp->inp_gencnt <= gencnt && inp->inp_state != INPCB_STATE_DEAD) {
                        struct xinpcb xi;
+
+                       bzero(&xi, sizeof(xi));
                        xi.xi_len = sizeof xi;
                        /* XXX should avoid extra copy */
                        inpcb_to_compat(inp, &xi.xi_inp);
@@ -860,6 +863,8 @@ udp_pcblist SYSCTL_HANDLER_ARGS
                 * while we were processing this request, and it
                 * might be necessary to retry.
                 */
+               bzero(&xig, sizeof(xig));
+               xig.xig_len = sizeof xig;
                xig.xig_gen = udbinfo.ipi_gencnt;
                xig.xig_sogen = so_gencnt;
                xig.xig_count = udbinfo.ipi_count;
index 9576d7afe2a43cbeef7e5002e25fbf05df3aa6f1..2c635da4c53ba3d723e5c78728054bf34a6982d3 100644 (file)
@@ -2850,6 +2850,8 @@ key_newsav(m, mhp, sah, errp)
        if (mhp->msg->sadb_msg_type != SADB_GETSPI) {
                *errp = key_setsaval(newsav, m, mhp);
                if (*errp) {
+                       if (newsav->spihash.le_prev || newsav->spihash.le_next)
+                               LIST_REMOVE(newsav, spihash);
                        KFREE(newsav);
                        return NULL;
                }
@@ -3094,6 +3096,7 @@ key_setsaval(sav, m, mhp)
 
                sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA];
                if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) {
+                       ipseclog((LOG_DEBUG, "key_setsaval: invalid message size.\n"));
                        error = EINVAL;
                        goto fail;
                }
@@ -3109,6 +3112,7 @@ key_setsaval(sav, m, mhp)
                if ((sav->flags & SADB_X_EXT_NATT) != 0) {
                        if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa_2) ||
                                 ((struct sadb_sa_2*)(sa0))->sadb_sa_natt_port == 0) {
+                               ipseclog((LOG_DEBUG, "key_setsaval: natt port not set.\n"));
                                error = EINVAL;
                                goto fail;
                        }
@@ -3136,6 +3140,7 @@ key_setsaval(sav, m, mhp)
 
                error = 0;
                if (len < sizeof(*key0)) {
+                       ipseclog((LOG_DEBUG, "key_setsaval: invalid auth key ext len. len = %d\n", len));
                        error = EINVAL;
                        goto fail;
                }
@@ -3174,6 +3179,7 @@ key_setsaval(sav, m, mhp)
 
                error = 0;
                if (len < sizeof(*key0)) {
+                       ipseclog((LOG_DEBUG, "key_setsaval: invalid encryption key ext len. len = %d\n", len));
                        error = EINVAL;
                        goto fail;
                }
@@ -3181,6 +3187,7 @@ key_setsaval(sav, m, mhp)
                case SADB_SATYPE_ESP:
                        if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) &&
                            sav->alg_enc != SADB_EALG_NULL) {
+                           ipseclog((LOG_DEBUG, "key_setsaval: invalid ESP algorithm.\n"));
                                error = EINVAL;
                                break;
                        }
@@ -3202,7 +3209,7 @@ key_setsaval(sav, m, mhp)
                        break;
                }
                if (error) {
-                       ipseclog((LOG_DEBUG, "key_setsatval: invalid key_enc value.\n"));
+                       ipseclog((LOG_DEBUG, "key_setsaval: invalid key_enc value.\n"));
                        goto fail;
                }
        }
@@ -3268,6 +3275,7 @@ key_setsaval(sav, m, mhp)
        lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD];
        if (lft0 != NULL) {
                if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) {
+                       ipseclog((LOG_DEBUG, "key_setsaval: invalid hard lifetime ext len.\n"));
                        error = EINVAL;
                        goto fail;
                }
@@ -3284,6 +3292,7 @@ key_setsaval(sav, m, mhp)
        lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_SOFT];
        if (lft0 != NULL) {
                if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) {
+                       ipseclog((LOG_DEBUG, "key_setsaval: invalid soft lifetime ext len.\n"));
                        error = EINVAL;
                        goto fail;
                }
index b955a56e8b39afc5584f37cfb54edff908548f18..673240a29078dcf51d82f5a0b072aa91db5ef96f 100644 (file)
@@ -457,6 +457,7 @@ struct user_nfs_export_args {
 #define NXA_DELETE             0x0001  /* delete the specified export(s) */
 #define NXA_ADD                        0x0002  /* add the specified export(s) */
 #define NXA_REPLACE            0x0003  /* delete and add the specified export(s) */
+#define NXA_DELETE_ALL         0x0004  /* delete all exports */
 
 /* export option flags */
 #define NX_READONLY            0x0001  /* exported read-only */
@@ -464,6 +465,7 @@ struct user_nfs_export_args {
 #define NX_MAPROOT             0x0004  /* map root access to anon credential */
 #define NX_MAPALL              0x0008  /* map all access to anon credential */
 #define NX_KERB                        0x0010  /* exported with Kerberos uid mapping */
+#define NX_32BITCLIENTS                0x0020  /* restrict directory cookies to 32 bits */
 
 #ifdef KERNEL
 struct nfs_exportfs;
index 966cf72f5906c0dab7f33f313d120e6b5da6047e..b4a0836a258cd4a20f27aff271b94d82cb024e03 100644 (file)
@@ -974,7 +974,7 @@ nfs_buf_release(struct nfsbuf *bp, int freeup)
                        bp->nb_data = NULL;
                }
                if (bp->nb_flags & (NB_ERROR | NB_INVAL | NB_NOCACHE)) {
-                       if (bp->nb_flags & (NB_READ | NB_INVAL))
+                       if (bp->nb_flags & (NB_READ | NB_INVAL | NB_NOCACHE))
                                upl_flags = UPL_ABORT_DUMP_PAGES;
                        else
                                upl_flags = 0;
index 1d11d4243fe180e97ff460a33e1e84282d0682b3..f6fc25fd4884de609610b04a5b2ddb2a8e338f7e 100644 (file)
@@ -3486,6 +3486,8 @@ nfsrv_readdir(nfsd, slp, procp, mrq)
        }
        context.vc_proc = procp;
        context.vc_ucred = nfsd->nd_cr;
+       if (!v3 || (nxo->nxo_flags & NX_32BITCLIENTS))
+               vnopflag |= VNODE_READDIR_SEEKOFF32;
        if (v3) {
                nfsm_srv_vattr_init(&at, v3);
                error = getret = vnode_getattr(vp, &at, &context);
@@ -3655,6 +3657,8 @@ again:
                        /* Finish off the record with the cookie */
                        nfsm_clget;
                        if (v3) {
+                               if (vnopflag & VNODE_READDIR_SEEKOFF32)
+                                       dp->d_seekoff &= 0x00000000ffffffffULL;
                                txdr_hyper(&dp->d_seekoff, &tquad);
                                *tl = tquad.nfsuquad[0];
                                bp += NFSX_UNSIGNED;
@@ -3762,6 +3766,8 @@ nfsrv_readdirplus(nfsd, slp, procp, mrq)
        }
        context.vc_proc = procp;
        context.vc_ucred = nfsd->nd_cr;
+       if (nxo->nxo_flags & NX_32BITCLIENTS)
+               vnopflag |= VNODE_READDIR_SEEKOFF32;
        nfsm_srv_vattr_init(&at, 1);
        error = getret = vnode_getattr(vp, &at, &context);
        if (!error && toff && verf && verf != at.va_filerev)
@@ -3932,6 +3938,8 @@ again:
                        fl.fl_fhsize = txdr_unsigned(nfhp->nfh_len);
                        fl.fl_fhok = nfs_true;
                        fl.fl_postopok = nfs_true;
+                       if (vnopflag & VNODE_READDIR_SEEKOFF32)
+                               dp->d_seekoff &= 0x00000000ffffffffULL;
                        txdr_hyper(&dp->d_seekoff, &fl.fl_off);
 
                        nfsm_clget;
index d0c97001832417b132263a0d3f0322d1de26e02e..556db3712e9cb51d9f12eb5e886063604da42d81 100644 (file)
@@ -2298,6 +2298,35 @@ nfsrv_export(struct user_nfs_export_args *unxa, struct vfs_context *ctx)
        char path[MAXPATHLEN];
        int expisroot;
 
+       if (unxa->nxa_flags & NXA_DELETE_ALL) {
+               /* delete all exports on all file systems */
+               lck_rw_lock_exclusive(&nfs_export_rwlock);
+               while ((nxfs = LIST_FIRST(&nfs_exports))) {
+                       mp = vfs_getvfs_by_mntonname(nxfs->nxfs_path);
+                       if (mp)
+                               mp->mnt_flag &= ~MNT_EXPORTED;
+                       /* delete all exports on this file system */
+                       while ((nx = LIST_FIRST(&nxfs->nxfs_exports))) {
+                               LIST_REMOVE(nx, nx_next);
+                               LIST_REMOVE(nx, nx_hash);
+                               /* delete all netopts for this export */
+                               nfsrv_free_addrlist(nx);
+                               nx->nx_flags &= ~NX_DEFAULTEXPORT;
+                               if (nx->nx_defopt.nxo_cred) {
+                                       kauth_cred_rele(nx->nx_defopt.nxo_cred);
+                                       nx->nx_defopt.nxo_cred = NULL;
+                               }
+                               FREE(nx->nx_path, M_TEMP);
+                               FREE(nx, M_TEMP);
+                       }
+                       LIST_REMOVE(nxfs, nxfs_next);
+                       FREE(nxfs->nxfs_path, M_TEMP);
+                       FREE(nxfs, M_TEMP);
+               }
+               lck_rw_done(&nfs_export_rwlock);
+               return (0);
+       }
+
        error = copyinstr(unxa->nxa_fspath, path, MAXPATHLEN, (size_t *)&pathlen);
        if (error)
                return (error);
index c858df06191c8751534a4a5f842bd11645fb90cc..b5d0b9c6fa3039004e8d9810cf3b16cb6753b02c 100644 (file)
@@ -4148,6 +4148,9 @@ again:
        if ((waitfor == MNT_WAIT) && !LIST_EMPTY(&np->n_dirtyblkhd)) {
                goto again;
        }
+       /* if we have no dirty blocks, we can clear the modified flag */
+       if (LIST_EMPTY(&np->n_dirtyblkhd))
+               np->n_flag &= ~NMODIFIED;
 
        FSDBG(526, np->n_flag, np->n_error, 0, 0);
        if (!ignore_writeerr && (np->n_flag & NWRITEERR)) {
index 0519d9522b65b2aa59c62f6b06e7c4c99b0594aa..f5f7ba1ee985c756496350e735979b98cb04866d 100644 (file)
@@ -213,6 +213,7 @@ typedef __darwin_pid_t      pid_t;
 #define F_PATHPKG_CHECK 52              /* find which component (if any) is a package */
 #define F_FREEZE_FS     53              /* "freeze" all fs operations */
 #define F_THAW_FS       54              /* "thaw" all fs operations */
+#define F_GLOBAL_NOCACHE 55            /* turn data caching off/on (globally) for this file */
 
 // FS-specific fcntl()'s numbers begin at 0x00010000 and go up
 #define FCNTL_FS_SPECIFIC_BASE  0x00010000
index f069bf4ac6179ea7c09756b33a06bc3dede696d6..a8db652c3f80a7d16e056675132a4c617c9d3b29 100644 (file)
@@ -206,7 +206,7 @@ struct socket {
        u_int32_t       so_filteruse; /* usecount for the socket filters */
        void    *reserved3;             /* Temporarily in use/debug: last socket lock LR */
        void    *reserved4;             /* Temporarily in use/debug: last socket unlock LR */
-
+       thread_t        so_send_filt_thread;
 #endif
 };
 #endif /* KERNEL_PRIVATE */
index 9bac1aec0240a7a2d46f2c13230f40d62fb12ae9..125d020d2dba27063fcb6c6d37e90df28a9e1e49 100644 (file)
@@ -415,6 +415,7 @@ extern int          vttoif_tab[];
 /* VNOP_READDIR flags: */
 #define VNODE_READDIR_EXTENDED    0x0001   /* use extended directory entries */
 #define VNODE_READDIR_REQSEEKOFF  0x0002   /* requires seek offset (cookies) */
+#define VNODE_READDIR_SEEKOFF32   0x0004   /* seek offset values should fit in 32 bits */
 
 
 #define        NULLVP  ((struct vnode *)NULL)
index 7716e41e25fc47bce84366fa2816db6d6f661e0d..96c731a0616c0112767b26a1461705403c0ede00 100644 (file)
@@ -970,10 +970,12 @@ getattrlist(struct proc *p, struct getattrlist_args *uap, __unused register_t *r
        
        /*
         * Allocate a target buffer for attribute results.
-        * Note that since we won't ever copy out more than the caller requested,
-        * we never need to allocate more than they offer.
+        *
+        * Note that we won't ever copy out more than the caller requested, even though
+        * we might have to allocate more than they offer do that the diagnostic checks
+        * don't result in a panic if the caller's buffer is too small..
         */
-       ab.allocated = imin(uap->bufferSize, fixedsize + varsize);
+       ab.allocated = fixedsize + varsize;
        if (ab.allocated > ATTR_MAX_BUFFER) {
                error = ENOMEM;
                VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER);
@@ -991,7 +993,7 @@ getattrlist(struct proc *p, struct getattrlist_args *uap, __unused register_t *r
         */
        ab.fixedcursor = ab.base + sizeof(uint32_t);
        ab.varcursor = ab.base + fixedsize;
-       ab.needed = fixedsize + varsize;
+       ab.needed = ab.allocated;
 
        /* common attributes **************************************************/
        if (al.commonattr & ATTR_CMN_NAME)
@@ -1248,7 +1250,8 @@ getattrlist(struct proc *p, struct getattrlist_args *uap, __unused register_t *r
         */
        *(uint32_t *)ab.base = (uap->options & FSOPT_REPORT_FULLSIZE) ? ab.needed : imin(ab.allocated, ab.needed);
        
-       error = copyout(ab.base, uap->attributeBuffer, ab.allocated);
+       /* Only actually copyout as much out as the user buffer can hold */
+       error = copyout(ab.base, uap->attributeBuffer, imin(uap->bufferSize, ab.allocated));
        
 out:
        if (va.va_name)
index a01ac6c4565bbdccaecedba720ef834def36a7e9..fd2250cc76881571d2220615704a7e33f282822c 100644 (file)
@@ -1023,7 +1023,7 @@ insmntque(vnode_t vp, mount_t mp)
        /*
         * Delete from old mount point vnode list, if on one.
         */
-       if ( (lmp = vp->v_mount) != NULL) {
+       if ( (lmp = vp->v_mount) != NULL && lmp != dead_mountp) {
                if ((vp->v_lflag & VNAMED_MOUNT) == 0)
                        panic("insmntque: vp not in mount vnode list");
                vp->v_lflag &= ~VNAMED_MOUNT;
@@ -1619,10 +1619,8 @@ loop:
                                vnode_unlock(vp);
                        } else {
                                vclean(vp, 0, p);
-                               vp->v_mount = 0;        /*override any dead_mountp */
                                vp->v_lflag &= ~VL_DEAD;
                                vp->v_op = spec_vnodeop_p;
-                               insmntque(vp, (struct mount *)0);
                                vnode_unlock(vp);
                        }
                        mount_lock(mp);
@@ -3915,6 +3913,7 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_
        struct vfs_context context2;
        vfs_context_t ctx = context;
        u_long ndflags = 0;
+       int lflags = flags;
 
        if (context == NULL) {          /* XXX technically an error */
                context2.vc_proc = current_proc();
@@ -3922,14 +3921,17 @@ vnode_open(const char *path, int fmode, int cmode, int flags, vnode_t *vpp, vfs_
                ctx = &context2;
        }
 
-       if (flags & VNODE_LOOKUP_NOFOLLOW)
+       if (fmode & O_NOFOLLOW)
+               lflags |= VNODE_LOOKUP_NOFOLLOW;
+
+       if (lflags & VNODE_LOOKUP_NOFOLLOW)
                ndflags = NOFOLLOW;
        else
                ndflags = FOLLOW;
 
-       if (flags & VNODE_LOOKUP_NOCROSSMOUNT)
+       if (lflags & VNODE_LOOKUP_NOCROSSMOUNT)
                ndflags |= NOCROSSMOUNT;
-       if (flags & VNODE_LOOKUP_DOWHITEOUT)
+       if (lflags & VNODE_LOOKUP_DOWHITEOUT)
                ndflags |= DOWHITEOUT;
        
        /* XXX AUDITVNPATH1 needed ? */
index 4dd4f2f0c72a20764c51e2a9493288d99a5ef055..1aa675b29cb707d66b21b741a5cff5fae95dfd54 100644 (file)
@@ -1,4 +1,4 @@
-8.2.0
+8.3.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 9aeba05d66272da3078f412b89f0bce4309c48cf..16e230227efc187524065abb19548f118ee4bd69 100644 (file)
@@ -208,3 +208,8 @@ _ml_mem_backoff
 _pe_do_clock_test
 _pe_run_clock_test
 _scc
+_pmsStart
+_pmsPark
+_pmsRun
+_pmsRunLocal
+_pmsBuild
index da87c45ee2cbf2185ee60be6bd81586f660e10dd..1f785c1eed0893697a5c4b08772c12cf8a483394 100644 (file)
@@ -23,3 +23,9 @@ _ml_set_processor_voltage
 _ml_throttle
 _temp_patch_ptrace
 _temp_unpatch_ptrace
+_pmsStart
+_pmsPark
+_pmsRun
+_pmsRunLocal
+_pmsBuild
+_ml_mem_backoff
diff --git a/iokit/IOKit/IOHibernatePrivate.h b/iokit/IOKit/IOHibernatePrivate.h
new file mode 100644 (file)
index 0000000..01e3b86
--- /dev/null
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef KERNEL
+#include <crypto/aes.h>
+#endif
+
+struct IOPolledFileExtent
+{
+    uint64_t   start;
+    uint64_t   length;
+};
+typedef struct IOPolledFileExtent IOPolledFileExtent;
+
+struct IOHibernateImageHeader
+{
+    uint64_t   imageSize;
+    uint64_t   image1Size;
+    
+    uint32_t   restore1CodePage;
+    uint32_t   restore1PageCount;
+    uint32_t   restore1CodeOffset;
+    uint32_t   restore1StackOffset;
+    
+    uint32_t   pageCount;
+    uint32_t   bitmapSize;
+
+    uint32_t   restore1Sum;
+    uint32_t   image1Sum;
+    uint32_t   image2Sum;
+
+    uint32_t   actualRestore1Sum;
+    uint32_t   actualImage1Sum;
+    uint32_t   actualImage2Sum;
+
+    uint32_t   actualUncompressedPages;
+    uint32_t   conflictCount;
+    uint32_t   nextFree;
+
+    uint32_t   signature;
+    uint32_t   processorFlags;
+
+    uint8_t    reserved2[24];
+    
+    uint64_t   encryptStart;
+    uint64_t   machineSignature;
+
+    uint32_t    previewSize;
+    uint32_t    previewPageListSize;
+
+    uint32_t   diag[4];
+
+    uint32_t   reserved[82];           // make sizeof == 512
+
+    uint32_t           fileExtentMapSize;
+    IOPolledFileExtent fileExtentMap[2];
+};
+typedef struct IOHibernateImageHeader IOHibernateImageHeader;
+
+
+struct hibernate_bitmap_t
+{
+    uint32_t   first_page;
+    uint32_t   last_page;
+    uint32_t   bitmapwords;
+    uint32_t   bitmap[0];
+};
+typedef struct hibernate_bitmap_t hibernate_bitmap_t;
+
+struct hibernate_page_list_t
+{
+    uint32_t             list_size;
+    uint32_t             page_count;
+    uint32_t             bank_count;
+    hibernate_bitmap_t    bank_bitmap[0];
+};
+typedef struct hibernate_page_list_t hibernate_page_list_t;
+
+struct hibernate_cryptwakevars_t
+{
+#ifdef _AES_H
+    uint8_t aes_iv[AES_BLOCK_SIZE];
+#else
+#warning undef _AES_H
+#endif
+};
+typedef struct hibernate_cryptwakevars_t hibernate_cryptwakevars_t;
+
+struct hibernate_cryptvars_t
+{
+#ifdef _AES_H
+    uint8_t aes_iv[AES_BLOCK_SIZE];
+    aes_ctx ctx;
+#else
+#warning undef _AES_H
+#endif
+};
+typedef struct hibernate_cryptvars_t hibernate_cryptvars_t;
+
+
+enum 
+{
+    kIOHibernateProgressCount         = 19,
+    kIOHibernateProgressWidth         = 7,
+    kIOHibernateProgressHeight        = 16,
+    kIOHibernateProgressSpacing       = 3,
+    kIOHibernateProgressOriginY       = 81,
+
+    kIOHibernateProgressSaveUnderSize = 2*5+14*2,
+
+    kIOHibernateProgressLightGray     = 230,
+    kIOHibernateProgressMidGray       = 174,
+    kIOHibernateProgressDarkGray      = 92
+};
+
+struct hibernate_graphics_t
+{
+    unsigned long physicalAddress;     // Base address of video memory
+    unsigned long mode;                // 
+    unsigned long rowBytes;            // Number of bytes per pixel row
+    unsigned long width;               // Width
+    unsigned long height;              // Height
+    unsigned long depth;               // Pixel Depth
+
+    uint8_t      progressSaveUnder[kIOHibernateProgressCount][kIOHibernateProgressSaveUnderSize];
+};
+typedef struct hibernate_graphics_t hibernate_graphics_t;
+
+#define DECLARE_IOHIBERNATEPROGRESSALPHA                               \
+static const uint8_t gIOHibernateProgressAlpha                 \
+[kIOHibernateProgressHeight][kIOHibernateProgressWidth] =      \
+{                                                              \
+    { 0x00,0x63,0xd8,0xf0,0xd8,0x63,0x00 },                    \
+    { 0x51,0xff,0xff,0xff,0xff,0xff,0x51 },                    \
+    { 0xae,0xff,0xff,0xff,0xff,0xff,0xae },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xc3,0xff,0xff,0xff,0xff,0xff,0xc3 },                    \
+    { 0xae,0xff,0xff,0xff,0xff,0xff,0xae },                    \
+    { 0x54,0xff,0xff,0xff,0xff,0xff,0x54 },                    \
+    { 0x00,0x66,0xdb,0xf3,0xdb,0x66,0x00 }                     \
+};
+
+#ifdef KERNEL
+
+#ifdef __cplusplus
+
+void     IOHibernateSystemInit(IOPMrootDomain * rootDomain);
+
+IOReturn IOHibernateSystemSleep(void);
+IOReturn IOHibernateSystemHasSlept(void);
+IOReturn IOHibernateSystemWake(void);
+IOReturn IOHibernateSystemPostWake(void);
+
+#endif /* __cplusplus */
+
+#ifdef _SYS_CONF_H_
+typedef void (*kern_get_file_extents_callback_t)(void * ref, uint64_t start, uint64_t size);
+
+struct kern_direct_file_io_ref_t *
+kern_open_file_for_direct_io(const char * name, 
+                            kern_get_file_extents_callback_t callback, 
+                            void * callback_ref,
+                            dev_t * device,
+                             uint64_t * partitionbase_result,
+                             uint64_t * maxiocount_result);
+void
+kern_close_file_for_direct_io(struct kern_direct_file_io_ref_t * ref);
+int
+kern_write_file(struct kern_direct_file_io_ref_t * ref, off_t offset, caddr_t addr, vm_size_t len);
+int get_kernel_symfile(struct proc *p, char **symfile);
+#endif /* _SYS_CONF_H_ */
+
+hibernate_page_list_t *
+hibernate_page_list_allocate(void);
+
+kern_return_t 
+hibernate_setup(IOHibernateImageHeader * header,
+                        uint32_t free_page_ratio,
+                        uint32_t free_page_time,
+                       hibernate_page_list_t ** page_list_ret,
+                       hibernate_page_list_t ** page_list_wired_ret,
+                        boolean_t * encryptedswap);
+kern_return_t 
+hibernate_teardown(hibernate_page_list_t * page_list,
+                    hibernate_page_list_t * page_list_wired);
+
+kern_return_t 
+hibernate_processor_setup(IOHibernateImageHeader * header);
+
+void
+hibernate_vm_lock(void);
+void
+hibernate_vm_unlock(void);
+
+void
+hibernate_page_list_setall(hibernate_page_list_t * page_list,
+                          hibernate_page_list_t * page_list_wired,
+                          uint32_t * pagesOut);
+
+void
+hibernate_page_list_setall_machine(hibernate_page_list_t * page_list,
+                                    hibernate_page_list_t * page_list_wired,
+                                    uint32_t * pagesOut);
+void
+hibernate_page_list_discard(hibernate_page_list_t * page_list);
+
+void
+hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired,
+                               vm_offset_t ppnum, vm_offset_t count, uint32_t kind);
+
+void 
+hibernate_page_bitset(hibernate_page_list_t * list, boolean_t set, uint32_t page);
+boolean_t 
+hibernate_page_bittst(hibernate_page_list_t * list, uint32_t page);
+
+uint32_t
+hibernate_page_list_count(hibernate_page_list_t *list, uint32_t set, uint32_t page);
+
+void 
+hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags);
+
+void
+hibernate_machine_init(void);
+boolean_t
+hibernate_write_image(void);
+
+long
+hibernate_machine_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4);
+long
+hibernate_kernel_entrypoint(IOHibernateImageHeader * header, void * p2, void * p3, void * p4);
+
+extern uint32_t    gIOHibernateState;
+extern uint32_t    gIOHibernateMode;
+extern uint32_t    gIOHibernateFreeTime;       // max time to spend freeing pages (ms)
+extern uint8_t     gIOHibernateRestoreStack[];
+extern uint8_t     gIOHibernateRestoreStackEnd[];
+extern IOHibernateImageHeader *    gIOHibernateCurrentHeader;
+extern hibernate_graphics_t *      gIOHibernateGraphicsInfo;
+extern hibernate_cryptwakevars_t * gIOHibernateCryptWakeVars;
+
+#define HIBLOG(fmt, args...)   \
+    { kprintf(fmt, ## args); printf(fmt, ## args); }
+
+#define HIBPRINT(fmt, args...) \
+    { kprintf(fmt, ## args); }
+
+#endif /* KERNEL */
+
+// gIOHibernateState, kIOHibernateStateKey
+enum
+{
+    kIOHibernateStateInactive            = 0,
+    kIOHibernateStateHibernating        = 1,   /* writing image */
+    kIOHibernateStateWakingFromHibernate = 2   /* booted and restored image */
+};
+
+// gIOHibernateMode, kIOHibernateModeKey
+enum
+{
+    kIOHibernateModeOn      = 0x00000001,
+    kIOHibernateModeSleep   = 0x00000002,
+    kIOHibernateModeEncrypt = 0x00000004,
+
+    kIOHibernateModeDiscardCleanInactive = 0x00000008,
+    kIOHibernateModeDiscardCleanActive   = 0x00000010
+};
+
+// IOHibernateImageHeader.signature
+enum
+{
+    kIOHibernateHeaderSignature        = 0x73696d65,
+    kIOHibernateHeaderInvalidSignature = 0x7a7a7a7a
+};
+
+// kind for hibernate_set_page_state()
+enum
+{
+    kIOHibernatePageStateFree        = 0,
+    kIOHibernatePageStateWiredSave   = 1,
+    kIOHibernatePageStateUnwiredSave = 2
+};
+
+#define kIOHibernateModeKey            "Hibernate Mode"
+#define kIOHibernateFileKey            "Hibernate File"
+#define kIOHibernateFreeRatioKey       "Hibernate Free Ratio"
+#define kIOHibernateFreeTimeKey                "Hibernate Free Time"
+
+#define kIOHibernateStateKey           "IOHibernateState"
+#define kIOHibernateFeatureKey         "Hibernation"
+#define kIOHibernatePreviewBufferKey   "IOPreviewBuffer"
+
+#define kIOHibernateBootImageKey       "boot-image"
+#define kIOHibernateBootImageKeyKey    "boot-image-key"
+#define kIOHibernateBootSignatureKey   "boot-signature"
+
+#define kIOHibernateMemorySignatureKey   "memory-signature"
+#define kIOHibernateMemorySignatureEnvKey "mem-sig"
+#define kIOHibernateMachineSignatureKey          "machine-signature"
+
+#ifdef __cplusplus
+}
+#endif
index fdcf6ccb78047e00f845024ec26b0530f58df5f7..963483625025cc8f3d4261e0e689215fd1d7bf42 100644 (file)
@@ -30,7 +30,7 @@ EXPINC_SUBDIRS_I386 = ${INSTINC_SUBDIRS_I386}
 
 NOT_EXPORT_HEADERS = 
 
-NOT_KF_MI_HEADERS  = $(NOT_EXPORT_HEADERS) IOKitKeysPrivate.h IOCPU.h IOPolledInterface.h
+NOT_KF_MI_HEADERS  = $(NOT_EXPORT_HEADERS) IOKitKeysPrivate.h IOCPU.h IOHibernatePrivate.h IOPolledInterface.h
 
 NOT_LOCAL_HEADERS = 
 
@@ -40,7 +40,7 @@ INSTALL_MI_LIST       = IOBSD.h IOKitKeys.h IOKitServer.h IOReturn.h\
                  IOSharedLock.h IOTypes.h OSMessageNotification.h\
                  IODataQueueShared.h IOMessage.h
                   
-INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h
+INSTALL_MI_LCL_LIST = IOKitKeysPrivate.h IOHibernatePrivate.h
 
 INSTALL_MI_DIR = .
 
index fd695d3bc0a676630a6bfe65e2aefd094541dc39..a238b773c547bbec7e635b5e2fb73ef4e932280f 100644 (file)
@@ -912,7 +912,6 @@ static SInt32 DefaultCompare( UInt32 cellCount, UInt32 left[], UInt32 right[] )
     return( left[ cellCount ] - right[ cellCount ] );
 }
 
-
 void IODTGetCellCounts( IORegistryEntry * regEntry,
                            UInt32 * sizeCount, UInt32 * addressCount)
 {
@@ -941,10 +940,13 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
     UInt32             childSizeCells, childAddressCells;
     UInt32             childCells;
     UInt32             cell[ 5 ], offset = 0, length;
+    UInt32             endCell[ 5 ];
     UInt32             *range;
+    UInt32             *lookRange;
+    UInt32             *startRange;
     UInt32             *endRanges;
     bool               ok = true;
-    SInt32             diff;
+    SInt32             diff, endDiff;
 
     IODTPersistent     *persist;
     IODTCompareAddressCellFunc compare;
@@ -959,60 +961,81 @@ bool IODTResolveAddressCell( IORegistryEntry * regEntry,
     else
         *len = IOPhysical32( 0, cellsIn[ childAddressCells ] );
 
-    do {
-        prop = OSDynamicCast( OSData, regEntry->getProperty( gIODTRangeKey ));
-        if( 0 == prop) {
-            /* end of the road */
-            *phys = IOPhysical32( 0,  cell[ childAddressCells - 1 ] + offset);
-            break;
-        }
-
-        parent = regEntry->getParentEntry( gIODTPlane );
-        IODTGetCellCounts( parent, &sizeCells, &addressCells );
-
-        if( (length = prop->getLength())) {
-            // search
-            range = (UInt32 *) prop->getBytesNoCopy();
-            endRanges = range + (length / 4);
-
-            prop = (OSData *) regEntry->getProperty( gIODTPersistKey );
-            if( prop) {
-                persist = (IODTPersistent *) prop->getBytesNoCopy();
-                compare = persist->compareFunc;
-            } else
-                compare = DefaultCompare;
-
-            for( ok = false;
-                 range < endRanges;
-                 range += (childCells + addressCells) ) {
-
-                // is cell >= range start?
-                diff = (*compare)( childAddressCells, cell, range );
-                if( diff < 0)
-                    continue;
-                    
-                // is cell + size <= range end?
-                if( (diff + cell[ childCells - 1 ])
-                        > range[ childCells + addressCells - 1 ])
-                    continue;
+    do
+    {
+       prop = OSDynamicCast( OSData, regEntry->getProperty( gIODTRangeKey ));
+       if( 0 == prop) {
+           /* end of the road */
+           *phys = IOPhysical32( 0,  cell[ childAddressCells - 1 ] + offset);
+           break;
+       }
 
-                offset += diff;
-                ok = true;
-                break;
-            }
+       parent = regEntry->getParentEntry( gIODTPlane );
+       IODTGetCellCounts( parent, &sizeCells, &addressCells );
+
+       if( (length = prop->getLength())) {
+           // search
+           startRange = (UInt32 *) prop->getBytesNoCopy();
+           range = startRange;
+           endRanges = range + (length / 4);
+
+           prop = (OSData *) regEntry->getProperty( gIODTPersistKey );
+           if( prop) {
+               persist = (IODTPersistent *) prop->getBytesNoCopy();
+               compare = persist->compareFunc;
+           } else
+               compare = DefaultCompare;
+
+           for( ok = false;
+                range < endRanges;
+                range += (childCells + addressCells) ) {
+
+               // is cell start >= range start?
+               diff = (*compare)( childAddressCells, cell, range );
+               if( diff < 0)
+                   continue;
+
+               ok = (0 == cell[childCells - 1]);
+               if (!ok)
+               {
+                   // search for cell end
+                   bcopy(cell, endCell, childAddressCells * sizeof(UInt32));
+                   endCell[childAddressCells - 1] += cell[childCells - 1] - 1;
+                   lookRange = startRange;
+                   for( ;
+                        lookRange < endRanges;
+                        lookRange += (childCells + addressCells) )
+                    {
+                       // is cell >= range start?
+                       endDiff = (*compare)( childAddressCells, endCell, lookRange );
+                       if( endDiff < 0)
+                           continue;
+                       if ((endDiff - cell[childCells - 1] + 1 + lookRange[childAddressCells + addressCells - 1])
+                           == (diff + range[childAddressCells + addressCells - 1]))
+                       {
+                           ok = true;
+                           break;
+                       }
+                   }
+                   if (!ok)
+                       continue;
+               }
+               offset += diff;
+               break;
+           }
 
-            // Get the physical start of the range from our parent
-            bcopy( range + childAddressCells, cell, 4 * addressCells );
-            bzero( cell + addressCells, 4 * sizeCells );
+           // Get the physical start of the range from our parent
+           bcopy( range + childAddressCells, cell, 4 * addressCells );
+           bzero( cell + addressCells, 4 * sizeCells );
 
-        } /* else zero length range => pass thru to parent */
+       } /* else zero length range => pass thru to parent */
 
        regEntry                = parent;
        childSizeCells          = sizeCells;
        childAddressCells       = addressCells;
        childCells              = childAddressCells + childSizeCells;
-
-    while( ok && regEntry);
+    }
+    while( ok && regEntry);
 
     return( ok);
 }
diff --git a/iokit/Kernel/IOHibernateIO.cpp b/iokit/Kernel/IOHibernateIO.cpp
new file mode 100644 (file)
index 0000000..0d5fdba
--- /dev/null
@@ -0,0 +1,2071 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+
+/*
+
+Sleep:
+
+- PMRootDomain calls IOHibernateSystemSleep() before system sleep
+(devices awake, normal execution context)
+- IOHibernateSystemSleep opens the hibernation file (or partition) at the bsd level, 
+  grabs its extents and searches for a polling driver willing to work with that IOMedia.
+  The BSD code makes an ioctl to the storage driver to get the partition base offset to
+  the disk, and other ioctls to get the transfer constraints 
+  If successful, the file is written to make sure its initially not bootable (in case of
+  later failure) and nvram set to point to the first block of the file. (Has to be done
+  here so blocking is possible in nvram support).
+  hibernate_setup() in osfmk is called to allocate page bitmaps for all dram, and
+  page out any pages it wants to (currently zero, but probably some percentage of memory).
+  Its assumed just allocating pages will cause the VM system to naturally select the best
+  pages for eviction. It also copies processor flags needed for the restore path and sets
+  a flag in the boot processor proc info.
+  gIOHibernateState = kIOHibernateStateHibernating.
+- Regular sleep progresses - some drivers may inspect the root domain property 
+  kIOHibernateStateKey to modify behavior. The platform driver saves state to memory
+  as usual but leaves motherboard I/O on.
+- Eventually the platform calls ml_ppc_sleep() in the shutdown context on the last cpu,
+  at which point memory is ready to be saved. mapping_hibernate_flush() is called to get
+  all ppc RC bits out of the hash table and caches into the mapping structures.
+- hibernate_write_image() is called (still in shutdown context, no blocking or preemption).
+  hibernate_page_list_setall() is called to get a bitmap of dram pages that need to be saved.
+  All pages are assumed to be saved (as part of the wired image) unless explicitly subtracted
+  by hibernate_page_list_setall(), avoiding having to find arch dependent low level bits.
+  The image header and block list are written. The header includes the second file extent so
+  only the header block is needed to read the file, regardless of filesystem.
+  The kernel section "__HIB" is written uncompressed to the image. This section of code and data 
+  (only) is used to decompress the image during wake/boot.
+  Some additional pages are removed from the bitmaps - the buffers used for hibernation.
+  The bitmaps are written to the image.
+  More areas are removed from the bitmaps (after they have been written to the image) - the 
+  section "__HIB" pages and interrupt stack.
+  Each wired page is compressed and written and then each non-wired page. Compression and 
+  disk writes are in parallel.
+  The image header is written to the start of the file and the polling driver closed.
+  The machine powers down (or sleeps).
+  
+Boot/Wake:
+
+- BootX sees the boot-image nvram variable containing the device and block number of the image,
+  reads the header and if the signature is correct proceeds. The boot-image variable is cleared.
+- BootX reads the portion of the image used for wired pages, to memory. Its assumed this will fit
+  in the OF memory environment, and the image is decrypted. There is no decompression in BootX,
+  that is in the kernel's __HIB section.
+- BootX copies the "__HIB" section to its correct position in memory, quiesces and calls its entry
+  hibernate_kernel_entrypoint(), passing the location of the image in memory. Translation is off, 
+  only code & data in that section is safe to call since all the other wired pages are still 
+  compressed in the image.
+- hibernate_kernel_entrypoint() removes pages occupied by the raw image from the page bitmaps.
+  It uses the bitmaps to work out which pages can be uncompressed from the image to their final
+  location directly, and copies those that can't to interim free pages. When the image has been
+  completed, the copies are uncompressed, overwriting the wired image pages.
+  hibernate_restore_phys_page() (in osfmk since its arch dependent, but part of the "__HIB" section)
+  is used to get pages into place for 64bit.
+- the reset vector is called (at least on ppc), the kernel proceeds on a normal wake, with some
+  changes conditional on the per proc flag - before VM is turned on the boot cpu, all mappings
+  are removed from the software strutures, and the hash table is reinitialized. 
+- After the platform CPU init code is called, hibernate_machine_init() is called to restore the rest
+  of memory, using the polled mode driver, before other threads can run or any devices are turned on.
+  This reduces the memory usage for BootX and allows decompression in parallel with disk reads,
+  for the remaining non wired pages. 
+- The polling driver is closed down and regular wake proceeds. When the kernel calls iokit to wake
+  (normal execution context) hibernate_teardown() in osmfk is called to release any memory, the file
+  is closed via bsd.
+
+Polled Mode I/O:
+
+IOHibernateSystemSleep() finds a polled mode interface to the ATA controller via a property in the
+registry, specifying an object of calls IOPolledInterface.
+
+Before the system goes to sleep it searches from the IOMedia object (could be a filesystem or
+partition) that the image is going to live, looking for polled interface properties. If it finds
+one the IOMedia object is passed to a "probe" call for the interface to accept or reject. All the
+interfaces found are kept in an ordered list.
+
+There is an Open/Close pair of calls made to each of the interfaces at various stages since there are 
+few different contexts things happen in:
+
+- there is an Open/Close (Preflight) made before any part of the system has slept (I/O is all
+up and running) and after wake - this is safe to allocate memory and do anything. The device
+ignores sleep requests from that point since its a waste of time if it goes to sleep and
+immediately wakes back up for the image write.
+
+- there is an Open/Close (BeforeSleep) pair made around the image write operations that happen
+immediately before sleep. These can't block or allocate memory - the I/O system is asleep apart
+from the low level bits (motherboard I/O etc). There is only one thread running. The close can be 
+used to flush and set the disk to sleep.
+
+- there is an Open/Close (AfterSleep) pair made around the image read operations that happen
+immediately after sleep. These can't block or allocate memory. This is happening after the platform
+expert has woken the low level bits of the system, but most of the I/O system has not. There is only
+one thread running.
+
+For the actual I/O, all the ops are with respect to a single IOMemoryDescriptor that was passed
+(prepared) to the Preflight Open() call. There is a read/write op, buffer offset to the IOMD for
+the data, an offset to the disk and length (block aligned 64 bit numbers), and completion callback.
+Each I/O is async but only one is ever outstanding. The polled interface has a checkForWork call
+that is called for the hardware to check for events, and complete the I/O via the callback.
+The hibernate path uses the same transfer constraints the regular cluster I/O path in BSD uses
+to restrict I/O ops.
+*/
+
+#include <sys/systm.h>
+
+#include <IOKit/IOWorkLoop.h>
+#include <IOKit/IOCommandGate.h>
+#include <IOKit/IOTimerEventSource.h>
+#include <IOKit/IOPlatformExpert.h>
+#include <IOKit/IOKitDebug.h>
+#include <IOKit/IOTimeStamp.h>
+#include <IOKit/pwr_mgt/RootDomain.h>
+#include <IOKit/pwr_mgt/IOPMPrivate.h>
+#include <IOKit/IOMessage.h>
+#include <IOKit/IODeviceTreeSupport.h>
+#include <IOKit/IOBSD.h>
+#include "RootDomainUserClient.h"
+#include <IOKit/pwr_mgt/IOPowerConnection.h>
+#include "IOPMPowerStateQueue.h"
+#include <IOKit/IOBufferMemoryDescriptor.h>
+#include <crypto/aes.h>
+
+#include <sys/uio.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+#include <sys/fcntl.h>                       // (FWRITE, ...)
+extern "C" {
+#include <sys/sysctl.h>
+}
+
+#include <IOKit/IOHibernatePrivate.h>
+#include <IOKit/IOPolledInterface.h>
+#include <IOKit/IONVRAM.h>
+#include "IOHibernateInternal.h"
+#include "WKdm.h"
+#include "IOKitKernelInternal.h"
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+OSDefineMetaClassAndAbstractStructors(IOPolledInterface, OSObject);
+
+OSMetaClassDefineReservedUnused(IOPolledInterface, 0);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 1);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 2);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 3);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 4);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 5);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 6);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 7);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 8);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 9);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 10);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 11);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 12);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 13);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 14);
+OSMetaClassDefineReservedUnused(IOPolledInterface, 15);
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+extern uint32_t                gIOHibernateState;
+uint32_t                       gIOHibernateMode;
+static char                    gIOHibernateBootSignature[256+1];
+static char                    gIOHibernateFilename[MAXPATHLEN+1];
+static uint32_t                        gIOHibernateFreeRatio = 0;              // free page target (percent)
+uint32_t                       gIOHibernateFreeTime  = 0*1000; // max time to spend freeing pages (ms)
+
+static IODTNVRAM *             gIOOptionsEntry;
+static IORegistryEntry *       gIOChosenEntry;
+
+static IOPolledFileIOVars                gFileVars;
+static IOHibernateVars                   gIOHibernateVars;
+static struct kern_direct_file_io_ref_t * gIOHibernateFileRef;
+static hibernate_cryptvars_t             gIOHibernateCryptWakeContext;
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+enum { kXPRamAudioVolume = 8 };
+enum { kDefaultIOSize = 128 * 1024 };
+enum { kVideoMapSize  = 32 * 1024 * 1024 };
+
+#ifndef kIOMediaPreferredBlockSizeKey
+#define kIOMediaPreferredBlockSizeKey  "Preferred Block Size"
+#endif
+
+#ifndef kIOBootPathKey 
+#define kIOBootPathKey                 "bootpath"
+#endif
+#ifndef kIOSelectedBootDeviceKey       
+#define kIOSelectedBootDeviceKey       "boot-device"
+#endif
+
+
+enum { kIOHibernateMinPollersNeeded = 2 };
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+// copy from phys addr to MD
+
+static IOReturn
+IOMemoryDescriptorWriteFromPhysical(IOMemoryDescriptor * md,
+                                   IOByteCount offset, addr64_t bytes, IOByteCount length)
+{
+    addr64_t srcAddr = bytes;
+    IOByteCount remaining;
+
+    remaining = length = min(length, md->getLength() - offset);
+    while (remaining) {        // (process another target segment?)
+        addr64_t    dstAddr64;
+        IOByteCount dstLen;
+
+        dstAddr64 = md->getPhysicalSegment64(offset, &dstLen);
+        if (!dstAddr64)
+            break;
+
+        // Clip segment length to remaining
+        if (dstLen > remaining)
+            dstLen = remaining;
+
+#if 1
+       bcopy_phys(srcAddr, dstAddr64, dstLen);
+#else
+        copypv(srcAddr, dstAddr64, dstLen,
+                            cppvPsnk | cppvFsnk | cppvNoRefSrc | cppvNoModSnk | cppvKmap);
+#endif
+        srcAddr   += dstLen;
+        offset    += dstLen;
+        remaining -= dstLen;
+    }
+
+    assert(!remaining);
+
+    return remaining ? kIOReturnUnderrun : kIOReturnSuccess;
+}
+
+// copy from MD to phys addr
+
+static IOReturn
+IOMemoryDescriptorReadToPhysical(IOMemoryDescriptor * md,
+                                IOByteCount offset, addr64_t bytes, IOByteCount length)
+{
+    addr64_t dstAddr = bytes;
+    IOByteCount remaining;
+
+    remaining = length = min(length, md->getLength() - offset);
+    while (remaining) {        // (process another target segment?)
+        addr64_t    srcAddr64;
+        IOByteCount dstLen;
+
+        srcAddr64 = md->getPhysicalSegment64(offset, &dstLen);
+        if (!srcAddr64)
+            break;
+
+        // Clip segment length to remaining
+        if (dstLen > remaining)
+            dstLen = remaining;
+
+#if 1
+       bcopy_phys(srcAddr64, dstAddr, dstLen);
+#else
+        copypv(srcAddr, dstAddr64, dstLen,
+                            cppvPsnk | cppvFsnk | cppvNoRefSrc | cppvNoModSnk | cppvKmap);
+#endif
+        dstAddr    += dstLen;
+        offset     += dstLen;
+        remaining  -= dstLen;
+    }
+
+    assert(!remaining);
+
+    return remaining ? kIOReturnUnderrun : kIOReturnSuccess;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+hibernate_set_page_state(hibernate_page_list_t * page_list, hibernate_page_list_t * page_list_wired,
+                               vm_offset_t ppnum, vm_offset_t count, uint32_t kind)
+{
+    count += ppnum;
+    switch (kind)
+    {
+      case kIOHibernatePageStateUnwiredSave:
+       // unwired save
+       for (; ppnum < count; ppnum++)
+       {
+           hibernate_page_bitset(page_list,       FALSE, ppnum);
+           hibernate_page_bitset(page_list_wired, TRUE,  ppnum);
+       }
+       break;
+      case kIOHibernatePageStateWiredSave:
+       // wired save
+       for (; ppnum < count; ppnum++)
+       {
+           hibernate_page_bitset(page_list,       FALSE, ppnum);
+           hibernate_page_bitset(page_list_wired, FALSE, ppnum);
+       }
+       break;
+      case kIOHibernatePageStateFree:
+       // free page
+       for (; ppnum < count; ppnum++)
+       {
+           hibernate_page_bitset(page_list,       TRUE, ppnum);
+           hibernate_page_bitset(page_list_wired, TRUE, ppnum);
+       }
+       break;
+      default:
+       panic("hibernate_set_page_state");
+    }
+}
+
+static vm_offset_t
+hibernate_page_list_iterate(hibernate_page_list_t * list, 
+                               void ** iterator, vm_offset_t * ppnum)
+{
+    uint32_t count, idx;
+
+    idx = (uint32_t) *iterator;
+
+    if (!idx)
+       idx = hibernate_page_list_count(list, TRUE, idx);
+
+    *ppnum = idx;
+    count  = hibernate_page_list_count(list, FALSE, idx);
+    idx   += count;
+    idx   += hibernate_page_list_count(list, TRUE, idx);
+    *iterator  = (void *) idx;
+
+    return (count);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static IOReturn
+IOHibernatePollerProbe(IOPolledFileIOVars * vars, IOService * target)
+{
+    IOReturn            err = kIOReturnError;
+    int32_t            idx;
+    IOPolledInterface * poller;
+
+    for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--)
+    {
+        poller = (IOPolledInterface *) vars->pollers->getObject(idx);
+        err = poller->probe(target);
+        if (err)
+        {
+            HIBLOG("IOPolledInterface::probe[%d] 0x%x\n", idx, err);
+            break;
+        }
+    }
+
+    return (err);
+}
+
+static IOReturn
+IOHibernatePollerOpen(IOPolledFileIOVars * vars, uint32_t state, IOMemoryDescriptor * md)
+{
+    IOReturn            err = kIOReturnError;
+    int32_t            idx;
+    IOPolledInterface * poller;
+
+    for (idx = vars->pollers->getCount() - 1; idx >= 0; idx--)
+    {
+        poller = (IOPolledInterface *) vars->pollers->getObject(idx);
+        err = poller->open(state, md);
+        if (err)
+        {
+            HIBLOG("IOPolledInterface::open[%d] 0x%x\n", idx, err);
+            break;
+        }
+    }
+
+    return (err);
+}
+
+static IOReturn
+IOHibernatePollerClose(IOPolledFileIOVars * vars, uint32_t state)
+{
+    IOReturn            err = kIOReturnError;
+    int32_t            idx;
+    IOPolledInterface * poller;
+
+    for (idx = 0;
+         (poller = (IOPolledInterface *) vars->pollers->getObject(idx));
+         idx++)
+    {
+        err = poller->close(state);
+        if (err)
+            HIBLOG("IOPolledInterface::close[%d] 0x%x\n", idx, err);
+    }
+
+    return (err);
+}
+
+static void
+IOHibernatePollerIOComplete(void *   target,
+                            void *   parameter,
+                            IOReturn status,
+                            UInt64   actualByteCount)
+{
+    IOPolledFileIOVars * vars = (IOPolledFileIOVars *) parameter;
+
+    vars->ioStatus = status;
+}
+
+static IOReturn
+IOHibernatePollerIO(IOPolledFileIOVars * vars, 
+                    uint32_t operation, uint32_t bufferOffset, 
+                   uint64_t deviceOffset, uint64_t length)
+{
+
+    IOReturn            err = kIOReturnError;
+    IOPolledInterface * poller;
+    IOPolledCompletion  completion;
+
+    completion.target    = 0;
+    completion.action    = &IOHibernatePollerIOComplete;
+    completion.parameter = vars;
+
+    vars->ioStatus = -1;
+
+    poller = (IOPolledInterface *) vars->pollers->getObject(0);
+    err = poller->startIO(operation, bufferOffset, deviceOffset + vars->block0, length, completion);
+    if (err)
+        HIBLOG("IOPolledInterface::startIO[%d] 0x%x\n", 0, err);
+
+    return (err);
+}
+
+static IOReturn
+IOHibernatePollerIODone(IOPolledFileIOVars * vars)
+{
+    IOReturn            err = kIOReturnError;
+    int32_t            idx;
+    IOPolledInterface * poller;
+
+    while (-1 == vars->ioStatus)
+    {
+        for (idx = 0;
+             (poller = (IOPolledInterface *) vars->pollers->getObject(idx));
+             idx++)
+        {
+            err = poller->checkForWork();
+            if (err)
+                HIBLOG("IOPolledInterface::checkForWork[%d] 0x%x\n", idx, err);
+        }
+    }
+
+    if (kIOReturnSuccess != vars->ioStatus)
+        HIBLOG("IOPolledInterface::ioStatus 0x%x\n", vars->ioStatus);
+
+    return (vars->ioStatus);
+}
+
+IOReturn
+IOPolledInterface::checkAllForWork(void)
+{
+    IOReturn            err = kIOReturnNotReady;
+    int32_t            idx;
+    IOPolledInterface * poller;
+
+    IOHibernateVars * vars  = &gIOHibernateVars;
+
+    if (!vars->fileVars || !vars->fileVars->pollers)
+       return (err);
+
+    for (idx = 0;
+            (poller = (IOPolledInterface *) vars->fileVars->pollers->getObject(idx));
+            idx++)
+    {
+        err = poller->checkForWork();
+        if (err)
+            HIBLOG("IOPolledInterface::checkAllForWork[%d] 0x%x\n", idx, err);
+    }
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+struct _OpenFileContext
+{
+    OSData * extents;
+    uint64_t size;
+};
+
+static void
+file_extent_callback(void * ref, uint64_t start, uint64_t length)
+{
+    _OpenFileContext * ctx = (_OpenFileContext *) ref;
+    IOPolledFileExtent extent;
+
+    extent.start  = start;
+    extent.length = length;
+
+    ctx->extents->appendBytes(&extent, sizeof(extent));
+    ctx->size += length;
+}
+
+IOReturn
+IOPolledFileOpen( const char * filename, IOBufferMemoryDescriptor * ioBuffer,
+                           IOPolledFileIOVars ** fileVars, OSData ** fileExtents,
+                           OSData ** imagePath)
+{
+    IOReturn                   err = kIOReturnError;
+    IOPolledFileIOVars *       vars;
+    _OpenFileContext           ctx;
+    OSData *                   extentsData;
+    OSNumber *                 num;
+    IORegistryEntry *          part = 0;
+    OSDictionary *             matching;
+    OSIterator *               iter;
+    dev_t                      hibernate_image_dev;
+    uint64_t                   maxiobytes;
+
+    vars = &gFileVars;
+    do
+    {
+       HIBLOG("sizeof(IOHibernateImageHeader) == %ld\n", sizeof(IOHibernateImageHeader));
+       if (sizeof(IOHibernateImageHeader) != 512)
+           continue;
+    
+       vars->io           = false;
+       vars->buffer       = (uint8_t *) ioBuffer->getBytesNoCopy();
+       vars->bufferHalf   = 0;
+       vars->bufferOffset = 0;
+       vars->bufferSize   = ioBuffer->getLength() >> 1;
+    
+       extentsData = OSData::withCapacity(32);
+    
+       ctx.extents = extentsData;
+       ctx.size    = 0;
+       vars->fileRef = kern_open_file_for_direct_io(filename, 
+                                                   &file_extent_callback, &ctx, 
+                                                   &hibernate_image_dev,
+                                                    &vars->block0,
+                                                    &maxiobytes);
+       if (!vars->fileRef)
+       {
+           err = kIOReturnNoSpace;
+           break;
+       }
+       HIBLOG("Opened file %s, size %qd, partition base 0x%qx, maxio %qx\n", filename, ctx.size, 
+                    vars->block0, maxiobytes);
+       if (ctx.size < 1*1024*1024)             // check against image size estimate!
+       {
+           err = kIOReturnNoSpace;
+           break;
+       }
+
+        if (maxiobytes < vars->bufferSize)
+            vars->bufferSize = maxiobytes;
+    
+       vars->extentMap = (IOPolledFileExtent *) extentsData->getBytesNoCopy();
+    
+       matching = IOService::serviceMatching("IOMedia");
+       num = OSNumber::withNumber(major(hibernate_image_dev), 32);
+       matching->setObject(kIOBSDMajorKey, num);
+       num->release();
+       num = OSNumber::withNumber(minor(hibernate_image_dev), 32);
+       matching->setObject(kIOBSDMinorKey, num);
+       num->release();
+       iter = IOService::getMatchingServices(matching);
+       matching->release();
+       if (iter)
+       {
+           part = (IORegistryEntry *) iter->getNextObject();
+           part->retain();
+           iter->release();
+       }
+    
+       int minor, major;
+       IORegistryEntry * next;
+       IORegistryEntry * child;
+       OSData * data;
+
+       num = (OSNumber *) part->getProperty(kIOBSDMajorKey);
+       if (!num)
+           break;
+       major = num->unsigned32BitValue();
+       num = (OSNumber *) part->getProperty(kIOBSDMinorKey);
+       if (!num)
+           break;
+       minor = num->unsigned32BitValue();
+
+       hibernate_image_dev = makedev(major, minor);
+
+        vars->pollers = OSArray::withCapacity(4);
+       if (!vars->pollers)
+           break;
+
+       vars->blockSize = 512;
+       next = part;
+       do
+       {
+            IOPolledInterface * poller;
+            if ((poller = OSDynamicCast(IOPolledInterface, next->getProperty(kIOPolledInterfaceSupportKey))))
+                vars->pollers->setObject(poller);
+           if ((num = OSDynamicCast(OSNumber, next->getProperty(kIOMediaPreferredBlockSizeKey))))
+               vars->blockSize = num->unsigned32BitValue();
+            child = next;
+       }
+       while ((next = child->getParentEntry(gIOServicePlane)) 
+                && child->isParent(next, gIOServicePlane, true));
+
+       HIBLOG("hibernate image major %d, minor %d, blocksize %ld, pollers %d\n",
+                   major, minor, vars->blockSize, vars->pollers->getCount());
+       if (vars->pollers->getCount() < kIOHibernateMinPollersNeeded)
+           continue;
+
+       err = IOHibernatePollerProbe(vars, (IOService *) part);
+       if (kIOReturnSuccess != err)
+           break;
+
+       err = IOHibernatePollerOpen(vars, kIOPolledPreflightState, ioBuffer);
+       if (kIOReturnSuccess != err)
+           break;
+
+       *fileVars    = vars;
+       *fileExtents = extentsData;
+    
+       // make imagePath
+       char str1[256];
+       char str2[24];
+       int len = sizeof(str1);
+
+       if ((extentsData->getLength() >= sizeof(IOPolledFileExtent))
+           && part->getPath(str1, &len, gIODTPlane))
+       {
+           // (strip the plane name)
+           char * tail = strchr(str1, ':');
+           if (!tail)
+               tail = str1 - 1;
+           data = OSData::withBytes(tail + 1, strlen(tail + 1));
+           sprintf(str2, ",%qx", vars->extentMap[0]);
+           data->appendBytes(str2, strlen(str2));
+           *imagePath = data;
+       }
+    }
+    while (false);
+
+    if (kIOReturnSuccess != err)
+    {
+        HIBLOG("error 0x%x opening hibernation file\n", err);
+       if (vars->fileRef)
+           kern_close_file_for_direct_io(vars->fileRef);
+    }
+
+    if (part)
+       part->release();
+
+    return (err);
+}
+
+IOReturn
+IOPolledFileClose( IOPolledFileIOVars * vars )
+{
+    if (vars->pollers)
+    {
+       IOHibernatePollerClose(vars, kIOPolledPostflightState);
+        vars->pollers->release();
+    }
+
+    gIOHibernateFileRef = vars->fileRef;
+
+    bzero(vars, sizeof(IOPolledFileIOVars));
+
+    return (kIOReturnSuccess);
+}
+
+static IOReturn
+IOPolledFileSeek(IOPolledFileIOVars * vars, uint64_t position)
+{
+    IOPolledFileExtent * extentMap;
+
+    extentMap = vars->extentMap;
+
+    vars->position = position;
+
+    while (position >= extentMap->length)
+    {
+       position -= extentMap->length;
+       extentMap++;
+    }
+
+    vars->currentExtent   = extentMap;
+    vars->extentRemaining = extentMap->length - position;
+    vars->extentPosition  = vars->position - position;
+
+    if (vars->bufferSize <= vars->extentRemaining)
+       vars->bufferLimit = vars->bufferSize;
+    else
+       vars->bufferLimit = vars->extentRemaining;
+
+    return (kIOReturnSuccess);
+}
+
+static IOReturn
+IOPolledFileWrite(IOPolledFileIOVars * vars,
+                    const uint8_t * bytes, IOByteCount size,
+                    hibernate_cryptvars_t * cryptvars)
+{
+    IOReturn    err = kIOReturnSuccess;
+    IOByteCount copy;
+    bool       flush = false;
+
+    do
+    {
+       if (!bytes && !size)
+       {
+           // seek to end of block & flush
+           size = vars->position & (vars->blockSize - 1);
+           if (size)
+               size = vars->blockSize - size;
+           flush = true;
+            // use some garbage for the fill
+            bytes = vars->buffer + vars->bufferOffset;
+       }
+
+       copy = vars->bufferLimit - vars->bufferOffset;
+       if (copy > size)
+           copy = size;
+       else
+           flush = true;
+
+       if (bytes)
+       {
+           bcopy(bytes, vars->buffer + vars->bufferHalf + vars->bufferOffset, copy);
+           bytes += copy;
+       }
+        else
+           bzero(vars->buffer + vars->bufferHalf + vars->bufferOffset, copy);
+        
+       size -= copy;
+       vars->bufferOffset += copy;
+       vars->position += copy;
+
+       if (flush && vars->bufferOffset)
+       {
+           uint64_t offset = (vars->position - vars->bufferOffset 
+                               - vars->extentPosition + vars->currentExtent->start);
+           uint32_t length = (vars->bufferOffset);
+
+            if (cryptvars && vars->encryptStart && (vars->position > vars->encryptStart))
+            {
+                uint32_t encryptLen, encryptStart;
+                encryptLen = vars->position - vars->encryptStart;
+                if (encryptLen > length)
+                    encryptLen = length;
+                encryptStart = length - encryptLen;
+                
+                // encrypt the buffer
+                aes_encrypt_cbc(vars->buffer + vars->bufferHalf + encryptStart,
+                                &cryptvars->aes_iv[0],
+                                encryptLen / AES_BLOCK_SIZE,
+                                vars->buffer + vars->bufferHalf + encryptStart,
+                                &cryptvars->ctx.encrypt);
+                // save initial vector for following encrypts
+                bcopy(vars->buffer + vars->bufferHalf + encryptStart + encryptLen - AES_BLOCK_SIZE,
+                        &cryptvars->aes_iv[0],
+                        AES_BLOCK_SIZE);
+            }
+
+           if (vars->io)
+            {
+               err = IOHibernatePollerIODone(vars);
+                if (kIOReturnSuccess != err)
+                    break;
+            }
+
+if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position);
+//if (length != vars->bufferSize) HIBLOG("short write of %qx ends@ %qx\n", length, offset + length);
+
+           err = IOHibernatePollerIO(vars, kIOPolledWrite, vars->bufferHalf, offset, length);
+            if (kIOReturnSuccess != err)
+                break;
+           vars->io = true;
+
+           vars->extentRemaining -= vars->bufferOffset;
+           if (!vars->extentRemaining)
+           {
+               vars->currentExtent++;
+               vars->extentRemaining = vars->currentExtent->length;
+               vars->extentPosition  = vars->position;
+                if (!vars->extentRemaining)
+                {
+                    err = kIOReturnOverrun;
+                    break;
+                }
+           }
+
+           vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize;
+           vars->bufferOffset = 0;
+           if (vars->bufferSize <= vars->extentRemaining)
+               vars->bufferLimit = vars->bufferSize;
+           else
+               vars->bufferLimit = vars->extentRemaining;
+
+           flush = false;
+       }
+    }
+    while (size);
+
+    return (err);
+}
+
+static IOReturn
+IOPolledFileRead(IOPolledFileIOVars * vars,
+                    uint8_t * bytes, IOByteCount size,
+                    hibernate_cryptvars_t * cryptvars)
+{
+    IOReturn    err = kIOReturnSuccess;
+    IOByteCount copy;
+
+//    bytesWritten += size;
+
+    do
+    {
+       copy = vars->bufferLimit - vars->bufferOffset;
+       if (copy > size)
+           copy = size;
+
+       if (bytes)
+       {
+           bcopy(vars->buffer + vars->bufferHalf + vars->bufferOffset, bytes, copy);
+           bytes += copy;
+       }
+       size -= copy;
+       vars->bufferOffset += copy;
+//     vars->position += copy;
+
+       if (vars->bufferOffset == vars->bufferLimit)
+       {
+           if (vars->io)
+            {
+               err = IOHibernatePollerIODone(vars);
+                if (kIOReturnSuccess != err)
+                    break;
+            }
+            else
+                cryptvars = 0;
+
+if (vars->position & (vars->blockSize - 1)) HIBLOG("misaligned file pos %qx\n", vars->position);
+
+           vars->position += vars->lastRead;
+           vars->extentRemaining -= vars->lastRead;
+           vars->bufferLimit = vars->lastRead;
+
+           if (!vars->extentRemaining)
+           {
+               vars->currentExtent++;
+               vars->extentRemaining = vars->currentExtent->length;
+               vars->extentPosition  = vars->position;
+                if (!vars->extentRemaining)
+                {
+                    err = kIOReturnOverrun;
+                    break;
+                }
+           }
+
+           if (vars->extentRemaining <= vars->bufferSize)
+               vars->lastRead = vars->extentRemaining;
+           else
+               vars->lastRead = vars->bufferSize;
+
+           uint64_t offset = (vars->position 
+                               - vars->extentPosition + vars->currentExtent->start);
+           uint64_t length = (vars->lastRead);
+
+//if (length != vars->bufferSize) HIBLOG("short read of %qx ends@ %qx\n", length, offset + length);
+
+           err = IOHibernatePollerIO(vars, kIOPolledRead, vars->bufferHalf, offset, length);
+            if (kIOReturnSuccess != err)
+                break;
+           vars->io = true;
+
+           vars->bufferHalf = vars->bufferHalf ? 0 : vars->bufferSize;
+           vars->bufferOffset = 0;
+
+            if (cryptvars)
+            {
+                uint8_t thisVector[AES_BLOCK_SIZE];
+                // save initial vector for following decrypts
+                bcopy(&cryptvars->aes_iv[0], &thisVector[0], AES_BLOCK_SIZE);
+                bcopy(vars->buffer + vars->bufferHalf + vars->lastRead - AES_BLOCK_SIZE, 
+                        &cryptvars->aes_iv[0], AES_BLOCK_SIZE);
+                // decrypt the buffer
+                aes_decrypt_cbc(vars->buffer + vars->bufferHalf,
+                                &thisVector[0],
+                                vars->lastRead / AES_BLOCK_SIZE,
+                                vars->buffer + vars->bufferHalf,
+                                &cryptvars->ctx.decrypt);
+            }
+       }
+    }
+    while (size);
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+               
+IOReturn
+IOHibernateSystemSleep(void)
+{
+    IOReturn   err;
+    OSData *   data;
+    OSObject * obj;
+    OSString * str;
+    OSNumber * num;
+
+    IOHibernateVars * vars  = &gIOHibernateVars;
+
+    if (vars->fileVars && vars->fileVars->fileRef)
+       // already on the way down
+       return (kIOReturnSuccess);
+
+    gIOHibernateState = kIOHibernateStateInactive;
+
+    if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateModeKey)))
+    {
+       if ((num = OSDynamicCast(OSNumber, obj)))
+           gIOHibernateMode = num->unsigned32BitValue();
+        if (kIOHibernateModeSleep & gIOHibernateMode)
+            // default to discard clean for safe sleep
+            gIOHibernateMode ^= (kIOHibernateModeDiscardCleanInactive 
+                                | kIOHibernateModeDiscardCleanActive);
+
+       obj->release();
+    }
+    if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFreeRatioKey)))
+    {
+       if ((num = OSDynamicCast(OSNumber, obj)))
+           gIOHibernateFreeRatio = num->unsigned32BitValue();
+       obj->release();
+    }
+    if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFreeTimeKey)))
+    {
+       if ((num = OSDynamicCast(OSNumber, obj)))
+           gIOHibernateFreeTime = num->unsigned32BitValue();
+       obj->release();
+    }
+    if ((obj = IOService::getPMRootDomain()->copyProperty(kIOHibernateFileKey)))
+    {
+       if ((str = OSDynamicCast(OSString, obj)))
+           strcpy(gIOHibernateFilename, str->getCStringNoCopy());
+       obj->release();
+    }
+
+    if (!gIOHibernateMode || !gIOHibernateFilename[0])
+       return (kIOReturnUnsupported);
+
+    HIBLOG("hibernate image path: %s\n", gIOHibernateFilename);
+
+    do
+    {
+        vars->srcBuffer = IOBufferMemoryDescriptor::withOptions(0, 4 * page_size, page_size);
+        vars->ioBuffer  = IOBufferMemoryDescriptor::withOptions(0, 2 * kDefaultIOSize, page_size);
+
+        if (!vars->srcBuffer || !vars->ioBuffer)
+        {
+            err = kIOReturnNoMemory;
+            break;
+        }
+
+        err = IOPolledFileOpen(gIOHibernateFilename, vars->ioBuffer,
+                                &vars->fileVars, &vars->fileExtents, &data);
+        if (KERN_SUCCESS != err)
+        {
+           HIBLOG("IOPolledFileOpen(%x)\n", err);
+            break;
+        }
+       if (vars->fileVars->fileRef)
+       {
+           // invalidate the image file
+           gIOHibernateCurrentHeader->signature = kIOHibernateHeaderInvalidSignature;
+           int err = kern_write_file(vars->fileVars->fileRef, 0,
+                                       (caddr_t) gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader));
+            if (KERN_SUCCESS != err)
+                HIBLOG("kern_write_file(%d)\n", err);
+       }
+
+       bzero(gIOHibernateCurrentHeader, sizeof(IOHibernateImageHeader));
+
+        boolean_t encryptedswap;
+        err = hibernate_setup(gIOHibernateCurrentHeader, 
+                                gIOHibernateFreeRatio, gIOHibernateFreeTime,
+                                &vars->page_list, &vars->page_list_wired, &encryptedswap);
+        if (KERN_SUCCESS != err)
+        {
+           HIBLOG("hibernate_setup(%d)\n", err);
+            break;
+        }
+
+        if (encryptedswap)
+            gIOHibernateMode ^= kIOHibernateModeEncrypt; 
+
+        vars->videoAllocSize = kVideoMapSize;
+        if (KERN_SUCCESS != kmem_alloc_pageable(kernel_map, &vars->videoMapping, vars->videoAllocSize))
+            vars->videoMapping = 0;
+
+       // generate crypt keys
+        for (uint32_t i = 0; i < sizeof(vars->wiredCryptKey); i++)
+            vars->wiredCryptKey[i] = random();
+        for (uint32_t i = 0; i < sizeof(vars->cryptKey); i++)
+            vars->cryptKey[i] = random();
+
+       // set nvram
+
+        IORegistryEntry * regEntry;
+        if (!gIOOptionsEntry)
+        {
+            regEntry = IORegistryEntry::fromPath("/options", gIODTPlane);
+            gIOOptionsEntry = OSDynamicCast(IODTNVRAM, regEntry);
+            if (regEntry && !gIOOptionsEntry)
+                regEntry->release();
+        }
+        if (!gIOChosenEntry)
+            gIOChosenEntry = IORegistryEntry::fromPath("/chosen", gIODTPlane);
+
+       if (gIOOptionsEntry)
+       {
+            const OSSymbol *  sym;
+            size_t           len;
+            char              valueString[16];
+
+            sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey);
+            if (sym)
+            {
+                gIOOptionsEntry->setProperty(sym, data);
+                sym->release();
+            }
+            data->release();
+
+           vars->saveBootDevice = gIOOptionsEntry->copyProperty(kIOSelectedBootDeviceKey);
+            if (gIOChosenEntry)
+            {
+               OSData * bootDevice = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOBootPathKey));
+               if (bootDevice)
+               {
+                   sym = OSSymbol::withCStringNoCopy(kIOSelectedBootDeviceKey);
+                   OSString * str2 = OSString::withCStringNoCopy((const char *) bootDevice->getBytesNoCopy());
+                   if (sym && str2)
+                       gIOOptionsEntry->setProperty(sym, str2);
+                   if (sym)
+                       sym->release();
+                   if (str2)
+                       str2->release();
+               }
+
+                data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMemorySignatureKey));
+                if (data)
+                {
+                    vars->haveFastBoot = true;
+
+                    len = sprintf(valueString, "0x%lx", *((UInt32 *)data->getBytesNoCopy()));
+                    data = OSData::withBytes(valueString, len + 1);
+                    sym = OSSymbol::withCStringNoCopy(kIOHibernateMemorySignatureEnvKey);
+                    if (sym && data)
+                        gIOOptionsEntry->setProperty(sym, data);
+                    if (sym)
+                        sym->release();
+                    if (data)
+                        data->release();
+                }
+                data = OSDynamicCast(OSData, gIOChosenEntry->getProperty(kIOHibernateMachineSignatureKey));
+                if (data)
+                    gIOHibernateCurrentHeader->machineSignature = *((UInt32 *)data->getBytesNoCopy());
+            }
+
+            if (kIOHibernateModeEncrypt & gIOHibernateMode)
+            {
+                data = OSData::withBytes(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey));
+                sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKeyKey);
+                if (sym && data)
+                    gIOOptionsEntry->setProperty(sym, data);
+                if (sym)
+                    sym->release();
+                if (data)
+                    data->release();
+                if (gIOHibernateBootSignature[0])
+                {
+                    data = OSData::withCapacity(16);
+                    sym = OSSymbol::withCStringNoCopy(kIOHibernateBootSignatureKey);
+                    if (sym && data)
+                    {
+                        char c;
+                        uint8_t value;
+                        for (uint32_t i = 0; (c = gIOHibernateBootSignature[i]); i++)
+                        {
+                            if (c >= 'a')
+                                c -= 'a' - 10;
+                            else if (c >= 'A')
+                                c -= 'A' - 10;
+                            else if (c >= '0')
+                                c -= '0';
+                            else
+                                continue;
+                            value = (value << 4) | c;
+                            if (i & 1)
+                                data->appendBytes(&value, sizeof(value));
+                        }
+                        gIOOptionsEntry->setProperty(sym, data);
+                    }
+                    if (sym)
+                        sym->release();
+                    if (data)
+                        data->release();
+                }
+            }
+
+            if (!vars->haveFastBoot)
+            {
+                // set boot volume to zero
+                IODTPlatformExpert * platform = OSDynamicCast(IODTPlatformExpert, IOService::getPlatform());
+                if (platform && (kIOReturnSuccess == platform->readXPRAM(kXPRamAudioVolume, 
+                                            &vars->saveBootAudioVolume, sizeof(vars->saveBootAudioVolume))))
+                {
+                    uint8_t newVolume;
+                    newVolume = vars->saveBootAudioVolume & 0xf8;
+                    platform->writeXPRAM(kXPRamAudioVolume, 
+                                            &newVolume, sizeof(newVolume));
+                }
+            }
+       }
+       // --
+
+       gIOHibernateCurrentHeader->signature = kIOHibernateHeaderSignature;
+       gIOHibernateState = kIOHibernateStateHibernating;
+    }
+    while (false);
+
+    return (err);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOHibernateSystemHasSlept(void)
+{
+    IOHibernateVars * vars  = &gIOHibernateVars;
+
+    if ((vars->previewData = OSDynamicCast(OSData, 
+            IOService::getPMRootDomain()->getProperty(kIOHibernatePreviewBufferKey))))
+    {
+        vars->previewBuffer = IOMemoryDescriptor::withAddress(
+                                    (void *) vars->previewData->getBytesNoCopy(), 
+                                    vars->previewData->getLength(), 
+                                    kIODirectionInOut);
+
+        if (vars->previewBuffer && (kIOReturnSuccess != vars->previewBuffer->prepare()))
+        {
+            vars->previewBuffer->release();
+            vars->previewBuffer = 0;
+        }
+        if (!vars->previewBuffer)
+            vars->previewData = 0;
+    }
+    if (gIOOptionsEntry)
+        gIOOptionsEntry->sync();
+
+    return (kIOReturnSuccess);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+IOReturn
+IOHibernateSystemWake(void)
+{
+    IOHibernateVars * vars  = &gIOHibernateVars;
+
+    hibernate_teardown(vars->page_list, vars->page_list_wired);
+
+    if (vars->videoMapping)
+    {
+        if (vars->videoMapSize)
+            // remove mappings
+            IOUnmapPages(kernel_map, vars->videoMapping, vars->videoMapSize);
+        if (vars->videoAllocSize)
+            // dealloc range
+            kmem_free(kernel_map, trunc_page_32(vars->videoMapping), vars->videoAllocSize);
+    }
+
+    if (vars->previewBuffer)
+    {
+        vars->previewBuffer->release();
+        vars->previewBuffer = 0;
+    }
+
+    if (vars->fileVars)
+    {
+       IOPolledFileClose(vars->fileVars);
+    }
+
+    // invalidate nvram properties - (gIOOptionsEntry != 0) => nvram was touched
+
+    OSData * data = OSData::withCapacity(4);
+    if (gIOOptionsEntry && data)
+    {
+        const OSSymbol * sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKey);
+        if (sym)
+        {
+            gIOOptionsEntry->setProperty(sym, data);
+            sym->release();
+        }
+        sym = OSSymbol::withCStringNoCopy(kIOSelectedBootDeviceKey);
+        if (sym)
+        {
+           if (vars->saveBootDevice)
+           {
+               gIOOptionsEntry->setProperty(sym, vars->saveBootDevice);
+               vars->saveBootDevice->release();
+           }
+            sym->release();
+        }
+        sym = OSSymbol::withCStringNoCopy(kIOHibernateBootImageKeyKey);
+        if (sym)
+        {
+            gIOOptionsEntry->setProperty(sym, data);
+            sym->release();
+        }
+        sym = OSSymbol::withCStringNoCopy(kIOHibernateMemorySignatureEnvKey);
+        if (sym)
+        {
+            gIOOptionsEntry->removeProperty(sym);
+            sym->release();
+        }
+    }
+    if (data)
+        data->release();
+
+    if (gIOOptionsEntry)
+    {
+       if (!vars->haveFastBoot)
+       {
+           // reset boot audio volume
+           IODTPlatformExpert * platform = OSDynamicCast(IODTPlatformExpert, IOService::getPlatform());
+           if (platform)
+               platform->writeXPRAM(kXPRamAudioVolume, 
+                                       &vars->saveBootAudioVolume, sizeof(vars->saveBootAudioVolume));
+       }
+
+       // sync now to hardware if the booter has not
+       if (kIOHibernateStateInactive == gIOHibernateState)
+           gIOOptionsEntry->sync();
+       else
+           // just sync the variables in case a later panic syncs nvram (it won't sync variables)
+           gIOOptionsEntry->syncOFVariables();
+    }
+
+    if (vars->srcBuffer)
+       vars->srcBuffer->release();
+    if (vars->ioBuffer)
+       vars->ioBuffer->release();
+    if (vars->fileExtents)
+       vars->fileExtents->release();
+
+    bzero(vars, sizeof(*vars));
+
+//    gIOHibernateState = kIOHibernateStateInactive;       // leave it for post wake code to see
+
+    return (kIOReturnSuccess);
+}
+
+IOReturn
+IOHibernateSystemPostWake(void)
+{
+    if (gIOHibernateFileRef)
+    {
+       kern_close_file_for_direct_io(gIOHibernateFileRef);
+        gIOHibernateFileRef = 0;
+    }
+    return (kIOReturnSuccess);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+void
+IOHibernateSystemInit(IOPMrootDomain * rootDomain)
+{
+    OSData * data = OSData::withBytesNoCopy(&gIOHibernateState, sizeof(gIOHibernateState));
+    if (data)
+    {
+       rootDomain->setProperty(kIOHibernateStateKey, data);
+       data->release();
+    }
+
+    if (PE_parse_boot_arg("hfile", gIOHibernateFilename))
+       gIOHibernateMode = kIOHibernateModeOn;
+    else
+       gIOHibernateFilename[0] = 0;
+
+    static SYSCTL_STRING(_kern, OID_AUTO, hibernatefile, 
+                               CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, 
+                               gIOHibernateFilename, sizeof(gIOHibernateFilename), "");
+    sysctl_register_oid(&sysctl__kern_hibernatefile);
+
+    static SYSCTL_STRING(_kern, OID_AUTO, bootsignature, 
+                               CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, 
+                               gIOHibernateBootSignature, sizeof(gIOHibernateBootSignature), "");
+    sysctl_register_oid(&sysctl__kern_bootsignature);
+
+    static SYSCTL_UINT(_kern, OID_AUTO, hibernatemode, 
+                               CTLFLAG_RW | CTLFLAG_NOAUTO | CTLFLAG_KERN, 
+                               &gIOHibernateMode, 0, "");
+    sysctl_register_oid(&sysctl__kern_hibernatemode);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static void
+hibernate_setup_for_wake(void)
+{
+#if __ppc__
+    // go slow (state needed for wake)
+    ml_set_processor_speed(1);
+#endif
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+extern "C" boolean_t
+hibernate_write_image(void)
+{
+    IOHibernateImageHeader * header = gIOHibernateCurrentHeader;
+    IOHibernateVars *        vars  = &gIOHibernateVars;
+    IOPolledFileExtent *     fileExtents;
+
+    uint32_t    pageCount, pagesDone;
+    IOReturn     err;
+    vm_offset_t  ppnum;
+    IOItemCount  page, count;
+    uint8_t *   src;
+    uint8_t *   data;
+    IOByteCount  pageCompressedSize;
+    uint64_t    compressedSize, uncompressedSize;
+    uint64_t    image1Size = 0;
+    uint32_t    bitmap_size;
+    bool        iterDone, pollerOpen, needEncryptStart;
+    uint32_t    restore1Sum, sum, sum1, sum2;
+    uint32_t    tag;
+    uint32_t    pageType;
+    uint32_t    pageAndCount[2];
+
+    AbsoluteTime startTime, endTime;
+    AbsoluteTime allTime, compTime, decoTime;
+    uint64_t     nsec;
+    uint32_t     lastProgressStamp = 0;
+    uint32_t     progressStamp;
+
+    hibernate_cryptvars_t _cryptvars;
+    hibernate_cryptvars_t * cryptvars = 0;
+
+    if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents)
+        return (false /* sleep */ );
+
+    restore1Sum = sum1 = sum2 = 0;
+
+    // encryption data. "iv" is the "initial vector".
+    if (kIOHibernateModeEncrypt & gIOHibernateMode)
+    {
+        static const unsigned char first_iv[AES_BLOCK_SIZE]
+        = {  0xa3, 0x63, 0x65, 0xa9, 0x0b, 0x71, 0x7b, 0x1c,
+             0xdf, 0x9e, 0x5f, 0x32, 0xd7, 0x61, 0x63, 0xda };
+    
+        cryptvars = &gIOHibernateCryptWakeContext;
+        bzero(cryptvars, sizeof(hibernate_cryptvars_t));
+        aes_encrypt_key(vars->cryptKey,
+                        kIOHibernateAESKeySize,
+                        &cryptvars->ctx.encrypt);
+        aes_decrypt_key(vars->cryptKey,
+                        kIOHibernateAESKeySize,
+                        &cryptvars->ctx.decrypt);
+
+        cryptvars = &_cryptvars;
+        bzero(cryptvars, sizeof(hibernate_cryptvars_t));
+        aes_encrypt_key(vars->wiredCryptKey,
+                        kIOHibernateAESKeySize,
+                        &cryptvars->ctx.encrypt);
+
+        bcopy(&first_iv[0], &cryptvars->aes_iv[0], AES_BLOCK_SIZE);
+        bzero(&vars->wiredCryptKey[0], sizeof(vars->wiredCryptKey));
+        bzero(&vars->cryptKey[0], sizeof(vars->cryptKey));
+        bzero(gIOHibernateCryptWakeVars, sizeof(hibernate_cryptwakevars_t));
+    }
+
+    hibernate_setup_for_wake();
+
+    hibernate_page_list_setall(vars->page_list,
+                               vars->page_list_wired,
+                               &pageCount);
+
+    HIBLOG("hibernate_page_list_setall found pageCount %d\n", pageCount);
+
+    fileExtents = (IOPolledFileExtent *) vars->fileExtents->getBytesNoCopy();
+
+#if 0
+    count = vars->fileExtents->getLength() / sizeof(IOPolledFileExtent);
+    for (page = 0; page < count; page++)
+    {
+       HIBLOG("fileExtents[%d] %qx, %qx (%qx)\n", page, 
+               fileExtents[page].start, fileExtents[page].length,
+               fileExtents[page].start + fileExtents[page].length);
+    }
+#endif
+
+    needEncryptStart = (0 != (kIOHibernateModeEncrypt & gIOHibernateMode));
+
+    AbsoluteTime_to_scalar(&compTime) = 0;
+    AbsoluteTime_to_scalar(&decoTime) = 0;
+
+    clock_get_uptime(&allTime);
+
+    do 
+    {
+        compressedSize   = 0;
+        uncompressedSize = 0;
+        iterDone         = false;
+        pageType         = 0;          // wired pages first
+
+        IOPolledFileSeek(vars->fileVars, sizeof(IOHibernateImageHeader));
+    
+        HIBLOG("IOHibernatePollerOpen, ml_get_interrupts_enabled %d\n", 
+                ml_get_interrupts_enabled());
+        err = IOHibernatePollerOpen(vars->fileVars, kIOPolledBeforeSleepState, vars->ioBuffer);
+        HIBLOG("IOHibernatePollerOpen(%x)\n", err);
+        pollerOpen = (kIOReturnSuccess == err);
+        if (!pollerOpen)
+            break;
+    
+        // copy file block extent list if larger than header
+    
+        count = vars->fileExtents->getLength();
+        if (count > sizeof(header->fileExtentMap))
+        {
+            count -= sizeof(header->fileExtentMap);
+            err = IOPolledFileWrite(vars->fileVars,
+                                    ((uint8_t *) &fileExtents[0]) + sizeof(header->fileExtentMap), count, cryptvars);
+            if (kIOReturnSuccess != err)
+                break;
+        }
+
+        // copy out restore1 code
+    
+        page = atop_32(sectHIBB);
+        count = atop_32(round_page(sectHIBB + sectSizeHIB)) - page;
+        header->restore1CodePage = page;
+        header->restore1PageCount = count;
+        header->restore1CodeOffset = ((uint32_t) &hibernate_machine_entrypoint)      - sectHIBB;
+        header->restore1StackOffset = ((uint32_t) &gIOHibernateRestoreStackEnd[0]) - 64 - sectHIBB;
+
+        // sum __HIB sect, with zeros for the stack
+        src = (uint8_t *) trunc_page(sectHIBB);
+        for (page = 0; page < count; page++)
+        {
+            if ((src < &gIOHibernateRestoreStack[0]) || (src >= &gIOHibernateRestoreStackEnd[0]))
+                restore1Sum += hibernate_sum(src, page_size);
+            else
+                restore1Sum += 0x10000001;
+            src += page_size;
+        }
+        sum1 = restore1Sum;
+    
+        // write the __HIB sect, with zeros for the stack
+
+        src = (uint8_t *) trunc_page(sectHIBB);
+        count = ((uint32_t) &gIOHibernateRestoreStack[0]) - trunc_page(sectHIBB);
+        if (count)
+        {
+            err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars);
+            if (kIOReturnSuccess != err)
+                break;
+        }
+        err = IOPolledFileWrite(vars->fileVars, 
+                                        (uint8_t *) 0,
+                                        &gIOHibernateRestoreStackEnd[0] - &gIOHibernateRestoreStack[0],
+                                        cryptvars);
+        if (kIOReturnSuccess != err)
+            break;
+        src = &gIOHibernateRestoreStackEnd[0];
+        count = round_page(sectHIBB + sectSizeHIB) - ((uint32_t) src);
+        if (count)
+        {
+            err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars);
+            if (kIOReturnSuccess != err)
+                break;
+        }
+
+        // write the preview buffer
+
+        addr64_t phys64;
+        IOByteCount segLen;
+
+        if (vars->previewData)
+        {
+            ppnum = 0;
+            count = 0;
+            do
+            {
+                phys64 = vars->previewBuffer->getPhysicalSegment64(count, &segLen);
+                pageAndCount[0] = atop_64(phys64);
+                pageAndCount[1] = atop_32(segLen);
+                err = IOPolledFileWrite(vars->fileVars, 
+                                        (const uint8_t *) &pageAndCount, sizeof(pageAndCount), 
+                                        cryptvars);
+                if (kIOReturnSuccess != err)
+                    break;
+                count += segLen;
+                ppnum += sizeof(pageAndCount);
+            }
+            while (phys64);
+            if (kIOReturnSuccess != err)
+                break;
+
+            src = (uint8_t *) vars->previewData->getBytesNoCopy();
+            count = vars->previewData->getLength();
+
+            header->previewPageListSize = ppnum;
+            header->previewSize = count + ppnum;
+
+            for (page = 0; page < count; page += page_size)
+                sum1 += hibernate_sum(src + page, page_size);
+
+            err = IOPolledFileWrite(vars->fileVars, src, count, cryptvars);
+            if (kIOReturnSuccess != err)
+                break;
+        }
+
+        // mark areas for no save
+    
+        for (count = 0;
+            (phys64 = vars->ioBuffer->getPhysicalSegment64(count, &segLen));
+            count += segLen)
+        {
+            hibernate_set_page_state(vars->page_list, vars->page_list_wired, 
+                                        atop_64(phys64), atop_32(segLen),
+                                        kIOHibernatePageStateFree);
+            pageCount -= atop_32(segLen);
+        }
+    
+        for (count = 0;
+            (phys64 = vars->srcBuffer->getPhysicalSegment64(count, &segLen));
+            count += segLen)
+        {
+            hibernate_set_page_state(vars->page_list, vars->page_list_wired, 
+                                        atop_64(phys64), atop_32(segLen),
+                                        kIOHibernatePageStateFree);
+            pageCount -= atop_32(segLen);
+        }
+
+        // copy out bitmap of pages available for trashing during restore
+    
+        bitmap_size = vars->page_list_wired->list_size;
+        src = (uint8_t *) vars->page_list_wired;
+        err = IOPolledFileWrite(vars->fileVars, src, bitmap_size, cryptvars);
+        if (kIOReturnSuccess != err)
+            break;
+
+        // mark more areas for no save, but these are not available 
+        // for trashing during restore
+    
+#if !__i386__
+        page = atop_32(sectHIBB);
+        count = atop_32(round_page(sectHIBB + sectSizeHIB)) - page;
+#else
+        // XXX
+        page = atop_32(sectHIBB & 0x3FFFFFFF);
+        count = atop_32(round_page((sectHIBB + sectSizeHIB) & 0x3FFFFFFF)) - page;
+#endif
+        hibernate_set_page_state(vars->page_list, vars->page_list_wired,
+                                        page, count,
+                                        kIOHibernatePageStateFree);
+        pageCount -= count;
+    
+
+
+        if (vars->previewBuffer) for (count = 0;
+                                        (phys64 = vars->previewBuffer->getPhysicalSegment64(count, &segLen));
+                                        count += segLen)
+        {
+            hibernate_set_page_state(vars->page_list, vars->page_list_wired, 
+                                        atop_64(phys64), atop_32(segLen),
+                                        kIOHibernatePageStateFree);
+            pageCount -= atop_32(segLen);
+        }
+
+        src = (uint8_t *) vars->srcBuffer->getBytesNoCopy();
+    
+        void * iter = 0;
+        pagesDone   = 0;
+    
+        HIBLOG("writing %d pages\n", pageCount);
+
+        do
+        {
+            count = hibernate_page_list_iterate(pageType ? vars->page_list : vars->page_list_wired,
+                                                    &iter, &ppnum);
+//          kprintf("[%d](%x : %x)\n", pageType, ppnum, count);
+    
+            iterDone = !count;
+
+            pageAndCount[0] = ppnum;
+            pageAndCount[1] = count;
+            err = IOPolledFileWrite(vars->fileVars, 
+                                    (const uint8_t *) &pageAndCount, sizeof(pageAndCount), 
+                                    cryptvars);
+            if (kIOReturnSuccess != err)
+                break;
+
+            for (page = 0; page < count; page++)
+            {
+                err = IOMemoryDescriptorWriteFromPhysical(vars->srcBuffer, 0, ptoa_64(ppnum), page_size);
+                if (err)
+                {
+                    HIBLOG("IOMemoryDescriptorWriteFromPhysical %d [%d] %x\n", __LINE__, ppnum, err);
+                    break;
+                }
+    
+                sum = hibernate_sum(src, page_size);
+   
+                clock_get_uptime(&startTime);
+
+                pageCompressedSize = WKdm_compress ((WK_word*) src, (WK_word*) (src + page_size), PAGE_SIZE_IN_WORDS);
+    
+                clock_get_uptime(&endTime);
+                ADD_ABSOLUTETIME(&compTime, &endTime);
+                SUB_ABSOLUTETIME(&compTime, &startTime);
+    
+                if (kIOHibernateModeEncrypt & gIOHibernateMode)
+                    pageCompressedSize = (pageCompressedSize + AES_BLOCK_SIZE - 1) & ~(AES_BLOCK_SIZE - 1);
+
+                if (pageCompressedSize > page_size)
+                {
+//                  HIBLOG("------------lose: %d\n", pageCompressedSize);
+                    pageCompressedSize = page_size;
+                }
+
+                if (pageCompressedSize != page_size)
+                    data = (src + page_size);
+                else
+                    data = src;
+
+                tag = pageCompressedSize | kIOHibernateTagSignature;
+
+                if (pageType)
+                    sum2 += sum;
+                else
+                    sum1 += sum;
+
+                if (needEncryptStart && (ppnum >= atop_32(sectDATAB)))
+                {
+                    // start encrypting partway into the data about to be written
+                    vars->fileVars->encryptStart = (vars->fileVars->position + AES_BLOCK_SIZE - 1) 
+                                                    & ~(AES_BLOCK_SIZE - 1);
+                    needEncryptStart = false;
+                }
+
+                err = IOPolledFileWrite(vars->fileVars, (const uint8_t *) &tag, sizeof(tag), cryptvars);
+                if (kIOReturnSuccess != err)
+                    break;
+
+                err = IOPolledFileWrite(vars->fileVars, data, (pageCompressedSize + 3) & ~3, cryptvars);
+                if (kIOReturnSuccess != err)
+                    break;
+
+                compressedSize += pageCompressedSize;
+                if (pageCompressedSize)
+                    uncompressedSize += page_size;
+                ppnum++;
+                pagesDone++;
+    
+                if (0 == (8191 & pagesDone))
+                {
+                    clock_get_uptime(&endTime);
+                    SUB_ABSOLUTETIME(&endTime, &allTime);
+                    absolutetime_to_nanoseconds(endTime, &nsec);
+                    progressStamp = nsec / 750000000ULL;
+                    if (progressStamp != lastProgressStamp)
+                    {
+                        lastProgressStamp = progressStamp;
+                        HIBPRINT("pages %d (%d%%)\n", pagesDone, (100 * pagesDone) / pageCount);
+                    }
+                }
+            }
+            if (kIOReturnSuccess != err)
+                break;
+            if (iterDone && !pageType)
+            {
+                err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars);
+                if (kIOReturnSuccess != err)
+                    break;
+
+                iterDone = false;
+                pageType = 1;
+                iter = 0;
+                image1Size = vars->fileVars->position;
+                if (cryptvars)
+                {
+                    bcopy(&cryptvars->aes_iv[0], 
+                            &gIOHibernateCryptWakeContext.aes_iv[0], 
+                            sizeof(cryptvars->aes_iv));
+                    cryptvars = &gIOHibernateCryptWakeContext;
+                }
+                HIBLOG("image1Size %qd\n", image1Size);
+            }
+        }
+        while (!iterDone);
+        if (kIOReturnSuccess != err)
+            break;
+        err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars);
+        if (kIOReturnSuccess != err)
+            break;
+
+        // Header:
+    
+        header->imageSize    = vars->fileVars->position;
+        header->image1Size   = image1Size;
+        header->bitmapSize   = bitmap_size;
+        header->pageCount    = pageCount;
+        header->encryptStart = vars->fileVars->encryptStart;
+    
+        header->restore1Sum  = restore1Sum;
+        header->image1Sum    = sum1;
+        header->image2Sum    = sum2;
+    
+        count = vars->fileExtents->getLength();
+        if (count > sizeof(header->fileExtentMap))
+        {
+            header->fileExtentMapSize = count;
+            count = sizeof(header->fileExtentMap);
+        }
+        else
+            header->fileExtentMapSize = sizeof(header->fileExtentMap);
+        bcopy(&fileExtents[0], &header->fileExtentMap[0], count);
+    
+        IOPolledFileSeek(vars->fileVars, 0);
+        err = IOPolledFileWrite(vars->fileVars,
+                                    (uint8_t *) header, sizeof(IOHibernateImageHeader), 
+                                    cryptvars);
+        if (kIOReturnSuccess != err)
+            break;
+        err = IOPolledFileWrite(vars->fileVars, 0, 0, cryptvars);
+        if (kIOReturnSuccess != err)
+            break;
+        err = IOHibernatePollerIODone(vars->fileVars);
+        if (kIOReturnSuccess != err)
+            break;
+    }
+    while (false);
+    
+    clock_get_uptime(&endTime);
+    SUB_ABSOLUTETIME(&endTime, &allTime);
+    absolutetime_to_nanoseconds(endTime, &nsec);
+    HIBLOG("all time: %qd ms, ", 
+               nsec / 1000000ULL);
+
+    absolutetime_to_nanoseconds(compTime, &nsec);
+    HIBLOG("comp time: %qd ms, ", 
+               nsec / 1000000ULL);
+
+    absolutetime_to_nanoseconds(decoTime, &nsec);
+    HIBLOG("deco time: %qd ms, ", 
+               nsec / 1000000ULL);
+
+    HIBLOG("\nimage %qd, uncompressed %qd (%d), compressed %qd (%d%%), sum1 %x, sum2 %x\n", 
+               header->imageSize,
+               uncompressedSize, atop_32(uncompressedSize), compressedSize,
+               (int) ((compressedSize * 100ULL) / uncompressedSize),
+               sum1, sum2);
+
+    if (pollerOpen)
+        IOHibernatePollerClose(vars->fileVars, kIOPolledBeforeSleepState);
+
+    HIBLOG("hibernate_write_image done(%x)\n", err);
+
+    // should we come back via regular wake, set the state in memory.
+    gIOHibernateState = kIOHibernateStateInactive;
+
+    if ((kIOReturnSuccess == err) && !(kIOHibernateModeSleep & gIOHibernateMode))
+        return (true  /* power down */ );
+    else
+        return (false /* sleep */ );
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+DECLARE_IOHIBERNATEPROGRESSALPHA
+
+static void
+ProgressUpdate(hibernate_graphics_t * display, uint8_t * screen, int32_t firstBlob, int32_t select)
+{
+    uint32_t  rowBytes, pixelShift;
+    uint32_t  x, y;
+    int32_t   blob, lastBlob;
+    uint32_t  alpha, in, color, result;
+    uint8_t * out;
+    uint32_t  saveindex[kIOHibernateProgressCount] = { 0 };
+
+    pixelShift = display->depth >> 4;
+    if (pixelShift < 1)
+        return;
+
+    rowBytes = display->rowBytes;
+
+    screen += ((display->width 
+            - kIOHibernateProgressCount * (kIOHibernateProgressWidth + kIOHibernateProgressSpacing)) << (pixelShift - 1))
+                + (display->height - kIOHibernateProgressOriginY - kIOHibernateProgressHeight) * rowBytes;
+
+    lastBlob  = (select < kIOHibernateProgressCount) ? select : (kIOHibernateProgressCount - 1);
+
+    screen += (firstBlob * (kIOHibernateProgressWidth + kIOHibernateProgressSpacing)) << pixelShift;
+
+    for (y = 0; y < kIOHibernateProgressHeight; y++)
+    {
+        out = screen + y * rowBytes;
+        for (blob = firstBlob; blob <= lastBlob; blob++)
+        {
+            color = (blob < select) ? kIOHibernateProgressLightGray : kIOHibernateProgressMidGray;
+            for (x = 0; x < kIOHibernateProgressWidth; x++)
+            {
+                alpha  = gIOHibernateProgressAlpha[y][x];
+                result = color;
+                if (alpha)
+                {
+                    if (0xff != alpha)
+                    {
+                        in = display->progressSaveUnder[blob][saveindex[blob]++];
+                        result = ((255 - alpha) * in + alpha * result + 0xff) / 255;
+                    }
+                    if (1 == pixelShift)
+                    {
+                        result >>= 3;
+                        *((uint16_t *)out) = (result << 10) | (result << 5) | result;  // 16
+                    }
+                    else
+                        *((uint32_t *)out) = (result << 16) | (result << 8) | result;  // 32
+                }
+                out += (1 << pixelShift);
+            }
+            out += (kIOHibernateProgressSpacing << pixelShift);
+        }
+    }
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+extern "C" void 
+hibernate_machine_init(void)
+{
+    IOReturn     err;
+    uint32_t     sum;
+    uint32_t     pagesDone;
+    AbsoluteTime allTime, endTime;
+    uint64_t     nsec;
+    uint32_t     lastProgressStamp = 0;
+    uint32_t     progressStamp;
+    uint64_t    progressZeroPosition = 0;
+    uint32_t    blob, lastBlob = (uint32_t) -1L;
+    hibernate_cryptvars_t * cryptvars = 0;
+
+    IOHibernateVars * vars  = &gIOHibernateVars;
+
+    if (!vars->fileVars || !vars->fileVars->pollers || !vars->fileExtents)
+       return;
+
+    if ((kIOHibernateModeDiscardCleanActive | kIOHibernateModeDiscardCleanInactive) & gIOHibernateMode)
+        hibernate_page_list_discard(vars->page_list);
+
+
+    sum = gIOHibernateCurrentHeader->actualImage1Sum;
+    pagesDone = gIOHibernateCurrentHeader->actualUncompressedPages;
+
+    HIBLOG("hibernate_machine_init: state %d, image pages %d, sum was %x, image1Size %qx, conflictCount %d, nextFree %x\n",
+           gIOHibernateState, pagesDone, sum, gIOHibernateCurrentHeader->image1Size,
+           gIOHibernateCurrentHeader->conflictCount, gIOHibernateCurrentHeader->nextFree);
+
+    if (kIOHibernateStateWakingFromHibernate != gIOHibernateState)
+    {
+       HIBLOG("regular wake\n");
+       return;
+    }
+
+    HIBPRINT("diag %x %x %x %x\n",
+           gIOHibernateCurrentHeader->diag[0], gIOHibernateCurrentHeader->diag[1], 
+           gIOHibernateCurrentHeader->diag[2], gIOHibernateCurrentHeader->diag[3]); 
+
+    HIBPRINT("video %x %d %d %d\n",
+           gIOHibernateGraphicsInfo->physicalAddress, gIOHibernateGraphicsInfo->depth, 
+           gIOHibernateGraphicsInfo->width, gIOHibernateGraphicsInfo->height); 
+
+    if (vars->videoMapping && gIOHibernateGraphicsInfo->physicalAddress)
+    {
+        vars->videoMapSize = round_page(gIOHibernateGraphicsInfo->height 
+                                        * gIOHibernateGraphicsInfo->rowBytes);
+        IOMapPages(kernel_map, 
+                    vars->videoMapping, gIOHibernateGraphicsInfo->physicalAddress,
+                    vars->videoMapSize, kIOMapInhibitCache );
+    }
+
+    uint8_t * src = (uint8_t *) vars->srcBuffer->getBytesNoCopy();;
+    uint32_t decoOffset;
+
+    clock_get_uptime(&allTime);
+
+    HIBLOG("IOHibernatePollerOpen(), ml_get_interrupts_enabled %d\n", ml_get_interrupts_enabled());
+    err = IOHibernatePollerOpen(vars->fileVars, kIOPolledAfterSleepState, 0);
+    HIBLOG("IOHibernatePollerOpen(%x)\n", err);
+
+    if (gIOHibernateCurrentHeader->previewSize)
+        progressZeroPosition = gIOHibernateCurrentHeader->previewSize 
+                             + gIOHibernateCurrentHeader->fileExtentMapSize 
+                             - sizeof(gIOHibernateCurrentHeader->fileExtentMap) 
+                             + ptoa_64(gIOHibernateCurrentHeader->restore1PageCount);
+
+    IOPolledFileSeek(vars->fileVars, gIOHibernateCurrentHeader->image1Size);
+
+    if (vars->videoMapping)
+    {
+        lastBlob = ((vars->fileVars->position - progressZeroPosition) * kIOHibernateProgressCount)
+                        / (gIOHibernateCurrentHeader->imageSize - progressZeroPosition);
+        ProgressUpdate(gIOHibernateGraphicsInfo, (uint8_t *) vars->videoMapping, 0, lastBlob);
+    }
+
+    cryptvars = (kIOHibernateModeEncrypt & gIOHibernateMode) ? &gIOHibernateCryptWakeContext : 0;
+    if (kIOHibernateModeEncrypt & gIOHibernateMode)
+    {
+        cryptvars = &gIOHibernateCryptWakeContext;
+        bcopy(&gIOHibernateCryptWakeVars->aes_iv[0], 
+                &cryptvars->aes_iv[0], 
+                sizeof(cryptvars->aes_iv));
+    }
+
+    // kick off the read ahead
+    vars->fileVars->io          = false;
+    vars->fileVars->bufferHalf   = 0;
+    vars->fileVars->bufferLimit  = 0;
+    vars->fileVars->lastRead     = 0;
+    vars->fileVars->bufferOffset = vars->fileVars->bufferLimit;
+
+    IOPolledFileRead(vars->fileVars, 0, 0, cryptvars);
+    vars->fileVars->bufferOffset = vars->fileVars->bufferLimit;
+    // --
+
+    HIBLOG("hibernate_machine_init reading\n");
+
+    uint32_t * header = (uint32_t *) src;
+    sum = 0;
+
+    do
+    {
+       unsigned int count;
+       unsigned int page;
+        uint32_t     tag;
+       vm_offset_t  ppnum, compressedSize;
+
+       IOPolledFileRead(vars->fileVars, src, 8, cryptvars);
+
+       ppnum = header[0];
+       count = header[1];
+
+//     HIBPRINT("(%x, %x)\n", ppnum, count);
+
+       if (!count)
+           break;
+
+       for (page = 0; page < count; page++)
+       {
+           IOPolledFileRead(vars->fileVars, (uint8_t *) &tag, 4, cryptvars);
+
+           compressedSize = kIOHibernateTagLength & tag;
+           if (!compressedSize)
+           {
+               ppnum++;
+               pagesDone++;
+               continue;
+           }
+
+           IOPolledFileRead(vars->fileVars, src, (compressedSize + 3) & ~3, cryptvars);
+   
+           if (compressedSize != page_size)
+           {
+               decoOffset = page_size;
+               WKdm_decompress((WK_word*) src, (WK_word*) (src + decoOffset), PAGE_SIZE_IN_WORDS);
+           }
+           else
+               decoOffset = 0;
+
+           sum += hibernate_sum((src + decoOffset), page_size);
+
+           err = IOMemoryDescriptorReadToPhysical(vars->srcBuffer, decoOffset, ptoa_64(ppnum), page_size);
+           if (err)
+               HIBLOG("IOMemoryDescriptorReadToPhysical [%d] %x\n", ppnum, err);
+
+           ppnum++;
+           pagesDone++;
+
+            if (vars->videoMapping && (0 == (255 & pagesDone)))
+            {
+                blob = ((vars->fileVars->position - progressZeroPosition) * kIOHibernateProgressCount)
+                        / (gIOHibernateCurrentHeader->imageSize - progressZeroPosition);
+                if (blob != lastBlob)
+                {
+                    ProgressUpdate(gIOHibernateGraphicsInfo, (uint8_t *) vars->videoMapping, lastBlob, blob);
+                    lastBlob = blob;
+                }
+            }
+
+           if (0 == (8191 & pagesDone))
+           {
+               clock_get_uptime(&endTime);
+               SUB_ABSOLUTETIME(&endTime, &allTime);
+               absolutetime_to_nanoseconds(endTime, &nsec);
+               progressStamp = nsec / 750000000ULL;
+               if (progressStamp != lastProgressStamp)
+               {
+                   lastProgressStamp = progressStamp;
+                   HIBPRINT("pages %d (%d%%)\n", pagesDone, 
+                           (100 * pagesDone) / gIOHibernateCurrentHeader->pageCount);
+               }
+           }
+       }
+    }
+    while (true);
+
+    gIOHibernateCurrentHeader->actualImage2Sum = sum;
+
+    if (vars->fileVars->io)
+        (void) IOHibernatePollerIODone(vars->fileVars);
+
+    err = IOHibernatePollerClose(vars->fileVars, kIOPolledAfterSleepState);
+
+    if (vars->videoMapping)
+        ProgressUpdate(gIOHibernateGraphicsInfo, 
+                        (uint8_t *) vars->videoMapping, 0, kIOHibernateProgressCount);
+
+    clock_get_uptime(&endTime);
+    SUB_ABSOLUTETIME(&endTime, &allTime);
+    absolutetime_to_nanoseconds(endTime, &nsec);
+
+    HIBLOG("hibernate_machine_init pagesDone %d sum2 %x, time: %qd ms\n", 
+               pagesDone, sum, nsec / 1000000ULL);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
diff --git a/iokit/Kernel/IOHibernateInternal.h b/iokit/Kernel/IOHibernateInternal.h
new file mode 100644 (file)
index 0000000..5978373
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+
+enum { kIOHibernateAESKeySize = 128 }; /* bits */
+
+struct IOHibernateVars
+{
+    hibernate_page_list_t *            page_list;
+    hibernate_page_list_t *            page_list_wired;
+    class IOBufferMemoryDescriptor *    ioBuffer;
+    class IOBufferMemoryDescriptor *    srcBuffer;
+    class IOMemoryDescriptor *          previewBuffer;
+    OSData *                           previewData;
+    OSData *                           fileExtents;
+    OSObject *                         saveBootDevice;
+
+    struct IOPolledFileIOVars *                fileVars;
+    vm_offset_t                                videoMapping;
+    vm_size_t                          videoAllocSize;
+    vm_size_t                          videoMapSize;
+    uint8_t                            haveFastBoot;
+    uint8_t                            saveBootAudioVolume;
+    uint8_t                            wiredCryptKey[kIOHibernateAESKeySize / 8];
+    uint8_t                            cryptKey[kIOHibernateAESKeySize / 8];
+};
+typedef struct IOHibernateVars IOHibernateVars;
+
+
+struct IOPolledFileIOVars
+{
+    struct kern_direct_file_io_ref_t * fileRef;
+    class OSArray *                    pollers;
+    IOByteCount                                blockSize;
+    uint8_t *                                  buffer;
+    IOByteCount                        bufferSize;
+    IOByteCount                        bufferLimit;
+    IOByteCount                        bufferOffset;
+    IOByteCount                        bufferHalf;
+    IOByteCount                                extentRemaining;
+    IOByteCount                                lastRead;
+    uint64_t                           block0;
+    uint64_t                           position;
+    uint64_t                           extentPosition;
+    uint64_t                           encryptStart;
+    IOPolledFileExtent *               extentMap;
+    IOPolledFileExtent *               currentExtent;
+    bool                               io;
+    IOReturn                           ioStatus;
+};
+typedef struct IOPolledFileIOVars IOPolledFileIOVars;
+
+#endif         /* __cplusplus */
+
+enum
+{
+    kIOHibernateTagSignature = 0x53000000,
+    kIOHibernateTagLength    = 0x00001fff,
+};
+
+#ifdef __cplusplus
+extern "C"
+#endif         /* __cplusplus */
+uint32_t
+hibernate_sum(uint8_t *buf, int32_t len);
+
+extern vm_offset_t sectHIBB;
+extern int         sectSizeHIB;
+extern vm_offset_t sectDATAB;
+extern int         sectSizeDATA;
+
diff --git a/iokit/Kernel/IOHibernateRestoreKernel.c b/iokit/Kernel/IOHibernateRestoreKernel.c
new file mode 100644 (file)
index 0000000..3c6f168
--- /dev/null
@@ -0,0 +1,501 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <stdint.h>
+#include <mach/mach_types.h>
+#include <mach/vm_param.h>
+#include <IOKit/IOHibernatePrivate.h>
+#include <pexpert/boot.h>
+#include <crypto/aes.h>
+
+#include "WKdm.h"
+#include "IOHibernateInternal.h"
+
+/*
+This code is linked into the kernel but part of the "__HIB" section, which means
+its used by code running in the special context of restoring the kernel text and data
+from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything
+it calls or references needs to be careful to only touch memory also in the "__HIB" section.
+*/
+
+uint32_t gIOHibernateState;
+
+static IOHibernateImageHeader _hibernateHeader;
+IOHibernateImageHeader * gIOHibernateCurrentHeader = &_hibernateHeader;
+
+static hibernate_graphics_t _hibernateGraphics;
+hibernate_graphics_t * gIOHibernateGraphicsInfo = &_hibernateGraphics;
+
+static hibernate_cryptwakevars_t _cryptWakeVars;
+hibernate_cryptwakevars_t * gIOHibernateCryptWakeVars = &_cryptWakeVars;
+
+#if __i386__
+extern void   acpi_wake_prot_entry(void);
+#endif
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#define BASE 65521L /* largest prime smaller than 65536 */
+#define NMAX 5000  
+// NMAX (was 5521) the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+
+#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
+#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf)   DO8(buf,0); DO8(buf,8);
+
+uint32_t
+hibernate_sum(uint8_t *buf, int32_t len)
+{
+    unsigned long s1 = 1; // adler & 0xffff;
+    unsigned long s2 = 0; // (adler >> 16) & 0xffff;
+    int k;
+
+    while (len > 0) {
+        k = len < NMAX ? len : NMAX;
+        len -= k;
+        while (k >= 16) {
+            DO16(buf);
+           buf += 16;
+            k -= 16;
+        }
+        if (k != 0) do {
+            s1 += *buf++;
+           s2 += s1;
+        } while (--k);
+        s1 %= BASE;
+        s2 %= BASE;
+    }
+    return (s2 << 16) | s1;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#if __ppc__
+static __inline__ unsigned int cntlzw(unsigned int num)
+{
+  unsigned int result;
+  __asm__ volatile("cntlzw %0, %1" : "=r" (result) : "r" (num));
+  return result;
+}
+#elif __i386__
+static __inline__ unsigned int cntlzw(unsigned int num)
+{
+    unsigned int result;
+    __asm__ volatile(  "bsrl   %1, %0\n\t"
+                       "cmovel %2, %0"
+                     : "=r" (result)
+                     : "rm" (num), "r" (63));
+    return 31 ^ result;
+}
+#else
+#error arch
+#endif
+
+void 
+hibernate_page_bitset(hibernate_page_list_t * list, boolean_t set, uint32_t page)
+{
+    uint32_t             bank;
+    hibernate_bitmap_t * bitmap = &list->bank_bitmap[0];
+
+    for (bank = 0; bank < list->bank_count; bank++)
+    {
+       if ((page >= bitmap->first_page) && (page <= bitmap->last_page))
+       {
+           page -= bitmap->first_page;
+           if (set)
+               bitmap->bitmap[page >> 5] |= (0x80000000 >> (page & 31));
+               //setbit(page - bitmap->first_page, (int *) &bitmap->bitmap[0]);
+           else
+               bitmap->bitmap[page >> 5] &= ~(0x80000000 >> (page & 31));
+               //clrbit(page - bitmap->first_page, (int *) &bitmap->bitmap[0]);
+           break;
+       }
+       bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
+    }
+}
+
+boolean_t 
+hibernate_page_bittst(hibernate_page_list_t * list, uint32_t page)
+{
+    boolean_t           result = TRUE;
+    uint32_t             bank;
+    hibernate_bitmap_t * bitmap = &list->bank_bitmap[0];
+
+    for (bank = 0; bank < list->bank_count; bank++)
+    {
+       if ((page >= bitmap->first_page) && (page <= bitmap->last_page))
+       {
+           page -= bitmap->first_page;
+            result = (0 != (bitmap->bitmap[page >> 5] & (0x80000000 >> (page & 31))));
+           break;
+       }
+       bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
+    }
+    return (result);
+}
+
+// count bits clear or set (set == TRUE) starting at index page.
+uint32_t
+hibernate_page_list_count(hibernate_page_list_t * list, uint32_t set, uint32_t page)
+{
+    uint32_t                   bank, count;
+    hibernate_bitmap_t *       bitmap;
+
+    bitmap = &list->bank_bitmap[0];
+    count  = 0;
+
+    for (bank = 0; bank < list->bank_count; bank++)
+    {
+       // bits between banks are "set"
+       if (set && (page < bitmap->first_page))
+       {
+           count += bitmap->first_page - page;
+           page  = bitmap->first_page;
+       }
+       if ((page >= bitmap->first_page) && (page <= bitmap->last_page))
+       {
+           uint32_t index, bit, bits;
+       
+           index = (page - bitmap->first_page) >> 5;
+           bit = (page - bitmap->first_page) & 31;
+       
+           while (TRUE)
+           {
+               bits = bitmap->bitmap[index];
+               if (set)
+                   bits = ~bits;
+               bits = (bits << bit);
+               count += cntlzw(bits);
+               if (bits)
+                   break;
+               count -= bit;
+           
+               while (++index < bitmap->bitmapwords)
+               {
+                   bits = bitmap->bitmap[index];
+                   if (set)
+                       bits = ~bits;
+                   count += cntlzw(bits);
+                   if (bits)
+                       break;
+               }
+               if (bits)
+                   break;
+               if (!set)
+                   break;
+               // bits between banks are "set"
+               bank++;
+               if (bank >= list->bank_count)
+                   break;
+               count -= (bitmap->last_page + 1);
+               bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
+               count += bitmap->first_page;
+               index = 0;
+               bit = 0;                            
+           }
+           break;
+       }
+       bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
+    }
+
+    return (count);
+}
+
+
+static uint32_t
+hibernate_page_list_grab(hibernate_page_list_t * map, uint32_t * _nextFree)
+{
+    uint32_t nextFree = *_nextFree;
+
+    if (!nextFree)
+       nextFree = hibernate_page_list_count(map, 0, 0);
+
+    *_nextFree = nextFree + 1 + hibernate_page_list_count(map, 0, nextFree + 1);
+
+    return (nextFree);
+}
+
+static uint32_t
+store_one_page(uint32_t procFlags, uint32_t * src, uint32_t compressedSize, 
+               uint32_t * buffer, uint32_t ppnum)
+{
+    uint64_t dst;
+    uint32_t sum;
+
+    dst = ptoa_64(ppnum);
+#if __ppc__
+    if (ppnum < 0x00100000)
+       buffer = (uint32_t *) (uint32_t) dst;
+#elif __i386__
+    if (ppnum < atop_32(0xC0000000)) {
+        buffer = (uint32_t *) (uint32_t) dst;
+    }
+#endif
+
+    if (compressedSize != PAGE_SIZE)
+    {
+       WKdm_decompress((WK_word*) src, (WK_word*) buffer, PAGE_SIZE >> 2);
+       src = buffer;
+    }
+
+    sum = hibernate_sum((uint8_t *) src, PAGE_SIZE);
+
+    if (((uint64_t) (uint32_t) src) == dst)
+       src = 0;
+
+    hibernate_restore_phys_page((uint64_t) (uint32_t) src, dst, PAGE_SIZE, procFlags);
+
+    return (sum);
+}
+
+static void 
+bcopy_internal(const void *src, void *dst, uint32_t len)
+{
+    const char *s = src;
+    char       *d = dst;
+    uint32_t   idx = 0;
+
+    while (idx < len)
+    {
+        d[idx] = s[idx];
+        idx++;
+    }
+}
+
+long 
+hibernate_kernel_entrypoint(IOHibernateImageHeader * header, 
+                            void * p2, void * p3, __unused void * p4)
+{
+    typedef void (*ResetProc)(void);
+    uint32_t idx;
+    uint32_t * src;
+    uint32_t * buffer;
+    uint32_t * pageIndexSource;
+    hibernate_page_list_t * map;
+    uint32_t count;
+    uint32_t ppnum;
+    uint32_t page;
+    uint32_t conflictCount;
+    uint32_t compressedSize;
+    uint32_t uncompressedPages;
+    uint32_t copyPageListHead;
+    uint32_t * copyPageList;
+    uint32_t copyPageIndex;
+    uint32_t sum;
+    uint32_t nextFree;
+    uint32_t lastImagePage;
+    uint32_t lastMapPage;
+    uint32_t lastPageIndexPage;
+
+
+    bcopy_internal(header, 
+                gIOHibernateCurrentHeader, 
+                sizeof(IOHibernateImageHeader));
+
+    if (p2) 
+        bcopy_internal(p2, 
+                gIOHibernateGraphicsInfo, 
+                sizeof(hibernate_graphics_t));
+    else
+        gIOHibernateGraphicsInfo->physicalAddress = gIOHibernateGraphicsInfo->depth = 0;
+
+    if (p3)
+        bcopy_internal(p3, 
+                gIOHibernateCryptWakeVars, 
+                sizeof(hibernate_cryptvars_t));
+
+    src = (uint32_t *)
+                (((uint32_t) &header->fileExtentMap[0]) 
+                            + header->fileExtentMapSize 
+                            + ptoa_32(header->restore1PageCount));
+
+    if (header->previewSize)
+    {
+        pageIndexSource = src;
+        map = (hibernate_page_list_t *)(((uint32_t) pageIndexSource) + header->previewSize);
+        src = (uint32_t *) (((uint32_t) pageIndexSource) + header->previewPageListSize);
+    }
+    else
+    {
+        pageIndexSource = 0;
+        map = (hibernate_page_list_t *) src;
+        src = (uint32_t *) (((uint32_t) map) + header->bitmapSize);
+    }
+
+    lastPageIndexPage = atop_32(src);
+
+    lastImagePage = atop_32(((uint32_t) header) + header->image1Size);
+
+    lastMapPage = atop_32(((uint32_t) map) + header->bitmapSize);
+
+    // knock all the image pages to be used out of free map
+    for (ppnum = atop_32(header); ppnum <= lastImagePage; ppnum++)
+    {
+       hibernate_page_bitset(map, FALSE, ppnum);
+    }
+
+    nextFree = 0;
+    buffer = (uint32_t *) ptoa_32(hibernate_page_list_grab(map, &nextFree));
+
+    sum = gIOHibernateCurrentHeader->actualRestore1Sum;
+    gIOHibernateCurrentHeader->diag[0] = (uint32_t) header;
+    gIOHibernateCurrentHeader->diag[1] = sum;
+
+    uncompressedPages = 0;
+    conflictCount     = 0;
+    copyPageListHead  = 0;
+    copyPageList      = 0;
+    copyPageIndex     = PAGE_SIZE >> 2;
+
+    compressedSize    = PAGE_SIZE;
+
+    while (1)
+    {
+        if (pageIndexSource)
+        {
+            ppnum = pageIndexSource[0];
+            count = pageIndexSource[1];
+            pageIndexSource += 2;
+            if (!count)
+            {
+                pageIndexSource = 0;
+                src =  (uint32_t *) (((uint32_t) map) + gIOHibernateCurrentHeader->bitmapSize);
+                ppnum = src[0];
+                count = src[1];
+                src += 2;
+            } 
+        }
+        else
+        {
+            ppnum = src[0];
+            count = src[1];
+            if (!count)
+                break;
+            src += 2;
+       }
+
+       for (page = 0; page < count; page++, ppnum++)
+       {
+            uint32_t tag;
+           int conflicts;
+
+            if (!pageIndexSource)
+            {
+                tag = *src++;
+                compressedSize = kIOHibernateTagLength & tag;
+            }
+
+           conflicts = (((ppnum >= atop_32(map)) && (ppnum <= lastMapPage))
+                     || ((ppnum >= atop_32(src)) && (ppnum <= lastImagePage)));
+
+            if (pageIndexSource)
+                conflicts |= ((ppnum >= atop_32(pageIndexSource)) && (ppnum <= lastPageIndexPage));
+
+           if (!conflicts)
+           {
+               if (compressedSize)
+                   sum += store_one_page(gIOHibernateCurrentHeader->processorFlags,
+                                           src, compressedSize, buffer, ppnum);
+               uncompressedPages++;
+           }
+           else
+           {
+               uint32_t   bufferPage;
+               uint32_t * dst;
+
+               conflictCount++;
+
+               // alloc new buffer page
+               bufferPage = hibernate_page_list_grab(map, &nextFree);
+
+               if (copyPageIndex > ((PAGE_SIZE >> 2) - 3))
+               {
+                   // alloc new copy list page
+                   uint32_t pageListPage = hibernate_page_list_grab(map, &nextFree);
+                   // link to current
+                   if (copyPageList)
+                       copyPageList[1] = pageListPage;
+                   else
+                       copyPageListHead = pageListPage;
+                   copyPageList = (uint32_t *) ptoa_32(pageListPage);
+                   copyPageList[1] = 0;
+                   copyPageIndex = 2;
+               }
+
+               copyPageList[copyPageIndex++] = ppnum;
+               copyPageList[copyPageIndex++] = bufferPage;
+               copyPageList[copyPageIndex++] = compressedSize;
+               copyPageList[0] = copyPageIndex;
+
+               dst = (uint32_t *) ptoa_32(bufferPage);
+               for (idx = 0; idx < ((compressedSize + 3) >> 2); idx++)
+                   dst[idx] = src[idx];
+           }
+           src += ((compressedSize + 3) >> 2);
+       }
+    }
+
+    // -- copy back conflicts
+
+    copyPageList = (uint32_t *) ptoa_32(copyPageListHead);
+    while (copyPageList)
+    {
+       for (copyPageIndex = 2; copyPageIndex < copyPageList[0]; copyPageIndex += 3)
+       {
+           ppnum          =              copyPageList[copyPageIndex + 0];
+           src            = (uint32_t *) ptoa_32(copyPageList[copyPageIndex + 1]);
+           compressedSize =              copyPageList[copyPageIndex + 2];
+
+           sum += store_one_page(gIOHibernateCurrentHeader->processorFlags,
+                                   src, compressedSize, buffer, ppnum);
+           uncompressedPages++;
+       }
+       copyPageList = (uint32_t *) ptoa_32(copyPageList[1]);
+    }
+
+    // -- image has been destroyed...
+
+    gIOHibernateCurrentHeader->actualImage1Sum         = sum;
+    gIOHibernateCurrentHeader->actualUncompressedPages = uncompressedPages;
+    gIOHibernateCurrentHeader->conflictCount           = conflictCount;
+    gIOHibernateCurrentHeader->nextFree                = nextFree;
+
+    gIOHibernateState = kIOHibernateStateWakingFromHibernate;
+
+#if __ppc__
+    ResetProc proc;
+    proc = (ResetProc) 0x100;
+    __asm__ volatile("ori 0, 0, 0" : : );
+    proc();
+#elif __i386__
+    ResetProc proc;
+    proc = (ResetProc) acpi_wake_prot_entry;
+
+    proc();
+#endif
+  
+    return -1;
+}
+
index b811ffbf8e797f16c06dadc0496786bd74704ef7..7c95249a5c488e1fcb3141916e1768d7406c7f14 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1998-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
 #include "IOKit/pwr_mgt/IOPowerConnection.h"
 #include "IOPMPowerStateQueue.h"
 #include <IOKit/IOCatalogue.h>
+#include <IOKit/IOHibernatePrivate.h>
 
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
-
-#include <IOKit/IOPolledInterface.h>
-
-OSDefineMetaClassAndAbstractStructors(IOPolledInterface, OSObject);
-
-OSMetaClassDefineReservedUnused(IOPolledInterface, 0);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 1);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 2);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 3);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 4);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 5);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 6);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 7);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 8);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 9);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 10);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 11);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 12);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 13);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 14);
-OSMetaClassDefineReservedUnused(IOPolledInterface, 15);
-
-IOReturn
-IOPolledInterface::checkAllForWork(void)
-{
-    return (kIOReturnSuccess);
-}
-
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-
+#ifdef __ppc__
+#include <ppc/pms.h>
+#endif
 
 extern "C" void kprintf(const char *, ...);
 
@@ -217,6 +188,7 @@ static void disk_sync_callout(thread_call_param_t p0, thread_call_param_t p1)
     IOService                               *rootDomain = (IOService *) p0;
     unsigned long                           pmRef = (unsigned long) p1;
 
+    IOHibernateSystemSleep();
     sync_internal();
     rootDomain->allowPowerChange(pmRef);
 }
@@ -314,6 +286,7 @@ bool IOPMrootDomain::start ( IOService * nub )
         temp_entry->release();
     }
 
+    IOHibernateSystemInit(this);
 
     registerService();                                         // let clients find us
 
@@ -353,6 +326,10 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj)
     const OSSymbol                            *fileserver_string = OSSymbol::withCString("AutoRestartOnPowerLoss");
     const OSSymbol                            *wakeonlid_string = OSSymbol::withCString("WakeOnLid");
     const OSSymbol                            *wakeonac_string = OSSymbol::withCString("WakeOnACChange");
+    const OSSymbol                            *hibernatemode_string = OSSymbol::withCString(kIOHibernateModeKey);
+    const OSSymbol                            *hibernatefile_string = OSSymbol::withCString(kIOHibernateFileKey);
+    const OSSymbol                            *hibernatefreeratio_string = OSSymbol::withCString(kIOHibernateFreeRatioKey);
+    const OSSymbol                            *hibernatefreetime_string = OSSymbol::withCString(kIOHibernateFreeTimeKey);
     const OSSymbol                            *timezone_string = OSSymbol::withCString("TimeZoneOffsetSeconds");
     
     if(!dict) 
@@ -381,6 +358,26 @@ IOReturn IOPMrootDomain::setProperties ( OSObject *props_obj)
         setProperty(stall_halt_string, b);
     }
 
+    if ( hibernatemode_string
+       && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatemode_string))))
+    {
+       setProperty(hibernatemode_string, n);
+    }
+    if ( hibernatefreeratio_string
+       && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreeratio_string))))
+    {
+       setProperty(hibernatefreeratio_string, n);
+    }
+    if ( hibernatefreetime_string
+       && (n = OSDynamicCast(OSNumber, dict->getObject(hibernatefreetime_string))))
+    {
+       setProperty(hibernatefreetime_string, n);
+    }
+    if ( hibernatefile_string
+       && (str = OSDynamicCast(OSString, dict->getObject(hibernatefile_string))))
+    {
+       setProperty(hibernatefile_string, str);
+    }
 
     // Relay AutoWake setting to its controller
     if( auto_wake_string
@@ -582,8 +579,24 @@ void IOPMrootDomain::stopIgnoringClamshellEventsDuringWakeup(void)
 // same thread.
 //*********************************************************************************
 
+static int pmsallsetup = 0;
+
 IOReturn IOPMrootDomain::setAggressiveness ( unsigned long type, unsigned long newLevel )
 {
+#ifdef __ppc__
+       if(pmsExperimental & 3) kprintf("setAggressiveness: type = %08X, newlevel = %08X\n", type, newLevel);
+       if(pmsExperimental & 1) {                                               /* Is experimental mode enabled? */
+               if(pmsInstalled && (type == kPMSetProcessorSpeed)) {    /* We want to look at all processor speed changes if stepper is installed */
+                       if(pmsallsetup) return kIOReturnSuccess;        /* If already running, just eat this */
+                       kprintf("setAggressiveness: starting stepper...\n");
+                       pmsallsetup = 1;                                                /* Remember we did this */
+                       pmsPark();
+                       pmsStart();                                                             /* Get it all started up... */
+                       return kIOReturnSuccess;                                /* Leave now... */
+               }
+       }
+#endif
+
     if ( pm_vars->PMcommandGate ) {
         pm_vars->PMcommandGate->runAction(broadcast_aggressiveness,(void *)type,(void *)newLevel);
     }
@@ -659,13 +672,17 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState )
                 // re-enable this timer for next sleep
                 idleSleepPending = false;                      
 
-                IOLog("System Sleep\n");
+                IOLog("System %sSleep\n", gIOHibernateState ? "Safe" : "");
+
+                IOHibernateSystemHasSlept();
+
                 pm_vars->thePlatform->sleepKernel();
 
                 // The CPU(s) are off at this point. When they're awakened by CPU interrupt,
                 // code will resume execution here.
 
                 // Now we're waking...
+               IOHibernateSystemWake();
 
                 // stay awake for at least 30 seconds
                 clock_interval_to_deadline(30, kSecondScale, &deadline);       
@@ -690,7 +707,7 @@ void IOPMrootDomain::powerChangeDone ( unsigned long previousState )
                 tellClients(kIOMessageSystemWillPowerOn);
 
                 // tell the tree we're waking
-                IOLog("System Wake\n");
+                IOLog("System %sWake\n", gIOHibernateState ? "SafeSleep " : "");
                 systemWake();
                 
                 // Allow drivers to request extra processing time before clamshell
@@ -1118,6 +1135,7 @@ void IOPMrootDomain::tellChangeUp ( unsigned long stateNum)
 {
     if ( stateNum == ON_STATE ) 
     {
+       IOHibernateSystemPostWake();
         return tellClients(kIOMessageSystemHasPoweredOn);
     }
 }
@@ -1217,6 +1235,8 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon,
 
             // We will ack within 20 seconds
             params->returnValue = 20 * 1000 * 1000;
+            if (gIOHibernateState)
+                params->returnValue += gIOHibernateFreeTime * 1000;    //add in time we could spend freeing pages
 
             if ( ! OSCompareAndSwap( 0, 1, &gSleepOrShutdownPending ) )
             {
index a5b07dd86e148a3fbd3ee7cf9239d349a9dff64f..1fa9a6ec770db69951dc3da6083e0d6928525372 100644 (file)
@@ -89,25 +89,17 @@ bool IOPlatformExpert::start( IOService * provider )
 {
     IORangeAllocator * physicalRanges;
     OSData *           busFrequency;
+    uint32_t           debugFlags;
     
     if (!super::start(provider))
       return false;
-
+    
+    // Override the mapper present flag is requested by boot arguments.
+    if (PE_parse_boot_arg("dart", &debugFlags) && (debugFlags == 0))
+      removeProperty(kIOPlatformMapperPresentKey);
+    
     // Register the presence or lack thereof a system 
     // PCI address mapper with the IOMapper class
-
-#if 1
-    IORegistryEntry * regEntry = IORegistryEntry::fromPath("/u3/dart", gIODTPlane);
-    if (!regEntry)
-       regEntry = IORegistryEntry::fromPath("/dart", gIODTPlane);
-    if (regEntry) {
-       int debugFlags;
-       if (!PE_parse_boot_arg("dart", &debugFlags) || debugFlags)
-           setProperty(kIOPlatformMapperPresentKey, kOSBooleanTrue);
-       regEntry->release();
-    }
-#endif
-
     IOMapper::setMapperRequired(0 != getProperty(kIOPlatformMapperPresentKey));
     
     gIOInterruptControllers = OSDictionary::withCapacity(1);
index af7e11dd009aca592cb08001c238d21b3aa6813d..a20e0a6a091464505a5b068db938829c413ebb05 100644 (file)
@@ -2264,15 +2264,22 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange )
 {
     IOReturn                            k = IOPMAckImplied;
     unsigned long                       childPower;
-    IOService                           *theChild = (IOService *)(theNub->copyChildEntry(gIOPowerPlane));
-
-    theNub->setAwaitingAck(true);                                      // in case they don't ack
+    IOService                           *theChild;
     
-    if ( ! theChild ) 
+    theChild = (IOService *)(theNub->copyChildEntry(gIOPowerPlane));
+    if(!theChild) 
     {
+        // The child has been detached since we grabbed the child iterator.
+        // Decrement pending_acks, already incremented in notifyAll,
+        // to account for this unexpected departure.
+        priv->head_note_pendingAcks--;
         return true;
     }
     
+    // Unless the child handles the notification immediately and returns
+    // kIOPMAckImplied, we'll be awaiting their acknowledgement later.
+    theNub->setAwaitingAck(true);
+    
     if ( is_prechange ) 
     {
         k = theChild->powerDomainWillChangeTo(priv->head_note_outputFlags,theNub);
@@ -2284,7 +2291,7 @@ bool IOService::notifyChild ( IOPowerConnection * theNub, bool is_prechange )
     if ( k == IOPMAckImplied ) 
     {
         // yes
-        priv->head_note_pendingAcks -=1;
+        priv->head_note_pendingAcks--;
         theNub->setAwaitingAck(false);
         childPower = theChild->currentPowerConsumption();
         if ( childPower == kIOPMUnknown ) 
diff --git a/iokit/Kernel/WKdm.h b/iokit/Kernel/WKdm.h
new file mode 100644 (file)
index 0000000..be3ca2d
--- /dev/null
@@ -0,0 +1,227 @@
+/* direct-mapped partial matching compressor with simple 22/10 split
+ *
+ *  Compresses buffers using a dictionary based match and partial match
+ *  (high bits only or full match) scheme.
+ *
+ *  Paul Wilson -- wilson@cs.utexas.edu
+ *  Scott F. Kaplan -- sfkaplan@cs.utexas.edu
+ *  September 1997
+ */
+
+/* compressed output format, in memory order
+ *  1. a four-word HEADER containing four one-word values:
+ *     i.   a one-word code saying what algorithm compressed the data
+ *     ii.  an integer WORD offset into the page saying
+ *          where the queue position area starts
+ *     iii. an integer WORD offset into the page saying where
+ *          the low-bits area starts
+ *     iv.  an integer WORD offset into the page saying where the
+ *          low-bits area ends
+ *
+ *  2. a 64-word TAGS AREA holding one two-bit tag for each word in 
+ *     the original (1024-word) page, packed 16 per word
+ *
+ *  3. a variable-sized FULL WORDS AREA (always word aligned and an
+ *     integral number of words) holding full-word patterns that
+ *     were not in the dictionary when encoded (i.e., dictionary misses)
+ *
+ *  4. a variable-sized QUEUE POSITIONS AREA (always word aligned and
+ *     an integral number of words) holding four-bit queue positions,
+ *     packed eight per word.
+ *
+ *  5. a variable-sized LOW BITS AREA (always word aligned and an
+ *     integral number of words) holding ten-bit low-bit patterns
+ *     (from partial matches), packed three per word. 
+ */
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ============================================================ */
+/* Included files */
+
+//#include <stdio.h>
+//#include <unistd.h>
+//#include <math.h>
+//#include <strings.h>
+
+typedef unsigned long WK_word;
+
+/* at the moment we have dependencies on the page size.  That should
+ * be changed to work for any power-of-two size that's at least 16
+ * words, or something like that
+ */
+
+#define PAGE_SIZE_IN_WORDS 1024
+#define PAGE_SIZE_IN_BYTES 4096
+
+#define DICTIONARY_SIZE 16
+
+/*
+ * macros defining the basic layout of stuff in a page
+ */
+#define HEADER_SIZE_IN_WORDS 4
+#define TAGS_AREA_OFFSET 4
+#define TAGS_AREA_SIZE 64
+
+/* the next few are used during compression to write the header */
+#define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr)  \
+        (compr_dest_buf[1] = qpos_start_addr - compr_dest_buf)
+#define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \
+        (compr_dest_buf[2] = lb_start_addr - compr_dest_buf)
+#define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \
+        (compr_dest_buf[3] = lb_end_addr - compr_dest_buf)
+
+/* the next few are only use during decompression to read the header */
+#define TAGS_AREA_START(decomp_src_buf)       \
+        (decomp_src_buf + TAGS_AREA_OFFSET)
+#define TAGS_AREA_END(decomp_src_buf)         \
+        (TAGS_AREA_START(decomp_src_buf) + TAGS_AREA_SIZE)
+#define FULL_WORD_AREA_START(the_buf) TAGS_AREA_END(the_buf)
+#define QPOS_AREA_START(decomp_src_buf)       \
+        (decomp_src_buf + decomp_src_buf[1])   
+#define LOW_BITS_AREA_START(decomp_src_buf)   \
+        (decomp_src_buf + (decomp_src_buf[2]))
+#define QPOS_AREA_END(the_buf) LOW_BITS_AREA_START(the_buf)
+#define LOW_BITS_AREA_END(decomp_src_buf)     \
+        (decomp_src_buf + (decomp_src_buf[3]))
+
+/* ============================================================ */
+/* Types and structures */
+
+/* A structure to store each element of the dictionary. */
+typedef WK_word DictionaryElement;
+
+/* ============================================================ */
+/* Misc constants */
+
+#define BITS_PER_WORD 32
+#define BYTES_PER_WORD 4
+#define NUM_LOW_BITS 10
+#define LOW_BITS_MASK 0x3FF
+#define ALL_ONES_MASK 0xFFFFFFFF
+
+#define TWO_BITS_PACKING_MASK 0x03030303
+#define FOUR_BITS_PACKING_MASK 0x0F0F0F0F
+#define TEN_LOW_BITS_MASK 0x000003FF
+#define TWENTY_TWO_HIGH_BITS_MASK 0xFFFFFC00
+
+/* Tag values.  NOTE THAT CODE MAY DEPEND ON THE NUMBERS USED.
+ * Check for conditionals doing arithmetic on these things
+ * before changing them
+ */
+#define ZERO_TAG 0x0
+#define PARTIAL_TAG 0x1
+#define MISS_TAG 0x2
+#define EXACT_TAG 0x3
+
+#define BITS_PER_BYTE 8
+
+/* ============================================================ */
+/* Global macros */
+
+/* Shift out the low bits of a pattern to give the high bits pattern.
+   The stripped patterns are used for initial tests of partial
+   matches. */
+#define HIGH_BITS(word_pattern) (word_pattern >> NUM_LOW_BITS)
+
+/* String the high bits of a pattern so the low order bits can
+   be included in an encoding of a partial match. */
+#define LOW_BITS(word_pattern) (word_pattern & LOW_BITS_MASK)
+
+#if defined DEBUG_WK
+#define DEBUG_PRINT_1(string) printf (string)
+#define DEBUG_PRINT_2(string,value) printf(string, value)
+#else
+#define DEBUG_PRINT_1(string)
+#define DEBUG_PRINT_2(string, value)
+#endif
+
+/* Set up the dictionary before performing compression or
+   decompression.  Each element is loaded with some value, the
+   high-bits version of that value, and a next pointer. */
+#define PRELOAD_DICTIONARY { \
+  dictionary[0] = 1; \
+  dictionary[1] = 1; \
+  dictionary[2] = 1; \
+  dictionary[3] = 1; \
+  dictionary[4] = 1; \
+  dictionary[5] = 1; \
+  dictionary[6] = 1; \
+  dictionary[7] = 1; \
+  dictionary[8] = 1; \
+  dictionary[9] = 1; \
+  dictionary[10] = 1; \
+  dictionary[11] = 1; \
+  dictionary[12] = 1; \
+  dictionary[13] = 1; \
+  dictionary[14] = 1; \
+  dictionary[15] = 1; \
+}
+
+/* these are the constants for the hash function lookup table.
+ * Only zero maps to zero.  The rest of the tabale is the result
+ * of appending 17 randomizations of the multiples of 4 from
+ * 4 to 56.  Generated by a Scheme script in hash.scm. 
+ */
+#define HASH_LOOKUP_TABLE_CONTENTS { \
+   0, 52,  8, 56, 16, 12, 28, 20,  4, 36, 48, 24, 44, 40, 32, 60, \
+   8, 12, 28, 20,  4, 60, 16, 36, 24, 48, 44, 32, 52, 56, 40, 12, \
+   8, 48, 16, 52, 60, 28, 56, 32, 20, 24, 36, 40, 44,  4,  8, 40, \
+  60, 32, 20, 44,  4, 36, 52, 24, 16, 56, 48, 12, 28, 16,  8, 40, \
+  36, 28, 32, 12,  4, 44, 52, 20, 24, 48, 60, 56, 40, 48,  8, 32, \
+  28, 36,  4, 44, 20, 56, 60, 24, 52, 16, 12, 12,  4, 48, 20,  8, \
+  52, 16, 60, 24, 36, 44, 28, 56, 40, 32, 36, 20, 24, 60, 40, 44, \
+  52, 16, 32,  4, 48,  8, 28, 56, 12, 28, 32, 40, 52, 36, 16, 20, \
+  48,  8,  4, 60, 24, 56, 44, 12,  8, 36, 24, 28, 16, 60, 20, 56, \
+  32, 40, 48, 12,  4, 44, 52, 44, 40, 12, 56,  8, 36, 24, 60, 28, \
+  48,  4, 32, 20, 16, 52, 60, 12, 24, 36,  8,  4, 16, 56, 48, 44, \
+  40, 52, 32, 20, 28, 32, 12, 36, 28, 24, 56, 40, 16, 52, 44,  4, \
+  20, 60,  8, 48, 48, 52, 12, 20, 32, 44, 36, 28,  4, 40, 24,  8, \
+  56, 60, 16, 36, 32,  8, 40,  4, 52, 24, 44, 20, 12, 28, 48, 56, \
+  16, 60,  4, 52, 60, 48, 20, 16, 56, 44, 24,  8, 40, 12, 32, 28, \
+  36, 24, 32, 12,  4, 20, 16, 60, 36, 28,  8, 52, 40, 48, 44, 56  \
+}
+
+#define HASH_TO_DICT_BYTE_OFFSET(pattern) \
+        (hashLookupTable[((pattern) >> 10) & 0xFF])
+
+extern const char hashLookupTable[];
+
+/* EMIT... macros emit bytes or words into the intermediate arrays
+ */
+
+#define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr = byte_value; fill_ptr++;}
+#define EMIT_WORD(fill_ptr,word_value) {*fill_ptr = word_value; fill_ptr++;}
+
+/* RECORD... macros record the results of modeling in the intermediate
+ * arrays
+ */
+
+#define RECORD_ZERO { EMIT_BYTE(next_tag,ZERO_TAG); }
+
+#define RECORD_EXACT(queue_posn)  EMIT_BYTE(next_tag,EXACT_TAG);  \
+                                  EMIT_BYTE(next_qp,(queue_posn)); 
+
+#define RECORD_PARTIAL(queue_posn,low_bits_pattern) { \
+   EMIT_BYTE(next_tag,PARTIAL_TAG);                   \
+   EMIT_BYTE(next_qp,(queue_posn));                   \
+   EMIT_WORD(next_low_bits,(low_bits_pattern))  }
+
+#define RECORD_MISS(word_pattern) EMIT_BYTE(next_tag,MISS_TAG); \
+                                  EMIT_WORD(next_full_patt,(word_pattern)); 
+                                 
+void
+WKdm_decompress (WK_word* src_buf,
+                WK_word* dest_buf,
+                unsigned int words);
+unsigned int
+WKdm_compress (WK_word* src_buf,
+               WK_word* dest_buf,
+              unsigned int num_input_words);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
diff --git a/iokit/Kernel/WKdmCompress.c b/iokit/Kernel/WKdmCompress.c
new file mode 100644 (file)
index 0000000..aa9d1b5
--- /dev/null
@@ -0,0 +1,328 @@
+#include "WKdm.h"
+
+/***********************************************************************
+ *                   THE PACKING ROUTINES
+ */
+
+/* WK_pack_2bits()
+ * Pack some multiple of four words holding two-bit tags (in the low
+ * two bits of each byte) into an integral number of words, i.e.,
+ * one fourth as many.  
+ * NOTE: Pad the input out with zeroes to a multiple of four words!
+ */
+static WK_word*
+WK_pack_2bits(WK_word* source_buf,
+              WK_word* source_end,
+             WK_word* dest_buf) {
+
+   register WK_word* src_next = source_buf;
+   WK_word* dest_next = dest_buf;
+  
+   while (src_next < source_end) {
+      register WK_word temp = src_next[0];
+      temp |= (src_next[1] << 2);
+      temp |= (src_next[2] << 4);
+      temp |= (src_next[3] << 6);
+    
+      dest_next[0] = temp;
+      dest_next++;     
+      src_next += 4;
+   }
+  
+   return dest_next;
+
+}
+
+/* WK_pack_4bits()
+ * Pack an even number of words holding 4-bit patterns in the low bits
+ * of each byte into half as many words.
+ * note: pad out the input with zeroes to an even number of words!
+ */
+
+static WK_word*
+WK_pack_4bits(WK_word* source_buf,
+             WK_word* source_end,
+             WK_word* dest_buf) {
+   register WK_word* src_next = source_buf;
+   WK_word* dest_next = dest_buf;
+  
+   /* this loop should probably be unrolled */
+   while (src_next < source_end) {
+     register WK_word temp = src_next[0];
+     temp |= (src_next[1] << 4);
+    
+     dest_next[0] = temp;
+     dest_next++;     
+     src_next += 2;
+   }
+
+   return dest_next;
+
+}
+
+/* pack_3_tenbits()
+ * Pack a sequence of three ten bit items into one word.
+ * note: pad out the input with zeroes to an even number of words!
+ */
+static WK_word*
+WK_pack_3_tenbits(WK_word* source_buf,
+                 WK_word* source_end,
+                 WK_word* dest_buf) {
+
+   register WK_word* src_next = source_buf;
+   WK_word* dest_next = dest_buf;
+  
+   /* this loop should probably be unrolled */
+   while (src_next < source_end) {
+      register WK_word temp = src_next[0];
+      temp |= (src_next[1] << 10);
+      temp |= (src_next[2] << 20);
+    
+      dest_next[0] = temp;
+      dest_next++;     
+      src_next += 3;
+   }
+
+   return dest_next;
+
+}
+
+/***************************************************************************
+ *  WKdm_compress()---THE COMPRESSOR
+ */
+
+unsigned int
+WKdm_compress (WK_word* src_buf,
+               WK_word* dest_buf,
+              unsigned int num_input_words)
+{
+  DictionaryElement dictionary[DICTIONARY_SIZE];
+
+  /* arrays that hold output data in intermediate form during modeling */
+  /* and whose contents are packed into the actual output after modeling */
+
+  /* sizes of these arrays should be increased if you want to compress
+   * pages larger than 4KB
+   */
+  WK_word tempTagsArray[300];         /* tags for everything          */
+  WK_word tempQPosArray[300];         /* queue positions for matches  */
+  WK_word tempLowBitsArray[1200];     /* low bits for partial matches */
+
+  /* boundary_tmp will be used for keeping track of what's where in
+   * the compressed page during packing
+   */
+  WK_word* boundary_tmp;
+
+  /* Fill pointers for filling intermediate arrays (of queue positions
+   * and low bits) during encoding.
+   * Full words go straight to the destination buffer area reserved
+   * for them.  (Right after where the tags go.)
+   */
+  WK_word* next_full_patt;
+  char* next_tag = (char *) tempTagsArray;
+  char* next_qp = (char *) tempQPosArray;
+  WK_word* next_low_bits = tempLowBitsArray;
+
+  WK_word* next_input_word = src_buf;
+  WK_word* end_of_input = src_buf + num_input_words;
+
+  PRELOAD_DICTIONARY;
+
+  next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16);
+
+#ifdef WK_DEBUG
+  printf("\nIn WKdm_compress\n");
+  printf("About to actually compress, src_buf is %u\n", src_buf);
+  printf("dictionary is at %u\n", dictionary);
+  printf("dest_buf is %u next_full_patt is %u\n", dest_buf, next_full_patt);
+  fflush(stdout);
+#endif
+
+  while (next_input_word < end_of_input)
+  {
+     WK_word *dict_location;
+     WK_word dict_word;
+     WK_word input_word = *next_input_word;
+
+     /* compute hash value, which is a byte offset into the dictionary,
+      * and add it to the base address of the dictionary. Cast back and
+      * forth to/from char * so no shifts are needed
+      */
+     dict_location =
+       (WK_word *)
+       (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word));
+
+     dict_word = *dict_location;
+
+     if (input_word == dict_word)
+     {
+        RECORD_EXACT(dict_location - dictionary); 
+     }
+     else if (input_word == 0) {
+        RECORD_ZERO;
+     }
+     else
+     {
+        WK_word input_high_bits = HIGH_BITS(input_word);
+        if (input_high_bits == HIGH_BITS(dict_word)) {
+         RECORD_PARTIAL(dict_location - dictionary, LOW_BITS(input_word));
+          *dict_location = input_word;
+        }
+        else {
+         RECORD_MISS(input_word);
+            *dict_location = input_word;
+        }
+     }
+     next_input_word++;
+  }
+
+#ifdef WK_DEBUG
+  printf("AFTER MODELING in WKdm_compress()\n");  fflush(stdout);
+  printf("tempTagsArray holds %u bytes\n",
+         next_tag - (char *) tempTagsArray);
+  printf("tempQPosArray holds %u bytes\n",
+         next_qp - (char *) tempQPosArray);
+  printf("tempLowBitsArray holds %u bytes\n",
+         (char *) next_low_bits - (char *) tempLowBitsArray);
+
+  printf("next_full_patt is %u\n",
+         (unsigned long) next_full_patt);
+
+  printf(" i.e., there are %u full patterns\n",
+     next_full_patt - (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16)));
+  fflush(stdout);
+
+  { int i;
+    WK_word *arr =(dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16));
+
+    printf("  first 20 full patterns are: \n");
+    for (i = 0; i < 20; i++) {
+      printf(" %d", arr[i]);
+    }
+    printf("\n");
+  }
+#endif
+
+  /* Record (into the header) where we stopped writing full words,
+   * which is where we will pack the queue positions.  (Recall
+   * that we wrote the full words directly into the dest buffer
+   * during modeling.
+   */
+
+  SET_QPOS_AREA_START(dest_buf,next_full_patt);
+
+  /* Pack the tags into the tags area, between the page header
+   * and the full words area.  We don't pad for the packer
+   * because we assume that the page size is a multiple of 16.
+   */     
+
+#ifdef WK_DEBUG
+  printf("about to pack %u bytes holding tags\n", 
+         next_tag - (char *) tempTagsArray);
+
+  { int i;
+    char* arr = (char *) tempTagsArray;
+
+    printf("  first 200 tags are: \n");
+    for (i = 0; i < 200; i++) {
+      printf(" %d", arr[i]);
+    }
+    printf("\n");
+  }
+#endif
+
+  boundary_tmp = WK_pack_2bits(tempTagsArray,
+                              (WK_word *) next_tag,
+                              dest_buf + HEADER_SIZE_IN_WORDS);
+
+#ifdef WK_DEBUG  
+    printf("packing tags stopped at %u\n", boundary_tmp);
+#endif
+  
+  /* Pack the queue positions into the area just after
+   * the full words.  We have to round up the source
+   * region to a multiple of two words.
+   */
+
+  {
+    unsigned int num_bytes_to_pack = next_qp - (char *) tempQPosArray;
+    unsigned int num_packed_words = (num_bytes_to_pack + 7) >> 3; // ceil((double) num_bytes_to_pack / 8);
+    unsigned int num_source_words = num_packed_words * 2;
+    WK_word* endQPosArray = tempQPosArray + num_source_words;
+
+    /* Pad out the array with zeros to avoid corrupting real packed
+       values. */
+    for (; /* next_qp is already set as desired */
+        next_qp < (char*)endQPosArray;
+        next_qp++) {
+      *next_qp = 0;
+    }
+
+#ifdef WK_DEBUG    
+    printf("about to pack %u (bytes holding) queue posns.\n",
+           num_bytes_to_pack);
+    printf("packing them from %u words into %u words\n",
+           num_source_words, num_packed_words);
+    printf("dest is range %u to %u\n",
+           next_full_patt, next_full_patt + num_packed_words);
+    { int i;
+      char *arr = (char *) tempQPosArray;
+      printf("  first 200 queue positions are: \n");
+      for (i = 0; i < 200; i++) {
+        printf(" %d", arr[i]);
+      }
+      printf("\n");
+    }
+#endif
+    
+    boundary_tmp = WK_pack_4bits(tempQPosArray,
+                                endQPosArray,
+                                next_full_patt);
+#ifdef WK_DEBUG
+     printf("Packing of queue positions stopped at %u\n", boundary_tmp);
+#endif WK_DEBUG
+
+    /* Record (into the header) where we stopped packing queue positions,
+     * which is where we will start packing low bits.
+     */
+    SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp);
+
+  }
+
+  /* Pack the low bit patterns into the area just after
+   * the queue positions.  We have to round up the source
+   * region to a multiple of three words.
+   */
+
+  {
+    unsigned int num_tenbits_to_pack =
+      next_low_bits - tempLowBitsArray;
+    unsigned int num_packed_words = (num_tenbits_to_pack + 2) / 3; //ceil((double) num_tenbits_to_pack / 3);
+    unsigned int num_source_words = num_packed_words * 3;
+    WK_word* endLowBitsArray = tempLowBitsArray + num_source_words;
+
+    /* Pad out the array with zeros to avoid corrupting real packed
+       values. */
+
+    for (; /* next_low_bits is already set as desired */
+        next_low_bits < endLowBitsArray;
+        next_low_bits++) {
+      *next_low_bits = 0;
+    }
+
+#ifdef WK_DEBUG
+         printf("about to pack low bits\n");
+          printf("num_tenbits_to_pack is %u\n", num_tenbits_to_pack);
+          printf("endLowBitsArray is %u\n", endLowBitsArray);
+#endif
+    
+    boundary_tmp = WK_pack_3_tenbits (tempLowBitsArray,
+                                     endLowBitsArray,
+                                     boundary_tmp);
+
+    SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp);
+
+  }
+
+  return ((char *) boundary_tmp - (char *) dest_buf);
+} 
diff --git a/iokit/Kernel/WKdmDecompress.c b/iokit/Kernel/WKdmDecompress.c
new file mode 100644 (file)
index 0000000..062f2d3
--- /dev/null
@@ -0,0 +1,283 @@
+#include <sys/cdefs.h>
+#include "WKdm.h"
+
+/***************************************************************************
+ *          THE UNPACKING ROUTINES should GO HERE
+ */
+
+const char hashLookupTable [] = HASH_LOOKUP_TABLE_CONTENTS;
+
+#if 0
+#define GET_NEXT_TAG tags[tagsIndex++]
+#define GET_NEXT_FULL_PATTERN fullPatterns[fullPatternsIndex++]
+#define GET_NEXT_LOW_BITS lowBits[lowBitsIndex++]
+#define GET_NEXT_DICTIONARY_INDEX dictionaryIndices[dictionaryIndicesIndex++]
+#endif
+
+/*  WK_unpack_2bits takes any number of words containing 16 two-bit values
+ *  and unpacks them into four times as many words containg those
+ *  two bit values as bytes (with the low two bits of each byte holding
+ *  the actual value.
+ */
+static WK_word*
+WK_unpack_2bits(WK_word *input_buf,
+                WK_word *input_end,
+                WK_word *output_buf) {
+
+  register WK_word *input_next = input_buf;
+  register WK_word *output_next = output_buf;
+  register WK_word packing_mask = TWO_BITS_PACKING_MASK;
+
+  /* loop to repeatedly grab one input word and unpack it into
+   * 4 output words.  This loop could be unrolled a little---it's
+   * designed to be easy to do that.
+   */   
+  while (input_next < input_end) {
+    register WK_word temp = input_next[0];
+    DEBUG_PRINT_2("Unpacked tags word: %.8x\n", temp);
+    output_next[0] = temp & packing_mask;
+    output_next[1] = (temp >> 2) & packing_mask;
+    output_next[2] = (temp >> 4) & packing_mask;
+    output_next[3] = (temp >> 6) & packing_mask;
+    
+    output_next += 4;
+    input_next++;
+  }
+
+  return output_next;
+
+}
+
+/* unpack four bits consumes any number of words (between input_buf
+ * and input_end) holding 8 4-bit values per word, and unpacks them
+ * into twice as many words, with each value in a separate byte.
+ * (The four-bit values occupy the low halves of the bytes in the
+ * result).
+ */
+static WK_word*
+WK_unpack_4bits(WK_word *input_buf,
+                WK_word *input_end,
+                WK_word *output_buf) {
+
+  register WK_word *input_next = input_buf;
+  register WK_word *output_next = output_buf;
+  register WK_word packing_mask = FOUR_BITS_PACKING_MASK;
+  
+  
+  /* loop to repeatedly grab one input word and unpack it into
+   * 4 output words.  This loop should probably be unrolled
+   * a little---it's designed to be easy to do that.
+   */   
+  while (input_next < input_end) {
+    register WK_word temp = input_next[0];
+    DEBUG_PRINT_2("Unpacked dictionary indices word: %.8x\n", temp);
+    output_next[0] = temp & packing_mask;
+    output_next[1] = (temp >> 4) & packing_mask;
+    
+    output_next += 2;
+    input_next++;
+  }
+  
+  return output_next;
+
+}
+
+/* unpack_3_tenbits unpacks three 10-bit items from (the low 30 bits of)
+ * a 32-bit word
+ */
+static WK_word*
+WK_unpack_3_tenbits(WK_word *input_buf,
+                    WK_word *input_end,
+                    WK_word *output_buf) {
+
+  register WK_word *input_next = input_buf;
+  register WK_word *output_next = output_buf;
+  register WK_word packing_mask = LOW_BITS_MASK;
+  
+  /* loop to fetch words of input, splitting each into three
+   * words of output with 10 meaningful low bits.  This loop
+   * probably ought to be unrolled and maybe coiled
+   */
+  while (input_next < input_end) {
+    register WK_word temp = input_next[0];
+    
+    output_next[0] = temp & packing_mask;
+    output_next[1] = (temp >> 10) & packing_mask;
+    output_next[2] = temp >> 20;
+    
+    input_next++;
+    output_next += 3;
+  }
+  
+  return output_next;
+
+}
+
+/*********************************************************************
+ * WKdm_decompress --- THE DECOMPRESSOR                                 
+ * Expects WORD pointers to the source and destination buffers
+ * and a page size in words.  The page size had better be 1024 unless     
+ * somebody finds the places that are dependent on the page size and 
+ * fixes them
+ */
+
+void
+WKdm_decompress (WK_word* src_buf,
+                WK_word* dest_buf,
+                __unused unsigned int words)
+{
+
+  DictionaryElement dictionary[DICTIONARY_SIZE];
+
+  /* arrays that hold output data in intermediate form during modeling */
+  /* and whose contents are packed into the actual output after modeling */
+
+  /* sizes of these arrays should be increased if you want to compress
+   * pages larger than 4KB
+   */
+  WK_word tempTagsArray[300];        /* tags for everything          */
+  WK_word tempQPosArray[300];        /* queue positions for matches  */
+  WK_word tempLowBitsArray[1200];    /* low bits for partial matches */
+
+  PRELOAD_DICTIONARY;
+
+#ifdef WK_DEBUG
+  printf("\nIn DECOMPRESSOR\n");
+  printf("tempTagsArray is at %u\n", (unsigned long int) tempTagsArray);
+  printf("tempQPosArray is at %u\n", (unsigned long int) tempQPosArray);
+  printf("tempLowBitsArray is at %u\n", (unsigned long int) tempLowBitsArray);
+
+  printf(" first four words of source buffer are:\n");
+  printf("   %u\n   %u\n   %u\n   %u\n",
+         src_buf[0], src_buf[1], src_buf[2], src_buf[3]);
+  
+  { int i;
+    WK_word *arr =(src_buf + TAGS_AREA_OFFSET + (PAGE_SIZE_IN_WORDS / 16));
+
+    printf("  first 20 full patterns are: \n");
+    for (i = 0; i < 20; i++) {
+      printf(" %d", arr[i]);
+    }
+    printf("\n");
+  }
+#endif
+
+  WK_unpack_2bits(TAGS_AREA_START(src_buf),
+                  TAGS_AREA_END(src_buf),
+                  tempTagsArray);
+
+#ifdef WK_DEBUG
+  { int i;
+    char* arr = (char *) tempTagsArray;
+
+    printf("  first 200 tags are: \n");
+    for (i = 0; i < 200; i++) {
+      printf(" %d", arr[i]);
+    }
+    printf("\n");
+  }
+#endif
+
+  WK_unpack_4bits(QPOS_AREA_START(src_buf),
+                  QPOS_AREA_END(src_buf),
+                  tempQPosArray);
+
+#ifdef WK_DEBUG
+  { int i;
+    char* arr = (char *) tempQPosArray;
+
+    printf("  first 200 queue positions are: \n");
+    for (i = 0; i < 200; i++) {
+      printf(" %d", arr[i]);
+    }
+    printf("\n");
+  }
+#endif
+
+  WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf),
+                      LOW_BITS_AREA_END(src_buf),
+                      tempLowBitsArray);
+
+#ifdef WK_DEBUG
+  printf("AFTER UNPACKING, about to enter main block \n");
+#endif
+
+  {
+    register char *next_tag = (char *) tempTagsArray;
+    char *tags_area_end =
+       ((char *) tempTagsArray) + PAGE_SIZE_IN_WORDS;
+    char *next_q_pos = (char *) tempQPosArray;
+    WK_word *next_low_bits = tempLowBitsArray;
+    WK_word *next_full_word = FULL_WORD_AREA_START(src_buf);
+
+    WK_word *next_output = dest_buf;
+
+#ifdef WK_DEBUG
+    printf("next_output is %u\n", next_output);
+
+    printf("next_tag is %u \n", next_tag);
+    printf("tags_area_end is %u\n", tags_area_end);
+    printf("next_q_pos is %u\n", next_q_pos);
+    printf("next_low_bits is %u\n", next_low_bits);
+    printf("next_full_word is %u\n", next_full_word);
+#endif 
+
+    /* this loop should probably be unrolled. Maybe we should unpack
+     * as 4 bit values, giving two consecutive tags, and switch on
+     * that 16 ways to decompress 2 words at a whack
+     */
+    while (next_tag < tags_area_end) {
+
+       char tag = next_tag[0];
+
+       switch(tag) {
+
+         case ZERO_TAG: {
+            *next_output = 0;
+            break;
+         }
+         case EXACT_TAG: {
+            WK_word *dict_location = dictionary + *(next_q_pos++);
+            /* no need to replace dict. entry if matched exactly */
+            *next_output = *dict_location;
+            break;
+         }
+         case PARTIAL_TAG: {
+            WK_word *dict_location = dictionary + *(next_q_pos++);
+            {
+               WK_word temp = *dict_location;
+
+               /* strip out low bits */
+               temp = ((temp >> NUM_LOW_BITS) << NUM_LOW_BITS);
+
+               /* add in stored low bits from temp array */
+               temp = temp | *(next_low_bits++);
+
+               *dict_location = temp;      /* replace old value in dict. */
+               *next_output = temp;    /* and echo it to output */
+            }
+            break;
+         }
+         case MISS_TAG: {
+            WK_word missed_word = *(next_full_word++);
+            WK_word *dict_location = 
+              (WK_word *)
+              (((char *) dictionary) + HASH_TO_DICT_BYTE_OFFSET(missed_word));
+            *dict_location = missed_word;
+            *next_output = missed_word;
+            break;
+         }
+       }
+       next_tag++;
+       next_output++;
+    }
+
+#ifdef WK_DEBUG        
+    printf("AFTER DECOMPRESSING\n");
+    printf("next_output is %u\n", (unsigned long int) next_output);
+    printf("next_tag is %u\n", (unsigned long int) next_tag);
+    printf("next_full_word is %u\n", (unsigned long int) next_full_word);
+    printf("next_q_pos is %u\n", (unsigned long int) next_q_pos);
+#endif
+  }
+}
index b24d19114a1c048082a76a461f38c27c18903c04..52571ea9d8ef0c59cdfeef0496a5996699cb5e0c 100644 (file)
@@ -87,6 +87,11 @@ LDOBJS = $(OBJS)
 $(COMPONENT).o: $(LDOBJS)
        @echo "creating $(COMPONENT).o"
 
+       $(SEG_HACK) __HIB IOHibernateRestoreKernel.o -o _IOHibernateRestoreKernel.o
+       mv _IOHibernateRestoreKernel.o IOHibernateRestoreKernel.o
+       $(SEG_HACK) __HIB WKdmDecompress.o -o _WKdmDecompress.o
+       mv _WKdmDecompress.o WKdmDecompress.o
+
        @echo [ updating $(COMPONENT).o ${IOKIT_KERNEL_CONFIG} ]
        $(LD)  $(LDFLAGS_COMPONENT) -o $(COMPONENT).o ${LDOBJS} 
 
index f4aa60ae1a4307737954fdad9d459ffbd0e39be8..ab719eb343dee9e7c836c959a768d8557fdc1347 100644 (file)
@@ -6,6 +6,11 @@ OPTIONS/kdebug                                         optional kdebug
 # libIOKit
 
 
+iokit/Kernel/WKdmCompress.c                            optional iokitcpp
+iokit/Kernel/WKdmDecompress.c                          optional iokitcpp
+iokit/Kernel/IOHibernateIO.cpp                         optional iokitcpp
+iokit/Kernel/IOHibernateRestoreKernel.c                        optional iokitcpp
+
 iokit/Kernel/IOLib.c                                   optional iokitcpp
 iokit/Kernel/IOLocks.cpp                               optional iokitcpp
 iokit/Kernel/IOConditionLock.cpp                       optional iokitcpp
index cf3af2a630a679ca82a92e0c50d5043e8c621aac..41c7ce91f265aeb970577f616fa0baa899ba05aa 100644 (file)
@@ -36,10 +36,11 @@ INSTALL_MI_LIST     = OSByteOrder.h OSDebug.h OSReturn.h OSTypes.h
 
 INSTALL_MI_DIR = libkern
 
+INSTALL_MI_LCL_GEN_LIST = OSCrossEndian.h
+
 EXPORT_MI_LIST = ${DATAFILES}
 
-EXPORT_MI_GEN_LIST = \
-        version.h
+EXPORT_MI_GEN_LIST = version.h
 
 EXPORT_MI_DIR = libkern
 
diff --git a/libkern/libkern/OSCrossEndian.h b/libkern/libkern/OSCrossEndian.h
new file mode 100644 (file)
index 0000000..0131455
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+/*
+ * This private header exports 3 APIs.
+ *     _OSRosettaCheck() -     An inline function that returns true if we are
+ *                             currently running under Rosetta.
+ *     IF_ROSETTA() -          Which is used to as a regular conditional
+ *                             expression that is true only if the current
+ *                             code is executing in the Rosetta
+ *                             translation space.
+ *     ROSETTA_ONLY(exprs) -   Which is used to create a block code that only
+ *                             executes if we are running in Rosetta.
+ *
+ * for example
+ *
+ * IF_ROSETTA() {
+ *     // Do Cross endian swapping of input data
+ *     outdata = OSSwap??(indata);
+ * }
+ * else {
+ *     // Do straight through 
+ *     outdata = indata;
+ * }
+ *
+ * outdata = indata;
+ * ROSETTA_ONLY(
+ *     // Do Cross endian swapping of input data
+ *     outdata = OSSwap??(outdata);
+ * );
+ */
+
+#ifndef _LIBKERN_OSCROSSENDIAN_H
+#define _LIBKERN_OSCROSSENDIAN_H
+
+#if __ppc__
+
+static __inline__ int _OSRosettaCheck(void)
+{
+    int isCrossEndian;
+
+    __asm__ (  "b 0f\n"
+           "   .long 0x14400004\n"
+           "   li %0,1\n"
+           "0:"
+       : "=r" (isCrossEndian) : "0" (0)
+    );
+
+    return isCrossEndian;
+}
+
+#else
+
+static __inline__ int _OSRosettaCheck(void) { return 0; }
+
+#endif
+
+#define IF_ROSETTA() if (__builtin_expect(_OSRosettaCheck(), 0) )
+
+#define ROSETTA_ONLY(exprs)    \
+do {                           \
+    IF_ROSETTA() {             \
+       exprs                   \
+    }                          \
+} while(0)
+
+#endif /*  _LIBKERN_OSCROSSENDIAN_H */
index ae239e230ab87da5b6d369715bd0213a984dc886..e7e996b6207f72e8855f00605af0e243f0f0986a 100644 (file)
@@ -34,10 +34,9 @@ OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS))
 
 $(OBJS_WERROR):                WERROR=-Werror
 
-
-
 # Files that must go in the __HIB segment:
 HIB_FILES=                              \
+           hibernate_restore.o          \
            gdt.o                        \
            idt.o
 
index 60251be9be021beac2ef26e3aaa60a37a60e57ac..80f3bd54241e9a3baf83a6f5e02a72856cf5ff61 100644 (file)
@@ -19,7 +19,7 @@ db_disasm.o : ppc_disasm.h
 
 # Files that must go in the __HIB segment:
 HIB_FILES=                              \
-
+           hibernate_restore.o
 
 ######################################################################
 #END   Machine dependent Makefile fragment for ppc
index 7514d7843e44223e1124da4a265707b1f1d5f9a8..3a2b613e0f0ad6ea1de5d47fbaaa46021796f717 100644 (file)
@@ -175,6 +175,7 @@ osfmk/kern/wait_queue.c             standard
 osfmk/kern/xpr.c                       optional xpr_debug
 osfmk/kern/zalloc.c                    standard
 osfmk/kern/bsd_kern.c          optional mach_bsd
+osfmk/kern/hibernate.c         standard
 ./mach/clock_server.c                  standard
 ./mach/clock_priv_server.c             standard
 ./mach/clock_reply_user.c              standard
index abd7886569f6236fb95c23e69333b3ce32642aa5..1121b7b74b8f98e92f6f8ba1256ce9c8c9b02b65 100644 (file)
@@ -111,6 +111,8 @@ osfmk/kdp/ml/i386/kdp_machdep.c     optional        mach_kdp
 
 osfmk/kdp/ml/i386/kdp_vm.c     optional        mach_kdp
 
+osfmk/i386/hibernate_i386.c            standard
+osfmk/i386/hibernate_restore.s         standard
 
 # DUMMIES TO FORCE GENERATION OF .h FILES
 osfmk/OPTIONS/ln               optional ln
index d985923dedad03fb0c96926a2d0595f7a9bfb947..e03ce61b94a574c3a3ea30663ab9220789b96dad 100644 (file)
@@ -64,6 +64,8 @@ osfmk/ppc/Diagnostics.c               standard
 osfmk/ppc/PPCcalls.c           standard
 osfmk/ppc/vmachmon.c           standard
 osfmk/ppc/vmachmon_asm.s       standard
+osfmk/ppc/pms.c                                standard
+osfmk/ppc/pmsCPU.c                     standard
 
 osfmk/ppc/Firmware.s           standard
 osfmk/ppc/FirmwareC.c          standard
@@ -110,6 +112,8 @@ osfmk/console/panic_dialog.c        optional        vc device-driver
 osfmk/console/video_console.c  optional        vc device-driver
 osfmk/console/ppc/video_scroll.s       optional        vc device-driver
 
+osfmk/ppc/hibernate_ppc.c              standard
+osfmk/ppc/hibernate_restore.s          standard
 
 # DUMMIES TO FORCE GENERATION OF .h FILES
 OPTIONS/bm                     optional bm
index d6fb27e2eb14e5be077e77f12d6fa48060acbe6d..c1e4c2d519405d04bb9385cf6980b64f5c5ab78f 100644 (file)
@@ -457,7 +457,7 @@ kern_return_t IOMapPages(vm_map_t map, vm_offset_t va, vm_offset_t pa,
 #if __ppc__
 
     // Set up a block mapped area
-    pmap_map_block(pmap, (addr64_t)va, (ppnum_t)(pa >> 12), length, prot, flags, 0);
+    pmap_map_block(pmap, (addr64_t)va, (ppnum_t)(pa >> 12), (uint32_t)(length >> 12), prot, flags, 0);
 
 #else
 //  enter each page's physical address in the target map
index 51623e41c7cf1627ed35c644dd6a73387662c3b5..e748f6161db8c3f08352fb342e973933085d4b92 100644 (file)
@@ -28,6 +28,8 @@
 #include <i386/mp.h>
 
 #include <kern/cpu_data.h>
+
+#include <IOKit/IOHibernatePrivate.h>
 #include <IOKit/IOPlatformExpert.h>
 
 extern void    acpi_sleep_cpu(acpi_sleep_callback, void * refcon);
@@ -54,16 +56,21 @@ acpi_install_wake_handler(void)
        return ACPI_WAKE_ADDR;
 }
 
-typedef struct acpi_sleep_callback_data {
+typedef struct acpi_hibernate_callback_data {
     acpi_sleep_callback func;
     void *refcon;
-} acpi_sleep_callback_data;
+} acpi_hibernate_callback_data;
 
 static void
-acpi_sleep_do_callback(void *refcon)
+acpi_hibernate(void *refcon)
 {
-    acpi_sleep_callback_data *data = (acpi_sleep_callback_data *)refcon;
+    boolean_t hib;
+
+    acpi_hibernate_callback_data *data = (acpi_hibernate_callback_data *)refcon;
 
+    if (current_cpu_datap()->cpu_hibernate) {
+        hib = hibernate_write_image();
+    }
 
     (data->func)(data->refcon);
 
@@ -73,7 +80,8 @@ acpi_sleep_do_callback(void *refcon)
 void
 acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 {
-    acpi_sleep_callback_data data;
+    acpi_hibernate_callback_data data;
+    boolean_t did_hibernate;
 
        /* shutdown local APIC before passing control to BIOS */
        lapic_shutdown();
@@ -86,12 +94,23 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
         * Will not return until platform is woken up,
         * or if sleep failed.
         */
-    acpi_sleep_cpu(acpi_sleep_do_callback, &data);
+    acpi_sleep_cpu(acpi_hibernate, &data);
 
        /* reset UART if kprintf is enabled */
        if (FALSE == disableSerialOuput)
                serial_init();
 
+    if (current_cpu_datap()->cpu_hibernate) {
+        * (int *) CM1 = 0;
+        * (int *) CM2 = 0;
+        * (int *) CM3 = 0;
+
+        current_cpu_datap()->cpu_hibernate = 0;
+
+        did_hibernate = TRUE;
+    } else {
+        did_hibernate = FALSE;
+    }
 
        /* restore MTRR settings */
        mtrr_update_cpu();
@@ -99,6 +118,10 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        /* set up PAT following boot processor power up */
        pat_init();
 
+    if (did_hibernate) {
+        hibernate_machine_init();
+    }
+        
        /* re-enable and re-init local apic */
        if (lapic_probe())
                lapic_init();
@@ -106,4 +129,7 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        /* let the realtime clock reset */
        rtc_sleep_wakeup();
 
+    if (did_hibernate) {
+        enable_preemption();
+    }
 }
index 4578329c756ad3c9e2a098e907d4d3df0c9132eb..f1698995283847f3896a4165db1eb78a6bc938d6 100644 (file)
@@ -134,7 +134,13 @@ cpu_machine_init(
 
        cpu = get_cpu_number();
        PE_cpu_machine_init(cpu_datap(cpu)->cpu_id, TRUE);
-
+#if 0
+       if (cpu_datap(cpu)->hibernate)
+       {
+           cpu_datap(cpu)->hibernate = 0;
+           hibernate_machine_init();
+       }
+#endif
        ml_init_interrupt();
 }
 
index 7d455b4a197913feb49cd46b26446c33c30ecac6..172738bc94eb8c06d11922581fcc298d7b05f0c8 100644 (file)
@@ -128,7 +128,7 @@ typedef struct cpu_data
        int                     cpu_kdb_is_slave;
        int                     cpu_kdb_active;
 #endif /* MACH_KDB */
-        int                     cpu_reserved1;
+        int                     cpu_hibernate;
 } cpu_data_t;
 
 extern cpu_data_t      *cpu_data_ptr[];  
diff --git a/osfmk/i386/hibernate_i386.c b/osfmk/i386/hibernate_i386.c
new file mode 100644 (file)
index 0000000..d243fcd
--- /dev/null
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
+#include <kern/thread.h>
+#include <kern/processor.h>
+#include <kern/kalloc.h>
+#include <mach/machine.h>
+#include <mach/processor_info.h>
+#include <mach/mach_types.h>
+#include <i386/pmap.h>
+#include <kern/cpu_data.h>
+#include <IOKit/IOPlatformExpert.h>
+#define KERNEL
+
+#include <IOKit/IOHibernatePrivate.h>
+#include <vm/vm_page.h>
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/* This assumes that
+ * - we never will want to read or write memory below the start of kernel text
+ * - kernel text and data isn't included in pmap memory regions
+ */
+
+extern void *sectTEXTB;
+extern char            *first_avail;
+
+hibernate_page_list_t *
+hibernate_page_list_allocate(void)
+{
+    vm_offset_t             base;
+    vm_size_t               size;
+    uint32_t                bank;
+    uint32_t               pages, page_count;
+    hibernate_page_list_t * list;
+    hibernate_bitmap_t *    bitmap;
+    pmap_memory_region_t *  regions;
+    pmap_memory_region_t *  rp;
+    uint32_t                num_regions, num_alloc_regions;
+
+    page_count = 0;
+
+    /* Make a list of the maximum number of regions needed */
+    num_alloc_regions = 1 + pmap_memory_region_count;
+
+    /* Allocate our own list of memory regions so we can sort them in order. */
+    regions = (pmap_memory_region_t *)kalloc(sizeof(pmap_memory_region_t) * num_alloc_regions);
+    if (!regions)
+        return (0);
+
+    /* Fill in the actual regions we will be returning. */
+    rp = regions;
+
+    /* XXX should check for non-volatile memory region below kernel space. */
+    /* Kernel region is first. */
+    base = (vm_offset_t)(sectTEXTB) & 0x3FFFFFFF;
+    rp->base = atop_32(base);
+    rp->end = atop_32((vm_offset_t)first_avail) - 1;
+    rp->alloc = 0;
+    num_regions = 1;
+
+    /* Remaining memory regions.  Consolidate adjacent regions. */
+    for (bank = 0; bank < (uint32_t) pmap_memory_region_count; bank++)
+    {
+        if ((rp->end + 1) == pmap_memory_regions[bank].base) {
+            rp->end = pmap_memory_regions[bank].end;
+        } else {
+            ++rp;
+            ++num_regions;
+            rp->base = pmap_memory_regions[bank].base;
+            rp->end = pmap_memory_regions[bank].end;
+            rp->alloc = 0;
+        }
+    }
+
+    /* Size the hibernation bitmap */
+    size = sizeof(hibernate_page_list_t);
+    page_count = 0;
+    for (bank = 0, rp = regions; bank < num_regions; bank++, rp++) {
+       pages = rp->end + 1 - rp->base;
+       page_count += pages;
+        size += sizeof(hibernate_bitmap_t) + ((pages + 31) >> 5) * sizeof(uint32_t);
+    }
+
+    list = (hibernate_page_list_t *)kalloc(size);
+    if (!list)
+       return (list);
+       
+    list->list_size  = size;
+    list->page_count = page_count;
+    list->bank_count = num_regions;
+
+    /* Convert to hibernation bitmap. */
+    /* This assumes that ranges are in order and do not overlap. */
+    bitmap = &list->bank_bitmap[0];
+    for (bank = 0, rp = regions; bank < num_regions; bank++, rp++) {
+        bitmap->first_page = rp->base;
+        bitmap->last_page = rp->end;
+        bitmap->bitmapwords = (bitmap->last_page + 1
+                               - bitmap->first_page + 31) >> 5;
+        kprintf("HIB: Bank %d: 0x%x end 0x%x\n", bank,
+                ptoa_32(bitmap->first_page),
+                ptoa_32(bitmap->last_page));
+       bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
+    }
+
+    kfree((void *)regions, sizeof(pmap_memory_region_t) * num_alloc_regions);
+    return (list);
+}
+
+void
+hibernate_page_list_setall_machine(hibernate_page_list_t * page_list,
+                                   hibernate_page_list_t * page_list_wired,
+                                   uint32_t * pagesOut)
+{
+    KernelBootArgs_t *      bootArgs = (KernelBootArgs_t *)PE_state.bootArgs;
+    MemoryRange *           mptr;
+    uint32_t                bank;
+    uint32_t                page, count;
+
+    for (bank = 0, mptr = bootArgs->memoryMap; bank < bootArgs->memoryMapCount; bank++, mptr++) {
+
+        if (kMemoryRangeNVS != mptr->type) continue;
+        kprintf("Base NVS region 0x%x + 0x%x\n", (vm_offset_t)mptr->base, (vm_size_t)mptr->length);
+        /* Round to page size.  Hopefully this does not overlap any reserved areas. */
+        page = atop_32(trunc_page((vm_offset_t)mptr->base));
+        count = atop_32(round_page((vm_offset_t)mptr->base + (vm_size_t)mptr->length)) - page;
+        kprintf("Rounded NVS region 0x%x size 0x%x\n", page, count);
+
+        hibernate_set_page_state(page_list, page_list_wired, page, count, 1);
+        pagesOut -= count;
+    }
+}
+
+kern_return_t 
+hibernate_processor_setup(IOHibernateImageHeader * header)
+{
+    current_cpu_datap()->cpu_hibernate = 1;
+    header->processorFlags = 0;
+    return (KERN_SUCCESS);
+}
+
+void
+hibernate_vm_lock(void)
+{
+    if (FALSE /* getPerProc()->hibernate */)
+    {
+        vm_page_lock_queues();
+        mutex_lock(&vm_page_queue_free_lock);
+    }
+}
+
+void
+hibernate_vm_unlock(void)
+{
+    if (FALSE /* getPerProc()->hibernate */)
+    {
+        mutex_unlock(&vm_page_queue_free_lock);
+        vm_page_unlock_queues();
+    }
+}
diff --git a/osfmk/i386/hibernate_restore.s b/osfmk/i386/hibernate_restore.s
new file mode 100644 (file)
index 0000000..c01f23c
--- /dev/null
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <i386/asm.h>
+#include <i386/proc_reg.h>
+       
+#include <i386/postcode.h>
+#include <assym.s>
+
+/*
+This code is linked into the kernel but part of the "__HIB" section, which means
+its used by code running in the special context of restoring the kernel text and data
+from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything
+it calls or references (ie. hibernate_restore_phys_page())
+needs to be careful to only touch memory also in the "__HIB" section.
+*/
+
+/*
+ * GAS won't handle an intersegment jump with a relocatable offset.
+ */
+#define        LJMP(segment,address)   \
+       .byte   0xea            ;\
+       .long   address         ;\
+       .word   segment
+       
+#define KVTOPHYS       (-KERNELBASE)
+#define        KVTOLINEAR      LINEAR_KERNELBASE
+
+#define        PA(addr)        ((addr)+KVTOPHYS)
+#define        VA(addr)        ((addr)-KVTOPHYS)
+
+/* Location of temporary page tables */
+#define HPTD        0x80000
+       
+#define KERNEL_MAP_SIZE (  4 * 1024 * 1024)
+
+/*
+ * fillkpt
+ *     eax = page frame address
+ *     ebx = index into page table
+ *     ecx = how many pages to map
+ *     base = base address of page dir/table
+ *     prot = protection bits
+ */
+#define        fillkpt(base, prot)               \
+       shll    $2,%ebx                 ; \
+       addl    base,%ebx               ; \
+       orl     $(PTE_V), %eax          ; \
+       orl     prot,%eax               ; \
+1:     movl    %eax,(%ebx)             ; \
+       addl    $(PAGE_SIZE),%eax       ; /* increment physical address */ \
+       addl    $4,%ebx                 ; /* next pte */ \
+       loop    1b
+
+/*
+ * fillpse
+ *     eax = physical page address
+ *     ebx = index into page table
+ *     ecx = how many pages to map
+ *     base = base address of page dir/table
+ *     prot = protection bits
+ */
+#define        fillpse(base, prot)               \
+       shll    $2,%ebx                 ; \
+       addl    base,%ebx               ; \
+       orl     $(PTE_V|PTE_PS), %eax   ; \
+       orl     prot,%eax               ; \
+1:     movl    %eax,(%ebx)             ; \
+       addl    $(1 << PDESHIFT),%eax   ; /* increment physical address 4Mb */ \
+       addl    $4,%ebx                 ; /* next entry */ \
+       loop    1b
+       
+/*
+ * fillkptphys(base, prot)
+ *     eax = physical address
+ *     ecx = how many pages to map
+ *      base = base of page table
+ *     prot = protection bits
+ */
+#define        fillkptphys(base, prot)           \
+       movl    %eax, %ebx              ; \
+       shrl    $(PAGE_SHIFT), %ebx     ; \
+       fillkpt(base, prot)
+
+/*
+ * Hibernation code restarts here.  Steal some pages from 0x10000
+ * to 0x90000 for pages tables and directories etc to temporarily
+ * map the hibernation code (put at 0x100000 (phys) by the booter
+ * and linked to 0xC0100000 by the linker) to 0xC0100000 so it can
+ * execute.  It's self-contained and won't make any references outside
+ * of itself.
+ *
+ * On the way down it has to save IdlePTD (and if PAE also IdlePDPT)
+ * and after it runs it has to restore those and load IdlePTD (or
+ * IdlePDPT if PAE) into %cr3 to re-establish the original mappings
+ */
+
+       .align  ALIGN
+       .globl  EXT(hibernate_machine_entrypoint)
+LEXT(hibernate_machine_entrypoint)
+       cli
+
+        mov     %eax, %edi
+        
+       POSTCODE(0x1)
+
+       /* Map physical memory from zero to 0xC0000000 */
+        xorl    %eax, %eax
+        xorl    %ebx, %ebx
+        movl    $(KPTDI), %ecx
+        fillpse( $(HPTD), $(PTE_W) )
+
+        /* Map 0 again at 0xC0000000 */
+        xorl    %eax, %eax
+        movl    $(KPTDI), %ebx
+        movl    $(KERNEL_MAP_SIZE >> PDESHIFT), %ecx
+        fillpse( $(HPTD), $(PTE_W) )
+               
+       movl    $(HPTD), %eax
+       movl    %eax, %cr3
+
+        POSTCODE(0x3)
+        
+       movl    %cr4,%eax
+        orl     $(CR4_PSE),%eax
+        movl    %eax,%cr4               /* enable page size extensions */
+       movl    %cr0, %eax
+       orl     $(CR0_PG|CR0_WP|CR0_PE), %eax
+       movl    %eax, %cr0      /* ready paging */
+       
+        POSTCODE(0x4)
+
+       lgdt    PA(EXT(gdtptr))         /* load GDT */
+       lidt    PA(EXT(idtptr))         /* load IDT */
+       
+        POSTCODE(0x5)
+
+        LJMP   (KERNEL_CS,EXT(hstart))  /* paging on and go to correct vaddr */
+
+/* Hib restart code now running with correct addresses */
+LEXT(hstart)
+       POSTCODE(0x6)
+
+       mov     $(KERNEL_DS),%ax        /* set kernel data segment */
+       mov     %ax,%ds
+       mov     %ax,%es
+       mov     %ax,%ss
+       
+       mov     $0,%ax                  /* fs must be zeroed; */
+       mov     %ax,%fs                 /* some bootstrappers don`t do this */
+       mov     %ax,%gs
+       
+       lea     EXT(gIOHibernateRestoreStackEnd),%esp   /* switch to the bootup stack */
+
+        POSTCODE(0x7)  
+       
+        xorl    %eax, %eax              /* Video memory - N/A */
+        pushl   %eax
+        mov     %edi, %eax              /* Pointer to hibernate header */
+        pushl   %eax
+        call    EXT(hibernate_kernel_entrypoint)
+        /* NOTREACHED */
+        hlt
+
+
+        
+/*
+void 
+hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags);
+*/
+
+                       .align  5
+                       .globl  EXT(hibernate_restore_phys_page)
+
+        /* XXX doesn't handle 64-bit addresses yet */
+       /* XXX can only deal with exactly one page */
+LEXT(hibernate_restore_phys_page)
+       pushl   %edi
+       pushl   %esi
+
+       movl    8+ 4(%esp),%esi         /* source virtual address */
+        addl    $0, %esi
+        jz      2f                      /* If source == 0, nothing to do */
+        
+
+       movl    8+ 12(%esp),%edi        /* destination physical address */
+        cmpl    $(LINEAR_KERNELBASE), %edi
+        jl      1f                      /* no need to map, below 0xC0000000 */
+
+        movl    %edi, %eax              /* destination physical address */
+        /* Map physical address to virt. address 0xffc00000 (4GB - 4MB) */
+        andl    $0xFFC00000, %eax
+        orl     $(PTE_V | PTE_PS | PTE_W), %eax
+        movl    %eax, (HPTD + (0x3FF * 4))
+        orl     $0xFFC00000, %edi
+        invlpg  (%edi)
+
+1:      
+       movl    8+ 20(%esp),%edx        /* number of bytes */
+       cld
+/* move longs*/
+       movl    %edx,%ecx
+       sarl    $2,%ecx
+       rep
+       movsl
+/* move bytes*/
+       movl    %edx,%ecx
+       andl    $3,%ecx
+       rep
+       movsb
+2:
+       popl    %esi
+       popl    %edi
+       ret
index d66ffd4efe98f7bf59ce703a85ca6c03abab98d4..e419a5374e752bd5950926e6dcbeb57b964d32cb 100644 (file)
@@ -263,6 +263,8 @@ i386_vm_init(unsigned int maxmem, KernelBootArgs_t *args)
 
           // save other regions
           if (kMemoryRangeNVS == mptr->type) {
+              // Mark this as a memory range (for hibernation),
+              // but don't count as usable memory
               pmptr->base = base;
               pmptr->end = ((mptr->base + mptr->length + I386_PGBYTES - 1) >> I386_PGSHIFT) - 1;
               pmptr->alloc = pmptr->end;
index a9f8c45b02ddf585a967b5542461b45aebc07208..830c1284b433f635befd03659ed319a6df96cef1 100644 (file)
@@ -92,16 +92,21 @@ EXT(_kick_buffer_):
 /*
  * Interrupt and bootup stack for initial processor.
  */
+        /* in the __HIB section since the hibernate restore code uses this stack. */
         .section __HIB, __data
        .align  ALIGN
 
        .globl  EXT(intstack)
 EXT(intstack):
+       .globl  EXT(gIOHibernateRestoreStack)
+EXT(gIOHibernateRestoreStack):
 
        .set    ., .+INTSTACK_SIZE
 
        .globl  EXT(eintstack)
 EXT(eintstack:)
+       .globl  EXT(gIOHibernateRestoreStackEnd)
+EXT(gIOHibernateRestoreStackEnd):
 
 /*
  * Pointers to GDT and IDT.  These contain linear addresses.
index 5fde4553411e484fe979b396e287def7d5232059..2edc405596e4eb3551388f95fb03eb75977a4e52 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -775,7 +775,7 @@ ipc_kmsg_get_from_kernel(
        ipc_port_t      dest_port;
 
        assert(size >= sizeof(mach_msg_header_t));
-       assert((size & 3) == 0);
+//     assert((size & 3) == 0);
 
        assert(IP_VALID((ipc_port_t) msg->msgh_remote_port));
        dest_port = (ipc_port_t)msg->msgh_remote_port;
diff --git a/osfmk/kern/hibernate.c b/osfmk/kern/hibernate.c
new file mode 100644 (file)
index 0000000..e43ee94
--- /dev/null
@@ -0,0 +1,446 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <kern/kalloc.h>
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
+#include <kern/thread.h>
+#include <kern/processor.h>
+#include <mach/machine.h>
+#include <mach/processor_info.h>
+#include <mach/mach_types.h>
+#include <default_pager/default_pager_internal.h>
+#include <IOKit/IOPlatformExpert.h>
+#define KERNEL
+
+#include <IOKit/IOHibernatePrivate.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static vm_page_t hibernate_gobble_queue;
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+static void
+hibernate_page_list_zero(hibernate_page_list_t *list)
+{
+    uint32_t             bank;
+    hibernate_bitmap_t * bitmap;
+
+    bitmap = &list->bank_bitmap[0];
+    for (bank = 0; bank < list->bank_count; bank++)
+    {
+        uint32_t bit, last_bit;
+        uint32_t *bitmap_word;
+
+       bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2); 
+
+        // Set out-of-bound bits at end of bitmap.
+        bitmap_word = &bitmap->bitmap[bitmap->bitmapwords - 1];
+        last_bit = ((bitmap->last_page - bitmap->first_page) & 31);
+        for (bit = 31; bit > last_bit; bit--) {
+            *bitmap_word |= (0x80000000 >> bit);
+        }
+
+       bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
+    }
+}
+
+
+static boolean_t 
+consider_discard(vm_page_t m)
+{
+    register vm_object_t object = 0;
+    int                  refmod_state;
+    boolean_t            discard = FALSE;
+
+    do
+    {
+        if(m->private)
+            panic("consider_discard: private");
+
+        if (!vm_object_lock_try(m->object))
+            break;
+
+        object = m->object;
+
+       if (m->wire_count != 0)
+            break;
+        if (m->precious)
+            break;
+
+        if (m->busy || !object->alive)
+           /*
+            *  Somebody is playing with this page.
+            */
+            break;
+
+        if (m->absent || m->unusual || m->error)
+           /*
+            * If it's unusual in anyway, ignore it
+            */
+            break;
+    
+        if (m->cleaning)
+            break;
+
+        if (!m->dirty)
+        {
+            refmod_state = pmap_get_refmod(m->phys_page);
+        
+            if (refmod_state & VM_MEM_REFERENCED)
+                m->reference = TRUE;
+            if (refmod_state & VM_MEM_MODIFIED)
+                m->dirty = TRUE;
+        }
+   
+        /*
+         * If it's clean we can discard the page on wakeup.
+         */
+        discard = !m->dirty;
+    }
+    while (FALSE);
+
+    if (object)
+        vm_object_unlock(object);
+
+    return (discard);
+}
+
+
+static void
+discard_page(vm_page_t m)
+{
+    if (m->absent || m->unusual || m->error)
+       /*
+        * If it's unusual in anyway, ignore
+        */
+        return;
+
+    if (!m->no_isync) 
+    {
+        int refmod_state = pmap_disconnect(m->phys_page);
+
+        if (refmod_state & VM_MEM_REFERENCED)
+            m->reference = TRUE;
+        if (refmod_state & VM_MEM_MODIFIED)
+            m->dirty = TRUE;
+    }
+
+    if (m->dirty)
+        panic("discard_page(%p) dirty", m);
+    if (m->laundry)
+        panic("discard_page(%p) laundry", m);
+    if (m->private)
+        panic("discard_page(%p) private", m);
+    if (m->fictitious)
+        panic("discard_page(%p) fictitious", m);
+
+    vm_page_free(m);
+}
+
+/*
+ Bits zero in the bitmaps => needs to be saved. All pages default to be saved,
+ pages known to VM to not need saving are subtracted.
+ Wired pages to be saved are present in page_list_wired, pageable in page_list.
+*/
+
+void
+hibernate_page_list_setall(hibernate_page_list_t * page_list,
+                          hibernate_page_list_t * page_list_wired,
+                          uint32_t * pagesOut)
+{
+    uint64_t start, end, nsec;
+    vm_page_t m;
+    uint32_t pages = page_list->page_count;
+    uint32_t count_zf = 0, count_inactive = 0, count_active = 0;
+    uint32_t count_wire = pages;
+    uint32_t count_discard_active = 0, count_discard_inactive = 0;
+    uint32_t i;
+
+    HIBLOG("hibernate_page_list_setall start\n");
+
+    clock_get_uptime(&start);
+
+    hibernate_page_list_zero(page_list);
+    hibernate_page_list_zero(page_list_wired);
+
+    m = (vm_page_t) hibernate_gobble_queue;
+    while(m)
+    {
+       pages--;
+       count_wire--;
+       hibernate_page_bitset(page_list,       TRUE, m->phys_page);
+       hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
+       m = (vm_page_t) m->pageq.next;
+    }
+
+    m = (vm_page_t) vm_page_queue_free;
+    while(m)
+    {
+       pages--;
+       count_wire--;
+       hibernate_page_bitset(page_list,       TRUE, m->phys_page);
+       hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
+       m = (vm_page_t) m->pageq.next;
+    }
+
+    queue_iterate( &vm_page_queue_zf,
+                    m,
+                    vm_page_t,
+                    pageq )
+    {
+        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) 
+         && consider_discard(m))
+        {
+            hibernate_page_bitset(page_list, TRUE, m->phys_page);
+            count_discard_inactive++;
+        }
+        else
+            count_zf++;
+       count_wire--;
+       hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
+    }
+
+    queue_iterate( &vm_page_queue_inactive,
+                    m,
+                    vm_page_t,
+                    pageq )
+    {
+        if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) 
+         && consider_discard(m))
+        {
+            hibernate_page_bitset(page_list, TRUE, m->phys_page);
+            count_discard_inactive++;
+        }
+        else
+            count_inactive++;
+       count_wire--;
+       hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
+    }
+
+    queue_iterate( &vm_page_queue_active,
+                    m,
+                    vm_page_t,
+                    pageq )
+    {
+        if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) 
+         && consider_discard(m))
+        {
+            hibernate_page_bitset(page_list, TRUE, m->phys_page);
+            count_discard_active++;
+        }
+        else
+            count_active++;
+       count_wire--;
+       hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
+    }
+
+    // pull wired from hibernate_bitmap
+
+    uint32_t             bank;
+    hibernate_bitmap_t * bitmap;
+    hibernate_bitmap_t * bitmap_wired;
+
+    bitmap = &page_list->bank_bitmap[0];
+    bitmap_wired = &page_list_wired->bank_bitmap[0];
+    for (bank = 0; bank < page_list->bank_count; bank++)
+    {
+       for (i = 0; i < bitmap->bitmapwords; i++)
+           bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
+       bitmap       = (hibernate_bitmap_t *) &bitmap->bitmap      [bitmap->bitmapwords];
+       bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
+    }
+
+    // machine dependent adjustments
+    hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
+
+    clock_get_uptime(&end);
+    absolutetime_to_nanoseconds(end - start, &nsec);
+    HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
+
+    HIBLOG("pages %d, wire %d, act %d, inact %d, zf %d, could discard act %d inact %d\n", 
+                pages, count_wire, count_active, count_inactive, count_zf,
+                count_discard_active, count_discard_inactive);
+
+    *pagesOut = pages;
+}
+
+void
+hibernate_page_list_discard(hibernate_page_list_t * page_list)
+{
+    uint64_t  start, end, nsec;
+    vm_page_t m;
+    vm_page_t next;
+    uint32_t  count_discard_active = 0, count_discard_inactive = 0;
+
+    clock_get_uptime(&start);
+
+    m = (vm_page_t) queue_first(&vm_page_queue_zf);
+    while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
+    {
+        next = (vm_page_t) m->pageq.next;
+        if (hibernate_page_bittst(page_list, m->phys_page))
+        {
+            discard_page(m);
+            count_discard_inactive++;
+        }
+        m = next;
+    }
+
+    m = (vm_page_t) queue_first(&vm_page_queue_inactive);
+    while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
+    {
+        next = (vm_page_t) m->pageq.next;
+        if (hibernate_page_bittst(page_list, m->phys_page))
+        {
+            discard_page(m);
+            count_discard_inactive++;
+        }
+        m = next;
+    }
+
+    m = (vm_page_t) queue_first(&vm_page_queue_active);
+    while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
+    {
+        next = (vm_page_t) m->pageq.next;
+        if (hibernate_page_bittst(page_list, m->phys_page))
+        {
+            discard_page(m);
+            count_discard_active++;
+        }
+        m = next;
+    }
+
+    clock_get_uptime(&end);
+    absolutetime_to_nanoseconds(end - start, &nsec);
+    HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d\n",
+                nsec / 1000000ULL,
+                count_discard_active, count_discard_inactive);
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+kern_return_t 
+hibernate_setup(IOHibernateImageHeader * header,
+                        uint32_t free_page_ratio,
+                        uint32_t free_page_time,
+                       hibernate_page_list_t ** page_list_ret,
+                       hibernate_page_list_t ** page_list_wired_ret,
+                        boolean_t * encryptedswap)
+{
+    hibernate_page_list_t * page_list = NULL;
+    hibernate_page_list_t * page_list_wired = NULL;
+    vm_page_t              m;
+    uint32_t               i, gobble_count;
+
+    *page_list_ret       = NULL;
+    *page_list_wired_ret = NULL;
+
+
+    page_list = hibernate_page_list_allocate();
+    if (!page_list)
+        return (KERN_RESOURCE_SHORTAGE);
+    page_list_wired = hibernate_page_list_allocate();
+    if (!page_list_wired)
+    {
+        kfree(page_list, page_list->list_size);
+        return (KERN_RESOURCE_SHORTAGE);
+    }
+
+    *encryptedswap = dp_encryption;
+
+    // pages we could force out to reduce hibernate image size
+    gobble_count = (((uint64_t) page_list->page_count) * ((uint64_t) free_page_ratio)) / 100;
+
+    // no failures hereafter
+
+    hibernate_processor_setup(header);
+
+    HIBLOG("hibernate_alloc_pages flags %08lx, gobbling %d pages\n", 
+           header->processorFlags, gobble_count);
+
+    if (gobble_count)
+    {
+        uint64_t start, end, timeout, nsec;
+        clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
+        clock_get_uptime(&start);
+    
+        for (i = 0; i < gobble_count; i++)
+        {
+            while (VM_PAGE_NULL == (m = vm_page_grab()))
+            {
+                clock_get_uptime(&end);
+                if (end >= timeout)
+                    break;
+                VM_PAGE_WAIT();
+            }
+            if (!m)
+                break;
+            m->busy = FALSE;
+            vm_page_gobble(m);
+    
+            m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
+            hibernate_gobble_queue = m;
+        }
+    
+        clock_get_uptime(&end);
+        absolutetime_to_nanoseconds(end - start, &nsec);
+        HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
+    }
+
+    *page_list_ret       = page_list;
+    *page_list_wired_ret = page_list_wired;
+
+    return (KERN_SUCCESS);
+}
+
+kern_return_t 
+hibernate_teardown(hibernate_page_list_t * page_list,
+                    hibernate_page_list_t * page_list_wired)
+{
+    vm_page_t m, next;
+    uint32_t  count = 0;
+
+    m = (vm_page_t) hibernate_gobble_queue;
+    while(m)
+    {
+        next = (vm_page_t) m->pageq.next;
+        vm_page_free(m);
+        count++;
+        m = next;
+    }
+    hibernate_gobble_queue = VM_PAGE_NULL;
+    
+    if (count)
+        HIBLOG("Freed %d pages\n", count);
+
+    if (page_list)
+        kfree(page_list, page_list->list_size);
+    if (page_list_wired)
+        kfree(page_list_wired, page_list_wired->list_size);
+
+    return (KERN_SUCCESS);
+}
+
index b29a1ee40484e5ece4c637531ab57599b877bae2..122d2ae6d332e4d55b3a94af5da258b95eb86be5 100644 (file)
@@ -82,6 +82,7 @@
 #include <kern/task.h>
 #include <kern/thread.h>
 
+#include <IOKit/IOHibernatePrivate.h>
 
 /*
  *     Exported variables:
@@ -266,6 +267,8 @@ processor_doshutdown(
                simple_unlock(&pset->sched_lock);
                processor_unlock(processor);
 
+               hibernate_vm_lock();
+
                processor_lock(processor);
                simple_lock(&pset->sched_lock);
        }
@@ -276,6 +279,8 @@ processor_doshutdown(
        simple_unlock(&pset->sched_lock);
        processor_unlock(processor);
 
+       if (pcount == 1)
+               hibernate_vm_unlock();
 
        /*
         *      Continue processor shutdown in shutdown context.
index 01b353cc1ea414849c3ae961a295d43a3fd6c72d..aaece698a70baf3bdaa5028468f59effe968a206 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
 
 #include <sys/kdebug.h>
 
+#ifdef __ppc__
+#include <ppc/pms.h>
+#endif
+
 #define                DEFAULT_PREEMPTION_RATE         100             /* (1/s) */
 int                    default_preemption_rate = DEFAULT_PREEMPTION_RATE;
 
@@ -2523,7 +2527,13 @@ idle_thread(void)
        lcount = &processor->runq.count;
        gcount = &processor->processor_set->runq.count;
 
-       (void)splsched();
+
+       (void)splsched();                       /* Turn interruptions off */
+
+#ifdef __ppc__
+       pmsDown();                                      /* Step power down.  Note: interruptions must be disabled for this call */
+#endif
+
        while ( (*threadp == THREAD_NULL)                               &&
                                (*gcount == 0) && (*lcount == 0)        ) {
 
@@ -2546,6 +2556,10 @@ idle_thread(void)
        pset = processor->processor_set;
        simple_lock(&pset->sched_lock);
 
+#ifdef __ppc__
+       pmsStep(0);                                     /* Step up out of idle power, may start timer for next step */
+#endif
+
        state = processor->state;
        if (state == PROCESSOR_DISPATCHING) {
                /*
index 3fc7c61066ed483aa2ae05d182174775f34249ba..b1aedddae623c44f1276a05a3646855dbd5913e2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -65,7 +65,7 @@ ppc_trap(CHUDCall,0x6009)
 ppc_trap(ppcNull,0x600A)       
 ppc_trap(perfmon_control,0x600B)       
 ppc_trap(ppcNullinst,0x600C)   
-
+ppc_trap(pmsCntrl,0x600D)      
 #endif /* _MACH_SYSCALL_SW_H_ */
 
 #endif /* _MACH_PPC_SYSCALL_SW_H_ */
index 7368591220b1af3f7abcba4aaf4551f0a4bcc324..d13104750993e7c3e63b2ab95b4d596ce3eac6a1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -2347,7 +2347,6 @@ stsslbm:  slbmfee r6,r5
                        cmplwi  r5,64
                        addi    r4,r4,16
                        blt             stsslbm
-
                        
                        mtmsr   r0
                        isync
index b978cc6765946ebac82b1419e09514724abc01b8..5649d653bac65f6d6bcb48a546a6b7b1b441d227 100644 (file)
@@ -20,6 +20,7 @@ EXPORT_ONLY_FILES =           \
        machine_routines.h      \
        mappings.h              \
        savearea.h              \
+       pms.h                   \
        simple_lock.h
 
 INSTALL_MD_DIR = ppc
index 5d5f75f5136aa56dc4ca16796e6af1328e64cfcf..e8beeb20d1ae671aab0853df920811294beaf123 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -54,7 +54,7 @@ PPCcallEnt    PPCcalls[] = {
        PPCcall(ppcNull),                               /* 0x600A Null PPC syscall */
        PPCcall(perfmon_control),               /* 0x600B performance monitor */
        PPCcall(ppcNullinst),                   /* 0x600C Instrumented Null PPC syscall */
-       PPCcall(dis),                                   /* 0x600D disabled */
+       PPCcall(pmsCntrl),                              /* 0x600D Power Management Stepper */
        PPCcall(dis),                                   /* 0x600E disabled */
        PPCcall(dis),                                   /* 0x600F disabled */
        PPCcall(dis),                                   /* 0x6010 disabled */
index 7145a2493fcb899f75d29db94d8091a32adb3825..4f703ebba67719dc0851a30c165cfec945013de4 100644 (file)
@@ -172,15 +172,20 @@ EXT(dbspecrs):
  *             Boot processor Interrupt and debug stacks go here.
  */
 
+                /* in the __HIB section since the hibernate restore code uses this stack. */
                .section __HIB, __data
 
                .align  PPC_PGSHIFT
      
                .globl  EXT(intstack)
 EXT(intstack):
+               .globl  EXT(gIOHibernateRestoreStack)
+EXT(gIOHibernateRestoreStack):
 
                .set    .,.+INTSTACK_SIZE
 
+               .globl  EXT(gIOHibernateRestoreStackEnd)
+EXT(gIOHibernateRestoreStackEnd):
 
                 /* back to the regular __DATA section. */
 
index 842167e3e4acaf3076fbd7bb83b291487fee336c..320af7d1fb821888b93fb2229ebeca6e742a7c83 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
 #define busFIR 0x0A00
 #define busFIRrst 0x0A10
 
-; PowerTune
-#define PowerTuneControlReg    0x0AA001
-#define PowerTuneStatusReg     0x408001
-
 ;      HID4
 #define hid4RMCI 23
 #define hid4FAlgn 24
 #define GUSMstgttim 0x00000038
 #define GUSMstgttoff 0x00000004
 
+/* PowerTune */
+#define PowerTuneControlReg    0x0AA001
+#define PowerTuneStatusReg     0x408001
+
 /* Tags are placed before Immediately Following Code (IFC) for the debugger
  * to be able to deduce where to find various registers when backtracing
  * 
index f70af13a4b971b5ada87e7f695e929fccbe4b03b..cd42fdd121ae938e4270402c0a54c1f13694b58d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -33,6 +33,7 @@
 #include <kern/processor.h>
 
 #include <vm/pmap.h>
+#include <IOKit/IOHibernatePrivate.h>
 
 #include <ppc/proc_reg.h>
 #include <ppc/misc_protos.h>
@@ -47,6 +48,8 @@
 #include <ppc/Diagnostics.h>
 #include <ppc/trap.h>
 #include <ppc/machine_cpu.h>
+#include <ppc/pms.h>
+#include <ppc/rtclock.h>
 
 decl_mutex_data(static,ppt_lock);
 
@@ -117,6 +120,8 @@ cpu_init(
                mttbu(proc_info->save_tbu);
                mttb(proc_info->save_tbl);
        }
+       
+       setTimerReq();                          /* Now that the time base is sort of correct, request the next timer pop */
 
        proc_info->cpu_type = CPU_TYPE_POWERPC;
        proc_info->cpu_subtype = (cpu_subtype_t)proc_info->pf.rptdProc;
@@ -150,6 +155,24 @@ cpu_machine_init(
 
        PE_cpu_machine_init(proc_info->cpu_id, !(proc_info->cpu_flags & BootDone));
 
+       if (proc_info->hibernate) {
+               uint32_t        tbu, tbl;
+
+               do {
+                       tbu = mftbu();
+                       tbl = mftb();
+               } while (mftbu() != tbu);
+
+           proc_info->hibernate = 0;
+           hibernate_machine_init();
+
+               // hibernate_machine_init() could take minutes and we don't want timeouts
+               // to fire as soon as scheduling starts. Reset timebase so it appears
+               // no time has elapsed, as it would for regular sleep.
+               mttb(0);
+               mttbu(tbu);
+               mttb(tbl);
+       }
 
        if (proc_info != mproc_info) {
        while (!((mproc_info->cpu_flags) & SignalReady)) 
@@ -167,6 +190,7 @@ cpu_machine_init(
                        thread_wakeup(&proc_info->cpu_flags);
                }
                simple_unlock(&SignalReadyLock);
+               pmsPark();                                              /* Timers should be cool now, park the power management stepper */
        }
 }
 
@@ -183,30 +207,29 @@ cpu_per_proc_alloc(
        void                    *interrupt_stack=0;
        void                    *debugger_stack=0;
 
-       if ((proc_info = (struct per_proc_info*)kalloc(PAGE_SIZE)) == (struct per_proc_info*)0)
-               return (struct per_proc_info *)NULL;;
+       if ((proc_info = (struct per_proc_info*)kalloc(sizeof(struct per_proc_info))) == (struct per_proc_info*)0)
+               return (struct per_proc_info *)NULL;
        if ((interrupt_stack = kalloc(INTSTACK_SIZE)) == 0) {
-               kfree(proc_info, PAGE_SIZE);
-               return (struct per_proc_info *)NULL;;
+               kfree(proc_info, sizeof(struct per_proc_info));
+               return (struct per_proc_info *)NULL;
        }
-#if     MACH_KDP || MACH_KDB
+
        if ((debugger_stack = kalloc(KERNEL_STACK_SIZE)) == 0) {
-               kfree(proc_info, PAGE_SIZE);
+               kfree(proc_info, sizeof(struct per_proc_info));
                kfree(interrupt_stack, INTSTACK_SIZE);
-               return (struct per_proc_info *)NULL;;
+               return (struct per_proc_info *)NULL;
        }
-#endif
 
        bzero((void *)proc_info, sizeof(struct per_proc_info));
 
+       proc_info->pp2ndPage = (addr64_t)pmap_find_phys(kernel_pmap, (addr64_t)proc_info + 0x1000) << PAGE_SHIFT;       /* Set physical address of the second page */
        proc_info->next_savearea = (uint64_t)save_get_init();
        proc_info->pf = BootProcInfo.pf;
        proc_info->istackptr = (vm_offset_t)interrupt_stack + INTSTACK_SIZE - FM_SIZE;
        proc_info->intstack_top_ss = proc_info->istackptr;
-#if     MACH_KDP || MACH_KDB
        proc_info->debstackptr = (vm_offset_t)debugger_stack + KERNEL_STACK_SIZE - FM_SIZE;
        proc_info->debstack_top_ss = proc_info->debstackptr;
-#endif  /* MACH_KDP || MACH_KDB */
+
        return proc_info;
 
 }
@@ -225,7 +248,7 @@ cpu_per_proc_free(
                return;
        kfree((void *)(proc_info->intstack_top_ss - INTSTACK_SIZE + FM_SIZE), INTSTACK_SIZE);
        kfree((void *)(proc_info->debstack_top_ss -  KERNEL_STACK_SIZE + FM_SIZE), KERNEL_STACK_SIZE);
-       kfree((void *)proc_info, PAGE_SIZE);
+       kfree((void *)proc_info, sizeof(struct per_proc_info));                 /* Release the per_proc */
 }
 
 
@@ -248,7 +271,7 @@ cpu_per_proc_register(
        cpu = real_ncpus;
        proc_info->cpu_number = cpu;
        PerProcTable[cpu].ppe_vaddr = proc_info;
-       PerProcTable[cpu].ppe_paddr = ((addr64_t)pmap_find_phys(kernel_pmap, (vm_offset_t)proc_info)) << PAGE_SHIFT;
+       PerProcTable[cpu].ppe_paddr = (addr64_t)pmap_find_phys(kernel_pmap, (addr64_t)proc_info) << PAGE_SHIFT;
        eieio();
        real_ncpus++;
        mutex_unlock(&ppt_lock);
@@ -281,7 +304,13 @@ cpu_start(
                proc_info->interrupts_enabled = 0;
                proc_info->pending_ast = AST_NONE;
                proc_info->istackptr = proc_info->intstack_top_ss;
-               proc_info->rtcPop = 0xFFFFFFFFFFFFFFFFULL;
+               proc_info->rtcPop = EndOfAllTime;
+               proc_info->FPU_owner = 0;
+               proc_info->VMX_owner = 0;
+               proc_info->pms.pmsStamp = 0;                                                                    /* Dummy transition time */
+               proc_info->pms.pmsPop = EndOfAllTime;                                                   /* Set the pop way into the future */
+               proc_info->pms.pmsState = pmsParked;                                                    /* Park the stepper */
+               proc_info->pms.pmsCSetCmd = pmsCInit;                                                   /* Set dummy initial hardware state */
                mp = (mapping_t *)(&proc_info->ppUMWmp);
                mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | 1;
                mp->mpSpace = invalSpace;
@@ -697,6 +726,8 @@ cpu_sync_timebase(
        while (*(volatile int *)&(syncClkSpot.done) == FALSE)
                continue;
 
+       setTimerReq();                                                                  /* Start the timer */
+       
        (void)ml_set_interrupts_enabled(intr);
 }
 
index 9bf612b488d8cdda6b1508920d4a897e5f22392e..72454fed557a2d9ff28289cfb1e9ab7ff8be2804 100644 (file)
@@ -440,7 +440,7 @@ void db_check_pmaps(db_expr_t addr, int have_addr, db_expr_t count, char * modif
 /*
  *             Displays iokit junk
  *
-  *            dp
+  *            di
  */
 
 void db_piokjunk(void);
index 1994d53a83bbee583e64e065c04faec8d5e98971..41ccba26b5dcf555f6e9f1405f3b1a0deb473265 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -44,6 +44,8 @@
 #include <pexpert/pexpert.h>
 #include <IOKit/IOInterrupts.h>
 #include <ppc/machine_routines.h>
+#include <ppc/pms.h>
+#include <ppc/rtclock.h>
 
 /*     Per processor CPU features */
 #pragma pack(4)                                                        /* Make sure the structure stays as we defined it */
@@ -129,12 +131,10 @@ struct procFeatures {
        unsigned int    pfPowerModes;           /* 0x07C */
 #define pmDPLLVmin             0x00010000
 #define pmDPLLVminb            15
-#define pmPowerTune            0x00000004
-#define pmPowerTuneb   29
+#define pmType                 0x000000FF
+#define pmPowerTune            0x00000003
 #define pmDFS                  0x00000002
-#define pmDFSb                 30
 #define pmDualPLL              0x00000001
-#define pmDualPLLb             31
        unsigned int    pfPowerTune0;           /* 0x080 */
        unsigned int    pfPowerTune1;           /* 0x084 */
        unsigned int    rsrvd88[6];                     /* 0x088 */
@@ -344,13 +344,7 @@ struct per_proc_info {
        void *                  pp_cbfr;
        void *                  pp_chud;
        uint64_t                rtclock_tick_deadline;
-       struct rtclock_timer {
-               uint64_t                deadline;
-               uint32_t
-               /*boolean_t*/   is_set:1,
-                                               has_expired:1,
-                                               :0;
-       }                               rtclock_timer;
+       rtclock_timer_t rtclock_timer;
        unsigned int    ppbbTaskEnv;            /* BlueBox Task Environment */
     
        /* PPC cache line boundary here - 160 */
@@ -377,7 +371,7 @@ struct per_proc_info {
        ppnum_t                 VMMareaPhys;            /* vmm state page physical addr */
        unsigned int    VMMXAFlgs;                      /* vmm extended flags */
        unsigned int    FAMintercept;           /* vmm FAM Exceptions to intercept */
-       unsigned int    ppinfo_reserved1;
+       unsigned int    hibernate;                      /* wake from hibernate */
        uint32_t                save_tbl;
        uint32_t                save_tbu;
        
@@ -518,10 +512,28 @@ struct per_proc_info {
 
        hwCtrs                  hwCtr;                                  /* Hardware exception counters */
 /*                                                                - A00 */
-
-       unsigned int    processor[384];                 /* processor structure */
+       addr64_t                pp2ndPage;                              /* Physical address of the second page of the per_proc */
+       uint32_t                pprsvd0A08[6];
+/*                                                                - A20 */
+       pmsd                    pms;                                    /* Power Management Stepper control */
+       unsigned int    pprsvd0A40[368];                /* Reserved out to next page boundary */
 /*                                                                - 1000 */
 
+/*
+ *     This is the start of the second page of the per_proc block.  Because we do not
+ *     allocate physically contiguous memory, it may be physically discontiguous from the
+ *     first page.  Currently there isn't anything here that is accessed translation off,
+ *     but if we need it, pp2ndPage contains the physical address.
+ *
+ *     Note that the boot processor's per_proc is statically allocated, so it will be a
+ *     V=R contiguous area.  That allows access during early boot before we turn translation on
+ *     for the first time.
+ */
+
+       unsigned int    processor[384];                 /* processor structure */
+       
+       unsigned int    pprsvd1[640];                   /* Reserved out to next page boundary */
+/*                                                                - 2000 */
 
 };
 
@@ -529,7 +541,7 @@ struct per_proc_info {
 
 
 /*
- * Macro to conver a processor_t processor to its attached per_proc_info_t per_proc
+ * Macro to convert a processor_t processor to its attached per_proc_info_t per_proc
  */
 #define PROCESSOR_TO_PER_PROC(x)                                                                               \
                        ((struct per_proc_info*)((unsigned int)(x)                                      \
@@ -540,9 +552,9 @@ extern struct per_proc_info BootProcInfo;
 #define        MAX_CPUS        256
 
 struct per_proc_entry {
-       addr64_t                                ppe_paddr;
+       addr64_t                                ppe_paddr;              /* Physical address of the first page of per_proc, 2nd is in pp2ndPage. */
        unsigned int                    ppe_pad4[1];
-       struct per_proc_info    *ppe_vaddr;
+       struct per_proc_info    *ppe_vaddr;             /* Virtual address of the per_proc */
 };
 
 extern struct per_proc_entry PerProcTable[MAX_CPUS-1];
@@ -550,7 +562,7 @@ extern      struct per_proc_entry PerProcTable[MAX_CPUS-1];
 
 extern char *trap_type[];
 
-#endif /* ndef ASSEMBLER */                                                                                    /* with this savearea should be redriven */
+#endif /* ndef ASSEMBLER */                                    /* with this savearea should be redriven */
 
 /* cpu_flags defs */
 #define SIGPactive     0x8000
index 46bbd060cfc8336dec2f416c6b649f10f7077df5..a30f7da5ee6240d306ddb4450272c59d06b1d835 100644 (file)
@@ -68,6 +68,8 @@
 #include <ppc/boot.h>
 #include <ppc/lowglobals.h>
 
+/* Undefine standard offsetof because it is different than the one here */
+#undef offsetof
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE)0)->MEMBER)
 
 #define DECLARE(SYM,VAL) \
@@ -129,6 +131,7 @@ int main(int argc, char *argv[])
        DECLARE("PP_INTSTACK_TOP_SS",   offsetof(struct per_proc_info *, intstack_top_ss));
        DECLARE("PP_DEBSTACKPTR",               offsetof(struct per_proc_info *, debstackptr));
        DECLARE("PP_DEBSTACK_TOP_SS",   offsetof(struct per_proc_info *, debstack_top_ss));
+       DECLARE("PP_HIBERNATE", offsetof(struct per_proc_info *, hibernate));
        DECLARE("FPUowner",                             offsetof(struct per_proc_info *, FPU_owner));
        DECLARE("VMXowner",                             offsetof(struct per_proc_info *, VMX_owner));
        DECLARE("holdQFret",                    offsetof(struct per_proc_info *, holdQFret));
@@ -225,14 +228,12 @@ int main(int argc, char *argv[])
        DECLARE("pfPowerModes",                 offsetof(struct per_proc_info *, pf.pfPowerModes));
        DECLARE("pfPowerTune0",                 offsetof(struct per_proc_info *, pf.pfPowerTune0));
        DECLARE("pfPowerTune1",                 offsetof(struct per_proc_info *, pf.pfPowerTune1));
+       DECLARE("pmType",                               pmType);
        DECLARE("pmDPLLVmin",                   pmDPLLVmin);
        DECLARE("pmDPLLVminb",                  pmDPLLVminb);
        DECLARE("pmPowerTune",                  pmPowerTune);
-       DECLARE("pmPowerTuneb",                 pmPowerTuneb);
        DECLARE("pmDFS",                                pmDFS);
-       DECLARE("pmDFSb",                               pmDFSb);
        DECLARE("pmDualPLL",                    pmDualPLL);
-       DECLARE("pmDualPLLb",                   pmDualPLLb);
        DECLARE("pfPTEG",                               offsetof(struct per_proc_info *, pf.pfPTEG));
        DECLARE("pfMaxVAddr",                   offsetof(struct per_proc_info *, pf.pfMaxVAddr));
        DECLARE("pfMaxPAddr",                   offsetof(struct per_proc_info *, pf.pfMaxPAddr));
@@ -1367,6 +1368,7 @@ int main(int argc, char *argv[])
 
        DECLARE("lgVerCode",                    offsetof(struct lowglo *, lgVerCode));
        DECLARE("lgPPStart",                    offsetof(struct lowglo *, lgPPStart));
+       DECLARE("maxDec",                               offsetof(struct lowglo *, lgMaxDec));
        DECLARE("mckFlags",                     offsetof(struct lowglo *, lgMckFlags));
        DECLARE("lgPMWvaddr",                   offsetof(struct lowglo *, lgPMWvaddr));
        DECLARE("lgUMWvaddr",                   offsetof(struct lowglo *, lgUMWvaddr));
diff --git a/osfmk/ppc/hibernate_ppc.c b/osfmk/ppc/hibernate_ppc.c
new file mode 100644 (file)
index 0000000..7bf11a5
--- /dev/null
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <kern/kern_types.h>
+#include <kern/kalloc.h>
+#include <kern/machine.h>
+#include <kern/misc_protos.h>
+#include <kern/thread.h>
+#include <kern/processor.h>
+#include <mach/machine.h>
+#include <mach/processor_info.h>
+#include <mach/mach_types.h>
+#include <ppc/proc_reg.h>
+#include <ppc/misc_protos.h>
+#include <ppc/machine_routines.h>
+#include <ppc/machine_cpu.h>
+#include <ppc/exception.h>
+#include <ppc/asm.h>
+#include <ppc/hw_perfmon.h>
+#include <pexpert/pexpert.h>
+#include <kern/cpu_data.h>
+#include <ppc/mappings.h>
+#include <ppc/Diagnostics.h>
+#include <ppc/trap.h>
+#include <ppc/mem.h>
+#include <IOKit/IOPlatformExpert.h>
+#define KERNEL
+
+#include <IOKit/IOHibernatePrivate.h>
+#include <vm/vm_page.h>
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+hibernate_page_list_t *
+hibernate_page_list_allocate(void)
+{
+    vm_size_t               size;
+    uint32_t                bank;
+    uint32_t               pages, page_count;
+    hibernate_page_list_t * list;
+    hibernate_bitmap_t *    bitmap;
+
+    page_count = 0;
+    size = sizeof(hibernate_page_list_t);
+
+    for (bank = 0; bank < (uint32_t) pmap_mem_regions_count; bank++)
+    {
+       size += sizeof(hibernate_bitmap_t);
+       pages = pmap_mem_regions[bank].mrEnd + 1 - pmap_mem_regions[bank].mrStart;
+       page_count += pages;
+       size += ((pages + 31) >> 5) * sizeof(uint32_t);
+    }
+
+    list = kalloc(size);
+    if (!list)
+       return (list);
+       
+    list->list_size  = size;
+    list->page_count = page_count;
+    list->bank_count = pmap_mem_regions_count;
+
+    bitmap = &list->bank_bitmap[0];
+    for (bank = 0; bank < list->bank_count; bank++)
+    {
+       bitmap->first_page  =  pmap_mem_regions[bank].mrStart;
+       bitmap->last_page   =  pmap_mem_regions[bank].mrEnd;
+       bitmap->bitmapwords = (pmap_mem_regions[bank].mrEnd + 1
+                            - pmap_mem_regions[bank].mrStart + 31) >> 5;
+
+       bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
+    }
+    return (list);
+}
+
+void
+hibernate_page_list_setall_machine(hibernate_page_list_t * page_list,
+                                    hibernate_page_list_t * page_list_wired,
+                                    uint32_t * pagesOut)
+{
+    uint32_t page, count, PCAsize;
+
+    /* Get total size of PCA table */
+    PCAsize = round_page((hash_table_size / PerProcTable[0].ppe_vaddr->pf.pfPTEG) 
+                          * sizeof(PCA_t));
+
+    page = atop_64(hash_table_base - PCAsize);
+    count = atop_64(hash_table_size + PCAsize);
+
+    hibernate_set_page_state(page_list, page_list_wired, page, count, 0);
+    pagesOut -= count;
+
+    HIBLOG("removed hash, pca: %d pages\n", count);
+
+    save_snapshot();
+}
+
+kern_return_t 
+hibernate_processor_setup(IOHibernateImageHeader * header)
+{
+    header->processorFlags = PerProcTable[0].ppe_vaddr->pf.Available;
+
+    PerProcTable[0].ppe_vaddr->hibernate = 1;
+
+    return (KERN_SUCCESS);
+}
+
+void
+hibernate_vm_lock(void)
+{
+    if (getPerProc()->hibernate)
+    {
+        vm_page_lock_queues();
+        mutex_lock(&vm_page_queue_free_lock);
+    }
+}
+
+void
+hibernate_vm_unlock(void)
+{
+    if (getPerProc()->hibernate)
+    {
+        mutex_unlock(&vm_page_queue_free_lock);
+        vm_page_unlock_queues();
+    }
+}
+
+void ml_ppc_sleep(void)
+{
+    struct per_proc_info *proc_info;
+    boolean_t dohalt;
+
+    proc_info = getPerProc();
+    if (!proc_info->hibernate)
+    {
+       ml_ppc_do_sleep();
+       return;
+    }
+
+    {
+        uint64_t start, end, nsec;
+
+       HIBLOG("mapping_hibernate_flush start\n");
+       clock_get_uptime(&start);
+
+       mapping_hibernate_flush();
+
+       clock_get_uptime(&end);
+       absolutetime_to_nanoseconds(end - start, &nsec);
+       HIBLOG("mapping_hibernate_flush time: %qd ms\n", nsec / 1000000ULL);
+    }
+
+    dohalt = hibernate_write_image();
+
+    if (dohalt)
+    {
+       // off
+       HIBLOG("power off\n");
+       if (PE_halt_restart) 
+           (*PE_halt_restart)(kPEHaltCPU);
+    }
+    else
+    {
+       // sleep
+       HIBLOG("sleep\n");
+
+       // should we come back via regular wake, set the state in memory.
+       PerProcTable[0].ppe_vaddr->hibernate = 0;
+
+       PE_cpu_machine_quiesce(proc_info->cpu_id);
+       return;
+    }
+}
+
diff --git a/osfmk/ppc/hibernate_restore.s b/osfmk/ppc/hibernate_restore.s
new file mode 100644 (file)
index 0000000..1d46fe2
--- /dev/null
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2004 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#include <ppc/asm.h>
+#include <ppc/proc_reg.h>
+#include <assym.s>
+
+/*
+This code is linked into the kernel but part of the "__HIB" section, which means
+its used by code running in the special context of restoring the kernel text and data
+from the hibernation image read by the booter. hibernate_kernel_entrypoint() and everything
+it calls or references (ie. hibernate_restore_phys_page())
+needs to be careful to only touch memory also in the "__HIB" section.
+*/
+
+/*
+void 
+hibernate_restore_phys_page(uint64_t src, uint64_t dst, uint32_t len, uint32_t procFlags);
+*/
+
+                       .align  5
+                       .globl  EXT(hibernate_restore_phys_page)
+                       .globl  EXT(hibernate_machine_entrypoint)
+
+LEXT(hibernate_restore_phys_page)
+
+       andi.           r0, r8, pf64Bit
+       bne             hibernate_restore_phys_page64
+
+        srwi           r10,r7,5                                ; r10 <- 32-byte chunks to xfer
+        mtctr          r10
+       cmpwi           r4, 0
+       beq             hibernate_restore_phys_pageFlush
+
+hibernate_restore_phys_pageCopy:
+        lwz            r0,0(r4)
+        lwz            r2,4(r4)
+        lwz            r7,8(r4)
+        lwz            r8,12(r4)
+        lwz            r9,16(r4)
+        lwz            r10,20(r4)
+        lwz            r11,24(r4)
+        lwz            r12,28(r4)
+
+        dcbz           0,r6                                    ; avoid prefetch of next cache line
+        stw            r0,0(r6)
+        stw            r2,4(r6)
+        stw            r7,8(r6)
+        stw            r8,12(r6)
+        stw            r9,16(r6)
+        stw            r10,20(r6)
+        stw            r11,24(r6)
+        stw            r12,28(r6)
+        
+       dcbf            0, r6
+       sync
+       icbi            0, r6
+       isync
+       sync
+
+        addi           r4,r4,32
+        addi           r6,r6,32
+
+        bdnz           hibernate_restore_phys_pageCopy         ; loop if more chunks
+        blr
+
+hibernate_restore_phys_pageFlush:
+       dcbf            0, r6
+       sync
+       icbi            0, r6
+       isync
+       sync
+
+        addi           r6,r6,32
+        bdnz           hibernate_restore_phys_pageFlush                ; loop if more chunks
+        blr
+
+
+hibernate_restore_phys_page64:
+       rlwinm          r3,r3,0,1,0                     ; Duplicate high half of long long paddr into top of reg
+       rlwimi          r3,r4,0,0,31                    ; Combine bottom of long long to full 64-bits
+       rlwinm          r4,r5,0,1,0                     ; Duplicate high half of long long paddr into top of reg
+       rlwimi          r4,r6,0,0,31                    ; Combine bottom of long long to full 64-bits
+
+       mfmsr           r9                              ; Get the MSR
+       li              r0,1                            ; Note - we use this in a couple places below
+       rldimi          r9,r0,63,MSR_SF_BIT             ; set SF on in MSR we will copy with
+       mtmsrd          r9                              ; turn 64-bit addressing on
+       isync                                           ; wait for it to happen
+
+        srwi   r10,r7,7                                ; r10 <- 128-byte chunks to xfer
+        mtctr  r10
+       cmpdi   r3, 0
+       beq     hibernate_restore_phys_page64Flush
+
+hibernate_restore_phys_page64Copy:
+        ld             r0,0(r3)
+        ld             r2,8(r3)
+        ld             r7,16(r3)
+        ld             r8,24(r3)
+        ld             r9,32(r3)
+        ld             r10,40(r3)
+        ld             r11,48(r3)
+        ld             r12,56(r3)
+
+        dcbz128                0,r4                            ; avoid prefetch of next cache line
+        std            r0,0(r4)
+        std            r2,8(r4)
+        std            r7,16(r4)
+        std            r8,24(r4)
+        std            r9,32(r4)
+        std            r10,40(r4)
+        std            r11,48(r4)
+        std            r12,56(r4)
+        
+        ld             r0,64(r3)                       ; load 2nd half of chunk
+        ld             r2,72(r3)
+        ld             r7,80(r3)
+        ld             r8,88(r3)
+        ld             r9,96(r3)
+        ld             r10,104(r3)
+        ld             r11,112(r3)
+        ld             r12,120(r3)
+
+        std            r0,64(r4)
+        std            r2,72(r4)
+        std            r7,80(r4)
+        std            r8,88(r4)
+        std            r9,96(r4)
+        std            r10,104(r4)
+        std            r11,112(r4)
+        std            r12,120(r4)
+
+       dcbf            0, r4
+       sync
+       icbi            0, r4
+       isync
+       sync
+
+        addi           r3,r3,128
+        addi           r4,r4,128
+
+        bdnz           hibernate_restore_phys_page64Copy               ; loop if more chunks
+
+
+hibernate_restore_phys_page64Done:
+       mfmsr           r9                              ; Get the MSR we used to copy
+       rldicl          r9,r9,0,MSR_SF_BIT+1            ; clear SF
+        mtmsrd         r9                              ; turn 64-bit mode off
+       isync                                           ; wait for it to happen
+        blr
+
+hibernate_restore_phys_page64Flush:
+       dcbf            0, r4
+       sync
+       icbi            0, r4
+       isync
+       sync
+
+        addi           r4,r4,128
+
+        bdnz           hibernate_restore_phys_page64Flush              ; loop if more chunks
+       b               hibernate_restore_phys_page64Done
+
+LEXT(hibernate_machine_entrypoint)
+        b               EXT(hibernate_kernel_entrypoint)
+
index f2099e2a74416530efd8e6e0d4f66aa0ccddef3f..4c66797de05f8eb5904771f462133ad8960d84f5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -434,7 +434,7 @@ noassist:   cmplwi  r15,0x7000                                              ; Do we have a fast path trap?
 ;                      and the savearea/pcb as the first parameter.
 ;                      It is up to the callee to enable interruptions if
 ;                      they should be.  We are in a state here where
-;                      both interrupts and preemption is ok, but because we could
+;                      both interrupts and preemption are ok, but because we could
 ;                      be calling diagnostic code we will not enable.
 ;                      
 ;                      Also, the callee is responsible for finding any parameters
@@ -504,11 +504,11 @@ LEXT(ppcscret)
  */
        
 ksystrace:     
-                       mr              r4,r30                                          ; Pass in saved state
+                       mr              r4,r30                                                  ; Pass in saved state
                        bl      EXT(syscall_trace)
                        
-                       cmplw   r31,r29                                         ; Is this syscall in the table? 
-                       add             r31,r27,r28                                     ; Point right to the syscall table entry
+                       cmplw   r31,r29                                                 ; Is this syscall in the table? 
+                       add             r31,r27,r28                                             ; Point right to the syscall table entry
 
                        bge-    .L_call_server_syscall_exception        ; The syscall number is invalid
        
@@ -520,20 +520,20 @@ ksystrace:
 
 .L_ksystrace_munge:
                        cmplwi  r0,0                                                    ; do we have a munger to call?
-                       mtctr   r0                                                      ; Set the function call address
-                       addi    r3,r30,saver3                                           ; Pointer to args from save area
-                       addi    r4,r1,FM_ARG0+ARG_SIZE                          ; Pointer for munged args
+                       mtctr   r0                                                              ; Set the function call address
+                       addi    r3,r30,saver3                                   ; Pointer to args from save area
+                       addi    r4,r1,FM_ARG0+ARG_SIZE                  ; Pointer for munged args
                        beq--   .L_ksystrace_trapcall                   ; just make the trap call
-                       bctrl                                                           ; Call the munge function
+                       bctrl                                                                   ; Call the munge function
 
 .L_ksystrace_trapcall:         
-                       lwz     r0,MACH_TRAP_FUNCTION(r31)                      ; Pick up the function address
-                       mtctr   r0                                                      ; Set the function call address
-                       addi    r3,r1,FM_ARG0+ARG_SIZE                          ; Pointer to munged args
+                       lwz             r0,MACH_TRAP_FUNCTION(r31)              ; Pick up the function address
+                       mtctr   r0                                                              ; Set the function call address
+                       addi    r3,r1,FM_ARG0+ARG_SIZE                  ; Pointer to munged args
                        bctrl
 
-                       mr              r4,r30                                          ; Pass in the savearea
-                       bl              EXT(syscall_trace_end)          ; Trace the exit of the system call     
+                       mr              r4,r30                                                  ; Pass in the savearea
+                       bl              EXT(syscall_trace_end)                  ; Trace the exit of the system call     
                        b               .L_mach_return
 
        
@@ -553,27 +553,27 @@ ksystrace:
 ; Call a function that can print out our syscall info 
 ; Note that we don t care about any volatiles yet
 ;
-                       lwz             r10,ACT_TASK(r13)                       ; Get our task 
+                       lwz             r10,ACT_TASK(r13)                               ; Get our task 
                        lwz             r0,saver0+4(r30)
-                       lis             r8,hi16(EXT(kdebug_enable))     ; Get top of kdebug_enable 
+                       lis             r8,hi16(EXT(kdebug_enable))             ; Get top of kdebug_enable 
                        lis             r28,hi16(EXT(mach_trap_table))  ; Get address of table
                        ori             r8,r8,lo16(EXT(kdebug_enable))  ; Get bottom of kdebug_enable 
-                       lwz             r8,0(r8)                                        ; Get kdebug_enable 
+                       lwz             r8,0(r8)                                                ; Get kdebug_enable 
 
-                       lwz             r7,TASK_SYSCALLS_MACH(r10)      ; Get the current count
-                       neg             r31,r0                                          ; Make this positive
-                       mr              r3,r31                                          ; save it
-                       slwi    r27,r3,4                                        ; multiply by 16
-                       slwi    r3,r3,2                                         ; and the original by 4
+                       lwz             r7,TASK_SYSCALLS_MACH(r10)              ; Get the current count
+                       neg             r31,r0                                                  ; Make this positive
+                       mr              r3,r31                                                  ; save it
+                       slwi    r27,r3,4                                                ; multiply by 16
+                       slwi    r3,r3,2                                                 ; and the original by 4
                        ori             r28,r28,lo16(EXT(mach_trap_table))      ; Get address of table
-                       add             r27,r27,r3                                      ; for a total of 20x (5 words/entry)
-                       addi    r7,r7,1                                         ; Bump TASK_SYSCALLS_MACH count
-                       cmplwi  r8,0                                            ; Is kdebug_enable non-zero
-                       stw             r7,TASK_SYSCALLS_MACH(r10)      ; Save count
-                       bne--   ksystrace                                       ; yes, tracing enabled
+                       add             r27,r27,r3                                              ; for a total of 20x (5 words/entry)
+                       addi    r7,r7,1                                                 ; Bump TASK_SYSCALLS_MACH count
+                       cmplwi  r8,0                                                    ; Is kdebug_enable non-zero
+                       stw             r7,TASK_SYSCALLS_MACH(r10)              ; Save count
+                       bne--   ksystrace                                               ; yes, tracing enabled
                        
-                       cmplwi  r31,MACH_TRAP_TABLE_COUNT       ; Is this syscall in the table? 
-                       add             r31,r27,r28                                     ; Point right to the syscall table entry
+                       cmplwi  r31,MACH_TRAP_TABLE_COUNT               ; Is this syscall in the table? 
+                       add             r31,r27,r28                                             ; Point right to the syscall table entry
 
                        bge--   .L_call_server_syscall_exception        ; The syscall number is invalid
 
@@ -586,15 +586,15 @@ ksystrace:
 .L_kernel_syscall_munge:
                        cmplwi  r0,0                                                    ; test for null munger
                        mtctr   r0                                                              ; Set the function call address
-                       addi    r3,r30,saver3                                           ; Pointer to args from save area
-                       addi    r4,r1,FM_ARG0+ARG_SIZE                          ; Pointer for munged args
-                       beq--   .L_kernel_syscall_trapcall              ;   null munger - skip to trap call
-                       bctrl                                                           ; Call the munge function
+                       addi    r3,r30,saver3                                   ; Pointer to args from save area
+                       addi    r4,r1,FM_ARG0+ARG_SIZE                  ; Pointer for munged args
+                       beq--   .L_kernel_syscall_trapcall              ; null munger - skip to trap call
+                       bctrl                                                                   ; Call the munge function
 
 .L_kernel_syscall_trapcall:            
-                       lwz     r0,MACH_TRAP_FUNCTION(r31)                      ; Pick up the function address
-                       mtctr   r0                                                      ; Set the function call address
-                       addi    r3,r1,FM_ARG0+ARG_SIZE                          ; Pointer to munged args
+                       lwz             r0,MACH_TRAP_FUNCTION(r31)              ; Pick up the function address
+                       mtctr   r0                                                              ; Set the function call address
+                       addi    r3,r1,FM_ARG0+ARG_SIZE                  ; Pointer to munged args
 
 #if FPFLOOD
                        stfd    f31,emfp31(r25)                                 ; (TEST/DEBUG)
@@ -611,12 +611,12 @@ ksystrace:
  */
 
 .L_mach_return:
-                       srawi  r0,r3,31                                         ; properly extend the return code
-                       cmpi    cr0,r3,KERN_INVALID_ARGUMENT            ; deal with invalid system calls
-                       mr              r31,r16                                         ; Move the current thread pointer
-                       stw     r0, saver3(r30)                                 ; stash the high part of the return code
-                       stw     r3,saver3+4(r30)                                        ; Stash the low part of the return code
-                       beq-    cr0,.L_mach_invalid_ret                         ; otherwise fall through into the normal return path
+                       srawi  r0,r3,31                                                 ; properly extend the return code
+                       cmpi    cr0,r3,KERN_INVALID_ARGUMENT    ; deal with invalid system calls
+                       mr              r31,r16                                                 ; Move the current thread pointer
+                       stw             r0, saver3(r30)                                 ; stash the high part of the return code
+                       stw             r3,saver3+4(r30)                                ; Stash the low part of the return code
+                       beq--   cr0,.L_mach_invalid_ret                 ; otherwise fall through into the normal return path
 .L_mach_invalid_arg:           
 
 
@@ -681,12 +681,12 @@ scrnotkern:
  * we want to pass the error code back to the caller
  */
                        lwz             r0,saver0+4(r30)                                ; reload the original syscall number
-                       neg             r28,r0                                          ; Make this positive
-                       mr              r4,r28                                          ; save a copy
-                       slwi            r27,r4,4                                                ; multiply by 16
-                       slwi            r4,r4,2                                         ; and another 4
+                       neg             r28,r0                                                  ; Make this positive
+                       mr              r4,r28                                                  ; save a copy
+                       slwi    r27,r4,4                                                ; multiply by 16
+                       slwi    r4,r4,2                                                 ; and another 4
                        lis             r28,hi16(EXT(mach_trap_table))  ; Get address of table
-                       add             r27,r27,r4                                      ; for a total of 20x (5 words/entry)
+                       add             r27,r27,r4                                              ; for a total of 20x (5 words/entry)
                        ori             r28,r28,lo16(EXT(mach_trap_table))      ; Get address of table
                        add             r28,r27,r28                                             ; Point right to the syscall table entry
                        lwz             r27,MACH_TRAP_FUNCTION(r28)             ; Pick up the function address
@@ -957,7 +957,7 @@ ihbootnover:                                                                                ; (TEST/DEBUG)
                        mr              r4,r30
                        lwz             r5,savedsisr(r30)                               ; Get the DSISR
                        lwz             r6,savedar+4(r30)                               ; Get the DAR 
-
+                       
 #if FPFLOOD
                        stfd    f31,emfp31(r25)                                 ; (TEST/DEBUG)
 #endif
@@ -982,7 +982,7 @@ LEXT(ihandler_ret)                                                                  ; Marks our return point from debugger entry
                        lwz             r10,ACT_PER_PROC(r8)                    ; Get the per_proc block 
                
                        lwz             r7,SAVflags(r3)                                 ; Pick up the flags
-                       lwz             r9,SAVprev+4(r3)                                        ; Get previous save area
+                       lwz             r9,SAVprev+4(r3)                                ; Get previous save area
                        cmplwi  cr1,r8,0                                                ; Are we still initializing?
                        lwz             r12,savesrr1+4(r3)                              ; Get the MSR we will load on return 
                        andis.  r11,r7,hi16(SAVrststk)                  ; Is this the first on the stack?
index 7ebbedecd2890b73fa97d17553b4499a36c5f398..01b1bdced7ba4989751a19317c1e039eb72dd46c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -1080,7 +1080,7 @@ mlckslow1:
                        bl              lockDisa                                                ; Go get a lock on the mutex's interlock lock
                        mr.             r4,r3                                                   ; Did we get it?
                        lwz             r3,FM_ARG0(r1)                                  ; Restore the lock address
-                       bne+    mlGotInt                                                ; We got it just fine...
+                       bne++   mlGotInt                                                ; We got it just fine...
                        mr              r4,r11                                                  ; Saved lock addr
                        lis             r3,hi16(mutex_failed1)                  ; Get the failed mutex message
                        ori             r3,r3,lo16(mutex_failed1)               ; Get the failed mutex message
@@ -1297,9 +1297,9 @@ L_mutex_try_slow:
                        bne-    mtFail                                                  ; Someone's got it already...
 
                        bl              lockDisa                                                ; Go get a lock on the mutex's interlock lock
-                       mr.             r4,r3                                                   ; Did we get it? */
+                       mr.             r4,r3                                                   ; Did we get it?
                        lwz             r3,FM_ARG0(r1)                                  ; Restore the lock address
-                       bne+    mtGotInt                                                ; We got it just fine...
+                       bne++   mtGotInt                                                ; We got it just fine...
                        mr              r4,r11                                                  ; Saved lock addr
                        lis             r3,hi16(mutex_failed2)                  ; Get the failed mutex message
                        ori             r3,r3,lo16(mutex_failed2)               ; Get the failed mutex message
@@ -1490,7 +1490,7 @@ L_mutex_unlock_slow:
                        bl              lockDisa                                                ; Go get a lock on the mutex's interlock lock
                        mr.             r4,r3                                                   ; Did we get it?
                        lwz             r3,FM_ARG0(r1)                                  ; Restore the lock address
-                       bne+    muGotInt                                                ; We got it just fine...
+                       bne++   muGotInt                                                ; We got it just fine...
                        mr              r4,r11                                                  ; Saved lock addr
                        lis             r3,hi16(mutex_failed3)                  ; Get the failed mutex message
                        ori             r3,r3,lo16(mutex_failed3)               ; Get the failed mutex message
index 988b3e37346dc0e1934f85a1e5f8bf6e59582bc9..e13bd5cfb6bb1124fa6e04c56ab846f87f49e507 100644 (file)
@@ -31,8 +31,6 @@
 #include <ppc/Performance.h>
 #include <ppc/exception.h>
 #include <mach/ppc/vm_param.h>
-
-#define INSTRUMENT 0
        
                        .text
 
@@ -199,53 +197,28 @@ hamRescan:        lwz             r4,mpVAddr(r31)                         ; Get the new vaddr top half
                        mr              r29,r4                                          ; Save top half of vaddr for later
                        mr              r30,r5                                          ; Save bottom half of vaddr for later
                        
-#if INSTRUMENT
-                       mfspr   r0,pmc1                                         ; INSTRUMENT - saveinstr[16] - Take stamp before mapSearchFull
-                       stw             r0,0x6100+(16*16)+0x0(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc2                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(16*16)+0x4(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc3                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(16*16)+0x8(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc4                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(16*16)+0xC(0)        ; INSTRUMENT - Save it
-#endif                 
-                       
                        bl              EXT(mapSearchFull)                      ; Go see if we can find it
                        
-#if INSTRUMENT
-                       mfspr   r0,pmc1                                         ; INSTRUMENT - saveinstr[14] - Take stamp after mapSearchFull
-                       stw             r0,0x6100+(17*16)+0x0(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc2                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(17*16)+0x4(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc3                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(17*16)+0x8(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc4                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(17*16)+0xC(0)        ; INSTRUMENT - Save it
-#endif                 
-
-                       rlwinm  r0,r24,0,mpType                         ; Isolate the mapping type
-                       rlwinm  r23,r23,12,0,19                         ; Convert standard block size to bytes
-                       cmplwi  r0,mpNest                                       ; Is this a nested type?
-                       cmplwi  cr1,r0,mpLinkage                        ; Linkage type?
-                       cror    cr0_eq,cr1_eq,cr0_eq            ; Nested or linkage type?
+                       li              r22,lo16(0x800C)                        ; Get 0xFFFF800C
+                       rlwinm  r0,r24,mpBSub+1,31,31           ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
+                       addi    r23,r23,1                                       ; Get actual length
+                       rlwnm   r22,r22,r0,27,31                        ; Rotate to get 12 or 25
                        lis             r0,0x8000                                       ; Get 0xFFFFFFFF80000000
-                       li              r22,0                                           ; Assume high part of size is 0
-                       bne++   hamNoNest                                       ; This is not a nested or linkage type
-                       
-                       rlwinm  r22,r23,16,16,31                        ; Convert partially converted size to segments
-                       rlwinm  r23,r23,16,0,3                          ; Finish shift
-                       
-hamNoNest:     add             r0,r0,r0                                        ; Get 0xFFFFFFFF00000000 for 64-bit or 0 for 32-bit
+                       slw             r9,r23,r22                                      ; Isolate the low part
+                       rlwnm   r22,r23,r22,22,31                       ; Extract the high order
+                       addic   r23,r9,-4096                            ; Get the length to the last page
+                       add             r0,r0,r0                                        ; Get 0xFFFFFFFF00000000 for 64-bit or 0 for 32-bit
+                       addme   r22,r22                                         ; Do high order as well...
                        mr.             r3,r3                                           ; Did we find a mapping here?
-                       or              r0,r0,r30                                       ; Make sure a carry will propagate all the way in 64-bit
-                       crmove  cr5_eq,cr0_eq                           ; Remember that if we found the mapping
+                       or              r0,r30,r0                                       ; Fill high word of 64-bit with 1s so we will properly carry
+                       bne--   hamOverlay                                      ; We found a mapping, this is no good, can not double map...
+
                        addc    r9,r0,r23                                       ; Add size to get last page in new range
                        or.             r0,r4,r5                                        ; Are we beyond the end?
                        adde    r8,r29,r22                                      ; Add the rest of the length on
-                       bne--   cr5,hamOverlay                          ; Yeah, this is no good, can not double map...
                        rlwinm  r9,r9,0,0,31                            ; Clean top half of sum
                        beq++   hamFits                                         ; We are at the end...
-                       
+
                        cmplw   cr1,r9,r5                                       ; Is the bottom part of our end less?
                        cmplw   r8,r4                                           ; Is our end before the next (top part)
                        crand   cr0_eq,cr0_eq,cr1_lt            ; Is the second half less and the first half equal?
@@ -283,32 +256,10 @@ hamFits:  mr.             r21,r21                                         ; Do we already have the exclusive lock?
                        
                        .align  5
 
-hamGotX:       
-#if INSTRUMENT
-                       mfspr   r3,pmc1                                         ; INSTRUMENT - saveinstr[18] - Take stamp before mapSearchFull
-                       stw             r3,0x6100+(18*16)+0x0(0)        ; INSTRUMENT - Save it
-                       mfspr   r3,pmc2                                         ; INSTRUMENT - Get stamp
-                       stw             r3,0x6100+(18*16)+0x4(0)        ; INSTRUMENT - Save it
-                       mfspr   r3,pmc3                                         ; INSTRUMENT - Get stamp
-                       stw             r3,0x6100+(18*16)+0x8(0)        ; INSTRUMENT - Save it
-                       mfspr   r3,pmc4                                         ; INSTRUMENT - Get stamp
-                       stw             r4,0x6100+(18*16)+0xC(0)        ; INSTRUMENT - Save it
-#endif                 
-                       mr              r3,r28                                          ; Get the pmap to insert into
+hamGotX:       mr              r3,r28                                          ; Get the pmap to insert into
                        mr              r4,r31                                          ; Point to the mapping
                        bl              EXT(mapInsert)                          ; Insert the mapping into the list
 
-#if INSTRUMENT
-                       mfspr   r4,pmc1                                         ; INSTRUMENT - saveinstr[19] - Take stamp before mapSearchFull
-                       stw             r4,0x6100+(19*16)+0x0(0)        ; INSTRUMENT - Save it
-                       mfspr   r4,pmc2                                         ; INSTRUMENT - Get stamp
-                       stw             r4,0x6100+(19*16)+0x4(0)        ; INSTRUMENT - Save it
-                       mfspr   r4,pmc3                                         ; INSTRUMENT - Get stamp
-                       stw             r4,0x6100+(19*16)+0x8(0)        ; INSTRUMENT - Save it
-                       mfspr   r4,pmc4                                         ; INSTRUMENT - Get stamp
-                       stw             r4,0x6100+(19*16)+0xC(0)        ; INSTRUMENT - Save it
-#endif                 
-       
                        rlwinm  r11,r24,mpPcfgb+2,mpPcfg>>6     ; Get the index into the page config table
                        lhz             r8,mpSpace(r31)                         ; Get the address space
                        lwz             r11,lgpPcfg(r11)                        ; Get the page config
@@ -384,18 +335,7 @@ hamReturn: bt++    pf64Bitb,hamR64                         ; Yes...
 hamR64:                mtmsrd  r17                                                     ; Restore enables/translation/etc.
                        isync                                                           
                        
-hamReturnC:    
-#if INSTRUMENT
-                       mfspr   r0,pmc1                                         ; INSTRUMENT - saveinstr[20] - Take stamp before mapSearchFull
-                       stw             r0,0x6100+(20*16)+0x0(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc2                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(20*16)+0x4(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc3                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(20*16)+0x8(0)        ; INSTRUMENT - Save it
-                       mfspr   r0,pmc4                                         ; INSTRUMENT - Get stamp
-                       stw             r0,0x6100+(20*16)+0xC(0)        ; INSTRUMENT - Save it
-#endif                 
-                       lwz             r0,(FM_ALIGN((31-17+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)       ; Get the return
+hamReturnC:    lwz             r0,(FM_ALIGN((31-17+1)*4)+FM_SIZE+FM_LR_SAVE)(r1)       ; Get the return
                        lwz             r17,FM_ARG0+0x00(r1)            ; Save a register
                        lwz             r18,FM_ARG0+0x04(r1)            ; Save a register
                        lwz             r19,FM_ARG0+0x08(r1)            ; Save a register
@@ -948,13 +888,18 @@ hrmPanic: lis             r0,hi16(Choke)                          ; System abend
 
                        .align  5
                        
-hrmBlock32:    
-                       lhz             r23,mpSpace(r31)                        ; Get the address space hash
+hrmBlock32:    lis             r29,0xD000                                      ; Get shift to 32MB bsu
+                       rlwinm  r24,r20,mpBSub+1+2,29,29        ; Rotate to get 0 if 4K bsu or 13 if 32MB bsu
                        lhz             r25,mpBSize(r31)                        ; Get the number of pages in block
+                       lhz             r23,mpSpace(r31)                        ; Get the address space hash
                        lwz             r9,mpBlkRemCur(r31)                     ; Get our current remove position
+                       rlwnm   r29,r29,r24,28,31                       ; Rotate to get 0 or 13
+                       addi    r25,r25,1                                       ; Account for zero-based counting
                        ori             r0,r20,mpRIP                            ; Turn on the remove in progress flag
+                       slw             r25,r25,r29                                     ; Adjust for 32MB if needed
                        mfsdr1  r29                                                     ; Get the hash table base and size
                        rlwinm  r24,r23,maxAdrSpb,32-maxAdrSpb-maxAdrSpb,31-maxAdrSpb   ; Get high order of hash
+                       subi    r25,r25,1                                       ; Convert back to zero-based counting
                        lwz             r27,mpVAddr+4(r31)                      ; Get the base vaddr
                        sub             r4,r25,r9                                       ; Get number of pages left
                        cmplw   cr1,r9,r25                                      ; Have we already hit the end?
@@ -1279,13 +1224,18 @@ hrmRetn64:      rldicr  r8,r31,0,51                                     ; Find start of page
 
                        .align  5
                        
-hrmBlock64:                            
+hrmBlock64:    lis             r29,0xD000                                      ; Get shift to 32MB bsu                 
+                       rlwinm  r10,r20,mpBSub+1+2,29,29        ; Rotate to get 0 if 4K bsu or 13 if 32MB bsu
                        lhz             r24,mpSpace(r31)                        ; Get the address space hash
                        lhz             r25,mpBSize(r31)                        ; Get the number of pages in block
                        lwz             r9,mpBlkRemCur(r31)                     ; Get our current remove position
+                       rlwnm   r29,r29,r10,28,31                       ; Rotate to get 0 or 13
+                       addi    r25,r25,1                                       ; Account for zero-based counting
                        ori             r0,r20,mpRIP                            ; Turn on the remove in progress flag
+                       slw             r25,r25,r29                                     ; Adjust for 32MB if needed
                        mfsdr1  r29                                                     ; Get the hash table base and size
                        ld              r27,mpVAddr(r31)                        ; Get the base vaddr
+                       subi    r25,r25,1                                       ; Convert back to zero-based counting
                        rlwinm  r5,r29,0,27,31                          ; Isolate the size
                        sub             r4,r25,r9                                       ; Get number of pages left
                        cmplw   cr1,r9,r25                                      ; Have we already hit the end?
@@ -2545,7 +2495,7 @@ hcmNextPhys32:
             addi       r3,r3,physEntrySize                     ; Next phys_entry
                        
 hcmNextMap32:
-                       rlwinm. r4,r4,0,0,25                            ; Clean and test mapping address
+                       rlwinm. r4,r4,0,~ppFlags                        ; Clean and test mapping address
                        beq             hcmNoMap32                                      ; Did not find one...
 
                        lwz             r0,mpPte(r4)                            ; Grab the offset to the PTE
@@ -2688,7 +2638,7 @@ LEXT(hw_walk_phys)
                        blt             hwpSrc32                                        ; Do TLB invalidate/purge/merge/reload for each mapping
                        beq             hwpMSrc32                                       ; Do TLB merge for each mapping
                        
-hwpQSrc32:     rlwinm. r31,r31,0,0,25                          ; Clean and test mapping address
+hwpQSrc32:     rlwinm. r31,r31,0,~ppFlags                      ; Clean and test mapping address
                        beq             hwpNone32                                       ; Did not find one...
                        
                        bctrl                                                           ; Call the op function
@@ -2698,7 +2648,7 @@ hwpQSrc32:        rlwinm. r31,r31,0,0,25                          ; Clean and test mapping address
                        b               hwpQSrc32                                       ; Check it out...
 
                        .align  5                       
-hwpMSrc32:     rlwinm. r31,r31,0,0,25                          ; Clean and test mapping address
+hwpMSrc32:     rlwinm. r31,r31,0,~ppFlags                      ; Clean and test mapping address
                        beq             hwpNone32                                       ; Did not find one...
                        
                        bl              mapMergeRC32                            ; Merge reference and change into mapping and physent
@@ -3541,11 +3491,11 @@ mapFLPNitr:     lwz             r3,mrPhysTab(r9)                        ; Get the actual table address
                        lwz             r5,mrStart(r9)                          ; Get start of table entry
                        lwz             r0,mrEnd(r9)                            ; Get end of table entry
                        addi    r9,r9,mrSize                            ; Point to the next slot
-                       cmplwi  cr2,r3,0                                        ; Are we at the end of the table?
+                       cmplwi  cr7,r3,0                                        ; Are we at the end of the table?
                        cmplw   r2,r5                                           ; See if we are in this table
                        cmplw   cr1,r2,r0                                       ; Check end also
                        sub             r4,r2,r5                                        ; Calculate index to physical entry
-                       beq--   cr2,mapFLPNmiss                         ; Leave if we did not find an entry...
+                       beq--   cr7,mapFLPNmiss                         ; Leave if we did not find an entry...
                        cror    cr0_lt,cr0_lt,cr1_gt            ; Set CR0_LT if it is NOT this entry
                        slwi    r4,r4,3                                         ; Get offset to physical entry
 
index 170ad22272d644163e384ab7cfafb477cc3bc712..aba525045e714972128303b0e9b826a240f6f831 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -86,9 +86,15 @@ struct savearea * interrupt(
                        }
 #endif
 
+                       now = mach_absolute_time();                             /* Find out what time it is */
+                       
+                       if(now >= proc_info->pms.pmsPop) {              /* Is it time for power management state change? */
+                               pmsStep(1);                                                     /* Yes, advance step */
+                               now = mach_absolute_time();                     /* Get the time again since we ran a bit */
+                       }
+
                        thread = current_thread();                                      /* Find ourselves */
                        if(thread->machine.qactTimer != 0) {    /* Is the timer set? */
-                               clock_get_uptime(&now);                         /* Find out what time it is */
                                if (thread->machine.qactTimer <= now) { /* It is set, has it popped? */
                                        thread->machine.qactTimer = 0;          /* Clear single shot timer */
                                        if((unsigned int)thread->machine.vmmControl & 0xFFFFFFFE) {     /* Are there any virtual machines? */
@@ -97,7 +103,7 @@ struct savearea * interrupt(
                                }
                        }
 
-                       rtclock_intr(0, ssp, 0);
+                       rtclock_intr(ssp);
                        break;
        
                case T_INTERRUPT:
@@ -130,10 +136,8 @@ struct savearea * interrupt(
        
                                
                default:
-#if     MACH_KDP || MACH_KDB
-                        if (!Call_Debugger(type, ssp))
-#endif
-                        unresolved_kernel_trap(type, ssp, dsisr, dar, NULL);
+                       if (!Call_Debugger(type, ssp))
+                               unresolved_kernel_trap(type, ssp, dsisr, dar, NULL);
                        break;
        }
 
index 60a1688adf5a5faea49ddaaaa1f4f4db2f9d5af1..3c47658ec6788641b15ef948e6536d294e8d7938 100644 (file)
@@ -68,7 +68,7 @@ io_map(phys_addr, size)
                
                (void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), 
                        (mmFlgBlock | mmFlgUseAttr | mmFlgCInhib | mmFlgGuarded),       /* Map as I/O page */
-                       size >> 12, VM_PROT_READ|VM_PROT_WRITE);
+                       (size >> 12), VM_PROT_READ|VM_PROT_WRITE);
 
                return (start + (phys_addr & PAGE_MASK));       /* Pass back the physical address */
        
@@ -117,7 +117,7 @@ vm_offset_t io_map_spec(vm_offset_t phys_addr, vm_size_t size)
 
        (void)mapping_make(kernel_pmap, (addr64_t)start, (ppnum_t)(phys_addr >> 12), 
                (mmFlgBlock | mmFlgUseAttr | mmFlgCInhib | mmFlgGuarded),       /* Map as I/O page */
-               size >> 12, VM_PROT_READ|VM_PROT_WRITE);
+               (size >> 12), VM_PROT_READ|VM_PROT_WRITE);
 
        return (start + (phys_addr & PAGE_MASK));
 }
index 512cef481d0f3ba7a78c9f0ede207a1d8cefc286..debb37c22d4521d4fba1da18595b220a255e6ec4 100644 (file)
@@ -60,7 +60,9 @@ typedef struct lowglo {
        uint64_t                lgPMWvaddr;                             /* 5020 physical memory window virtual address */
        uint64_t                lgUMWvaddr;                             /* 5028 user memory window virtual address */
        unsigned int    lgVMMforcedFeats;               /* 5030 VMM boot-args forced feature flags */
-       unsigned int    lgRsv034[19];                   /* 5034 reserved */
+       unsigned int    lgMaxDec;                               /* 5034 Maximum decrementer we can set */
+       unsigned int    lgPmsCtlp;                              /* 5038 Pointer to power management stepper control */
+       unsigned int    lgRsv03C[17];                   /* 503C reserved */
        traceWork               lgTrcWork;                              /* 5080 Tracing control block - trcWork */
        unsigned int    lgRsv0A0[24];                   /* 50A0 reserved */
        struct Saveanchor       lgSaveanchor;           /* 5100 Savearea anchor - saveanchor */
index a3b0e9e5a8cdfdc9d77cfba4cd0c7bbd126caf75..54b63596f270782f7a93803e090e1bb8c981ddf3 100644 (file)
@@ -369,26 +369,26 @@ notDCache:        mtcrf   255,r13                                                 ; Restore CRs
 .L_handlerC00:
                        mtsprg  3,r11                                                   ; Save R11
                        mtsprg  2,r13                                                   ; Save R13
-                       rlwinm  r11,r0,0,0xFFFFFFF8                             ; mask off low 3 bits of syscall number
-                       xori    r13,r11,0x7FF0                                  ; start to check for the 0x7FFx traps
-                       addi    r11,r11,8                                               ; make a 0 iff this is a 0xFFFFFFF8 trap
-                       cntlzw  r13,r13                                                 ; set bit 0x20 iff a 0x7FFx trap
-                       cntlzw  r11,r11                                                 ; set bit 0x20 iff a 0xFFFFFFF8 trap
+            rlwinm  r11,r0,0,0xFFFFFFF8             ; mask off low 3 bits of syscall number
+            xori    r13,r11,0x7FF0                  ; start to check for the 0x7FFx traps
+            addi    r11,r11,8                       ; make a 0 iff this is a 0xFFFFFFF8 trap
+            cntlzw  r13,r13                         ; set bit 0x20 iff a 0x7FFx trap
+            cntlzw  r11,r11                         ; set bit 0x20 iff a 0xFFFFFFF8 trap
                        xoris   r0,r0,0x8000                                    ; Flip bit to make 0 iff 0x80000000
-                       rlwimi  r11,r13,31,0x10                                 ; move 0x7FFx bit into position
+            rlwimi  r11,r13,31,0x10                 ; move 0x7FFx bit into position
                        cntlzw  r13,r0                                                  ; Set bit 0x20 iff 0x80000000
                        xoris   r0,r0,0x8000                                    ; Flip bit to restore R0
                        rlwimi  r11,r13,2,0x80                                  ; Set bit 0x80 iff CutTrace
-                       xori    r13,r0,0x6004                                   ; start to check for 0x6004
-                       rlwimi  r11,r0,1,0xE                                    ; move in low 3 bits of syscall number
-                       cntlzw  r13,r13                                                 ; set bit 0x20 iff 0x6004
-                       rlwinm  r11,r11,0,0,30                                  ; clear out bit 31
-                       rlwimi  r11,r13,1,0x40                                  ; move 0x6004 bit into position
-                       lhz             r11,lo16(scTable)(r11)                  ; get branch address from sc table
-                       mfctr   r13                                                             ; save caller's ctr in r13
-                       mtctr   r11                                                             ; set up branch to syscall handler
-                       mfsprg  r11,0                                                   ; get per_proc, which most UFTs use
-                       bctr                                                                    ; dispatch (r11 in sprg3, r13 in sprg2, ctr in r13, per_proc in r11)
+            xori    r13,r0,0x6004                   ; start to check for 0x6004
+            rlwimi  r11,r0,1,0xE                    ; move in low 3 bits of syscall number
+            cntlzw  r13,r13                         ; set bit 0x20 iff 0x6004
+            rlwinm  r11,r11,0,0,30                  ; clear out bit 31
+            rlwimi  r11,r13,1,0x40                  ; move 0x6004 bit into position
+            lhz     r11,lo16(scTable)(r11)          ; get branch address from sc table
+            mfctr   r13                             ; save caller's ctr in r13
+            mtctr   r11                             ; set up branch to syscall handler
+            mfsprg  r11,0                           ; get per_proc, which most UFTs use
+            bctr                                    ; dispatch (r11 in sprg3, r13 in sprg2, ctr in r13, per_proc in r11)
 
 /*
  *                     Trace - generated by single stepping
@@ -850,84 +850,84 @@ scTable:                                            ; ABCD E
  *      sprg3 = holds caller's r11
  */
 
-;                      Handle "vmm_dispatch" (0x6004), of which only some selectors are UFTs.
+;           Handle "vmm_dispatch" (0x6004), of which only some selectors are UFTs.
 
 uftVMM:
-                       mtctr   r13                                                             ; restore caller's ctr
-                       lwz             r11,spcFlags(r11)                               ; get the special flags word from per_proc
-                       mfcr    r13                                                             ; save caller's entire cr (we use all fields below)
+            mtctr   r13                             ; restore caller's ctr
+            lwz     r11,spcFlags(r11)               ; get the special flags word from per_proc
+            mfcr    r13                             ; save caller's entire cr (we use all fields below)
                        rlwinm  r11,r11,16,16,31                                ; Extract spcFlags upper bits
                        andi.   r11,r11,hi16(runningVM|FamVMena|FamVMmode)
                        cmpwi   cr0,r11,hi16(runningVM|FamVMena|FamVMmode)      ; Test in VM FAM
-                       bne--   uftNormal80                                             ; not eligible for FAM UFTs
+                       bne--   uftNormal80                     ; not eligible for FAM UFTs
                        cmpwi   cr5,r3,kvmmResumeGuest                  ; Compare r3 with kvmmResumeGuest
                        cmpwi   cr2,r3,kvmmSetGuestRegister             ; Compare r3 with kvmmSetGuestRegister
                        cror    cr1_eq,cr5_lt,cr2_gt                    ; Set true if out of VMM Fast syscall range
-                       bt--    cr1_eq,uftNormalFF                              ; Exit if out of range (the others are not UFTs)
+                       bt--    cr1_eq,uftNormalFF              ; Exit if out of range (the others are not UFTs)
                        b               EXT(vmm_ufp)                                    ; handle UFT range of vmm_dispatch syscall
 
-                       
-;                      Handle blue box UFTs (syscalls -1 and -2).
+            
+;           Handle blue box UFTs (syscalls -1 and -2).
 
 uftIsPreemptiveTask:
 uftIsPreemptiveTaskEnv:
-                       mtctr   r13                                                             ; restore caller's ctr
-                       lwz             r11,spcFlags(r11)                               ; get the special flags word from per_proc
-                       mfcr    r13,0x80                                                ; save caller's cr0 so we can use it
-                       andi.   r11,r11,bbNoMachSC|bbPreemptive ; Clear what we do not need
+            mtctr   r13                             ; restore caller's ctr
+            lwz     r11,spcFlags(r11)               ; get the special flags word from per_proc
+            mfcr    r13,0x80                        ; save caller's cr0 so we can use it
+            andi.      r11,r11,bbNoMachSC|bbPreemptive ; Clear what we do not need
                        cmplwi  r11,bbNoMachSC                                  ; See if we are trapping syscalls
-                       blt--   uftNormal80                                             ; No...
-                       cmpwi   r0,-2                                                   ; is this call IsPreemptiveTaskEnv?
+                       blt--   uftNormal80                     ; No...
+            cmpwi   r0,-2                           ; is this call IsPreemptiveTaskEnv?
                        rlwimi  r13,r11,bbPreemptivebit-cr0_eq,cr0_eq,cr0_eq    ; Copy preemptive task flag into user cr0_eq
                        mfsprg  r11,0                                                   ; Get the per proc once more
-                       bne++   uftRestoreThenRFI                               ; do not load r0 if IsPreemptiveTask
+            bne++   uftRestoreThenRFI               ; do not load r0 if IsPreemptiveTask
                        lwz             r0,ppbbTaskEnv(r11)                             ; Get the shadowed taskEnv (only difference)
-                       b               uftRestoreThenRFI                               ; restore modified cr0 and return
+            b       uftRestoreThenRFI               ; restore modified cr0 and return
 
 
-;                      Handle "Thread Info" UFT (0x7FF2)
+;           Handle "Thread Info" UFT (0x7FF2)
 
-                       .globl  EXT(uft_uaw_nop_if_32bit)
+            .globl  EXT(uft_uaw_nop_if_32bit)
 uftThreadInfo:
-                       lwz             r3,UAW+4(r11)                                   ; get user assist word, assuming a 32-bit processor
+            lwz     r3,UAW+4(r11)                   ; get user assist word, assuming a 32-bit processor
 LEXT(uft_uaw_nop_if_32bit)
-                       ld              r3,UAW(r11)                                             ; get the whole doubleword if 64-bit (patched to nop if 32-bit)
-                       mtctr   r13                                                             ; restore caller's ctr
-                       b               uftRFI                                                  ; done
+            ld      r3,UAW(r11)                     ; get the whole doubleword if 64-bit (patched to nop if 32-bit)
+            mtctr   r13                             ; restore caller's ctr
+            b       uftRFI                          ; done
 
 
-;                      Handle "Facility Status" UFT (0x7FF3)
+;           Handle "Facility Status" UFT (0x7FF3)
 
 uftFacilityStatus:
-                       lwz             r3,spcFlags(r11)                                ; get "special flags" word from per_proc
-                       mtctr   r13                                                             ; restore caller's ctr
-                       b               uftRFI                                                  ; done
+            lwz     r3,spcFlags(r11)                ; get "special flags" word from per_proc
+            mtctr   r13                             ; restore caller's ctr
+            b       uftRFI                          ; done
 
 
-;                      Handle "Load MSR" UFT (0x7FF4).  This is not used on 64-bit processors, though it would work.
+;           Handle "Load MSR" UFT (0x7FF4).  This is not used on 64-bit processors, though it would work.
 
 uftLoadMSR:
-                       mfsrr1  r11                                                             ; get caller's MSR
-                       mtctr   r13                                                             ; restore caller's ctr
-                       mfcr    r13,0x80                                                ; save caller's cr0 so we can test PR
-                       rlwinm. r11,r11,0,MSR_PR_BIT,MSR_PR_BIT ; really in the kernel?
-                       bne-    uftNormal80                                             ; do not permit from user mode
-                       mfsprg  r11,0                                                   ; restore per_proc
+            mfsrr1  r11                             ; get caller's MSR
+            mtctr   r13                             ; restore caller's ctr
+            mfcr    r13,0x80                        ; save caller's cr0 so we can test PR
+            rlwinm. r11,r11,0,MSR_PR_BIT,MSR_PR_BIT ; really in the kernel?
+            bne-    uftNormal80                     ; do not permit from user mode
+            mfsprg  r11,0                           ; restore per_proc
                        mtsrr1  r3                                                              ; Set new MSR
 
 
-;                      Return to caller after UFT.      When called:
-;                              r11 = per_proc ptr
-;                              r13 = callers cr0 in upper nibble (if uftRestoreThenRFI called)
-;                              sprg2 = callers r13
-;                              sprg3 = callers r11
+;           Return to caller after UFT.  When called:
+;               r11 = per_proc ptr
+;               r13 = callers cr0 in upper nibble (if uftRestoreThenRFI called)
+;               sprg2 = callers r13
+;               sprg3 = callers r11
 
-uftRestoreThenRFI:                                                                     ; WARNING: can drop down to here
-                       mtcrf   0x80,r13                                                ; restore caller's cr0
+uftRestoreThenRFI:                                  ; WARNING: can drop down to here
+            mtcrf   0x80,r13                        ; restore caller's cr0
 uftRFI:
-                       .globl  EXT(uft_nop_if_32bit)
+            .globl  EXT(uft_nop_if_32bit)
 LEXT(uft_nop_if_32bit)
-                       b               uftX64                                                  ; patched to NOP if 32-bit processor
+            b       uftX64                          ; patched to NOP if 32-bit processor
                        
 uftX32:                lwz             r11,pfAvailable(r11)                    ; Get the feature flags
                        mfsprg  r13,2                                                   ; Restore R13
@@ -1217,20 +1217,20 @@ ctbail64:       mtcrf   0x80,r25                                                ; Restore the used condition register field
 
 
 
-;                      Handle a system call that is not a UFT and which thus goes upstairs.
+;           Handle a system call that is not a UFT and which thus goes upstairs.
 
-uftNormalFF:                                                                           ; here with entire cr in r13
-                       mtcr    r13                                                             ; restore all 8 fields
+uftNormalFF:                                        ; here with entire cr in r13
+            mtcr    r13                             ; restore all 8 fields
                        b               uftNormalSyscall1                               ; Join common...
-                       
-uftNormal80:                                                                           ; here with callers cr0 in r13
-                       mtcrf   0x80,r13                                                ; restore cr0
+            
+uftNormal80:                                        ; here with callers cr0 in r13
+            mtcrf   0x80,r13                        ; restore cr0
                        b               uftNormalSyscall1                               ; Join common...
-                       
-uftNormalSyscall:                                                                      ; r13 = callers ctr
-                       mtctr   r13                                                             ; restore ctr
+            
+uftNormalSyscall:                                   ; r13 = callers ctr
+            mtctr   r13                             ; restore ctr
 uftNormalSyscall1:
-                       li              r11,T_SYSTEM_CALL|T_FAM                 ; this is a system call (and fall through)
+            li      r11,T_SYSTEM_CALL|T_FAM         ; this is a system call (and fall through)
 
 
 /*<><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>*/
@@ -1257,7 +1257,7 @@ uftNormalSyscall1:
  *     misses, so these stores won't take all that long. Except the first line that is because
  *     we can't do a DCBZ if the L1 D-cache is off.  The rest we will skip if they are
  *     off also.
- * 
+ *
  *     Note that if we are attempting to sleep (as opposed to nap or doze) all interruptions
  *     are ignored.
  */
@@ -1548,7 +1548,7 @@ noPerfMonSave32:
                        lwz             r25,traceMask(0)                                ; Get the trace mask
                        li              r0,SAVgeneral                                   ; Get the savearea type value
                        lhz             r19,PP_CPU_NUMBER(r2)                   ; Get the logical processor number                                                                                      
-                       rlwinm  r22,r11,30,0,31                                 ; Divide interrupt code by 4
+                       rlwinm  r22,r11,30,0,31                                 ; Divide interrupt code by 2
                        stb             r0,SAVflags+2(r13)                              ; Mark valid context
                        addi    r22,r22,10                                              ; Adjust code so we shift into CR5
                        li              r23,trcWork                                             ; Get the trace work area address
@@ -3661,8 +3661,16 @@ EXT(mckFlags):
                        .long   0                                                               ; 5028 user memory window virtual address
                        .long   0                                                               ; 502C user memory window virtual address
                        .long   0                                                               ; 5030 VMM boot-args forced feature flags
-                       .long   0                                                               ; 5034 reserved
-                       .long   0                                                               ; 5038 reserved
+
+                       .globl  EXT(maxDec)
+EXT(maxDec):
+                       .long   0x7FFFFFFF                                              ; 5034 maximum decrementer value
+                       
+
+                       .globl  EXT(pmsCtlp)
+EXT(pmsCtlp):
+                       .long   0                                                               ; 5038 Pointer to power management stepper control
+                       
                        .long   0                                                               ; 503C reserved
                        .long   0                                                               ; 5040 reserved
                        .long   0                                                               ; 5044 reserved
index 02c5d403e02657af8f345515e9ec71ad63a13373..438dfd53322dd9ca2db6644ae7404e2b18b15ac4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -36,7 +36,7 @@ extern void   cpu_doshutdown(
 extern void    cpu_signal_handler(
        void);
 
-typedef void (*broadcastFunc) (int);
+typedef void (*broadcastFunc) (uint32_t);
 
 int32_t cpu_broadcast(uint32_t *, broadcastFunc, uint32_t);
 
index e424bed03d7627e21e35cb0caf6e83e9118387d6..749446f7715a554d76db4ed624696628bb66e031 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -380,9 +380,9 @@ ml_processor_register(
        else
                proc_info->time_base_enable = (void(*)(cpu_id_t, boolean_t ))NULL;
 
-       if (proc_info->pf.pfPowerModes & pmPowerTune) {
-         proc_info->pf.pfPowerTune0 = in_processor_info->power_mode_0;
-         proc_info->pf.pfPowerTune1 = in_processor_info->power_mode_1;
+       if((proc_info->pf.pfPowerModes & pmType) == pmPowerTune) {
+               proc_info->pf.pfPowerTune0 = in_processor_info->power_mode_0;
+               proc_info->pf.pfPowerTune1 = in_processor_info->power_mode_1;
        }
 
        donap = in_processor_info->supports_nap;        /* Assume we use requested nap */
@@ -626,57 +626,64 @@ void
 ml_set_processor_speed(unsigned long speed)
 {
        struct per_proc_info    *proc_info;
-       uint32_t                powerModes, cpu;
+       uint32_t                cpu;
        kern_return_t           result;
        boolean_t               current_state;
         unsigned int           i;
   
        proc_info = PerProcTable[master_cpu].ppe_vaddr;
-       powerModes = proc_info->pf.pfPowerModes;
 
-       if (powerModes & pmDualPLL) {
-
-               ml_set_processor_speed_dpll(speed);
-
-       } else if (powerModes & pmDFS) {
-
-               for (cpu = 0; cpu < real_ncpus; cpu++) {
-                       /*
-                        * cpu_signal() returns after .5ms if it fails to signal a running cpu
-                        * retry cpu_signal() for .1s to deal with long interrupt latency at boot
-                        */
-                       for (i=200; i>0; i--) {
-                               current_state = ml_set_interrupts_enabled(FALSE);
-                               if (cpu != cpu_number()) {
-                                       if (PerProcTable[cpu].ppe_vaddr->cpu_flags & SignalReady)
-                                               /*
-                                                * Target cpu is off-line, skip
-                                                */
+       switch (proc_info->pf.pfPowerModes & pmType) {  /* Figure specific type */
+               case pmDualPLL:
+
+                       ml_set_processor_speed_dpll(speed);
+                       break;
+                       
+               case pmDFS:
+
+                       for (cpu = 0; cpu < real_ncpus; cpu++) {
+                               /*
+                                * cpu_signal() returns after .5ms if it fails to signal a running cpu
+                                * retry cpu_signal() for .1s to deal with long interrupt latency at boot
+                                */
+                               for (i=200; i>0; i--) {
+                                       current_state = ml_set_interrupts_enabled(FALSE);
+                                       if (cpu != cpu_number()) {
+                                                       if (PerProcTable[cpu].ppe_vaddr->cpu_flags & SignalReady)
+                                                       /*
+                                                        * Target cpu is off-line, skip
+                                                        */
+                                                       result = KERN_SUCCESS;
+                                               else {
+                                                       simple_lock(&spsLock);
+                                                       result = cpu_signal(cpu, SIGPcpureq, CPRQsps, speed);   
+                                                       if (result == KERN_SUCCESS) 
+                                                               thread_sleep_simple_lock(&spsLock, &spsLock, THREAD_UNINT);
+                                                       simple_unlock(&spsLock);
+                                               }
+                                       } else {
+                                               ml_set_processor_speed_dfs(speed);
                                                result = KERN_SUCCESS;
-                                       else {
-                                               simple_lock(&spsLock);
-                                               result = cpu_signal(cpu, SIGPcpureq, CPRQsps, speed);   
-                                               if (result == KERN_SUCCESS) 
-                                                       thread_sleep_simple_lock(&spsLock, &spsLock, THREAD_UNINT);
-                                               simple_unlock(&spsLock);
                                        }
-                               } else {
-                                       ml_set_processor_speed_dfs(speed);
-                                       result = KERN_SUCCESS;
+                                       (void) ml_set_interrupts_enabled(current_state);
+                                       if (result == KERN_SUCCESS)
+                                               break;
                                }
-                               (void) ml_set_interrupts_enabled(current_state);
-                               if (result == KERN_SUCCESS)
-                                       break;
+                               if (result != KERN_SUCCESS)
+                                       panic("ml_set_processor_speed(): Fail to set cpu%d speed\n", cpu);
                        }
-                       if (result != KERN_SUCCESS)
-                               panic("ml_set_processor_speed(): Fail to set cpu%d speed\n", cpu);
-               }
-
-       } else if (powerModes & pmPowerTune) {
-
-               ml_set_processor_speed_powertune(speed);
+                       break;
+                       
+               case pmPowerTune:
+       
+                       ml_set_processor_speed_powertune(speed);
+                       break;
+                       
+               default:                                        
+                       break;
 
        }
+       return;
 }
 
 /*
index 85492843418f101241fad5cbd24520eff5ea8d50..afd81129a7f516304ce6cd4d626c521205c33726 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -989,10 +989,8 @@ LEXT(machine_idle_ret)
 
 ;                      Force a line boundry here
                        .align  5
-                       .globl  EXT(ml_ppc_sleep)
-LEXT(ml_ppc_sleep)
-
                        .globl  EXT(ml_ppc_do_sleep)
+
 LEXT(ml_ppc_do_sleep)
 
 #if 0
@@ -2027,8 +2025,6 @@ LEXT(ml_set_processor_speed_powertune)
                        mfsprg  r31,1                                                                   ; Get the current activation
                        lwz             r31,ACT_PER_PROC(r31)                                   ; Get the per_proc block
 
-                       lwz             r30, pfPowerModes(r31)                                  ; Get the supported power modes
-
                        rlwinm  r28, r3, 31-dnap, dnap, dnap                    ; Shift the 1 bit to the dnap+32 bit
                        rlwinm  r3, r3, 2, 29, 29                                               ; Shift the 1 to a 4 and mask
                        addi    r3, r3, pfPowerTune0                                    ; Add in the pfPowerTune0 offset
@@ -2137,7 +2133,11 @@ spsDPLL2:
 
 
 /*
-**      ml_set_processor_speed_dfs()
+**      ml_set_processor_speed_dfs(divideby)
+**                     divideby == 0 then divide by 1 (full speed)
+**                     divideby == 1 then divide by 2 (half speed)
+**                     divideby == 2 then divide by 4 (quarter speed)
+**                     divideby == 3 then divide by 4 (quarter speed) - preferred
 **
 */
 ;                      Force a line boundry here
@@ -2145,19 +2145,13 @@ spsDPLL2:
                        .globl  EXT(ml_set_processor_speed_dfs)
 
 LEXT(ml_set_processor_speed_dfs)
-                       mfsprg  r5,1                                                                    ; Get the current activation
-                       lwz             r5,ACT_PER_PROC(r5)                                             ; Get the per_proc block
-
-                       cmplwi  r3, 0                                                                   ; full speed?
-                       mfspr   r3, hid1                                                                ; Get the current HID1
-                       rlwinm  r3, r3, 0, hid1dfs1+1, hid1dfs0-1               ; assume full speed, clear dfs bits
-                       beq             spsDFS
-                       oris    r3, r3, hi16(hid1dfs1m)                                 ; slow, set half speed dfs1 bit
 
-spsDFS:
-                       stw             r3, pfHID1(r5)                                                  ; Save the new hid1 value
+                       mfspr   r4,hid1                                                                 ; Get the current HID1
+                       mfsprg  r5,0                                                                    ; Get the per_proc_info
+                       rlwimi  r4,r3,31-hid1dfs1,hid1dfs0,hid1dfs1             ; Stick the new divider bits in
+                       stw             r4,pfHID1(r5)                                                   ; Save the new hid1 value
                        sync
-                       mtspr   hid1, r3                                                                ; Set the new HID1
+                       mtspr   hid1,r4                                                                 ; Set the new HID1
                        sync
                        isync
                        blr
@@ -2272,3 +2266,49 @@ mhrcalc: mftb    r8                                                                      ; Get time now
                        sub             r3,r2,r9                                                        ; How many ticks?
                        mtmsrd  r12,1                                                           ; Flip EE on if needed
                        blr                                                                                     ; Leave...
+
+
+;
+;                      int setPop(time)
+;      
+;                      Calculates the number of ticks to the supplied event and
+;                      sets the decrementer.  Never set the time for less that the
+;                      minimum, which is 10, nor more than maxDec, which is usually 0x7FFFFFFF
+;                      and never more than that but can be set by root.
+;
+;
+
+                       .align  7
+                       .globl  EXT(setPop)
+
+#define kMin   10
+
+LEXT(setPop)
+
+spOver:                mftbu   r8                                                                      ; Get upper time
+                       addic   r2,r4,-kMin                                                     ; Subtract minimum from target
+                       mftb    r9                                                                      ; Get lower
+                       addme   r11,r3                                                          ; Do you have any bits I could borrow?
+                       mftbu   r10                                                                     ; Get upper again
+                       subfe   r0,r0,r0                                                        ; Get -1 if we went negative 0 otherwise
+                       subc    r7,r2,r9                                                        ; Subtract bottom and get carry
+                       cmplw   r8,r10                                                          ; Did timebase upper tick?
+                       subfe   r6,r8,r11                                                       ; Get the upper difference accounting for borrow
+                       lwz             r12,maxDec(0)                                           ; Get the maximum decrementer size 
+                       addme   r0,r0                                                           ; Get -1 or -2 if anything negative, 0 otherwise
+                       addic   r2,r6,-1                                                        ; Set carry if diff < 2**32
+                       srawi   r0,r0,1                                                         ; Make all foxes
+                       subi    r10,r12,kMin                                            ; Adjust maximum for minimum adjust
+                       andc    r7,r7,r0                                                        ; Pin time at 0 if under minimum
+                       subfe   r2,r2,r2                                                        ; 0 if diff > 2**32, -1 otherwise               
+                       sub             r7,r7,r10                                                       ; Negative if duration is less than (max - min)
+                       or              r2,r2,r0                                                        ; If the duration is negative, it isn't too big
+                       srawi   r0,r7,31                                                        ; -1 if duration is too small
+                       and             r7,r7,r2                                                        ; Clear duration if high part too big
+                       and             r7,r7,r0                                                        ; Clear duration if low part too big
+                       bne--   spOver                                                          ; Timer ticked...
+                       add             r3,r7,r12                                                       ; Add back the max for total                            
+                       mtdec   r3                                                                      ; Set the decrementer
+                       blr                                                                                     ; Leave...
+
+
index ebeef928bfddb25784a077989b51243b0a8aef41..2f80bc88c150f617a8a86582d1f3921790964442 100644 (file)
@@ -272,7 +272,7 @@ addr64_t mapping_remove(pmap_t pmap, addr64_t va) {         /* Remove a single mapping
  *                     perm                                    Mapping is permanent
  *                     cache inhibited                 Cache inhibited (used if use attribute or block set )
  *                     guarded                                 Guarded access (used if use attribute or block set )
- *             size                                            size of block (not used if not block)
+ *             size                                            size of block in pages - 1 (not used if not block)
  *             prot                                            VM protection bits
  *             attr                                            Cachability/Guardedness    
  *
@@ -337,6 +337,12 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags,
                 
                pattr = flags & (mmFlgCInhib | mmFlgGuarded);                   /* Use requested attributes */
                mflags |= mpBlock;                                                                              /* Show that this is a block */
+       
+               if(size > pmapSmallBlock) {                                                             /* Is it one? */
+                       if(size & 0x00001FFF) return mapRtBadSz;                        /* Fail if bigger than 256MB and not a 32MB multiple */
+                       size = size >> 13;                                                                      /* Convert to 32MB chunks */
+                       mflags = mflags | mpBSu;                                                        /* Show 32MB basic size unit */
+               }
        }
        
        wimg = 0x2;                                                                                                     /* Set basic PPC wimg to 0b0010 - Coherent */
@@ -348,7 +354,7 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags,
        if(flags & mmFlgPerm) mflags |= mpPerm;                                         /* Set permanent mapping */
        
        size = size - 1;                                                                                        /* Change size to offset */
-       if(size > 0xFFFF) return 1;                                                                     /* Leave if size is too big */
+       if(size > 0xFFFF) return mapRtBadSz;                                            /* Leave if size is too big */
        
        nlists = mapSetLists(pmap);                                                                     /* Set number of lists this will be on */
        
@@ -371,7 +377,7 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags,
                
                switch (rc) {
                        case mapRtOK:
-                               return 0;                                                                               /* Mapping added successfully */
+                               return mapRtOK;                                                                 /* Mapping added successfully */
                                
                        case mapRtRemove:                                                                       /* Remove in progress */
                                (void)mapping_remove(pmap, colladdr);                   /* Lend a helping hand to another CPU doing block removal */
@@ -379,12 +385,12 @@ addr64_t mapping_make(pmap_t pmap, addr64_t va, ppnum_t pa, unsigned int flags,
                                
                        case mapRtMapDup:                                                                       /* Identical mapping already present */
                                mapping_free(mp);                                                               /* Free duplicate mapping */
-                               return 0;                                                                               /* Return success */
+                               return mapRtOK;                                                                         /* Return success */
                                
                        case mapRtSmash:                                                                        /* Mapping already present but does not match new mapping */
                                mapping_free(mp);                                                               /* Free duplicate mapping */
-                               return (colladdr | 1);                                                  /* Return colliding address, with some dirt added to avoid
-                                                                                                                                   confusion if effective address is 0 */
+                               return (colladdr | mapRtSmash);                                 /* Return colliding address, with some dirt added to avoid
+                                                                                                                                  confusion if effective address is 0 */
                        default:
                                panic("mapping_make: hw_add_map failed - collision addr = %016llX, code = %02X, pmap = %08X, va = %016llX, mapping = %08X\n",
                                        colladdr, rc, pmap, va, mp);                            /* Die dead */
@@ -1739,6 +1745,23 @@ void mapping_phys_unused(ppnum_t pa) {
        
 }
        
+void mapping_hibernate_flush(void)
+{
+    int bank;
+    unsigned int page;
+    struct phys_entry * entry;
+
+    for (bank = 0; bank < pmap_mem_regions_count; bank++)
+    {
+       entry = (struct phys_entry *) pmap_mem_regions[bank].mrPhysTab;
+       for (page = pmap_mem_regions[bank].mrStart; page <= pmap_mem_regions[bank].mrEnd; page++)
+       {
+           hw_walk_phys(entry, hwpNoop, hwpNoop, hwpNoop, 0, hwpPurgePTE);
+           entry++;
+       }
+    }
+}
+
 
 
 
index 9d7a7c75919c0650357f7482a3bada202a88312a..6d910c3defefb3754ce31d8c26b3366febc9d001 100644 (file)
@@ -336,7 +336,8 @@ typedef struct mappingblok {
 #define mapRtMapDup    7
 #define mapRtGuest     8
 #define mapRtEmpty     9
-#define mapRtSmash     0xA                                     /* Mapping already exists and doesn't match new mapping */
+#define mapRtSmash     10                                      /* Mapping already exists and doesn't match new mapping */
+#define mapRtBadSz     11                                      /* Requested size too big or more than 256MB and not mult of 32MB */
 
 /*
  *     This struct describes available physical page configurations
@@ -400,6 +401,7 @@ extern phys_entry_t  *mapping_phys_lookup(ppnum_t pp, unsigned int *pindex);        /*
 extern int                     mapalc1(struct mappingblok *mb);                        /* Finds and allcates a 1-bit mapping entry */
 extern int                     mapalc2(struct mappingblok *mb);                        /* Finds and allcates a 2-bit mapping entry */
 extern void                    ignore_zero_fault(boolean_t type);                      /* Sets up to ignore or honor any fault on page 0 access for the current thread */
+extern void                    mapping_hibernate_flush(void);
 
 extern void                    mapping_fake_zone_info(         /* return mapping usage stats as a fake zone info */
                                                int *count,
index 33d39c41bf609d4ca67164ecf7ebf0cbcbf65a1f..8ca2624a299d5ab4153791bb45fa1fb18a5b615d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -257,6 +257,12 @@ ENTRY(mfsda, TAG_NO_FRAME_USED)
        mfspr   r3,sda
        blr
 
+       .globl  EXT(hid1get)
+LEXT(hid1get)
+
+       mfspr   r3,hid1                                 ; Get the HID1
+       blr
+
        .globl  EXT(hid0get64)
 LEXT(hid0get64)
 
index db35a19ae30a826172398521e84f46d2c4c22cf0..b88e4af6482211457f0ebd90acecd245ddfd44ec 100644 (file)
@@ -305,7 +305,11 @@ void
 machine_init(void)
 {
        clock_config();
+/*     Note that we must initialize the stepper tables AFTER the clock is configured!!!!! */
+       if(pmsExperimental & 1) pmsCPUConf();   /* (EXPERIMENTAL) Initialize the stepper tables */
        perfmon_init();
+       return;
+
 }
 
 void slave_machine_init(void)
index 7453b8aa57c82c87386827388549c6b32f8c4601..875ee6912b1da84204222abbafdc76679b5ad17e 100644 (file)
@@ -259,23 +259,30 @@ void
 pmap_map_physical()
 {
        unsigned region;
+       uint64_t msize, size;
+       addr64_t paddr, vaddr, colladdr;
 
        /* Iterate over physical memory regions, block mapping each into the kernel's address map */    
        for (region = 0; region < (unsigned)pmap_mem_regions_count; region++) {
-               addr64_t paddr = ((addr64_t)pmap_mem_regions[region].mrStart << 12);
-               addr64_t size  = (((addr64_t)pmap_mem_regions[region].mrEnd + 1) << 12) - paddr;
+               paddr = ((addr64_t)pmap_mem_regions[region].mrStart << 12);     /* Get starting physical address */
+               size  = (((addr64_t)pmap_mem_regions[region].mrEnd + 1) << 12) - paddr;
+
+               vaddr = paddr + lowGlo.lgPMWvaddr;                                      /* Get starting virtual address */
+
                while (size > 0) {
-                       /* Block mappings are limited to 256M, so we map in blocks of up to 256M */
-                       addr64_t vaddr = paddr + lowGlo.lgPMWvaddr;
-                       unsigned msize = ((size > 0x10000000)? 0x10000000 : size);
-                       addr64_t colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12),
-                                                        (mmFlgBlock | mmFlgPerm), (msize >> 12),
-                                                        (VM_PROT_READ | VM_PROT_WRITE));  
+                       
+                       msize = ((size > 0x0000020000000000ULL) ? 0x0000020000000000ULL : size);        /* Get size, but no more than 2TBs */
+                       
+                       colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12),
+                               (mmFlgBlock | mmFlgPerm), (msize >> 12),
+                               (VM_PROT_READ | VM_PROT_WRITE));
                        if (colladdr) {
-                               panic ("pmap_map_physical: collision with previously mapped range - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n",
+                               panic ("pmap_map_physical: mapping failure - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n",
                                           vaddr, (paddr >> 12), (msize >> 12), colladdr);
                        }
-                       paddr += msize;
+
+                       vaddr = vaddr + (uint64_t)msize;                                /* Point to the next virtual addr */
+                       paddr = paddr + (uint64_t)msize;                                /* Point to the next physical addr */
                        size  -= msize;
                }
        }
@@ -290,19 +297,28 @@ pmap_map_physical()
 void
 pmap_map_iohole(addr64_t paddr, addr64_t size)
 {
+
+       addr64_t vaddr, colladdr, msize;
+       uint32_t psize;
+
+       vaddr = paddr + lowGlo.lgPMWvaddr;                                              /* Get starting virtual address */              
+
        while (size > 0) {
-               addr64_t vaddr = paddr + lowGlo.lgPMWvaddr;
-               unsigned msize = ((size > 0x10000000)? 0x10000000 : size);
-               addr64_t colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12),
-                                                                                (mmFlgBlock | mmFlgPerm | mmFlgGuarded | mmFlgCInhib), (msize >> 12),
-                                                                                (VM_PROT_READ | VM_PROT_WRITE));
+
+               msize = ((size > 0x0000020000000000ULL) ? 0x0000020000000000ULL : size);        /* Get size, but no more than 2TBs */
+               
+               colladdr = mapping_make(kernel_pmap, vaddr, (paddr >> 12),
+                       (mmFlgBlock | mmFlgPerm | mmFlgGuarded | mmFlgCInhib), (msize >> 12),
+                       (VM_PROT_READ | VM_PROT_WRITE));
                if (colladdr) {
-                       panic ("pmap_map_iohole: collision with previously mapped range - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n",
-                                       vaddr, (paddr >> 12), (msize >> 12), colladdr);
+                       panic ("pmap_map_iohole: mapping failed - va = %016llX, pa = %08X, size = %08X, collision = %016llX\n",
+                                  vaddr, (paddr >> 12), (msize >> 12), colladdr);
                }
-               paddr += msize;
+
+               vaddr = vaddr + (uint64_t)msize;                                        /* Point to the next virtual addr */
+               paddr = paddr + (uint64_t)msize;                                        /* Point to the next physical addr */
                size  -= msize;
-       }
+       }       
 }
 
 /*
@@ -1108,11 +1124,13 @@ pmap_enter(pmap_t pmap, vm_map_offset_t va, ppnum_t pa, vm_prot_t prot,
  *             not be changed.  The block must be unmapped and then remapped with the new stuff.
  *             We also do not keep track of reference or change flags.
  *
+ *             Any block that is larger than 256MB must be a multiple of 32MB.  We panic if it is not.
+ *
  *             Note that pmap_map_block_rc is the same but doesn't panic if collision.
  *
  */
  
-void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags) {      /* Map an autogenned block */
+void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags) {       /* Map an autogenned block */
 
        unsigned int            mflags;
        addr64_t                        colva;
@@ -1125,20 +1143,19 @@ void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_pro
 //     kprintf("pmap_map_block: (%08X) va = %016llX, pa = %08X, size = %08X, prot = %08X, attr = %08X, flags = %08X\n",        /* (BRINGUP) */
 //             current_thread(), va, pa, size, prot, attr, flags);     /* (BRINGUP) */
 
-
        mflags = mmFlgBlock | mmFlgUseAttr | (attr & VM_MEM_GUARDED) | ((attr & VM_MEM_NOT_CACHEABLE) >> 1);    /* Convert to our mapping_make flags */
        if(flags) mflags |= mmFlgPerm;                                  /* Mark permanent if requested */
        
-       colva = mapping_make(pmap, va, pa, mflags, (size >> 12), prot); /* Enter the mapping into the pmap */
+       colva = mapping_make(pmap, va, pa, mflags, size, prot); /* Enter the mapping into the pmap */
        
        if(colva) {                                                                             /* If there was a collision, panic */
-               panic("pmap_map_block: collision at %016llX, pmap = %08X\n", colva, pmap);
+               panic("pmap_map_block: mapping error %d, pmap = %08X, va = %016llX\n", (uint32_t)(colva & mapRetCode), pmap, va);
        }
        
        return;                                                                                 /* Return */
 }
 
-int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags) {    /* Map an autogenned block */
+int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags) {     /* Map an autogenned block */
 
        unsigned int            mflags;
        addr64_t                        colva;
@@ -1150,8 +1167,8 @@ int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_p
 
        mflags = mmFlgBlock | mmFlgUseAttr | (attr & VM_MEM_GUARDED) | ((attr & VM_MEM_NOT_CACHEABLE) >> 1);    /* Convert to our mapping_make flags */
        if(flags) mflags |= mmFlgPerm;                                  /* Mark permanent if requested */
-       
-       colva = mapping_make(pmap, va, pa, mflags, (size >> 12), prot); /* Enter the mapping into the pmap */
+
+       colva = mapping_make(pmap, va, pa, mflags, size, prot); /* Enter the mapping into the pmap */
        
        if(colva) return 0;                                                             /* If there was a collision, fail */
        
@@ -1625,7 +1642,7 @@ void pmap_switch(pmap_t map)
  *     subord = the pmap that goes into the grand
  *     vstart  = start of range in pmap to be inserted
  *     nstart  = start of range in pmap nested pmap
- *     size   = Size of nest area (up to 16TB)
+ *     size   = Size of nest area (up to 2TB)
  *
  *     Inserts a pmap into another.  This is used to implement shared segments.
  *     On the current PPC processors, this is limited to segment (256MB) aligned
@@ -1634,8 +1651,6 @@ void pmap_switch(pmap_t map)
  *     We actually kinda allow recursive nests.  The gating factor is that we do not allow 
  *     nesting on top of something that is already mapped, i.e., the range must be empty.
  *
- *     
- *
  *     Note that we depend upon higher level VM locks to insure that things don't change while
  *     we are doing this.  For example, VM should not be doing any pmap enters while it is nesting
  *     or do 2 nests at once.
@@ -1648,9 +1663,8 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t n
        int nlists;
        mapping_t *mp;
        
-       
        if(size & 0x0FFFFFFFULL) return KERN_INVALID_VALUE;     /* We can only do this for multiples of 256MB */
-       if((size >> 28) > 65536)  return KERN_INVALID_VALUE;    /* Max size we can nest is 16TB */
+       if((size >> 25) > 65536)  return KERN_INVALID_VALUE;    /* Max size we can nest is 2TB */
        if(vstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE;   /* We can only do this aligned to 256MB */
        if(nstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE;   /* We can only do this aligned to 256MB */
        
@@ -1658,13 +1672,13 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t n
                panic("pmap_nest: size is invalid - %016llX\n", size);
        }
        
-       msize = (size >> 28) - 1;                                                       /* Change size to blocks of 256MB */
+       msize = (size >> 25) - 1;                                                       /* Change size to blocks of 32MB */
        
        nlists = mapSetLists(grand);                                            /* Set number of lists this will be on */
 
        mp = mapping_alloc(nlists);                                                     /* Get a spare mapping block */
        
-       mp->mpFlags = 0x01000000 | mpNest | mpPerm | nlists;
+       mp->mpFlags = 0x01000000 | mpNest | mpPerm | mpBSu | nlists;    /* Make this a permanent nested pmap with a 32MB basic size unit */
                                                                                                                /* Set the flags. Make sure busy count is 1 */
        mp->mpSpace = subord->space;                                            /* Set the address space/pmap lookup ID */
        mp->u.mpBSize = msize;                                                          /* Set the size */
@@ -1800,10 +1814,10 @@ void MapUserMemoryWindowInit(void) {
        
        mp = mapping_alloc(nlists);                                                     /* Get a spare mapping block */
 
-       mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | nlists;
+       mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | mpBSu | nlists; /* Make this a permanent nested pmap with a 32MB basic size unit */
                                                                                                                /* Set the flags. Make sure busy count is 1 */
        mp->mpSpace = kernel_pmap->space;                                       /* Set the address space/pmap lookup ID */
-       mp->u.mpBSize = 1;                                                                      /* Set the size to 2 segments */
+       mp->u.mpBSize = 15;                                                                     /* Set the size to 2 segments in 32MB chunks - 1 */
        mp->mpPte = 0;                                                                          /* Means nothing */
        mp->mpPAddr = 0;                                                                        /* Means nothing */
        mp->mpVAddr = lowGlo.lgUMWvaddr;                                        /* Set the address range we cover */
@@ -2042,9 +2056,3 @@ coredumpok(
 {
        return TRUE;
 }
-
-/*
-;;; Local Variables: ***
-;;; tab-width:4 ***
-;;; End: ***
-*/
index 2d88a66f5acc94b490b7c0c25c2b1c5cb51784d8..e8b137b6447e9269fc92ad6d1b4bc66c1cafa983 100644 (file)
@@ -250,6 +250,7 @@ extern pmapTransTab *pmapTrans;                     /* Space to pmap translate table */
 #define PHYS_MEM_WINDOW_VADDR  0x0000000100000000ULL
 #define IO_MEM_WINDOW_VADDR            0x0000000080000000ULL
 #define IO_MEM_WINDOW_SIZE             0x0000000080000000ULL
+#define pmapSmallBlock 65536
 
 #define pmap_kernel()                  (kernel_pmap)
 #define        pmap_resident_count(pmap)       ((pmap)->stats.resident_count)
@@ -302,8 +303,8 @@ extern void invalidate_icache(vm_offset_t va, unsigned length, boolean_t phys);
 extern void invalidate_icache64(addr64_t va, unsigned length, boolean_t phys);
 extern void pmap_sync_page_data_phys(ppnum_t pa);
 extern void pmap_sync_page_attributes_phys(ppnum_t pa);
-extern void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags);
-extern int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, vm_size_t size, vm_prot_t prot, int attr, unsigned int flags);
+extern void pmap_map_block(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags);
+extern int pmap_map_block_rc(pmap_t pmap, addr64_t va, ppnum_t pa, uint32_t size, vm_prot_t prot, int attr, unsigned int flags);
 
 extern kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size);
 extern kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr);
diff --git a/osfmk/ppc/pms.c b/osfmk/ppc/pms.c
new file mode 100644 (file)
index 0000000..58c47f9
--- /dev/null
@@ -0,0 +1,682 @@
+/*
+ * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <ppc/machine_routines.h>
+#include <ppc/machine_cpu.h>
+#include <ppc/exception.h>
+#include <ppc/misc_protos.h>
+#include <ppc/Firmware.h>
+#include <ppc/pmap.h>
+#include <ppc/proc_reg.h>
+#include <ppc/pms.h>
+#include <ppc/savearea.h>
+#include <ppc/exception.h>
+#include <kern/processor.h>
+
+extern int real_ncpus;
+
+static uint32_t pmsSyncrolator = 0;                                    /* Only one control operation at a time please */
+uint32_t pmsBroadcastWait = 0;                                         /* Number of outstanding broadcasts */
+
+int pmsInstalled = 0;                                                          /* Power Management Stepper can run and has table installed */
+int pmsExperimental = 0;                                                       /* Power Management Stepper in experimental mode */
+decl_simple_lock_data(,pmsBuildLock)                           /* Make sure only one guy can replace table  at the same time */
+
+static pmsDef *altDpmsTab = 0;                                         /* Alternate step definition table */
+static uint32_t altDpmsTabSize = 0;                                    /* Size of alternate step definition table */
+
+pmsDef pmsDummy = {                                                                    /* This is the dummy step for initialization.  All it does is to park */
+       .pmsLimit = 0,                                                                  /* Time doesn't matter for a park */
+       .pmsStepID = pmsMaxStates - 1,                                  /* Use the very last ID number for the dummy */
+       .pmsSetCmd = pmsParkIt,                                                 /* Force us to be parked */
+       .sf.pmsSetFuncInd = 0,                                                  /* No platform call for this one */
+       .pmsDown = pmsPrepSleep,                                                /* We always park */
+       .pmsNext = pmsPrepSleep                                                 /* We always park */
+};
+
+pmsStat pmsStatsd[4][pmsMaxStates];                                    /* Generate enough statistics blocks for 4 processors */
+
+pmsCtl pmsCtls = {                                                                     /* Power Management Stepper control */
+       .pmsStats = &pmsStatsd
+};
+
+pmsSetFunc_t pmsFuncTab[pmsSetFuncMax] = {0};          /* This is the function index table */
+pmsQueryFunc_t pmsQueryFunc = 0;                                       /* Pointer to pmsQuery function */
+uint32_t pmsPlatformData = 0;                                          /* Data provided by and passed to platform functions */
+
+
+/*
+ *     Do any initialization needed
+ */
+void pmsInit(void) {
+
+       int i;
+       
+       simple_lock_init(&pmsBuildLock, 0);                             /* Initialize the build lock */
+       for(i = 0; i < pmsMaxStates; i++) pmsCtls.pmsDefs[i] = &pmsDummy;       /* Initialize the table to dummy steps */
+
+       return;
+}
+
+
+/*
+ *     Start the power management stepper on all processors
+ *
+ *     All processors must be parked.  This should be called when the hardware
+ *     is ready to step.  Probably only at boot and after wake from sleep.
+ *
+ */
+ void pmsStart(void) {
+
+       boolean_t       intr;
+
+       if(!pmsInstalled) return;                                               /* We can't do this if no table installed */
+
+       intr = ml_set_interrupts_enabled(FALSE);                /* No interruptions in here */
+       pmsRun(pmsStartUp);                                                             /* Start running the stepper everywhere */
+       (void)ml_set_interrupts_enabled(intr);                  /* Restore interruptions */
+
+       return;
+ }
+
+/*
+ *     Park the stepper execution.  This will force the stepper on this
+ *     processor to abandon its current step and stop.  No changes to the
+ *     hardware state is made and any previous step is lost.
+ *     
+ *     This is used as the initial state at startup and when the step table
+ *     is being changed.
+ *
+ */
+void pmsPark(void) {
+
+       boolean_t       intr;
+
+       if(!pmsInstalled) return;                                               /* We can't do this if no table installed */
+
+       intr = ml_set_interrupts_enabled(FALSE);                /* No interruptions in here */
+       pmsSetStep(pmsParked, 0);                                               /* Park the stepper */
+       (void)ml_set_interrupts_enabled(intr);                  /* Restore interruptions */
+       
+       return;
+
+}
+
+/*
+ *     Steps down to a lower power.
+ *     Interrupts must be off...
+ */
+
+void pmsDown(void) {
+
+       struct per_proc_info *pp;
+       uint32_t nstate;
+       
+       pp = getPerProc();                                                              /* Get our per_proc */
+       
+       if(!pmsInstalled || pp->pms.pmsState == pmsParked) return;              /* No stepping if parked or not installed */
+       
+       nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsDown;    /* Get the downward step */
+       pmsSetStep(nstate, 0);                                                  /* Step to it */
+       return;
+}
+
+
+/*
+ *     Steps up to a higher power.  The "timer" parameter is true if the
+ *     step was driven due to the pms timer expiring.
+ *
+ *     Interrupts must be off...
+ */
+void pmsStep(int timer) {
+
+       struct per_proc_info *pp;
+       uint32_t nstate;
+       int dir;
+       
+       pp = getPerProc();                                                              /* Get our per_proc */
+
+       if(!pmsInstalled || pp->pms.pmsState == pmsParked) return;      /* No stepping if parked or not installed */
+       
+       nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsNext;    /* Assume a normal step */
+       dir = 1;                                                                                /* A normal step is a step up */
+       
+       if(timer && (pmsCtls.pmsDefs[pp->pms.pmsState]->pmsSetCmd == pmsDelay)) {       /* If the timer expired and we are in a delay step, use the delay branch */
+               nstate = pmsCtls.pmsDefs[pp->pms.pmsState]->pmsTDelay;  /* Get the delayed step */
+               dir = 0;                                                                        /* Delayed steps are a step down for accounting purposes. */
+       }
+
+       pmsSetStep(nstate, dir);                                                /* Step to it  */
+       return;
+}
+
+
+/*
+ *     Set a specific step
+ *
+ *     We do not do statistics if exiting park
+ *     Interrupts must be off...
+ *
+ */
+
+void pmsSetStep(uint32_t nstep, int dir) {
+
+       struct per_proc_info *pp;
+       uint32_t pstate, ret, nCSetCmd, mCSetCmd;
+       pmsDef *pnstate, *pcstate;
+       uint64_t tb, nt, dur;
+       int cpu, frompark;
+
+       pp = getPerProc();                                                              /* Get our per_proc */
+       cpu = cpu_number();                                                             /* Get our processor */
+       
+       while(1) {                                                                              /* Keep stepping until we get a delay */
+               
+               if(pp->pms.pmsCSetCmd & pmsMustCmp) {           /* Do we have to finish the delay before changing? */
+                       while(mach_absolute_time() < pp->pms.pmsPop);   /* Yes, spin here... */
+               }
+               
+               if((nstep == pmsParked) || ((uint32_t)pmsCtls.pmsDefs[nstep]->pmsSetCmd == pmsParkIt)) {        /* Are we parking? */
+                       
+                       tb = mach_absolute_time();                              /* What time is it? */
+                       pp->pms.pmsStamp = tb;                                  /* Show transition now */
+                       pp->pms.pmsPop = HalfwayToForever;              /* Set the pop way into the future */
+                       pp->pms.pmsState = pmsParked;                   /* Make sure we are parked */
+                       setTimerReq();                                                  /* Cancel our timer if going */
+                       return;
+               }
+
+               pnstate = pmsCtls.pmsDefs[nstep];                       /* Point to the state definition */ 
+               pstate = pp->pms.pmsState;                                      /* Save the current step */
+               pp->pms.pmsState = nstep;                                       /* Set the current to the next step */
+
+               if(pnstate->pmsSetCmd != pmsDelay) {            /* If this is not a delayed state, change the actual hardware now */
+                       if(pnstate->pmsSetCmd & pmsCngCPU) pmsCPUSet(pnstate->pmsSetCmd);       /* We have some CPU work to do... */
+                       if((uint32_t)pnstate->sf.pmsSetFunc) pnstate->sf.pmsSetFunc(pnstate->pmsSetCmd, cpu, pmsPlatformData);  /* Tell the platform to set power mode */
+       
+                       mCSetCmd = pnstate->pmsSetCmd & (pmsCngXClk | pmsCngCPU | pmsCngVolt);  /* Isolate just the change flags */
+                       mCSetCmd = (mCSetCmd - (mCSetCmd >> 7)) | pmsSync | pmsMustCmp | pmsPowerID;    /* Form mask of bits that come from new command */
+                       nCSetCmd = pp->pms.pmsCSetCmd & ~mCSetCmd;      /* Clear changing bits */
+                       nCSetCmd = nCSetCmd | (pnstate->pmsSetCmd & mCSetCmd);  /* Flip on the changing bits and the always copy bits */
+       
+                       pp->pms.pmsCSetCmd = nCSetCmd;                  /* Set it for real */
+               }
+       
+               tb = mach_absolute_time();                                      /* What time is it? */
+               pp->pms.pmsPop = tb + pnstate->pmsLimit;        /* Set the next pop */
+       
+               if((pnstate->pmsSetCmd != pmsDelay) && (pp->pms.pmsCSetCmd & pmsSync) && (pnstate->pmsLimit != 0)) {    /* Is this a synchronous command with a delay? */
+                       while(mach_absolute_time() < pp->pms.pmsPop);   /* Yes, spin here and wait it out... */
+               }
+
+/*
+ *             Gather some statistics
+ */
+         
+               dur = tb - pp->pms.pmsStamp;                            /* Get the amount of time we were in the old step */
+               pp->pms.pmsStamp = tb;                                          /* Set the new timestamp */
+               if(!(pstate == pmsParked)) {                            /* Only take stats if we were not parked */
+                       pcstate = pmsCtls.pmsDefs[pstate];              /* Get the previous step */
+                       pmsCtls.pmsStats[cpu][pcstate->pmsStepID].stTime[dir] += dur;   /* Accumulate the total time in the old step */ 
+                       pmsCtls.pmsStats[cpu][pcstate->pmsStepID].stCnt[dir] += 1;      /* Count transitions */
+               }
+
+/*
+ *             See if we are done chaining steps
+ */
+               if((pnstate->pmsSetCmd == pmsDelay) 
+                       || (!(pp->pms.pmsCSetCmd & pmsSync) && (pnstate->pmsLimit != 0))) {     /* Is this not syncronous and a non-zero delay or a delayed step? */
+                       setTimerReq();                                                  /* Start the timers ticking */
+                       break;                                                                  /* We've stepped as far as we're going to... */
+               }
+               
+               nstep = pnstate->pmsNext;                                       /* Chain on to the next */
+       }
+
+       return;
+
+}
+
+/*
+ *     Either park the stepper or force the step on a parked stepper for local processor only
+ *
+ */
+void pmsRunLocal(uint32_t nstep) {
+
+       struct per_proc_info *pp;
+       uint32_t cstate, ret, lastState;
+       pmsDef *pnstate, *pcstate;
+       uint64_t tb, nt, dur;
+       int cpu, i, j;
+       boolean_t       intr;
+
+       if(!pmsInstalled) return;                                               /* Ignore this if no step programs installed... */
+
+       intr = ml_set_interrupts_enabled(FALSE);                /* No interruptions in here */
+
+       pp = getPerProc();                                                              /* Get our per_proc */
+
+       if(nstep == pmsStartUp) {                                               /* Should we start up? */
+               pmsCPUInit();                                                           /* Get us up to full with high voltage and park */
+               nstep = pmsNormHigh;                                            /* Change request to transition to normal high */
+       }
+
+       lastState = pp->pms.pmsState;                                   /* Remember if we are parked now */
+
+       pmsSetStep(nstep, 1);                                                   /* Step to the new state */
+       
+       if((lastState == pmsParked) && (pp->pms.pmsState != pmsParked)) {       /* Did we just unpark? */
+               cpu = cpu_number();                                                     /* Get our processor */
+               for(i = 0; i < pmsMaxStates; i++) {                     /* Step through the steps and clear the statistics since we were parked */
+                       pmsCtls.pmsStats[cpu][i].stTime[0] = 0; /* Clear accumulated time - downward */ 
+                       pmsCtls.pmsStats[cpu][i].stTime[1] = 0; /* Clear accumulated time - forward */  
+                       pmsCtls.pmsStats[cpu][i].stCnt[0] = 0;  /* Clear transition count - downward */
+                       pmsCtls.pmsStats[cpu][i].stCnt[1] = 0;  /* Clear transition count - forward */
+               }
+       }
+
+       (void)ml_set_interrupts_enabled(intr);                  /* Restore interruptions */
+
+       return;
+
+}
+
+/*
+ *     Control the Power Management Stepper.
+ *     Called from user state by the superuser via a ppc system call.
+ *     Interruptions disabled.
+ *
+ */
+
+int pmsCntrl(struct savearea *save) {
+
+       uint32_t request, nstep, reqsize, result, presult;
+       int ret, cpu;
+       kern_return_t kret;
+       pmsDef *ndefs;
+       struct per_proc_info *pp;
+
+       pp = getPerProc();                                                              /* Get our per_proc */
+       cpu = cpu_number();                                                             /* Get our processor */
+       
+       if(!is_suser()) {                                                               /* We are better than most, */
+               save->save_r3 = KERN_FAILURE;                           /* so we will only talk to the superuser. */
+               return 1;                                                                       /* Turn up our noses, say "harrumph," and walk away... */
+       }
+       
+       if(save->save_r3 >= pmsCFree) {                                 /* Can we understand the request? */
+               save->save_r3 = KERN_INVALID_ARGUMENT;          /* What language are these guys talking in, anyway? */
+               return 1;                                                                       /* Cock head like a confused puppy and run away... */
+       }
+       
+       request = (int)save->save_r3;                                   /* Remember the request */
+       reqsize = (uint32_t)save->save_r5;                              /* Get the size of the config table */
+
+       if(request == pmsCQuery) {                                              /* Are we just checking? */
+               result = pmsCPUquery() & pmsCPU;                        /* Get the processor data and make sure there is no slop */
+               presult = 0;                                                            /* Assume nothing */
+               if((uint32_t)pmsQueryFunc) presult = pmsQueryFunc(cpu, pmsPlatformData);        /* Go get the platform state */
+               result = result | (presult & (pmsXClk | pmsVoltage | pmsPowerID));      /* Merge the platform state with no slop */
+               save->save_r3 = result;                                         /* Tell 'em... */
+               return 1;
+       }
+       
+       if(request == pmsCExperimental) {                               /* Enter experimental mode? */
+       
+               if(pmsInstalled || (pmsExperimental & 1)) {     /* Are we already running or in experimental? */
+                       save->save_r3 = KERN_FAILURE;                   /* Fail, since we are already running */
+                       return 1;
+               }
+               
+               pmsExperimental |= 1;                                           /* Flip us into experimental but don't change other flags */
+               
+               pmsCPUConf();                                                           /* Configure for this machine */
+               pmsStart();                                                                     /* Start stepping */
+               save->save_r3 = KERN_SUCCESS;                           /* We are victorious... */
+               return 1;
+       
+       }
+
+       if(request == pmsCCnfg) {                                               /* Do some up-front checking before we commit to doing this */
+               if((reqsize > (pmsMaxStates * sizeof(pmsDef))) || (reqsize < (pmsFree * sizeof(pmsDef)))) {     /* Check that the size is reasonable */
+                       save->save_r3 = KERN_NO_SPACE;                  /* Tell them that they messed up */
+                       return 1;                                                               /* l8r... */
+               }
+       }
+
+
+/*
+ *     We are committed after here.  If there are any errors detected, we shouldn't die, but we
+ *     will be stuck in park.
+ *
+ *     Also, we can possibly end up on another processor after the broadcast.
+ *
+ */
+               
+       if(!hw_compare_and_store(0, 1, &pmsSyncrolator)) {      /* Are we already doing this? */
+               save->save_r3 = KERN_RESOURCE_SHORTAGE;         /* Tell them that we are already busy and to try again */
+               return 1;                                                                       /* G'wan away and don't bother me... */
+       }
+       save->save_r3 = KERN_SUCCESS;                                   /* Assume success */
+
+//     NOTE:  We will block in the following code until everyone has finished the prepare
+
+       pmsRun(pmsPrepCng);                                                             /* Get everyone parked and in a proper state for step table changes, including me */
+       
+       if(request == pmsCPark) {                                               /* Is all we're supposed to do park? */
+               pmsSyncrolator = 0;                                                     /* Free us up */
+               return 1;                                                                       /* Well, then we're done... */
+       }
+       
+       switch(request) {                                                               /* Select the routine */
+
+               case pmsCStart:                                                         /* Starts normal steppping */
+                       nstep = pmsNormHigh;                                    /* Set the request */
+                       break;
+
+               case pmsCFLow:                                                          /* Forces low power */
+                       nstep = pmsLow;                                                 /* Set request */
+                       break;
+
+               case pmsCFHigh:                                                         /* Forces high power */
+                       nstep = pmsHigh;                                                /* Set request */
+                       break;
+
+               case pmsCCnfg:                                                          /* Loads new stepper program */
+                       
+                       if(!(ndefs = (pmsDef *)kalloc(reqsize))) {      /* Get memory for the whole thing */
+                               save->save_r3 = KERN_INVALID_ADDRESS;   /* Return invalid address */
+                               pmsSyncrolator = 0;                                     /* Free us up */
+                               return 1;                                                       /* All done... */
+                       }
+                       
+                       ret = copyin((user_addr_t)((unsigned int)(save->save_r4)), (void *)ndefs, reqsize);     /* Get the new config table */
+                       if(ret) {                                                               /* Hmmm, something went wrong with the copyin */
+                               save->save_r3 = KERN_INVALID_ADDRESS;   /* Return invalid address */
+                               kfree((vm_offset_t)ndefs, reqsize);     /* Free up the copied in data */
+                               pmsSyncrolator = 0;                                     /* Free us up */
+                               return 1;                                                       /* All done... */
+                       }
+
+                       kret = pmsBuild(ndefs, reqsize, 0, 0, 0);       /* Go build and replace the tables.  Make sure we keep the old platform stuff */
+                       if(kret) {                                                              /* Hmmm, something went wrong with the compilation */
+                               save->save_r3 = kret;                           /* Pass back the passed back return code */
+                               kfree((vm_offset_t)ndefs, reqsize);     /* Free up the copied in data */
+                               pmsSyncrolator = 0;                                     /* Free us up */
+                               return 1;                                                       /* All done... */
+                       }
+
+                       nstep = pmsNormHigh;                                    /* Set the request */
+                       break;
+
+               default:
+                       panic("pmsCntrl: stepper control is so very, very confused = %08X\n", request);
+       
+       }
+
+       pmsRun(nstep);                                                                  /* Get everyone into step */
+       pmsSyncrolator = 0;                                                             /* Free us up */
+       return 1;                                                                               /* All done... */
+
+}
+
+/*
+ *     Broadcast a change to all processors including ourselves.
+ *     This must transition before broadcasting because we may block and end up on a different processor.
+ *
+ *     This will block until all processors have transitioned, so
+ *     obviously, this can block.
+ *
+ *     Called with interruptions disabled.
+ *
+ */
+void pmsRun(uint32_t nstep) {
+
+       pmsRunLocal(nstep);                                                             /* If we aren't parking (we are already parked), transition ourselves */
+       (void)cpu_broadcast(&pmsBroadcastWait, pmsRemote, nstep);       /* Tell everyone else to do it too */
+
+       return;
+       
+}
+
+/*
+ *     Receive a broadcast and react.
+ *     This is called from the interprocessor signal handler.
+ *     We wake up the initiator after we are finished.
+ *
+ */
+       
+void pmsRemote(uint32_t nstep) {
+
+       pmsRunLocal(nstep);                                                             /* Go set the step */
+       if(!hw_atomic_sub(&pmsBroadcastWait, 1)) {              /* Drop the wait count */
+               thread_wakeup((event_t)&pmsBroadcastWait);      /* If we were the last, wake up the signaller */
+       }
+       return;
+}      
+
+
+/*
+ *     Build the tables needed for the stepper.  This includes both the step definitions and the step control table.
+ *
+ *     We most absolutely need to be parked before this happens because we're gonna change the table.
+ *     We're going to have to be pretty complete about checking for errors.
+ *     Also, a copy is always made because we don't want to be crippled by not being able to change
+ *     the table or description formats.
+ *
+ *     We pass in a table of external functions and the new stepper def uses the corresponding 
+ *     indexes rather than actual function addresses.  This is done so that a proper table can be
+ *     built with the control syscall.  It can't supply addresses, so the index has to do.  We
+ *     internalize the table so our caller does not need to keep it.  Note that passing in a 0
+ *     will use the current function table.  Also note that entry 0 is reserved and must be 0,
+ *     we will check and fail the build.
+ *
+ *     The platformData parameter is a 32-bit word of data that is passed unaltered to the set function.
+ *
+ *     The queryFunc parameter is the address of a function that will return the current state of the platform.
+ *     The format of the data returned is the same as the platform specific portions of pmsSetCmd, i.e., pmsXClk,
+ *     pmsVoltage, and any part of pmsPowerID that is maintained by the platform hardware (an example would be
+ *     the values of the gpios that correspond to pmsPowerID).  The value should be constructed by querying
+ *     hardware rather than returning a value cached by software. One of the intents of this function is to 
+ *     help recover lost or determine initial power states.
+ *
+ */
+kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc) {
+
+       int steps, newsize, i, cstp, nstps, oldAltSize, xdsply;
+       uint32_t setf;
+       uint64_t nlimit;
+       pmsDef *newpd, *oldAlt;
+       boolean_t intr;
+
+       xdsply = (pmsExperimental & 3) != 0;                    /* Turn on kprintfs if requested or in experimental mode */
+
+       if(pdsize % sizeof(pmsDef)) return KERN_INVALID_ARGUMENT;       /* Length not multiple of definition size */
+       
+       steps = pdsize / sizeof(pmsDef);                                /* Get the number of steps supplied */
+
+       if((steps >= pmsMaxStates) || (steps < pmsFree))        /* Complain if too big or too small */
+                       return KERN_INVALID_ARGUMENT;                   /* Squeak loudly!!! */
+                       
+       if((uint32_t)functab && (uint32_t)functab[0])   /* Verify that if they supplied a new function table, entry 0 is 0 */
+               return KERN_INVALID_ARGUMENT;                           /* Fail because they didn't reserve entry 0 */
+                       
+       if(xdsply) kprintf("\n  StepID   Down   Next    HWSel  HWfun                Limit\n");
+
+       for(i = 0; i < steps; i++) {                                    /* Step through and verify the definitions */
+
+               if(xdsply) kprintf("  %6d %6d %6d %08X %6d %20lld\n", pd[i].pmsStepID, pd[i].pmsDown, 
+                       pd[i].pmsNext, pd[i].pmsSetCmd,
+                       pd[i].sf.pmsSetFuncInd, pd[i].pmsLimit);
+
+               if((pd[i].pmsLimit != 0) && (pd[i].pmsLimit < 100ULL)) {
+                       if(xdsply) kprintf("error step %3d: pmsLimit too small/n", i);
+                       return KERN_INVALID_ARGUMENT;   /* Has to be 100µS or more */
+               }
+               
+               if((pd[i].pmsLimit != 0xFFFFFFFFFFFFFFFFULL) && (pd[i].pmsLimit > (HalfwayToForever / 1000ULL))) {
+                       if(xdsply) kprintf("error step %3d: pmsLimit too big\n", i);
+                       return KERN_INVALID_ARGUMENT;                   /* Can't be too big */
+               }
+               
+               if(pd[i].pmsStepID != i) {
+                       if(xdsply) kprintf("error step %3d: step ID does not match (%d)\n", i, pd[i].pmsStepID);
+                       return KERN_INVALID_ARGUMENT;   /* ID must match */
+               }
+
+               if(pd[i].sf.pmsSetFuncInd >= pmsSetFuncMax) {
+                       if(xdsply) kprintf("error step %3d: function invalid (%d)\n", i, pd[i].sf.pmsSetFuncInd);
+                       return KERN_INVALID_ARGUMENT;   /* Fail if this function is not in the table */
+               }
+               
+               if((pd[i].pmsDown != pmsParked) && pd[i].pmsDown >= steps) {
+                       if(xdsply) kprintf("error step %3d: pmsDown out of range (%d)\n", i, pd[i].pmsDown);
+                       return KERN_INVALID_ARGUMENT;   /* Step down must be in the table or park */
+               }
+               
+               if((pd[i].pmsNext != pmsParked) && pd[i].pmsNext >= steps) {
+                       if(xdsply) kprintf("error step %3d: pmsNext out of range (%d)\n", i, pd[i].pmsNext);
+                       return KERN_INVALID_ARGUMENT;   /* Step up must be in the table or park */
+               }
+               
+               if((pd[i].pmsSetCmd == pmsDelay) && (pd[i].pmsTDelay >= steps)) {
+                       if(xdsply) kprintf("error step %3d: pmsTDelay out of range (%d)\n", i, pd[i].pmsTDelay);
+                       return KERN_INVALID_ARGUMENT;   /* Delayed step must be in the table */
+               }
+               
+               if((pd[i].pmsSetCmd == pmsDelay) && (pd[i].pmsLimit == 0xFFFFFFFFFFFFFFFFULL)) {
+                       if(xdsply) kprintf("error step %3d: delay time limit must not be infinite\n", i);
+                       return KERN_INVALID_ARGUMENT;   /* Delayed step must have a time limit */
+               }
+               
+       }
+       
+/*
+ *     Verify that there are no infinite synchronous forward loops in the table
+ */
+       if(xdsply) kprintf("\nInitial scan passed, start in loop check\n");
+       for(i = 0; i < steps; i++) {                                    /* Start with each step. Inefficient, but who cares */
+               cstp = i;                                                                       /* Set starting point */
+               nstps = 0;                                                                      /* Initialize chain length counter */
+               while(1) {                                                                      /* Do until we hit the end */
+                       if(pd[cstp].pmsSetCmd == pmsParkIt) break;      /* Parking always terminates a chain so no endless loop here */
+                       if(pd[cstp].pmsSetCmd == pmsDelay) break;       /* Delayed steps always terminate a chain so no endless loop here */
+                       if((pd[cstp].pmsLimit != 0) && ((pd[cstp].pmsSetCmd & pmsSync) != pmsSync)) break;      /* If time limit is not 0 and not synchrouous, no endless loop */
+                       if(pd[cstp].pmsNext == pmsParked) break;        /* If the next step is parked, no endless loop */
+                       
+                       cstp = pd[cstp].pmsNext;                                /* Chain to the next */
+                       nstps = nstps + 1;                                              /* Count this step */
+                       if(nstps >= steps) {                                    /* We've stepped for more steps than we have, must be an endless loop! */
+                               if(xdsply) kprintf("error step %3d: infinite pmsNext loop\n", i);
+                               return KERN_INVALID_ARGUMENT;           /* Suggest to our caller that they can't program... */
+                       }
+               }
+       }
+       
+       if((pmsExperimental & 4) && (pmsInstalled) && ((uint32_t)functab != 0)) {       /* If we are already initted and experimental is locked in, and we are doing first */
+               if(xdsply) kprintf("Experimental locked, ignoring driver pmsBuild\n");
+               return KERN_RESOURCE_SHORTAGE;                          /* Just ignore the request. */
+       }
+       
+       
+       
+/*
+ *     Well, things look ok, let's do it to it...
+ */
+
+       if(xdsply) kprintf("Loop check passed, building and installing table\n");
+
+       newsize = steps * sizeof(pmsDef);                               /* Get the size needed for the definition blocks */
+
+       if(!(newpd = (pmsDef *)kalloc(newsize))) {              /* Get memory for the whole thing */
+               return KERN_RESOURCE_SHORTAGE;                          /* No storage... */
+       }
+       
+       bzero((void *)newpd, newsize);                                  /* Make it pretty */
+       
+/*
+ *     Ok, this is it, finish intitializing, switch the tables, and pray...
+ *     We want no interruptions at all and we need to lock the table.  Everybody should be parked,
+ *     so no one should ever touch this.  The lock is to keep multiple builders safe.  It probably
+ *     will never ever happen, but paranoia is a good thing...
+ */
+       intr = ml_set_interrupts_enabled(FALSE);                /* No interruptions in here */
+       simple_lock(&pmsBuildLock);                                             /* Lock out everyone... */
+       
+       if(platformData) pmsPlatformData = platformData;        /* Remember the platform data word passed in if any was... */
+       if((uint32_t)queryFunc) pmsQueryFunc = queryFunc;       /* Remember the query function passed in, if it was... */
+       
+       oldAlt = altDpmsTab;                                                    /* Remember any old alternate we had */
+       oldAltSize = altDpmsTabSize;                                    /* Remember its size */
+
+       altDpmsTab = newpd;                                                             /* Point to the new table */
+       altDpmsTabSize = newsize;                                               /* Set the size */
+       
+       if((uint32_t)functab) {                                                 /* Did we get a new function table? */
+               for(i = 0; i < pmsSetFuncMax; i++) pmsFuncTab[i] = functab[i];  /* Copy in the new table */
+       }
+
+       for(i = 0; i < pmsMaxStates; i++) pmsCtls.pmsDefs[i] = &pmsDummy;       /* Initialize the table to point to the dummy step */
+
+       for(i = 0; i < steps; i++) {                                    /* Replace the step table entries */
+               if(pd[i].pmsLimit == 0xFFFFFFFFFFFFFFFFULL) nlimit = century;   /* Default to 100 years */
+               else nlimit = pd[i].pmsLimit;                           /* Otherwise use what was supplied */
+               
+               nanoseconds_to_absolutetime(nlimit * 1000ULL, &newpd[i].pmsLimit);      /* Convert microseconds to nanoseconds and then to ticks */
+       
+               setf = pd[i].sf.pmsSetFuncInd;                                  /* Make convienient */
+               newpd[i].sf.pmsSetFunc = pmsFuncTab[setf];              /* Replace the index with the function address */
+        
+               newpd[i].pmsStepID  = pd[i].pmsStepID;          /* Set the step ID */ 
+               newpd[i].pmsSetCmd  = pd[i].pmsSetCmd;          /* Set the hardware selector ID */
+               newpd[i].pmsDown    = pd[i].pmsDown;            /* Set the downward step */
+               newpd[i].pmsNext    = pd[i].pmsNext;            /* Set the next setp */
+               newpd[i].pmsTDelay  = pd[i].pmsTDelay;          /* Set the delayed setp */
+               pmsCtls.pmsDefs[i]  = &newpd[i];                        /* Copy it in */
+       }
+       
+       pmsCtlp = (uint32_t)&pmsCtls;                                   /* Point to the new pms table */
+       
+       pmsInstalled = 1;                                                               /* The stepper has been born or born again... */
+
+       simple_unlock(&pmsBuildLock);                                   /* Free play! */
+       (void)ml_set_interrupts_enabled(intr);                  /* Interrupts back the way there were */
+
+       if((uint32_t)oldAlt) kfree((vm_offset_t)oldAlt, oldAltSize);    /* If we already had an alternate, free it */
+
+       if(xdsply) kprintf("Stepper table installed\n");
+       
+       return KERN_SUCCESS;                                                    /* We're in fate's hands now... */
+}
diff --git a/osfmk/ppc/pms.h b/osfmk/ppc/pms.h
new file mode 100644 (file)
index 0000000..799b9a4
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+
+#ifdef KERNEL_PRIVATE
+
+#ifndef _PPC_PMS_H_
+#define _PPC_PMS_H_
+
+#define pmsMaxStates 64
+#define HalfwayToForever 0x7FFFFFFFFFFFFFFFULL
+#define century 790560000000000ULL
+
+typedef void (*pmsSetFunc_t)(uint32_t, uint32_t, uint32_t);    /* Function used to set hardware power state */
+typedef uint32_t (*pmsQueryFunc_t)(uint32_t, uint32_t);        /* Function used to query hardware power state */
+
+typedef struct pmsStat {
+       uint64_t        stTime[2];                      /* Total time until switch to next step */
+       uint32_t        stCnt[2];                       /* Number of times switched to next step */
+} pmsStat;
+
+typedef struct pmsDef {
+       uint64_t        pmsLimit;                       /* Max time in this state in microseconds */
+       uint32_t        pmsStepID;                      /* Unique ID for this step */
+       uint32_t        pmsSetCmd;                      /* Command to select power state */
+#define pmsCngXClk  0x80000000         /* Change external clock */
+#define pmsXUnk        0x7F                    /* External clock unknown  */
+#define pmsXClk     0x7F000000         /* External clock frequency */
+#define pmsCngCPU   0x00800000         /* Change CPU parameters */
+#define pmsSync     0x00400000         /* Make changes synchronously, i.e., spin until delay finished */
+#define pmsMustCmp  0x00200000         /* Delay must complete before next change */
+#define pmsCPU      0x001F0000         /* CPU frequency */
+#define pmsCPUUnk      0x1F                    /* CPU frequency unknown */
+#define pmsCngVolt  0x00008000         /* Change voltage */
+#define pmsVoltage  0x00007F00         /* Voltage */
+#define pmsVoltUnk     0x7F                    /* Voltage unknown */
+#define pmsPowerID  0x000000FF         /* Identify power state to HW */
+
+/*     Special commands - various things */
+#define pmsDelay    0xFFFFFFFD         /* Delayed step, no processor or platform changes.  Timer expiration causes transition to pmsTDelay */
+#define pmsParkIt      0xFFFFFFFF              /* Enters the parked state.  No processor or platform changes.  Timers cancelled */
+#define pmsCInit       ((pmsXUnk << 24) | (pmsCPUUnk << 16) | (pmsVoltUnk << 8))       /* Initial current set command value */
+/*     Note:  pmsSetFuncInd is an index into a table of function pointers and pmsSetFunc is the address
+ *     of a function.  Initially, when you create a step table, this field is set as an index into
+ *     a table of function addresses that gets passed as a parameter to pmsBuild.  When pmsBuild
+ *     internalizes the step and function tables, it converts the index to the function address.
+ */
+       union sf {
+               pmsSetFunc_t    pmsSetFunc;     /* Function used to set platform power state */
+               uint32_t        pmsSetFuncInd;  /* Index to function in function table */
+       } sf;
+
+       uint32_t        pmsDown;                        /* Next state if going lower */
+       uint32_t        pmsNext;                        /* Normal next state */
+       uint32_t        pmsTDelay;                      /* State if command was pmsDelay and timer expired */
+} pmsDef;
+
+typedef struct pmsCtl {
+       pmsStat         (*pmsStats)[pmsMaxStates];      /* Pointer to statistics information, 0 if not enabled */
+       pmsDef          *pmsDefs[pmsMaxStates]; /* Indexed pointers to steps */
+} pmsCtl;
+
+/*
+ *     Note that this block is in the middle of the per_proc and the size (32 bytes)
+ *     can't be changed without moving it.
+ */
+
+typedef struct pmsd {
+       uint32_t        pmsState;                       /* Current power management state */
+       uint32_t        pmsCSetCmd;                     /* Current select command */
+       uint64_t        pmsPop;                         /* Time of next step */
+       uint64_t        pmsStamp;                       /* Time of transition to current state */
+       uint64_t        pmsTime;                        /* Total time in this state */
+} pmsd;
+
+/*
+ *     Required power management step programs
+ */
+enum {
+       pmsIdle      = 0,                               /* Power state in idle loop */
+       pmsNorm      = 1,                               /* Normal step - usually low power */
+       pmsNormHigh  = 2,                               /* Highest power in normal step */
+       pmsBoost     = 3,                               /* Boost/overdrive step */
+       pmsLow       = 4,                               /* Lowest non-idle power state, no transitions */
+       pmsHigh      = 5,                               /* Power step for full on, no transitions */
+       pmsPrepCng   = 6,                               /* Prepare for step table change */
+       pmsPrepSleep = 7,                               /* Prepare for sleep */
+       pmsOverTemp  = 8,                               /* Machine is too hot */
+       pmsEnterNorm = 9,                               /* Enter into the normal step program */
+       pmsFree      = 10,                              /* First available empty step */
+       pmsStartUp   = 0xFFFFFFFE,              /* Start stepping */
+       pmsParked    = 0xFFFFFFFF               /* Power parked - used when changing stepping table */
+};
+
+/*
+ *     Power Management Stepper Control requests
+ */
+enum {
+       pmsCPark = 0,                                   /* Parks the stepper */
+       pmsCStart = 1,                                  /* Starts normal steppping */
+       pmsCFLow = 2,                                   /* Forces low power */
+       pmsCFHigh = 3,                                  /* Forces high power */
+       pmsCCnfg = 4,                                   /* Loads new stepper program */
+       pmsCQuery = 5,                                  /* Query current step and state */
+       pmsCExperimental = 6,                   /* Enter experimental mode */
+       pmsCFree = 7                                    /* Next control command to be assigned */
+};
+
+extern pmsCtl pmsCtls;                         /* Power Management Stepper control */
+extern uint32_t pmsCtlp;
+extern uint32_t pmsBroadcastWait;      /* Number of outstanding broadcasts */
+extern pmsDef pmsDefault[];
+extern int pmsInstalled;
+extern int pmsExperimental;
+
+#define pmsSetFuncMax 32
+extern pmsSetFunc_t pmsFuncTab[pmsSetFuncMax];
+extern pmsQueryFunc_t pmsQueryFunc;
+extern uint32_t pmsPlatformData;
+
+extern int pmsCntrl(struct savearea *save);
+extern void pmsInit(void);
+extern void pmsStep(int timer);
+extern void pmsDown(void);
+extern void pmsSetStep(uint32_t nstep, int dir);
+extern void pmsRemote(uint32_t nstep);
+extern void pmsCPUSet(uint32_t sel);
+extern uint32_t pmsCPUquery(void);
+extern void pmsCPUConf(void);
+extern void pmsCPUInit(void);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern kern_return_t pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc);
+extern void pmsRun(uint32_t nstep);
+extern void pmsRunLocal(uint32_t nstep);
+extern void pmsPark(void);
+extern void pmsStart(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PPC_PMS_H_ */
+#endif /* KERNEL_PRIVATE */
diff --git a/osfmk/ppc/pmsCPU.c b/osfmk/ppc/pmsCPU.c
new file mode 100644 (file)
index 0000000..3350292
--- /dev/null
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <ppc/machine_routines.h>
+#include <ppc/machine_cpu.h>
+#include <ppc/exception.h>
+#include <ppc/misc_protos.h>
+#include <ppc/Firmware.h>
+#include <ppc/pmap.h>
+#include <ppc/asm.h>
+#include <ppc/proc_reg.h>
+#include <ppc/pms.h>
+#include <ppc/savearea.h>
+#include <ppc/Diagnostics.h>
+#include <kern/processor.h>
+
+
+pmsDef pmsDefault[] = {
+       {
+               .pmsLimit = century,                                                    /* We can normally stay here for 100 years */
+               .pmsStepID = pmsIdle,                                                   /* Unique identifier to this step */
+               .pmsSetCmd = 0,                                                                 /* Dummy platform power level */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsIdle,                                                             /* We stay here */
+               .pmsNext = pmsNorm                                                              /* Next step */
+       },
+       {
+               .pmsLimit = century,                                                    /* We can normally stay here for 100 years */
+               .pmsStepID = pmsNorm,                                                   /* Unique identifier to this step */
+               .pmsSetCmd = 0,                                                                 /* Dummy platform power level */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsIdle,                                                             /* Down to idle */
+               .pmsNext = pmsNorm                                                              /* Next step */
+       },
+       {
+               .pmsLimit = century,                                                    /* We can normally stay here for 100 years */
+               .pmsStepID = pmsNormHigh,                                               /* Unique identifier to this step */
+               .pmsSetCmd = 0,                                                                 /* Dummy platform power level */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsIdle,                                                             /* Down to idle */
+               .pmsNext = pmsNormHigh                                                  /* Next step */
+       },
+       {
+               .pmsLimit = century,                                                    /* We can normally stay here for 100 years */
+               .pmsStepID = pmsBoost,                                                  /* Unique identifier to this step */
+               .pmsSetCmd = 0,                                                                 /* Dummy platform power level */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsIdle,                                                             /* Step down */
+               .pmsNext = pmsBoost                                                             /* Next step */
+       },      
+       {       
+               .pmsLimit = century,                                                    /* We can normally stay here for 100 years */
+               .pmsStepID = pmsLow,                                                    /* Unique identifier to this step */
+               .pmsSetCmd = 0,                                                                 /* Dummy platform power level */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsLow,                                                              /* We always stay here */
+               .pmsNext = pmsLow                                                               /* We always stay here */
+       },      
+       {       
+               .pmsLimit = century,                                                    /* We can normally stay here for 100 years */
+               .pmsStepID = pmsHigh,                                                   /* Unique identifier to this step */
+               .pmsSetCmd = 0,                                                                 /* Dummy platform power level */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsHigh,                                                             /* We always stay here */
+               .pmsNext = pmsHigh                                                              /* We always stay here */
+       },      
+       {       
+               .pmsLimit = 0,                                                                  /* Time doesn't matter for a prepare for change */
+               .pmsStepID = pmsPrepCng,                                                /* Unique identifier to this step */
+               .pmsSetCmd = pmsParkIt,                                                 /* Force us to be parked */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsPrepCng,                                                  /* We always stay here */
+               .pmsNext = pmsPrepCng                                                   /* We always stay here */
+       },      
+       {       
+               .pmsLimit = 0,                                                                  /* Time doesn't matter for a prepare for sleep */
+               .pmsStepID = pmsPrepSleep,                                              /* Unique identifier to this step */
+               .pmsSetCmd = pmsParkIt,                                                 /* Force us to be parked */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsPrepSleep,                                                /* We always stay here */
+               .pmsNext = pmsPrepSleep                                                 /* We always stay here */
+       },      
+       {       
+               .pmsLimit = 0,                                                                  /* Time doesn't matter for a prepare for sleep */
+               .pmsStepID = pmsOverTemp,                                               /* Unique identifier to this step */
+               .pmsSetCmd = 0,                                                                 /* Dummy platform power level */
+               .sf.pmsSetFuncInd = 0,                                                  /* Dummy platform set function */
+               .pmsDown = pmsOverTemp,                                                 /* We always stay here */
+               .pmsNext = pmsOverTemp                                                  /* We always stay here */
+       }       
+};
+
+
+
+/*
+ *     This is where the CPU part of the stepper code lives.   
+ *
+ *     It also contains the "hacked kext" experimental code.  This is/was used for
+ *     experimentation and bringup.  It should neither live long nor prosper.
+ *
+ */
+
+/*
+ *     Set the processor frequency and stuff
+ */
+
+void pmsCPUSet(uint32_t sel) {
+       int nvoltage, nfreq;
+       uint32_t oldaack;
+       struct per_proc_info *pp;
+
+       pp = getPerProc();                                                                      /* Get our per_proc */
+
+       if(!((sel ^ pp->pms.pmsCSetCmd) & pmsCPU)) return;      /* If there aren't any changes, bail now... */
+
+       nfreq = (sel & pmsCPU) >> 16;                                           /* Isolate the new frequency */
+       
+       switch(pp->pf.pfPowerModes & pmType) {                          /* Figure out what type to do */
+       
+               case pmDFS:                                                                             /* This is a DFS machine */
+                       ml_set_processor_speed_dfs(nfreq);                      /* Yes, set it */
+                       break;
+       
+               case pmDualPLL:
+                       ml_set_processor_speed_dpll(nfreq);                     /* THIS IS COMPLETELY UNTESTED!!! */
+                       break;
+
+               case pmPowerTune:                                                               /* This is a PowerTune machine */
+                       ml_set_processor_speed_powertune(nfreq);        /* Diddle the deal */
+                       break;
+                       
+               default:                                                                                /* Not this time dolt!!! */
+                       panic("pmsCPUSet: unsupported power manager type: %08X\n", pp->pf.pfPowerModes);
+                       break;
+       
+       }
+       
+}
+
+/*
+ *     This code configures the initial step tables.  It should be called after the timebase frequency is initialized.
+ */
+
+void pmsCPUConf(void) {
+
+       int i;
+       kern_return_t ret;
+       pmsSetFunc_t pmsDfltFunc[pmsSetFuncMax];                        /* List of functions for the external power control to use */
+
+       for(i = 0; i < pmsSetFuncMax; i++) pmsDfltFunc[i] = 0;  /* Clear this */
+
+
+       ret = pmsBuild((pmsDef *)&pmsDefault, sizeof(pmsDefault), pmsDfltFunc, 0, (pmsQueryFunc_t)0);   /* Configure the default stepper */
+
+pCCfinish:
+       if(ret != KERN_SUCCESS) {                                                       /* Some screw up? */
+               panic("pmsCPUConf: initial stepper table build failed, ret = %08X\n", ret);     /* Squeal */
+       }
+       
+       pmsSetStep(pmsHigh, 1);                                                         /* Slew to high speed */
+       pmsPark();                                                                                      /* Then park */
+       return;
+}
+
+/*
+ *     This function should be called once for each processor to force the
+ *     processor to the correct voltage and frequency.
+ */
+void pmsCPUInit(void) {
+
+       int cpu;
+
+       cpu = cpu_number();                                                                     /* Who are we? */
+       
+       kprintf("************ Initializing stepper hardware, cpu %d ******************\n", cpu);        /* (BRINGUP) */
+       
+       pmsSetStep(pmsHigh, 1);                                                         /* Slew to high speed */
+       pmsPark();                                                                                      /* Then park */
+
+       kprintf("************ Stepper hardware initialized, cpu %d ******************\n", cpu); /* (BRINGUP) */
+
+       return;
+}
+
+uint32_t pmsCPUquery(void) {
+
+       uint32_t result;
+       struct per_proc_info *pp;
+       uint64_t scdata;
+
+       pp = getPerProc();                                                                      /* Get our per_proc */
+
+       switch(pp->pf.pfPowerModes & pmType) {                          /* Figure out what type to do */
+       
+               case pmDFS:                                                                             /* This is a DFS machine */
+                       result = hid1get();                                                     /* Get HID1 */
+                       result = (result >> 6) & 0x00030000;            /* Isolate the DFS bits */
+                       break;
+                       
+               case pmPowerTune:                                                               /* This is a PowerTune machine */               
+                       (void)ml_scom_read(PowerTuneStatusReg, &scdata);        /* Get the current power level */
+                       result = (scdata >> (32 + 8)) & 0x00030000;     /* Shift the data to align with the set command */
+                       break;
+                       
+               default:                                                                                /* Query not supported for this kind */
+                       result = 0;                                                                     /* Return highest if not supported */
+                       break;
+       
+       }
+
+       return result;
+}
+
+
index 71168cd3af7881f5fde95fdb0ff1067f344e60a8..f1dfa33b48bcc115c9272bbc3004925a982b5c55 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -48,6 +48,8 @@
 #include <ppc/mem.h>
 #include <ppc/mappings.h>
 #include <ppc/locks.h>
+#include <ppc/pms.h>
+#include <ppc/rtclock.h>
 
 #include <pexpert/pexpert.h>
 
@@ -118,6 +120,7 @@ patch_entry_t patch_table[] = {
     {NULL,                  0x00000000, PATCH_END_OF_TABLE, 0}
        };
 
+
 /*
  * Forward definition
  */
@@ -153,7 +156,7 @@ ppc_init(
 
        BootProcInfo.cpu_number = 0;
        BootProcInfo.cpu_flags = 0;
-       BootProcInfo.istackptr = 0;     /* we're on the interrupt stack */
+       BootProcInfo.istackptr = 0;                                                     /* we're on the interrupt stack */
        BootProcInfo.intstack_top_ss = (vm_offset_t)&intstack + INTSTACK_SIZE - FM_SIZE;
        BootProcInfo.debstack_top_ss = (vm_offset_t)&debstack + KERNEL_STACK_SIZE - FM_SIZE;
        BootProcInfo.debstackptr = BootProcInfo.debstack_top_ss;
@@ -162,17 +165,27 @@ ppc_init(
        BootProcInfo.FPU_owner = 0;
        BootProcInfo.VMX_owner = 0;
        BootProcInfo.pp_cbfr = console_per_proc_alloc(TRUE);
-       BootProcInfo.rtcPop = 0xFFFFFFFFFFFFFFFFULL;
+       BootProcInfo.rtcPop = EndOfAllTime;
+       BootProcInfo.pp2ndPage = (addr64_t)&BootProcInfo;       /* Initial physical address of the second page */
+
+       BootProcInfo.pms.pmsStamp = 0;                                          /* Dummy transition time */
+       BootProcInfo.pms.pmsPop = EndOfAllTime;                         /* Set the pop way into the future */
+       
+       BootProcInfo.pms.pmsState = pmsParked;                          /* Park the power stepper */
+       BootProcInfo.pms.pmsCSetCmd = pmsCInit;                         /* Set dummy initial hardware state */
+       
        mp = (mapping_t *)BootProcInfo.ppUMWmp;
        mp->mpFlags = 0x01000000 | mpLinkage | mpPerm | 1;
        mp->mpSpace = invalSpace;
 
+       pmsInit();                                                                                      /* Initialize the stepper */
+
        thread_bootstrap();
 
        thread = current_thread();
        thread->machine.curctx = &thread->machine.facctx;
        thread->machine.facctx.facAct = thread;
-       thread->machine.umwSpace = invalSpace;                                  /* Initialize user memory window space to invalid */
+       thread->machine.umwSpace = invalSpace;                          /* Initialize user memory window space to invalid */
        thread->machine.preemption_count = 1;
 
        cpu_bootstrap();
@@ -185,33 +198,34 @@ ppc_init(
 
        static_memory_end = round_page(args->topOfKernelData);;
       
-       PE_init_platform(FALSE, args);                          /* Get platform expert set up */
+       PE_init_platform(FALSE, args);                                          /* Get platform expert set up */
 
        if (!PE_parse_boot_arg("novmx", &novmx)) novmx=0;       /* Special run without VMX? */
-       if(novmx) {                                                                     /* Yeah, turn it off */
-               BootProcInfo.pf.Available &= ~pfAltivec;        /* Turn off Altivec available */
+       if(novmx) {                                                                                     /* Yeah, turn it off */
+               BootProcInfo.pf.Available &= ~pfAltivec;                /* Turn off Altivec available */
                __asm__ volatile("mtsprg 2,%0" : : "r" (BootProcInfo.pf.Available));    /* Set live value */
        }
 
        if (!PE_parse_boot_arg("fn", &forcenap)) forcenap = 0;  /* If force nap not set, make 0 */
        else {
-               if(forcenap < 2) forcenap = forcenap + 1;                       /* Else set 1 for off, 2 for on */
-               else forcenap = 0;                                                                      /* Clear for error case */
+               if(forcenap < 2) forcenap = forcenap + 1;               /* Else set 1 for off, 2 for on */
+               else forcenap = 0;                                                              /* Clear for error case */
        }
        
-       if (!PE_parse_boot_arg("diag", &dgWork.dgFlags)) dgWork.dgFlags=0;      /* Set diagnostic flags */
-       if (!PE_parse_boot_arg("lcks", &LcksOpts)) LcksOpts=0;                          /* Set lcks options */
+       if (!PE_parse_boot_arg("pmsx", &pmsExperimental)) pmsExperimental = 0;  /* Check if we should start in experimental power management stepper mode */
+       if (!PE_parse_boot_arg("lcks", &LcksOpts)) LcksOpts = 0;        /* Set lcks options */
+       if (!PE_parse_boot_arg("diag", &dgWork.dgFlags)) dgWork.dgFlags = 0;    /* Set diagnostic flags */
        if(dgWork.dgFlags & enaExpTrace) trcWork.traceMask = 0xFFFFFFFF;        /* If tracing requested, enable it */
 
-       if(PE_parse_boot_arg("ctrc", &cputrace)) {                                                      /* See if tracing is limited to a specific cpu */
+       if(PE_parse_boot_arg("ctrc", &cputrace)) {                      /* See if tracing is limited to a specific cpu */
                trcWork.traceMask = (trcWork.traceMask & 0xFFFFFFF0) | (cputrace & 0xF);        /* Limit to 4 */
        }
 
        if(!PE_parse_boot_arg("tb", &trcWork.traceSize)) {      /* See if non-default trace buffer size */
 #if DEBUG
-               trcWork.traceSize = 32;                                 /* Default 32 page trace table for DEBUG */
+               trcWork.traceSize = 32;                                                 /* Default 32 page trace table for DEBUG */
 #else
-               trcWork.traceSize = 8;                                  /* Default 8 page trace table for RELEASE */
+               trcWork.traceSize = 8;                                                  /* Default 8 page trace table for RELEASE */
 #endif
        }
 
@@ -228,7 +242,7 @@ ppc_init(
        else wcte = (wcte != 0);                                                        /* Force to 0 or 1 */
 
        if (!PE_parse_boot_arg("mcklog", &mckFlags)) mckFlags = 0;      /* If machine check flags not specified, clear */
-       else if(mckFlags > 1) mckFlags = 0;                     /* If bogus, clear */
+       else if(mckFlags > 1) mckFlags = 0;                                     /* If bogus, clear */
     
     if (!PE_parse_boot_arg("ht_shift", &hash_table_shift))  /* should we use a non-default hash table size? */
         hash_table_shift = 0;                           /* no, use default size */
@@ -257,9 +271,7 @@ ppc_init(
                        }
                }
        }
-       
-       PE_init_platform(TRUE, args);
-       
+               
        machine_startup(args);
 }
 
index 21a24e3f32dac24de61395e651ea3e7c3472d75d..cca618d4ff361adecef5ace31a1be6a927b86495 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -64,6 +64,8 @@ extern int disableConsoleOutput;
 
 struct shadowBAT shadow_BAT;
 
+
+
 /*
  *     NOTE: mem_size is bogus on large memory machines.  We will pin it to 0x80000000 if there is more than 2 GB
  *     This is left only for compatibility and max_mem should be used.
@@ -329,6 +331,8 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args)
  */
 
        hw_start_trans();                                       /* Start translating */
+       PE_init_platform(TRUE, args);           /* Initialize this right off the bat */
+
 
 #if 0
        GratefulDebInit((bootBumbleC *)&(args->Video)); /* Initialize the GratefulDeb debugger */
index bd97881bd024dff5bebe97bfaa05175988099b41..ecd5ee24f9887a960e3bd520e907e9b9a89c9435 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -44,6 +44,8 @@
 #include <machine/machine_routines.h>
 #include <ppc/exception.h>
 #include <ppc/proc_reg.h>
+#include <ppc/pms.h>
+#include <ppc/rtclock.h>
 
 #include <IOKit/IOPlatformExpert.h>
 
@@ -53,8 +55,6 @@ int           sysclk_config(void);
 
 int            sysclk_init(void);
 
-void treqs(uint32_t dec);
-
 kern_return_t  sysclk_gettime(
        mach_timespec_t                 *cur_time);
 
@@ -140,21 +140,12 @@ static void               nanotime_to_absolutetime(
                                        uint32_t                nanosecs,
                                        uint64_t                *result);
 
-static int             deadline_to_decrementer(
-                                       uint64_t                deadline,
-                                       uint64_t                now);
-
 static void            rtclock_alarm_expire(
                                        timer_call_param_t              p0,
                                        timer_call_param_t              p1);
 
 /* global data declarations */
 
-#define DECREMENTER_MAX                0x7FFFFFFFUL
-#define DECREMENTER_MIN                0xAUL
-
-natural_t              rtclock_decrementer_min;
-
 decl_simple_lock_data(static,rtclock_lock)
 
 /*
@@ -234,28 +225,16 @@ sysclk_config(void)
 int
 sysclk_init(void)
 {
-       uint64_t                                abstime, nexttick;
-       int                                             decr1, decr2;
-       struct rtclock_timer    *mytimer;
+       uint64_t                                abstime;
        struct per_proc_info    *pp;
 
-       decr1 = decr2 = DECREMENTER_MAX;
-
        pp = getPerProc();
-       mytimer = &pp->rtclock_timer;
 
        abstime = mach_absolute_time();
-       nexttick = abstime + rtclock_tick_interval;
-       pp->rtclock_tick_deadline = nexttick;
-       decr1 = deadline_to_decrementer(nexttick, abstime);
-
-       if (mytimer->is_set)
-               decr2 = deadline_to_decrementer(mytimer->deadline, abstime);
-
-       if (decr1 > decr2)
-               decr1 = decr2;
-
-       treqs(decr1);
+       pp->rtclock_tick_deadline = abstime + rtclock_tick_interval;    /* Get the time we need to pop */
+       pp->rtcPop = pp->rtclock_tick_deadline; /* Set the rtc pop time the same for now */
+       
+       (void)setTimerReq();                    /* Start the timers going */
 
        return (1);
 }
@@ -595,6 +574,43 @@ clock_set_calendar_microtime(
 
     commpage_set_timestamp(0,0,0,0);
 
+       /*
+        *      Cancel any adjustment in progress.
+        */
+       if (rtclock_calend.adjdelta < 0) {
+               uint64_t        now, t64;
+               uint32_t        delta, t32;
+
+               delta = -rtclock_calend.adjdelta;
+
+               sys = rtclock_calend.epoch;
+               microsys = rtclock_calend.microepoch;
+
+               now = mach_absolute_time();
+
+               if (now > rtclock_calend.epoch1)
+                       t64 = now - rtclock_calend.epoch1;
+               else
+                       t64 = 0;
+
+               t32 = (t64 * USEC_PER_SEC) / rtclock_sec_divisor;
+
+               if (t32 > delta)
+                       TIME_ADD(sys, 0, microsys, (t32 - delta), USEC_PER_SEC);
+
+               rtclock_calend.epoch = sys;
+               rtclock_calend.microepoch = microsys;
+
+               sys = t64 = now / rtclock_sec_divisor;
+               now -= (t64 * rtclock_sec_divisor);
+               microsys = (now * USEC_PER_SEC) / rtclock_sec_divisor;
+
+               TIME_SUB(rtclock_calend.epoch, sys, rtclock_calend.microepoch, microsys, USEC_PER_SEC);
+       }
+
+       rtclock_calend.epoch1 = 0;
+       rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0;
+
        /*
         *      Calculate the new calendar epoch based on
         *      the new value and the system clock.
@@ -613,12 +629,6 @@ clock_set_calendar_microtime(
        rtclock_calend.epoch = secs;
        rtclock_calend.microepoch = microsecs;
 
-       /*
-        *      Cancel any adjustment in progress.
-        */
-       rtclock_calend.epoch1 = 0;
-       rtclock_calend.adjdelta = rtclock_calend.adjtotal = 0;
-
        simple_unlock(&rtclock_lock);
 
        /*
@@ -877,9 +887,9 @@ void
 clock_set_timer_deadline(
        uint64_t                                deadline)
 {
-       uint64_t                                abstime;
        int                                             decr;
-       struct rtclock_timer    *mytimer;
+       uint64_t                                abstime;
+       rtclock_timer_t                 *mytimer;
        struct per_proc_info    *pp;
        spl_t                                   s;
 
@@ -887,21 +897,15 @@ clock_set_timer_deadline(
        pp = getPerProc();
        mytimer = &pp->rtclock_timer;
        mytimer->deadline = deadline;
-       mytimer->is_set = TRUE;
-       if (!mytimer->has_expired) {
-               abstime = mach_absolute_time();
-               if (    mytimer->deadline < pp->rtclock_tick_deadline                   ) {
-                       decr = deadline_to_decrementer(mytimer->deadline, abstime);
-                       if (    rtclock_decrementer_min != 0                            &&
-                                       rtclock_decrementer_min < (natural_t)decr               )
-                               decr = rtclock_decrementer_min;
-
-                       treqs(decr);
-
-                       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1)
-                                                                               | DBG_FUNC_NONE, decr, 2, 0, 0, 0);
-               }
+
+       if (!mytimer->has_expired && (deadline < pp->rtclock_tick_deadline)) {          /* Has the timer already expired or is less that set? */
+               pp->rtcPop = deadline;                  /* Yes, set the new rtc pop time */
+               decr = setTimerReq();                   /* Start the timers going */
+
+               KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1)
+                                                                       | DBG_FUNC_NONE, decr, 2, 0, 0, 0);
        }
+
        splx(s);
 }
 
@@ -917,64 +921,67 @@ clock_set_timer_func(
        UNLOCK_RTC(s);
 }
 
-void
-rtclock_intr(
-       int                                     device,
-       struct savearea         *ssp,
-       spl_t                           old);
-
 /*
  * Real-time clock device interrupt.
  */
 void
-rtclock_intr(
-       __unused int                    device,
-       struct savearea                 *ssp,
-       __unused spl_t                  old_spl)
-{
+rtclock_intr(struct savearea *ssp) {
+       
        uint64_t                                abstime;
-       int                                             decr1, decr2;
-       struct rtclock_timer    *mytimer;
+       int                                             decr;
+       rtclock_timer_t                 *mytimer;
        struct per_proc_info    *pp;
 
-       decr1 = decr2 = DECREMENTER_MAX;
-
        pp = getPerProc();
+       mytimer = &pp->rtclock_timer;
 
        abstime = mach_absolute_time();
-       if (    pp->rtclock_tick_deadline <= abstime            ) {
+       if (pp->rtclock_tick_deadline <= abstime) {     /* Have we passed the pop time? */
                clock_deadline_for_periodic_event(rtclock_tick_interval, abstime,
                                                                                                &pp->rtclock_tick_deadline);
                hertz_tick(USER_MODE(ssp->save_srr1), ssp->save_srr0);
+               abstime = mach_absolute_time();                 /* Refresh the current time since we went away */
        }
 
-       mytimer = &pp->rtclock_timer;
-
-       abstime = mach_absolute_time();
-       if (    mytimer->is_set                                 &&
-                       mytimer->deadline <= abstime            ) {
-               mytimer->has_expired = TRUE; mytimer->is_set = FALSE;
-               (*rtclock_timer_expire)(abstime);
+       if (mytimer->deadline <= abstime) {                     /* Have we expired the deadline? */
+               mytimer->has_expired = TRUE;                    /* Remember that we popped */
+               mytimer->deadline = EndOfAllTime;               /* Set timer request to the end of all time in case we have no more events */
+               (*rtclock_timer_expire)(abstime);               /* Process pop */
                mytimer->has_expired = FALSE;
        }
 
-       abstime = mach_absolute_time();
-       decr1 = deadline_to_decrementer(pp->rtclock_tick_deadline, abstime);
+       pp->rtcPop = (pp->rtclock_tick_deadline < mytimer->deadline) ?  /* Get shortest pop */
+               pp->rtclock_tick_deadline :                             /* It was the periodic timer */
+               mytimer->deadline;                                              /* Actually, an event request */
+       
+       decr = setTimerReq();                                           /* Request the timer pop */
 
-       if (mytimer->is_set)
-               decr2 = deadline_to_decrementer(mytimer->deadline, abstime);
+       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1)
+                                                 | DBG_FUNC_NONE, decr, 3, 0, 0, 0);
+}
 
-       if (decr1 > decr2)
-               decr1 = decr2;
+/*
+ *     Request an interruption at a specific time 
+ *
+ *     Sets the decrementer to pop at the right time based on the timebase.
+ *     The value is chosen by comparing the rtc request with the power management.
+ *     request.  We may add other values at a future time.
+ *
+ */
+int setTimerReq(void) {
 
-       if (    rtclock_decrementer_min != 0                                    &&
-                       rtclock_decrementer_min < (natural_t)decr1              )
-               decr1 = rtclock_decrementer_min;
+       struct per_proc_info *pp;
+       int decr;
+       uint64_t nexttime;
+       
+       pp = getPerProc();                                                      /* Get per_proc */
 
-       treqs(decr1);
+       nexttime = pp->rtcPop;                                          /* Assume main timer */
 
-       KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1)
-                                                 | DBG_FUNC_NONE, decr1, 3, 0, 0, 0);
+       decr = setPop((pp->pms.pmsPop < nexttime) ? pp->pms.pmsPop : nexttime); /* Schedule timer pop */
+
+       return decr;                                                            /* Pass back what we actually set */
 }
 
 static void
@@ -989,22 +996,6 @@ rtclock_alarm_expire(
        clock_alarm_intr(SYSTEM_CLOCK, &timestamp);
 }
 
-static int
-deadline_to_decrementer(
-       uint64_t                deadline,
-       uint64_t                now)
-{
-       uint64_t                delt;
-
-       if (deadline <= now)
-               return DECREMENTER_MIN;
-       else {
-               delt = deadline - now;
-               return (delt >= (DECREMENTER_MAX + 1))? DECREMENTER_MAX:
-                               ((delt >= (DECREMENTER_MIN + 1))? (delt - 1): DECREMENTER_MIN);
-       }
-}
-
 static void
 nanotime_to_absolutetime(
        uint32_t                        secs,
@@ -1110,23 +1101,3 @@ machine_delay_until(
        } while (now < deadline);
 }
 
-/*
- *     Request a decrementer pop
- *
- */
-
-void treqs(uint32_t dec) {
-
-
-       struct per_proc_info *pp;
-       uint64_t nowtime, newtime;
-       
-       nowtime = mach_absolute_time();                                         /* What time is it? */
-       pp = getPerProc();                                                                      /* Get our processor block */
-       newtime = nowtime + (uint64_t)dec;                                      /* Get requested pop time */
-       pp->rtcPop = newtime;                                                           /* Copy it */
-       
-       mtdec((uint32_t)(newtime - nowtime));                           /* Set decrementer */
-       return;
-
-}
diff --git a/osfmk/ppc/rtclock.h b/osfmk/ppc/rtclock.h
new file mode 100644 (file)
index 0000000..4c2800d
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2004-2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ * 
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ * 
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ * 
+ * @APPLE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+/*
+ * @APPLE_FREE_COPYRIGHT@
+ */
+/*
+ *     File:           rtclock.h
+ *     Purpose:        Routines for handling the machine dependent
+ *                             real-time clock.
+ */
+
+#ifndef _PPC_RTCLOCK_H_
+#define _PPC_RTCLOCK_H_
+
+#define EndOfAllTime   0xFFFFFFFFFFFFFFFFULL
+
+extern void rtclock_intr(struct savearea *ssp);
+extern int setTimerReq(void);
+
+#pragma pack(push,4)
+struct rtclock_timer_t  {
+       uint64_t                deadline;
+       uint32_t
+       /*boolean_t*/   is_set:1,
+                                       has_expired:1,
+                                       :0;
+};
+#pragma pack(pop)
+typedef struct rtclock_timer_t rtclock_timer_t;
+
+#endif /* _PPC_RTCLOCK_H_ */
index 640b063fcbb820fa08c8a31536cb24e0a984b01e..492b0dda3a33aeca5a8779dc8144e795d7c4b64e 100644 (file)
@@ -67,7 +67,7 @@ typedef struct savearea_comm {
     
                                                 /* offset 0x040 */
        uint64_t                save_misc0;                                     /* Various stuff */
-       uint64_t                save_misc1;                                     /* Various stuff */
+       uint64_t                save_misc1;                                     /* Various stuff - snapshot chain during hibernation */
        unsigned int    sac_alloc;                                      /* Bitmap of allocated slots */
     unsigned int       save_054;
     unsigned int       save_misc2;
index 848c8c25b2969cdf45c9b0d5dfdaeb9fefc24509..176ff88f304146d836682108329ebf7a4c2f1ec5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -99,7 +99,7 @@ struct scc_tty scc_tty[NSCC_LINE];
 extern unsigned int disableSerialOuput;
 
 int    serial_initted = 0;
-unsigned int scc_parm_done = 0;                                /* (TEST/DEBUG) */
+unsigned int scc_parm_done = 0;
 
 extern unsigned int serialmode;
 
@@ -182,7 +182,7 @@ boolean_t scc_funnel_initted = FALSE;
  * Adapt/Probe/Attach functions
  */
 boolean_t      scc_uses_modem_control = FALSE;/* patch this with adb */
-decl_simple_lock_data(,scc_stomp)                      /* (TEST/DEBUG) */
+decl_simple_lock_data(,scc_stomp)
 
 /* This is called VERY early on in the init and therefore has to have
  * hardcoded addresses of the serial hardware control registers. The
@@ -210,7 +210,7 @@ initialize_serial( caddr_t scc_phys_base, int32_t serial_baud )
                return;
        }
 
-       simple_lock_init(&scc_stomp, FALSE);                            /* (TEST/DEBUG) */
+       simple_lock_init(&scc_stomp, FALSE);
        
        if (serial_baud == -1) serial_baud = DEFAULT_SPEED;
        
@@ -241,7 +241,7 @@ initialize_serial( caddr_t scc_phys_base, int32_t serial_baud )
 
                        scc_read_reg_zero(regs, 0, bits);/* Clear the status */
                }
-                scc_parm_done = 1;                     /* (TEST/DEBUG) */
+               scc_parm_done = 1;
        }
 
        serial_initted = TRUE;
@@ -323,12 +323,14 @@ scc_getc(int unit, int line, boolean_t wait, boolean_t raw)
        register scc_regmap_t   regs;
        unsigned char   c, value;
        int             rcvalue, from_line;
+       uint32_t        fcrmunge;
        spl_t           s = splhigh();
        DECL_FUNNEL_VARS
 
        FUNNEL_ENTER(&SCC_FUNNEL);
 
-       simple_lock(&scc_stomp);                                        /* (TEST/DEBUG) */
+
+       simple_lock(&scc_stomp);
        regs = scc_softc[0].regs;
 
        /*
@@ -344,7 +346,7 @@ again:
                        break;
 
                if (!wait) {
-                       simple_unlock(&scc_stomp);                      /* (TEST/DEBUG) */
+                       simple_unlock(&scc_stomp);
                        splx(s);
                        FUNNEL_EXIT(&SCC_FUNNEL);
                        return -1;
@@ -362,14 +364,14 @@ again:
        if (console_is_serial() &&
            c == ('_' & 0x1f)) {
                /* Drop into the debugger */
-               simple_unlock(&scc_stomp);                              /* (TEST/DEBUG) */
+               simple_unlock(&scc_stomp);
                Debugger("Serial Line Request");
-               simple_lock(&scc_stomp);                                /* (TEST/DEBUG) */
+               simple_lock(&scc_stomp);
                scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS);
                if (wait) {
                        goto again;
                }
-               simple_unlock(&scc_stomp);                              /* (TEST/DEBUG) */
+               simple_unlock(&scc_stomp);
                splx(s);
                FUNNEL_EXIT(&SCC_FUNNEL);
                return -1;
@@ -390,7 +392,7 @@ again:
 
        scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS);
 
-       simple_unlock(&scc_stomp);                                      /* (TEST/DEBUG) */
+       simple_unlock(&scc_stomp);
        splx(s);
 
        FUNNEL_EXIT(&SCC_FUNNEL);
@@ -408,14 +410,16 @@ scc_putc(int unit, int line, int c)
        scc_regmap_t    regs;
        spl_t            s;
        unsigned char    value;
+       uint32_t fcrmunge;
        DECL_FUNNEL_VARS
 
+
        if (disableSerialOuput)
                return 0;
 
        s = splhigh();
        FUNNEL_ENTER(&SCC_FUNNEL);
-       simple_lock(&scc_stomp);                                /* (TEST/DEBUG) */
+       simple_lock(&scc_stomp);                
 
        regs = scc_softc[0].regs;
 
@@ -435,7 +439,7 @@ scc_putc(int unit, int line, int c)
                        break;
        } while (1);
        scc_write_reg(regs, line, SCC_RR0, SCC_RESET_HIGHEST_IUS);
-       simple_unlock(&scc_stomp);                              /* (TEST/DEBUG) */
+       simple_unlock(&scc_stomp);              
 
        splx(s);
 
@@ -485,7 +489,7 @@ scc_param(struct scc_tty *tp)
        assert(FUNNEL_IN_USE(&SCC_FUNNEL));
        
        s = splhigh();
-       simple_lock(&scc_stomp);                                /* (TEST/DEBUG) */
+       simple_lock(&scc_stomp);
 
        chan = scc_chan(tp->t_dev);
        scc = &scc_softc[0];
@@ -497,29 +501,29 @@ scc_param(struct scc_tty *tp)
        if ((sr->flags & (TF_ODDP|TF_EVENP)) == (tp->t_flags & (TF_ODDP|TF_EVENP))
            && sr->speed == tp->t_ispeed) {
                assert(FUNNEL_IN_USE(&SCC_FUNNEL));
-               simple_unlock(&scc_stomp);                                      /* (TEST/DEBUG) */
-               splx(s);                                                                                        /* (TEST/DEBUG) */
-               return 0;                                                                                       /* (TEST/DEBUG) */
+               simple_unlock(&scc_stomp);
+               splx(s);
+               return 0;
        }
 
        if(scc_parm_done)       {                                                               
                
-               scc_write_reg(regs,  chan,  3, SCC_WR3_RX_8_BITS|SCC_WR3_RX_ENABLE);    /* (TEST/DEBUG) */
-               sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE;      /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan,  1, sr->wr1);                        /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan, 15, SCC_WR15_ENABLE_ESCC);   /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan,  7, SCC_WR7P_RX_FIFO);       /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan,  0, SCC_IE_NEXT_CHAR);       /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan,  0, SCC_RESET_EXT_IP);       /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan,  0, SCC_RESET_EXT_IP);       /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan,  9, SCC_WR9_MASTER_IE|SCC_WR9_NV);   /* (TEST/DEBUG) */
-               scc_read_reg_zero(regs, 0, bits);                                       /* (TEST/DEBUG) */
-               sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE;      /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan,  1, sr->wr1);                        /* (TEST/DEBUG) */
-               scc_write_reg(regs,  chan,  0, SCC_IE_NEXT_CHAR);       /* (TEST/DEBUG) */
-               simple_unlock(&scc_stomp);                                                      /* (TEST/DEBUG) */
-               splx(s);                                                                                        /* (TEST/DEBUG) */
-               return 0;                                                                                       /* (TEST/DEBUG) */
+               scc_write_reg(regs,  chan,  3, SCC_WR3_RX_8_BITS|SCC_WR3_RX_ENABLE);
+               sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE;
+               scc_write_reg(regs,  chan,  1, sr->wr1);
+               scc_write_reg(regs,  chan, 15, SCC_WR15_ENABLE_ESCC);
+               scc_write_reg(regs,  chan,  7, SCC_WR7P_RX_FIFO);
+               scc_write_reg(regs,  chan,  0, SCC_IE_NEXT_CHAR);
+               scc_write_reg(regs,  chan,  0, SCC_RESET_EXT_IP);
+               scc_write_reg(regs,  chan,  0, SCC_RESET_EXT_IP);
+               scc_write_reg(regs,  chan,  9, SCC_WR9_MASTER_IE|SCC_WR9_NV);
+               scc_read_reg_zero(regs, 0, bits);
+               sr->wr1 = SCC_WR1_RXI_FIRST_CHAR | SCC_WR1_EXT_IE;
+               scc_write_reg(regs,  chan,  1, sr->wr1);
+               scc_write_reg(regs,  chan,  0, SCC_IE_NEXT_CHAR);
+               simple_unlock(&scc_stomp);
+               splx(s);
+               return 0;
        }
        
        sr->flags = tp->t_flags;
@@ -529,7 +533,7 @@ scc_param(struct scc_tty *tp)
        if (tp->t_ispeed == 0) {
                sr->wr5 &= ~SCC_WR5_DTR;
                scc_write_reg(regs,  chan, 5, sr->wr5);
-               simple_unlock(&scc_stomp);                                                      /* (TEST/DEBUG) */
+               simple_unlock(&scc_stomp);
                splx(s);
 
                assert(FUNNEL_IN_USE(&SCC_FUNNEL));
@@ -635,7 +639,7 @@ scc_param(struct scc_tty *tp)
        sr->wr5 |= SCC_WR5_TX_ENABLE;
        scc_write_reg(regs,  chan,  5, sr->wr5);
 
-       simple_unlock(&scc_stomp);                      /* (TEST/DEBUG) */
+       simple_unlock(&scc_stomp);
        splx(s);
 
        assert(FUNNEL_IN_USE(&SCC_FUNNEL));
@@ -671,6 +675,8 @@ serial_keyboard_start(void)
        panic("serial_keyboard_start: we can't get back here\n");
 }
 
+static int ptestxxx = 0;
+
 void
 serial_keyboard_poll(void)
 {
@@ -678,6 +684,7 @@ serial_keyboard_poll(void)
        uint64_t next;
        extern void cons_cinput(char ch);       /* The BSD routine that gets characters */
 
+
        while(1) {                              /* Do this for a while */
                chr = scc_getc(0, 1, 0, 1);     /* Get a character if there is one */
                if(chr < 0) break;              /* The serial buffer is empty */
index 5acc66143b727ebada303e1477c068f40c75e504..d5653260d0aeaed1ac3f864930e430a38045e71c 100644 (file)
@@ -149,34 +149,26 @@ mapSrch64d:
             ; never for the most-common case of finding a scalar mapping.  The full searches
             ; must check _in_ the inner loop, to get the prev ptrs right.
 
-            mr.                r9,r9                                   ; was there a prev ptr?
-            li         r3,0                                    ; assume we are going to return null
-            ld         r4,pmapSkipLists(r6)    ; assume prev ptr null... so next is first
-            beq--      mapSrch64Exit                   ; prev ptr was null, search failed
-            lwz                r0,mpFlags(r9)                  ; get flag bits from prev mapping
-            ld         r10,mpVAddr(r9)                 ; re-fetch base address of prev ptr
-            ld         r4,mpList0(r9)                  ; get 64-bit ptr to next mapping, if any
-            lhz                r11,mpBSize(r9)                 ; get #pages/#segments in block/submap mapping
-            
-            rlwinm     r0,r0,0,mpType                  ; isolate mapping type code
-            cmplwi     cr1,r0,mpBlock                  ; cr1_eq <- block type?
-            cmplwi     r0,mpNest                               ; cr0_eq <- nested type?
-            cror       cr0_eq,cr1_eq,cr0_eq    ; cr0_eq <- block or nested type?
-            cmplwi     cr5,r0,mpLinkage                ; cr5_eq <- linkage type?
-            cror       cr0_eq,cr5_eq,cr0_eq    ; cr0_eq <- block or nested or linkage type?
-            
-            rldicr     r10,r10,0,51                    ; zero low 12 bits of mapping va
-            bne                mapSrch64Exit                   ; prev mapping was just a scalar page, search failed
-            sldi       r0,r11,12                               ; assume block mapping, get size in bytes - 4k
-            beq                cr1,mapSrch64f                  ; we guessed right, it was a block mapping
-            addi       r11,r11,1                               ; mpBSize is 1 too low
-            sldi       r11,r11,28                              ; in a nested pmap, mpBSize is in units of segments
-            subi       r0,r11,4096                             ; get address of last page in submap
-mapSrch64f:
-            add                r10,r10,r0                              ; r10 <- last page in this mapping
-            cmpld      r5,r10                                  ; does this mapping cover our page?
-            bgt                mapSrch64Exit                   ; no, search failed
-            mr         r3,r9                                   ; yes, we found it
+                       mr.             r9,r9                                   ; was there a prev ptr?
+                       li              r3,0                                    ; assume we are going to return null
+                       ld              r4,pmapSkipLists(r6)    ; assume prev ptr null... so next is first
+                       beq--   mapSrch64Exit                   ; prev ptr was null, search failed
+                       lwz             r0,mpFlags(r9)                  ; get flag bits from prev mapping
+                       lhz             r11,mpBSize(r9)                 ; get #pages/#segments in block/submap mapping
+                       
+                       rlwinm  r0,r0,mpBSub+1,31,31    ; 0 if 4K bsu or 1 if 32MB bsu
+                       ld              r10,mpVAddr(r9)                 ; re-fetch base address of prev ptr
+                       ori             r0,r0,0x3216                    ; OR in 0x00003216 (0x3200 and a base rotate of 22)
+                       addi    r11,r11,1                               ; Convert 0-based to 1-based
+                       rlwnm   r0,r0,r0,27,31                  ; Rotate to get 12 or 25
+                       ld              r4,mpList0(r9)                  ; get 64-bit ptr to next mapping, if any
+                       sld             r11,r11,r0                              ; Get the length in bytes
+                       rldicr  r10,r10,0,51                    ; zero low 12 bits of mapping va
+                       subi    r0,r11,4096                             ; get offset last page in mapping
+                       add             r10,r10,r0                              ; r10 <- last page in this mapping
+                       cmpld   r5,r10                                  ; does this mapping cover our page?
+                       bgt             mapSrch64Exit                   ; no, search failed
+                       mr              r3,r9                                   ; yes, we found it
 
             ; found the mapping
             ;   r2 = count of nodes visited
@@ -245,34 +237,26 @@ mapSrch32d:
             ; never for the most-common case of finding a scalar mapping.  The full searches
             ; must check _in_ the inner loop, to get the prev ptrs right.
 
-            mr.                r9,r9                                   ; was there a prev ptr?
-            li         r3,0                                    ; assume we are going to return null
-            lwz                r4,pmapSkipLists+4(r6)  ; assume prev ptr null... so next is first
-            beq-       mapSrch32Exit                   ; prev ptr was null, search failed
-            lwz                r0,mpFlags(r9)                  ; get flag bits from prev mapping
-            lwz                r10,mpVAddr+4(r9)               ; re-fetch base address of prev ptr
-            lwz                r4,mpList0+4(r9)                ; get ptr to next mapping, if any
-
-            rlwinm     r0,r0,0,mpType                  ; isolate mapping type code
-            cmplwi     cr1,r0,mpBlock                  ; cr1_eq <- block type?
-            cmplwi     r0,mpNest                               ; cr0_eq <- nested type?
-            cror       cr0_eq,cr1_eq,cr0_eq    ; cr0_eq <- block or nested type?
-            cmplwi     cr5,r0,mpLinkage                ; cr5_eq <- linkage type?
-            cror       cr0_eq,cr5_eq,cr0_eq    ; cr0_eq <- block or nested or linkage type?
-
-            bne                mapSrch32Exit                   ; prev mapping was just a scalar page, search failed
-            lhz                r11,mpBSize(r9)                 ; get #pages/#segments in block/submap mapping
-            rlwinm     r10,r10,0,0,19                  ; zero low 12 bits of block mapping va
-            slwi       r0,r11,12                               ; assume block mapping, get size in bytes - 4k
-            beq                cr1,mapSrch32f                  ; we guessed right, it was a block mapping
-            addi       r11,r11,1                               ; mpBSize is 1 too low
-            slwi       r11,r11,28                              ; in a nested pmap, mpBSize is in units of segments
-            subi       r0,r11,4096                             ; get address of last page in submap
-mapSrch32f:
-            add                r10,r10,r0                              ; r10 <- last page in this mapping
-            cmplw      r5,r10                                  ; does this mapping cover our page?
-            bgt                mapSrch32Exit                   ; no, search failed
-            mr         r3,r9                                   ; yes, we found it
+                       mr.             r9,r9                                   ; was there a prev ptr?
+                       li              r3,0                                    ; assume we are going to return null
+                       lwz             r4,pmapSkipLists+4(r6)  ; assume prev ptr null... so next is first
+                       beq-    mapSrch32Exit                   ; prev ptr was null, search failed
+                       lwz             r0,mpFlags(r9)                  ; get flag bits from prev mapping
+                       lhz             r11,mpBSize(r9)                 ; get #pages/#segments in block/submap mapping
+                       lwz             r10,mpVAddr+4(r9)               ; re-fetch base address of prev ptr
+                       
+                       rlwinm  r0,r0,mpBSub+1,31,31    ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
+                       addi    r11,r11,1                               ; Convert 0-based to 1-based
+                       ori             r0,r0,0x3216                    ; OR in 0x00003216 (0x3200 and a base rotate of 22)
+                       rlwnm   r0,r0,r0,27,31                  ; Rotate to get 12 or 25
+                       lwz             r4,mpList0+4(r9)                ; get ptr to next mapping, if any
+                       slw             r11,r11,r0                              ; Get length in bytes
+                       rlwinm  r10,r10,0,0,19                  ; zero low 12 bits of block mapping va
+                       subi    r0,r11,4096                             ; get address of last page in submap
+                       add             r10,r10,r0                              ; r10 <- last page in this mapping
+                       cmplw   r5,r10                                  ; does this mapping cover our page?
+                       bgt             mapSrch32Exit                   ; no, search failed
+                       mr              r3,r9                                   ; yes, we found it
 
             ; found the mapping
             ;   r2 = count of nodes visited
@@ -378,35 +362,36 @@ LEXT(mapSearchFull)
             ;  r7 = current skip list number * 8
             ;  r8 = ptr to skip list vector of mapping pointed to by r9
             ;  r9 = prev ptr, ie highest mapping that comes before search target (initially the pmap)
-            ;  r10 = prev mappings va, or 0 if r9==pmap 
+            ;  r10 = lowest expected next va, 0 at the beginning of the search 
             ;  r12 = ptr to the skipListPrev vector in the per-proc
             
             .align     5
 mapSrchFull64a:                                                                ; loop over each mapping
-            ld         r4,mpVAddr(r3)                  ; get va for this mapping (plus flags in low 12 bits)
-            addi       r2,r2,1                                 ; count mappings visited
-            lwz                r0,mpFlags(r3)                  ; get mapping flag bits
-            
-            cmpld      cr0,r10,r4                              ; make sure VAs come in strictly ascending order
+                       addi    r2,r2,1                                 ; count mappings visited
+                       lwz             r0,mpFlags(r3)                  ; get mapping flag bits
+                       lhz             r11,mpBSize(r3)                 ; get #pages/#segments in block/submap mapping
+                       ld              r4,mpVAddr(r3)                  ; get va for this mapping (plus flags in low 12 bits)
+
+                       rlwinm  r0,r0,mpBSub+1,31,31    ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
+                       addi    r11,r11,1                               ; Convert 0-based to 1-based
+                       ori             r0,r0,0x3216                    ; OR in 0x00003216 (0x3200 and a base rotate of 22)
+                       rlwnm   r0,r0,r0,27,31                  ; Rotate to get 12 or 25
+                       sld             r11,r11,r0                              ; Get the length in bytes
             rldicr     r4,r4,0,51                              ; zero low 12 bits of mapping va
-            cmpld      cr1,r5,r4                               ; compare the vas
-            bge--      cr0,mapSkipListPanic    ; die if keys are out of order
+            addic.     r0,r11,-4096                    ; get offset last page in mapping (set cr0_eq if 1 page)
 
-            rlwinm     r0,r0,0,mpType                  ; isolate mapping type code
-            cmplwi     r0,mpNest                               ; cr0_eq <- nested type?
-            cmplwi     cr5,r0,mpLinkage                ; cr5_eq <- linkage type?
-            cror       cr0_eq,cr5_eq,cr0_eq    ; cr0_eq <- nested type or linkage type?
-            cmplwi     cr5,r0,mpBlock                  ; cr5_eq <- block type?
-            cror       cr0_eq,cr5_eq,cr0_eq    ; cr0_eq <- block or nested or linkage type?
+            cmpld      cr5,r10,r4                              ; make sure VAs come in strictly ascending order
+            cmpld      cr1,r5,r4                               ; compare the vas
+            bgt--      cr5,mapSkipListPanic    ; die if keys are out of order
 
             blt                cr1,mapSrchFull64d              ; key is less, try next list
             beq                cr1,mapSrchFull64Found  ; this is the correct mapping
-            beq--      cr0,mapSrchFull64e              ; handle block mapping or nested pmap
+            bne--      cr0,mapSrchFull64e              ; handle mapping larger than one page
 mapSrchFull64b:
             la         r8,mpList0(r3)                  ; point to skip list vector in this mapping
             mr         r9,r3                                   ; current becomes previous
             ldx                r3,r7,r8                                ; get ptr to next mapping in current list
-            mr         r10,r4                                  ; remember prev ptrs VA
+            addi       r10,r4,0x1000                   ; Get the lowest VA we can get next
 mapSrchFull64c:
             mr.                r3,r3                                   ; was there another mapping on current list?
             bne++      mapSrchFull64a                  ; was another, so loop
@@ -427,13 +412,6 @@ mapSrchFull64d:
             ; the end of the block to see if key fits within it.
 
 mapSrchFull64e:            
-            lhz                r11,mpBSize(r3)                 ; get #pages/#segments in block/submap mapping (if nonscalar)
-            sldi       r0,r11,12                               ; assume block mapping, get size in bytes - 4k
-            beq                cr5,mapSrchFull64f              ; we guessed right, it was a block mapping
-            addi       r11,r11,1                               ; mpBSize is 1 too low
-            sldi       r11,r11,28                              ; in a nested pmap, mpBSize is in units of segments
-            subi       r0,r11,4096                             ; get address of last page in submap
-mapSrchFull64f:
             add                r4,r4,r0                                ; r4 <- last page in this mapping
             cmpld      r5,r4                                   ; does this mapping cover our page?
             bgt                mapSrchFull64b                  ; no, try next mapping (r4 is advanced to end of range)
@@ -467,35 +445,36 @@ mapSrchFull64Found:                                                       ; WARNING: can drop down to here
             ;  r7 = current skip list number * 8
             ;  r8 = ptr to skip list vector of mapping pointed to by r9
             ;  r9 = prev ptr, ie highest mapping that comes before search target (initially the pmap)
-            ;  r10 = prev mappings va, or 0 if r9==pmap 
+            ;  r10 = lowest expected next va, 0 at the beginning of the search 
             ;  r12 = ptr to the skipListPrev vector in the per-proc
             
             .align     4
 mapSrchFull32a:                                                                ; loop over each mapping
-            lwz                r4,mpVAddr+4(r3)                ; get va for this mapping (plus flags in low 12 bits)
-            addi       r2,r2,1                                 ; count mappings visited
-            lwz                r0,mpFlags(r3)                  ; get mapping flag bits
-                        
-            cmplw      cr0,r10,r4                              ; make sure VAs come in strictly ascending order
-            rlwinm     r4,r4,0,0,19                    ; zero low 12 bits of mapping va
-            cmplw      cr1,r5,r4                               ; compare the vas
-            bge-       cr0,mapSkipListPanic    ; die if keys are out of order
-
-            rlwinm     r0,r0,0,mpType                  ; isolate mapping type code
-            cmplwi     cr5,r0,mpLinkage                ; cr5_eq <- linkage type?
-            cmplwi     r0,mpNest                               ; cr0_eq <- nested type?
-            cror       cr0_eq,cr5_eq,cr0_eq    ; cr0_eq <- linkage type or nested type?
-            cmplwi     cr5,r0,mpBlock                  ; cr5_eq <- block type?
-            cror       cr0_eq,cr5_eq,cr0_eq    ; cr0_eq <- block or nested or linkage type?
+                       addi    r2,r2,1                                 ; count mappings visited
+                       lwz             r0,mpFlags(r3)                  ; get mapping flag bits
+                       lhz             r11,mpBSize(r3)                 ; get #pages/#segments in block/submap mapping
+                       lwz             r4,mpVAddr+4(r3)                ; get va for this mapping (plus flags in low 12 bits)
+                                               
+                       rlwinm  r0,r0,mpBSub+1,31,31    ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
+                       addi    r11,r11,1                               ; Convert 0-based to 1-based
+                       ori             r0,r0,0x3216                    ; OR in 0x00003216 (0x3200 and a base rotate of 22)
+                       rlwnm   r0,r0,r0,27,31                  ; Rotate to get 12 or 25
+                       slw             r11,r11,r0                              ; Get the length in bytes
+                       rlwinm  r4,r4,0,0,19                    ; zero low 12 bits of mapping va
+            addic.     r0,r11,-4096                    ; get offset last page in mapping (set cr0_eq if 1 page)
 
-            blt                cr1,mapSrchFull32d              ; key is less than this va, try next list
-            beq-       cr1,mapSrchFull32Found  ; this is the correct mapping
-            beq-       cr0,mapSrchFull32e              ; handle block mapping or nested pmap
+                       cmplw   cr0,r10,r4                              ; make sure VAs come in strictly ascending order
+                       cmplw   cr1,r5,r4                               ; compare the vas
+                       bgt-    cr0,mapSkipListPanic    ; die if keys are out of order
+                       
+                       blt             cr1,mapSrchFull32d              ; key is less than this va, try next list
+                       beq             cr1,mapSrchFull32Found  ; this is the correct mapping
+                       bne-    cr0,mapSrchFull32e              ; handle mapping larger than one page
 mapSrchFull32b:
             la         r8,mpList0+4(r3)                ; point to skip list vector in this mapping
             mr         r9,r3                                   ; current becomes previous
             lwzx       r3,r7,r8                                ; get ptr to next mapping in current list
-            mr         r10,r4                                  ; remember prev ptrs VA
+            addi       r10,r4,0x1000                   ; Get the lowest VA we can get next
 mapSrchFull32c:
             mr.                r3,r3                                   ; next becomes current
             bne+       mapSrchFull32a                  ; was another, so loop
@@ -516,13 +495,6 @@ mapSrchFull32d:
             ; the end of the block to see if our key fits within it.
 
 mapSrchFull32e:            
-            lhz                r11,mpBSize(r3)                 ; get #pages/#segments in block/submap mapping (if nonscalar)
-            slwi       r0,r11,12                               ; assume block mapping, get size in bytes - 4k
-            beq                cr5,mapSrchFull32f              ; we guessed right, it was a block mapping
-            addi       r11,r11,1                               ; mpBSize is 1 too low
-            slwi       r11,r11,28                              ; in a nested pmap, mpBSize is in units of segments
-            subi       r0,r11,4096                             ; get address of last page in submap
-mapSrchFull32f:
             add                r4,r4,r0                                ; r4 <- last page in this mapping
             cmplw      r5,r4                                   ; does this mapping cover our page?
             bgt                mapSrchFull32b                  ; no, try next mapping
@@ -1089,25 +1061,17 @@ mapVer64a:
             ; Do some additional checks (so we only do them once per mapping.)
             ; First, if a block mapping or nested pmap, compute block end.
             
-            rlwinm     r29,r29,0,mpType                ; isolate mapping type code
-            cmplwi     r29,mpNest                              ; cr0_eq <- nested type?
-            cmplwi     cr1,r29,mpLinkage               ; cr1_eq <- linkage type?
-            cror       cr0_eq,cr1_eq,cr0_eq    ; cr0_eq <- linkage type or nested type?
-            cmplwi     cr1,r29,mpBlock                 ; cr1_eq <- block type?
-            cror       cr0_eq,cr1_eq,cr0_eq    ; cr0_eq <- block or nested or linkage type?
-            
-            subi       r21,r21,1                               ; count mappings in this pmap
-            bne++      mapVer64b                               ; not nested or pmap
-            lhz                r27,mpBSize(r26)                ; get #pages or #segments
-            sldi       r29,r27,12                              ; assume block mapping, units are (pages-1)
-            beq                cr1,mapVer64b                   ; guessed correctly
-            addi       r27,r27,1                               ; units of nested pmap are (#segs-1)
-            sldi       r29,r27,28                              ; convert to #bytes
-            subi       r29,r29,4096                    ; get offset to last byte in nested pmap
+                       lhz             r27,mpBSize(r26)                ; get #pages or #segments
+                       rlwinm  r29,r29,mpBSub+1,31,31  ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
+                       addi    r27,r27,1                               ; units of nested pmap are (#segs-1)
+                       ori             r29,r29,0x3216                  ; OR in 0x00003216 (0x3200 and a base rotate of 22)
+                       rlwnm   r29,r29,r29,27,31               ; Rotate to get 12 or 25
+                       subi    r21,r21,1                               ; count mappings in this pmap
+                       sld             r29,r27,r29                             ; Get the length in bytes
+                       subi    r29,r29,4096                    ; get offset to last byte in nested pmap
             
             ; Here with r29 = size of block - 4k, or 0 if mapping is a scalar page.
 
-mapVer64b:
             add                r24,r28,r29                             ; r24 <- address of last valid page in this mapping
             la         r28,mpList0(r26)                ; get base of this mappings vector            
             lwz                r27,mpFlags(r26)                ; Get the number of lists
@@ -1213,32 +1177,22 @@ mapVer32a:
             ; Do some additional checks (so we only do them once per mapping.)
             ; First, make sure upper words of the mpList vector are 0.
 
-            subi       r21,r21,1                               ; count mappings in this pmap
+                       lhz             r27,mpBSize(r26)                ; get #blocks
+                       rlwinm  r29,r29,mpBSub+1,31,31  ; Rotate to get 0 if 4K bsu or 1 if 32MB bsu
+                       addi    r27,r27,1                               ; units of nested pmap are (#segs-1)
+                       ori             r29,r29,0x3216                  ; OR in 0x00003216 (0x3200 and a base rotate of 22)
+                       rlwnm   r29,r29,r29,27,31               ; Rotate to get 12 or 25
+                       subi    r21,r21,1                               ; count mappings in this pmap
+                       slw             r29,r27,r29                             ; Get the length in bytes
+                       subi    r29,r29,4096                    ; get offset to last byte in nested pmap
+
             lwz                r24,mpFlags(r26)                ; Get number of lists
             la         r30,mpList0(r26)                ; point to base of skiplist vector
                        andi.   r24,r24,mpLists                 ; Clean the number of lists
                        bl              mapVerUpperWordsAre0    ; make sure upper words are all 0 (uses r24 and r27)
-            
-            ; Then, if a block mapping or nested pmap, compute block end.
-            
-            rlwinm     r29,r29,0,mpType                ; isolate mapping type code
-            cmplwi     cr1,r29,mpLinkage               ; cr1_eq <- linkage type?
-            cmplwi     r29,mpNest                              ; cr0_eq <- nested type?
-            cror       cr0_eq,cr1_eq,cr0_eq    ; cr0_eq <- linkage type or nested type?
-            cmplwi     cr1,r29,mpBlock                 ; cr1_eq <- block type?
-            cror       cr0_eq,cr1_eq,cr0_eq    ; cr0_eq <- block or nested or linkage type?
-            
-            bne+       mapVer32b                               ; not block or nested type
-            lhz                r27,mpBSize(r26)                ; get #pages or #segments
-            slwi       r29,r27,12                              ; assume block mapping, units are pages
-            beq                cr1,mapVer32b                   ; guessed correctly
-            addi       r27,r27,1                               ; units of nested pmap are (#segs-1)
-            slwi       r29,r27,28                              ; convert to #bytes
-            subi       r29,r29,4096                    ; get offset to last byte in nested pmap
-            
+                        
             ; Here with r29 = size of block - 4k, or 0 if mapping is a scalar page.
 
-mapVer32b:
             add                r24,r28,r29                             ; r24 <- address of last valid page in this mapping
             la         r28,mpList0+4(r26)              ; get base of this mappings vector            
             lwz                r27,mpFlags(r26)                ; Get the number of lists
index c5d46ed5c1167aed8fe0c5c250bfc0c83fe606ff..8222e4c39b183a1695ad4a40a1d76f6452ce02f9 100644 (file)
@@ -425,6 +425,13 @@ noVector:
                        bt              bootCPU,run32                                   
 
                        mfsprg  r30,0                                                           ; Phys per proc
+                       lwz             r29,PP_HIBERNATE(r30)
+            andi.      r29, r29, 1
+                       beq             noHashTableInit                                         ; Skip following if not waking from from hibernate
+                       bl              EXT(hw_clear_maps)                                      ; Mark all maps as absent from hash table
+                       bl              EXT(hw_hash_init)                                       ; Clear hash table
+                       bl              EXT(save_snapshot_restore)                      ; Reset save area chains
+noHashTableInit:
                        bl      EXT(hw_setup_trans)                                             ; Set up hardware needed for translation
                        bl      EXT(hw_start_trans)                                             ; Start translating 
 
index 2d5cdb785f1e954ff6a874f7da1282f85828eece..538553085858ef4dc714bc03bedc36f1b5db212b 100644 (file)
@@ -1223,7 +1223,7 @@ swap64:           lwz             r22,vmmXAFlgs(r27)                      ; Get the eXtended Architecture flags
                        lwz             r15,vmmppcpc(r5)                        ; First line of context 
                        lis             r22,hi16(MSR_IMPORT_BITS)       ; Get the MSR bits that are controllable by user
                        lwz             r23,vmmppcmsr(r5)                               
-                       ori             r22,r25,lo16(MSR_IMPORT_BITS)   ; Get the rest of the MSR bits that are controllable by user
+                       ori             r22,r22,lo16(MSR_IMPORT_BITS)   ; Get the rest of the MSR bits that are controllable by user
                        lwz             r17,vmmppcr0(r5)                                
                        lwz             r18,vmmppcr1(r5)                
                        and             r23,r23,r22                                     ; Keep only the controllable bits               
@@ -1259,7 +1259,7 @@ sw64x1:           ld              r15,vmmppcXpc(r5)                       ; First line of context
                        lis             r22,hi16(MSR_IMPORT_BITS)       ; Get the MSR bits that are controllable by user (we will also allow 64-bit here)
                        sldi    r0,r0,63                                        ; Get 64-bit bit
                        ld              r23,vmmppcXmsr(r5)                              
-                       ori             r22,r25,lo16(MSR_IMPORT_BITS)   ; Get the rest of the MSR bits that are controllable by user
+                       ori             r22,r22,lo16(MSR_IMPORT_BITS)   ; Get the rest of the MSR bits that are controllable by user
                        ld              r17,vmmppcXr0(r5)               
                        or              r22,r22,r0                                      ; Add the 64-bit bit            
                        ld              r18,vmmppcXr1(r5)               
index c601cb0da3d6c82c424c9e68295bff9157c083d1..75d1c3f23f12bd5617b388c74f8850cfab70ba01 100644 (file)
@@ -3081,8 +3081,8 @@ FastPmapEnter:
                                    (entry->object.vm_object->shadow_offset)) 
                                        + entry->offset + 
                                        (laddr - entry->vme_start) 
-                                                       - ldelta)>>12,
-                               ldelta + hdelta, prot, 
+                                                       - ldelta) >> 12,
+                               ((ldelta + hdelta) >> 12), prot, 
                                (VM_WIMG_MASK & (int)object->wimg_bits), 0);
                        } else { 
                                /* Set up a block mapped area */
@@ -3091,8 +3091,8 @@ FastPmapEnter:
                                   (((vm_map_offset_t)
                                    (entry->object.vm_object->shadow_offset)) 
                                       + entry->offset + 
-                                      (laddr - entry->vme_start) - ldelta)>>12,
-                                  ldelta + hdelta, prot, 
+                                      (laddr - entry->vme_start) - ldelta) >> 12,
+                                  ((ldelta + hdelta) >> 12), prot, 
                                   (VM_WIMG_MASK & (int)object->wimg_bits), 0);
                        }
                }
index d75ec79ded59f4755dec18d3587e69f0ec7cea6c..0ccb3e1ac03bafa563198fceb007fa08f59fcf81 100644 (file)
@@ -5305,7 +5305,7 @@ vm_paging_map_object(
                        pmap_map_block(kernel_pmap,
                                       page_map_offset,
                                       page->phys_page,
-                                      PAGE_SIZE,
+                                      1,                                               /* Size is number of 4k pages */
                                       VM_PROT_DEFAULT,
                                       ((int) page->object->wimg_bits &
                                        VM_WIMG_MASK),
index 02b5251d65e24a40d24f6d43901b4a5c8af22ad0..8c617634794b234849e19de774e55342c799e0f0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
@@ -32,6 +32,7 @@
 #include <kern/debug.h>
 #include <kern/sched_prim.h>
 
+
 /* extern references */
 void pe_identify_machine(void);
 
@@ -148,7 +149,13 @@ void PE_init_iokit(void)
 
 void PE_init_platform(boolean_t vm_initialized, void *_args)
 {
-        boot_args *args = (boot_args *)_args;
+       DTEntry dsouth, dnorth, root, dcpu;
+       char *model;
+       int msize, size;
+       uint32_t *south, *north, *pdata, *ddata;
+       int i;
+       
+       boot_args *args = (boot_args *)_args;
 
        if (PE_state.initialized == FALSE)
        {
@@ -179,6 +186,7 @@ void PE_init_platform(boolean_t vm_initialized, void *_args)
        else
        {
            pe_init_debug();
+       
        }
 }