]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-1228.7.58.tar.gz mac-os-x-1055 v1228.7.58
authorApple <opensource@apple.com>
Mon, 15 Sep 2008 21:01:57 +0000 (21:01 +0000)
committerApple <opensource@apple.com>
Mon, 15 Sep 2008 21:01:57 +0000 (21:01 +0000)
47 files changed:
bsd/dev/dtrace/dtrace.c
bsd/hfs/hfs_btreeio.c
bsd/hfs/hfs_catalog.c
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_cnode.h
bsd/hfs/hfs_endian.c
bsd/hfs/hfs_endian.h
bsd/hfs/hfs_link.c
bsd/hfs/hfs_lookup.c
bsd/hfs/hfs_readwrite.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vfsutils.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfscommon/BTree/BTree.c
bsd/hfs/hfscommon/BTree/BTreeAllocate.c
bsd/hfs/hfscommon/BTree/BTreeMiscOps.c
bsd/hfs/hfscommon/BTree/BTreeNodeOps.c
bsd/hfs/hfscommon/BTree/BTreeScanner.c
bsd/hfs/hfscommon/BTree/BTreeTreeOps.c
bsd/hfs/hfscommon/headers/BTreesInternal.h
bsd/hfs/hfscommon/headers/BTreesPrivate.h
bsd/kern/kern_sysctl.c
bsd/kern/tty_ptmx.c
bsd/kern/uipc_mbuf.c
bsd/netinet/in_gif.c
bsd/netinet/ip_divert.c
bsd/netinet6/esp_input.c
bsd/netinet6/in6_gif.c
bsd/sys/errno.h
bsd/vfs/vfs_cache.c
bsd/vfs/vfs_cluster.c
bsd/vfs/vfs_journal.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_vnops.c
config/MasterVersion
osfmk/i386/AT386/model_dep.c
osfmk/i386/i386_init.c
osfmk/i386/locore.s
osfmk/i386/mp.c
osfmk/i386/mp.h
osfmk/i386/mp_events.h
osfmk/i386/pmap.c
osfmk/i386/trap.c
osfmk/i386/trap.h
osfmk/vm/bsd_vm.c
osfmk/vm/vm_object.c
osfmk/vm/vm_resident.c

index aa200c11f8a52c3e4a2942aeb2a6a6b6094b1c81..c28ae6f0b7cc32f092e83ef615ae6df2c608890e 100644 (file)
@@ -1071,6 +1071,21 @@ bad:
        return (0);
 }
 
+#if defined(__APPLE__)
+/* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */
+static int
+dtrace_priv_proc_relaxed(dtrace_state_t *state)
+{
+
+       if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
+               return (1);
+
+       cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
+
+       return (0);
+}
+#endif /* __APPLE__ */
+
 static int
 dtrace_priv_kernel(dtrace_state_t *state)
 {
@@ -2709,7 +2724,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
 
 #else
        case DIF_VAR_PID:
-               if (!dtrace_priv_proc(state))
+               if (!dtrace_priv_proc_relaxed(state))
                        return (0);
 
                /*
@@ -2738,7 +2753,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
                return ((uint64_t)curthread->t_procp->p_ppid);
 #else
        case DIF_VAR_PPID:
-               if (!dtrace_priv_proc(state))
+               if (!dtrace_priv_proc_relaxed(state))
                        return (0);
 
                /*
@@ -2800,7 +2815,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
                    mstate->dtms_scratch_base + mstate->dtms_scratch_size)
                        return 0;
                        
-               if (!dtrace_priv_proc(state))
+               if (!dtrace_priv_proc_relaxed(state))
                        return (0);
 
                mstate->dtms_scratch_ptr += scratch_size;
index 0c81c879f594ccb162b71f1b1383646300762895..0b9a39160feeab8651622d8324dce1e8ed638c05 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -76,6 +76,16 @@ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions opt
 {
     OSStatus    retval = E_NONE;
     struct buf   *bp = NULL;
+       u_int8_t     allow_empty_node;    
+
+       /* If the btree block is being read using hint, it is 
+        * fine for the swap code to find zeroed out nodes. 
+        */
+       if (options & kGetBlockHint) {
+                       allow_empty_node = true;
+       } else {
+                       allow_empty_node = false;
+       }
 
     if (options & kGetEmptyBlock) {
         daddr64_t blkno;
@@ -115,21 +125,21 @@ OSStatus GetBTreeBlock(FileReference vp, u_int32_t blockNum, GetBlockOptions opt
                  * size once the B-tree control block is set up with the node size
                  * from the header record.
                  */
-                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly);
+                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeHeaderRecordOnly, allow_empty_node);
 
                        } else if (block->blockReadFromDisk) {
                /*
                 * The node was just read from disk, so always swap/check it.
                 * This is necessary on big endian since the test below won't trigger.
                 */
-                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost);
+                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
             } else if (*((u_int16_t *)((char *)block->buffer + (block->blockSize - sizeof (u_int16_t)))) == 0x0e00) {
                                /*
                                 * The node was left in the cache in non-native order, so swap it.
                                 * This only happens on little endian, after the node is written
                                 * back to disk.
                                 */
-                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost);
+                retval = hfs_swap_BTNode (block, vp, kSwapBTNodeBigToHost, allow_empty_node);
             }
             
                /*
@@ -191,8 +201,11 @@ btree_swap_node(struct buf *bp, __unused void *arg)
     block.blockReadFromDisk = (buf_fromcache(bp) == 0);
     block.blockSize = buf_count(bp);
 
-    // swap the data now that this node is ready to go to disk
-    retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
+    /* Swap the data now that this node is ready to go to disk.
+     * We allow swapping of zeroed out nodes here because we might
+     * be writing node whose last record just got deleted.
+     */
+    retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, true);
     if (retval)
        panic("btree_swap_node: about to write corrupt node!\n");
 }
index 59d20b4efe1e92ea40b23d352900c266ad489edd..04db5d2903b7b5ad836d0c00e270c4da423d79e7 100644 (file)
@@ -286,11 +286,11 @@ cat_releasedesc(struct cat_desc *descp)
 
 /*
  * These Catalog functions allow access to the HFS Catalog (database).
- * The catalog b-tree lock must be aquired before calling any of these routines.
+ * The catalog b-tree lock must be acquired before calling any of these routines.
  */
 
 /*
- * cat_lookup - lookup a catalog node using a cnode decriptor
+ * cat_lookup - lookup a catalog node using a cnode descriptor
  *
  * Note: The caller is responsible for releasing the output
  * catalog descriptor (when supplied outdescp is non-null).
index 17a0cba633470ccff9c56cc72a080091f1fd7f07..7ff95e5933ba165a5329dd2b5591cbcb22453d27 100644 (file)
@@ -127,8 +127,10 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
         */
        if (v_type == VDIR) {
                hfs_reldirhints(cp, 0);
-               if (cp->c_flag & C_HARDLINK)
-                       hfs_relorigins(cp);
+       }               
+       
+       if (cp->c_flag & C_HARDLINK) {
+               hfs_relorigins(cp);
        }
 
        if (cp->c_datafork)
@@ -472,6 +474,10 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
                if (vnode_isdir(vp)) {
                        hfs_reldirhints(cp, 0);
                }
+               
+               if (cp->c_flag & C_HARDLINK) {
+                       hfs_relorigins(cp);
+               }
        }
        /* Release the file fork and related data */
        if (fp) {
index 22c5c078482de696431f83fde900d040512b838a..7a5b13601a0a1c3ffe91a2405329f40b6f5cd3f7 100644 (file)
@@ -91,7 +91,7 @@ struct linkorigin {
 typedef struct linkorigin linkorigin_t;
 
 #define MAX_CACHED_ORIGINS  10
-
+#define MAX_CACHED_FILE_ORIGINS 8
 
 /*
  * The cnode is used to represent each active (or recently active)
index 41251934fce9c1e556e4f4ecd250f7c11cee2577..e5775bfbc638d550b7e203df2e3510a9c9de2fc7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -83,7 +83,8 @@ int
 hfs_swap_BTNode (
     BlockDescriptor *src,
     vnode_t vp,
-    enum HFSBTSwapDirection direction
+    enum HFSBTSwapDirection direction,
+    u_int8_t allow_empty_node
 )
 {
     BTNodeDescriptor *srcDesc = src->buffer;
@@ -177,9 +178,13 @@ hfs_swap_BTNode (
              * Sanity check: must be even, and within the node itself.
              *
              * We may be called to swap an unused node, which contains all zeroes.
-             * This is why we allow the record offset to be zero.
+                        * Unused nodes are expected only when allow_empty_node is true.
+                        * If it is false and record offset is zero, return error.
              */
-            if ((srcOffs[i] & 1) || (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || (srcOffs[i] >= src->blockSize)) {
+            if ((srcOffs[i] & 1) || (
+                           (allow_empty_node == false) && (srcOffs[i] == 0)) ||
+                               (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || 
+                               (srcOffs[i] >= src->blockSize)) { 
                printf("hfs_swap_BTNode: record #%d invalid offset (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
                error = fsBTInvalidHeaderErr;
                goto fail;
@@ -306,9 +311,15 @@ hfs_swap_BTNode (
              * Sanity check: must be even, and within the node itself.
              *
              * We may be called to swap an unused node, which contains all zeroes.
+                * This can happen when the last record from a node gets deleted.
              * This is why we allow the record offset to be zero.
+                * Unused nodes are expected only when allow_empty_node is true 
+                * (the caller should set it to true for kSwapBTNodeBigToHost). 
              */
-            if ((srcOffs[i] & 1) || (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || (srcOffs[i] >= src->blockSize)) {
+            if ((srcOffs[i] & 1) || 
+                           ((allow_empty_node == false) && (srcOffs[i] == 0)) ||
+                               (srcOffs[i] < sizeof(BTNodeDescriptor) && srcOffs[i] != 0) || 
+                               (srcOffs[i] >= src->blockSize)) {
                panic("hfs_UNswap_BTNode: record #%d invalid offset (0x%04X)\n", srcDesc->numRecords-i-1, srcOffs[i]);
                error = fsBTInvalidHeaderErr;
                goto fail;
index 519c40104df3b0213d4d21d925da1643c3ed765c..c1c46f7aaea725dad78eb12d44a2847b9959b1ab 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000, 2002-2003, 2005-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -93,7 +93,8 @@ enum HFSBTSwapDirection {
        kSwapBTNodeHeaderRecordOnly     =       3
 };
 
-int  hfs_swap_BTNode (BlockDescriptor *src, vnode_t vp, enum HFSBTSwapDirection direction);
+int  hfs_swap_BTNode (BlockDescriptor *src, vnode_t vp, enum HFSBTSwapDirection direction, 
+       u_int8_t allow_empty_node);
 
 #ifdef __cplusplus
 }
index a2e08a098713c662ed7d0ecc9e928166f3d567d7..f6c5e8409a82dd48ffe799fdbbf3a03f217cde2e 100644 (file)
@@ -677,12 +677,10 @@ hfs_unlink(struct hfsmount *hfsmp, struct vnode *dvp, struct vnode *vp, struct c
                goto out;
        }
 
-       /* Purge any cached origin entries for a directory hard link. */
-       if (cndesc.cd_flags & CD_ISDIR) {
-               hfs_relorigin(cp, dcp->c_fileid);
-               if (dcp->c_fileid != dcp->c_cnid) {
-                       hfs_relorigin(cp, dcp->c_cnid);
-               }
+       /* Purge any cached origin entries for a directory or file hard link. */
+       hfs_relorigin(cp, dcp->c_fileid);
+       if (dcp->c_fileid != dcp->c_cnid) {
+               hfs_relorigin(cp, dcp->c_cnid);
        }
 
        /* Delete the link record. */
@@ -996,7 +994,7 @@ hfs_lookuplink(struct hfsmount *hfsmp, cnid_t linkfileid, cnid_t *prevlinkid,  c
 }
 
 /*
- * Cache the orgin of a directory hard link
+ * Cache the origin of a directory or file hard link
  *
  * cnode must be lock on entry
  */
@@ -1007,6 +1005,7 @@ hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid)
        linkorigin_t *origin = NULL;
        void * thread = current_thread();
        int count = 0;
+       int maxorigins = (S_ISDIR(cp->c_mode)) ? MAX_CACHED_ORIGINS : MAX_CACHED_FILE_ORIGINS;
 
        /*
         *  Look for an existing origin first.  If not found, create/steal one.
@@ -1020,7 +1019,7 @@ hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid)
        }
        if (origin == NULL) {
                /* Recycle the last (i.e., the oldest) if we have too many. */
-               if (count > MAX_CACHED_ORIGINS) {
+               if (count > maxorigins) {
                        origin = TAILQ_LAST(&cp->c_originlist, hfs_originhead);
                        TAILQ_REMOVE(&cp->c_originlist, origin, lo_link);
                } else {
@@ -1034,7 +1033,7 @@ hfs_savelinkorigin(cnode_t *cp, cnid_t parentcnid)
 }
 
 /*
- * Release any cached origins for a directory hard link
+ * Release any cached origins for a directory or file hard link
  *
  * cnode must be lock on entry
  */
@@ -1051,7 +1050,7 @@ hfs_relorigins(struct cnode *cp)
 }
 
 /*
- * Release a specific origin for a directory hard link
+ * Release a specific origin for a directory or file hard link
  *
  * cnode must be lock on entry
  */
@@ -1073,7 +1072,7 @@ hfs_relorigin(struct cnode *cp, cnid_t parentcnid)
 }
 
 /*
- * Test if a directory hard link has a cached origin
+ * Test if a directory or file hard link has a cached origin
  *
  * cnode must be lock on entry
  */
@@ -1095,7 +1094,7 @@ hfs_haslinkorigin(cnode_t *cp)
 }
 
 /*
- * Obtain the current parent cnid of a directory hard link
+ * Obtain the current parent cnid of a directory or file hard link
  *
  * cnode must be lock on entry
  */
@@ -1117,7 +1116,7 @@ hfs_currentparent(cnode_t *cp)
 }
 
 /*
- * Obtain the current cnid of a directory hard link
+ * Obtain the current cnid of a directory or file hard link
  *
  * cnode must be lock on entry
  */
index b59038b3b0b6dab0b17ad74359f62735cd2637a6..6009fb787c705d88504dde14a346eae197debbc9 100644 (file)
@@ -360,9 +360,15 @@ found:
                        }
                        goto exit;
                }
-
-               /* Save the origin info of a directory link for future ".." requests. */
-               if (S_ISDIR(attr.ca_mode) && (attr.ca_recflags & kHFSHasLinkChainMask)) {
+               
+               /* 
+                * Save the origin info for file and directory hardlinks.  Directory hardlinks 
+                * need the origin for '..' lookups, and file hardlinks need it to ensure that 
+                * competing lookups do not cause us to vend different hardlinks than the ones requested.
+                * We want to restrict saving the cache entries to LOOKUP namei operations, since
+                * we're really doing this to protect getattr.
+                */
+               if ((cnp->cn_nameiop == LOOKUP) && (VTOC(tvp)->c_flag & C_HARDLINK)) {
                        hfs_savelinkorigin(VTOC(tvp), VTOC(dvp)->c_fileid);
                }
                *cnode_locked = 1;
@@ -479,6 +485,14 @@ hfs_vnop_lookup(struct vnop_lookup_args *ap)
                                replace_desc(cp, &desc);
                        hfs_systemfile_unlock(VTOHFS(dvp), lockflags);
                }
+
+               /* Save the lookup result in the origin list for future lookups, but
+                * only if it was through a LOOKUP nameiop
+                */
+               if (cnp->cn_nameiop == LOOKUP) {
+                       hfs_savelinkorigin(cp, dcp->c_fileid);
+               }          
+
                hfs_unlock(cp);
        }
 #if NAMEDRSRCFORK
index a667aa364fa2ee84ba7b452e61799700736b0b74..958ca6e3a09a081c0e80d9082cd95abaa9bd56c1 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -80,6 +80,8 @@ enum {
 
 /* from bsd/vfs/vfs_cluster.c */
 extern int is_file_clean(vnode_t vp, off_t filesize);
+/* from bsd/hfs/hfs_vfsops.c */
+extern int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 
 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
 static int  hfs_clonefile(struct vnode *, int, int, int);
@@ -1328,7 +1330,11 @@ hfs_vnop_ioctl( struct vnop_ioctl_args /* {
                bufptr = (char *)ap->a_data;
                cnid = strtoul(bufptr, NULL, 10);
 
-               if ((error = hfs_vget(hfsmp, cnid, &file_vp, 1))) {
+               /* We need to call hfs_vfs_vget to leverage the code that will fix the
+                * origin list for us if needed, as opposed to calling hfs_vget, since
+                * we will need it for the subsequent build_path call.  
+                */
+               if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
                        return (error);
                }
                error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
@@ -3029,7 +3035,7 @@ hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
                        block.blockSize = buf_count(bp);
     
                        /* Endian un-swap B-Tree node */
-                       retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
+                       retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
                        if (retval)
                                panic("hfs_vnop_bwrite: about to write corrupt node!\n");
                }
index bf7635ebaba8ce1b01958f17e6c5e65c16da33eb..7b67b6686ddbac8b27fc33c0b55cb3509814208d 100644 (file)
@@ -118,6 +118,9 @@ lck_grp_t *  hfs_mutex_group;
 lck_grp_t *  hfs_rwlock_group;
 
 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
+/* not static so we can re-use in hfs_readwrite.c for build_path */
+int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
+
 
 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
@@ -136,7 +139,6 @@ static int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
 static int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp, 
                       user_addr_t newp, size_t newlen, vfs_context_t context);
 static int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
-static int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
 
 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_long startblk, u_long reclaimblks, vfs_context_t context);
@@ -372,13 +374,18 @@ hfs_changefs_callback(struct vnode *vp, void *cargs)
        struct cat_desc cndesc;
        struct cat_attr cnattr;
        struct hfs_changefs_cargs *args;
+       int lockflags;
+       int error;
 
        args = (struct hfs_changefs_cargs *)cargs;
 
        cp = VTOC(vp);
        vcb = HFSTOVCB(args->hfsmp);
 
-       if (cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL)) {
+       lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+       error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
+       hfs_systemfile_unlock(args->hfsmp, lockflags);
+       if (error) {
                /*
                 * If we couldn't find this guy skip to the next one
                 */
@@ -526,8 +533,9 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
         *
         * hfs_changefs_callback will be called for each vnode
         * hung off of this mount point
-        * the vnode will be
-        * properly referenced and unreferenced around the callback
+        *
+        * The vnode will be properly referenced and unreferenced 
+        * around the callback
         */
        cargs.hfsmp = hfsmp;
        cargs.namefix = namefix;
@@ -561,6 +569,7 @@ hfs_reload_callback(struct vnode *vp, void *cargs)
 {
        struct cnode *cp;
        struct hfs_reload_cargs *args;
+       int lockflags;
 
        args = (struct hfs_reload_cargs *)cargs;
        /*
@@ -585,8 +594,12 @@ hfs_reload_callback(struct vnode *vp, void *cargs)
                datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
 
                /* lookup by fileID since name could have changed */
-               if ((args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork)))
+               lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+               args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
+               hfs_systemfile_unlock(args->hfsmp, lockflags);
+               if (args->error) {
                        return (VNODE_RETURNED_DONE);
+               }
 
                /* update cnode's catalog descriptor */
                (void) replace_desc(cp, &desc);
@@ -2276,33 +2289,48 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
        return (ENOTSUP);
 }
 
-
-static int
+/* hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support the
+ * build_path ioctl.  We use it to leverage the code below that updates the origin
+ * cache if necessary.
+ */
+int
 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
 {
        int error;
+       int lockflags;
+       struct hfsmount *hfsmp;
 
-       error = hfs_vget(VFSTOHFS(mp), (cnid_t)ino, vpp, 1);
+       hfsmp = VFSTOHFS(mp);
+
+       error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1);
        if (error)
                return (error);
 
        /*
         * ADLs may need to have their origin state updated
-        * since build_path needs a valid parent.
+        * since build_path needs a valid parent. The same is true
+        * for hardlinked files as well. There isn't a race window here in re-acquiring
+        * the cnode lock since we aren't pulling any data out of the cnode; instead, we're
+        * going back to the catalog.
         */
-       if (vnode_isdir(*vpp) &&
-           (VTOC(*vpp)->c_flag & C_HARDLINK) &&
+       if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
            (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
                cnode_t *cp = VTOC(*vpp);
                struct cat_desc cdesc;
                
-               if (!hfs_haslinkorigin(cp) &&
-                   (cat_findname(VFSTOHFS(mp), (cnid_t)ino, &cdesc) == 0)) {
-                       if (cdesc.cd_parentcnid !=
-                           VFSTOHFS(mp)->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
-                               hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
+               if (!hfs_haslinkorigin(cp)) {
+                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                   error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+                       if (error == 0) {
+                               if ((cdesc.cd_parentcnid !=
+                               hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) && 
+                                       (cdesc.cd_parentcnid != 
+                                       hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
+                                       hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
+                               }
+                               cat_releasedesc(&cdesc);
                        }
-                       cat_releasedesc(&cdesc);
                }
                hfs_unlock(cp);
        }
@@ -2413,6 +2441,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
                cnid_t nextlinkid;
                cnid_t prevlinkid;
                struct cat_desc linkdesc;
+               int lockflags;
 
                cnattr.ca_linkref = linkref;
 
@@ -2422,7 +2451,10 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
                 */
                if ((hfs_lookuplink(hfsmp, linkref, &prevlinkid,  &nextlinkid) == 0) &&
                    (nextlinkid != 0)) {
-                       if (cat_findname(hfsmp, nextlinkid, &linkdesc) == 0) {
+                       lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                       error = cat_findname(hfsmp, nextlinkid, &linkdesc);
+                       hfs_systemfile_unlock(hfsmp, lockflags);
+                       if (error == 0) {
                                cat_releasedesc(&cndesc);
                                bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
                        }
@@ -2452,7 +2484,7 @@ hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock)
        
                error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr, &cnfork, &vp);
 
-               if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK) && vnode_isdir(vp)) {
+               if ((error == 0) && (VTOC(vp)->c_flag & C_HARDLINK)) {
                        hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
                }
                FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
index 0433aec327d9680d229052864cf5d203b3eb8fba..736ab6199549cfdfde83c23ff271908a9561b9c5 100644 (file)
@@ -243,13 +243,6 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
        /* mark the volume dirty (clear clean unmount bit) */
        vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
 
-       /*
-        * all done with system files so we can unlock now...
-        */
-       hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
-       hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
-       hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
-
     if (error == noErr)
       {
                error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, NULL, NULL, NULL);
@@ -262,6 +255,14 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
             MarkVCBDirty( vcb );                                                               //      mark VCB dirty so it will be written
           }
       }
+
+       /*
+        * all done with system files so we can unlock now...
+        */
+       hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
+       hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
+       hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
+
     goto       CmdDone;
 
     //--       Release any resources allocated so far before exiting with an error:
index 45eda09382ff942e4dfa2885307fd15734a1d093..eef6b5e9660486b10484d49bd60c81c25a9bcf21 100644 (file)
@@ -384,7 +384,7 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
 
        struct vnode *vp = ap->a_vp;
        struct vnode_attr *vap = ap->a_vap;
-       struct vnode *rvp = NULL;
+       struct vnode *rvp = NULLVP;
        struct hfsmount *hfsmp;
        struct cnode *cp;
        uint64_t data_size;
@@ -516,11 +516,11 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                        }
 
                        if (cp->c_blocks - VTOF(vp)->ff_blocks) {
+                               /* We deal with resource fork vnode iocount at the end of the function */
                                error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE);
                                if (error) {
                                        goto out;
                                }
-               
                                rcp = VTOC(rvp);
                                if (rcp && rcp->c_rsrcfork) {
                                        total_size += rcp->c_rsrcfork->ff_size;
@@ -592,8 +592,15 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
         * which are hardlink-ignorant, will ask for va_linkid.
         */
        vap->va_fileid = (u_int64_t)cp->c_fileid;
-       /* Hardlinked directories have multiple cnids and parents (one per link). */
-       if ((v_type == VDIR) && (cp->c_flag & C_HARDLINK)) {
+       /* 
+        * We need to use the origin cache for both hardlinked files 
+        * and directories. Hardlinked directories have multiple cnids 
+        * and parents (one per link). Hardlinked files also have their 
+        * own parents and link IDs separate from the indirect inode number. 
+        * If we don't use the cache, we could end up vending the wrong ID 
+        * because the cnode will only reflect the link that was looked up most recently.
+        */
+       if (cp->c_flag & C_HARDLINK) {
                vap->va_linkid = (u_int64_t)hfs_currentcnid(cp);
                vap->va_parentid = (u_int64_t)hfs_currentparent(cp);
        } else {
@@ -617,70 +624,79 @@ hfs_vnop_getattr(struct vnop_getattr_args *ap)
                             VNODE_ATTR_va_encoding | VNODE_ATTR_va_rdev |
                             VNODE_ATTR_va_data_size;
 
-       /* if this is the root, let VFS to find out the mount name, which may be different from the real name */
+       /* If this is the root, let VFS to find out the mount name, which may be different from the real name.
+        * Otherwise, we need to just take care for hardlinked files, which need to be looked up, if necessary
+        */
        if (VATTR_IS_ACTIVE(vap, va_name) && (cp->c_cnid != kHFSRootFolderID)) {
-               /* Return the name for ATTR_CMN_NAME */
-               if (cp->c_desc.cd_namelen == 0) {
-                   if ((cp->c_flag & C_HARDLINK) && ((cp->c_flag & C_DELETED) == 0 || (cp->c_linkcount > 1))) {
-                       cnid_t nextlinkid;
-                       cnid_t prevlinkid;
-                       struct vnode *file_vp;
-                       
-                       if ((error = hfs_lookuplink(hfsmp, cp->c_fileid, &prevlinkid, &nextlinkid))) {
-                           goto out;
-                       }
-
-                       //
-                       // don't bother trying to get a linkid that's the same
-                       // as the current cnid
-                       //
-                       if (nextlinkid == VTOC(vp)->c_cnid) {
-                           if (prevlinkid == VTOC(vp)->c_cnid) {
-                               hfs_unlock(cp);
-                               goto out2;
-                           } else {
-                               nextlinkid = prevlinkid;
-                           }
-                       }
-                           
-                       hfs_unlock(cp);
-
-                       if (nextlinkid == 0 || (error = hfs_vget(hfsmp, nextlinkid, &file_vp, 1))) {
-                           if (prevlinkid == 0 || (error = hfs_vget(hfsmp, prevlinkid, &file_vp, 1))) {
-                               goto out2;
-                           }
+               struct cat_desc linkdesc;
+               int lockflags;
+               int uselinkdesc = 0;
+               cnid_t nextlinkid = 0;
+               cnid_t prevlinkid = 0;  
+
+               /* Get the name for ATTR_CMN_NAME.  We need to take special care for hardlinks      
+                * here because the info. for the link ID requested by getattrlist may be
+                * different than what's currently in the cnode.  This is because the cnode     
+                * will be filled in with the information for the most recent link ID that went
+                * through namei/lookup().  If there are competing lookups for hardlinks that point 
+                * to the same inode, one (or more) getattrlists could be vended incorrect name information.
+                * Also, we need to beware of open-unlinked files which could have a namelen of 0.  Note
+                * that if another hardlink sibling of this file is being unlinked, that could also thrash
+                * the name fields but it should *not* be treated like an open-unlinked file here.
+                */
+               if ((cp->c_flag & C_HARDLINK) &&
+                               ((cp->c_desc.cd_namelen == 0) || (vap->va_linkid != cp->c_cnid))) {
+                       /* If we have no name and our linkID is the raw inode number, then we may
+                        * have an open-unlinked file.  Go to the next link in this case. 
+                        */
+                       if ((cp->c_desc.cd_namelen == 0) && (vap->va_linkid == cp->c_fileid)) {
+                               if ((error = hfs_lookuplink(hfsmp, vap->va_linkid, &prevlinkid, &nextlinkid))) {
+                                       goto out;
+                               }
                        }
-                               
-                       cp = VTOC(file_vp);
-                       if (hfs_lock(cp, HFS_SHARED_LOCK) == 0) {
-                           if (cp->c_desc.cd_namelen) {
-                               strlcpy(vap->va_name, (const char *)cp->c_desc.cd_nameptr, MAXPATHLEN);
-                           }
-                           hfs_unlock(cp);
-                           vnode_put(file_vp);
-                           goto out2;
+                       else {
+                               nextlinkid = vap->va_linkid;
                        }
-                       
-                       if (vnode_name(file_vp)) {
-                           strlcpy(vap->va_name, vnode_name(file_vp), MAXPATHLEN);
-                       } else {
-                           error = ENOENT;
+                       /* Now probe the catalog for the linkID.  Note that we don't know if we have
+                        * the exclusive lock here for the cnode, so we can't just update the descriptor.  
+                        * Instead, we should just store the descriptor's value locally and then use it to pass
+                        * out the name value as needed below.
+                        */
+                       if (nextlinkid) {
+                               lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
+                               error = cat_findname(hfsmp, nextlinkid, &linkdesc);     
+                               hfs_systemfile_unlock(hfsmp, lockflags);
+                               if (error == 0) {
+                                       uselinkdesc = 1;
+                               }
                        }
-                       vnode_put(file_vp);
-                       goto out2;
-                   } else {
-                       error = ENOENT;
-                       goto out;
-                   }
-               } else {
-                   strlcpy(vap->va_name, (const char *)cp->c_desc.cd_nameptr, MAXPATHLEN);
-                   VATTR_SET_SUPPORTED(vap, va_name);
+               }
+               
+               /* By this point, we either patched the name above, and the c_desc points 
+                * to correct data, or it already did, in which case we just proceed by copying
+                * the name into the VAP.  Note that we will never set va_name to supported if
+                * nextlinkid is never initialized.  This could happen in the degenerate case above
+                * involving the raw inode number, where it has no nextlinkid.  In this case, we will
+                * simply not export the name as supported.
+                */
+               if (uselinkdesc) {
+                       strlcpy(vap->va_name, (const char *)linkdesc.cd_nameptr, MAXPATHLEN);
+                       VATTR_SET_SUPPORTED(vap, va_name);
+                       cat_releasedesc(&linkdesc);     
+               }
+               else if (cp->c_desc.cd_namelen) {
+                       strlcpy(vap->va_name, (const char *)cp->c_desc.cd_nameptr, MAXPATHLEN);
+                       VATTR_SET_SUPPORTED(vap, va_name);
                }
        }
 
 out:
        hfs_unlock(cp);
-out2:
+       /* 
+        * We need to drop the iocount on the rsrc fork vnode only *after* we've 
+        * released the cnode lock, since vnode_put can trigger an inactive call, which
+        * will go back into the HFS and try to acquire a cnode lock.    
+        */
        if (rvp) {
                vnode_put(rvp);
        }
index 2ee3159e32b2d206f9fb16d9d11e94d30a404ea3..86307fbcb94147f5d84f63c0ab7274c4849da82f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -334,7 +334,7 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc)
                ++btreePtr->numReleaseNodes;
                M_ExitOnError (err);
 
-               err = GetNode (btreePtr, kHeaderNodeNum, &nodeRec );
+               err = GetNode (btreePtr, kHeaderNodeNum, 0, &nodeRec );
                M_ExitOnError (err);
        }
 
@@ -504,7 +504,7 @@ OSStatus    BTSearchRecord          (FCB                                            *filePtr,
        {
                nodeNum = searchIterator->hint.nodeNum;
                
-               err = GetNode (btreePtr, nodeNum, &node);
+               err = GetNode (btreePtr, nodeNum, kGetNodeHint, &node);
                if( err == noErr )
                {
                        if ( ((BTNodeDescriptor*) node.buffer)->kind == kBTLeafNode &&
@@ -711,7 +711,7 @@ OSStatus    BTIterateRecord         (FCB                                            *filePtr,
                        goto ErrorExit;
                }
 
-               err = GetNode (btreePtr, nodeNum, &node);
+               err = GetNode (btreePtr, nodeNum, 0, &node);
                M_ExitOnError (err);
 
                if ( ((NodeDescPtr) node.buffer)->kind != kBTLeafNode ||
@@ -763,7 +763,7 @@ OSStatus    BTIterateRecord         (FCB                                            *filePtr,
                                        M_ExitOnError(err);
 
                                        // Look up the left node 
-                                       err = GetNode (btreePtr, nodeNum, &left);
+                                       err = GetNode (btreePtr, nodeNum, 0, &left);
                                        M_ExitOnError (err);
 
                                        // Look up the current node again
@@ -811,7 +811,7 @@ OSStatus    BTIterateRecord         (FCB                                            *filePtr,
                                nodeNum = ((NodeDescPtr) node.buffer)->fLink;
                                if ( nodeNum > 0)
                                {
-                                       err = GetNode (btreePtr, nodeNum, &right);
+                                       err = GetNode (btreePtr, nodeNum, 0, &right);
                                        M_ExitOnError (err);
                                } else {
                                        err = fsBTEndOfIterationErr;
@@ -1019,7 +1019,7 @@ BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator
                        goto ErrorExit;
                }
 
-               err = GetNode(btreePtr, nodeNum, &node);
+               err = GetNode(btreePtr, nodeNum, 0, &node);
                M_ExitOnError(err);
 
                if ( ((NodeDescPtr)node.buffer)->kind != kBTLeafNode ||
@@ -1074,7 +1074,7 @@ BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator
                                        M_ExitOnError(err);
 
                                        // Look up the left node 
-                                       err = GetNode (btreePtr, nodeNum, &left);
+                                       err = GetNode (btreePtr, nodeNum, 0, &left);
                                        M_ExitOnError (err);
 
                                        // Look up the current node again
@@ -1122,7 +1122,7 @@ BTIterateRecords(FCB *filePtr, BTreeIterationOperation operation, BTreeIterator
                                nodeNum = ((NodeDescPtr)node.buffer)->fLink;
                                if ( nodeNum > 0)
                                {
-                                       err = GetNode(btreePtr, nodeNum, &right);
+                                       err = GetNode(btreePtr, nodeNum, 0, &right);
                                        M_ExitOnError(err);
                                } else {
                                        err = fsBTEndOfIterationErr;
@@ -1172,7 +1172,7 @@ ProcessData:
                                nodeNum = ((NodeDescPtr)node.buffer)->fLink;
                                if ( nodeNum > 0)
                                {
-                                       err = GetNode(btreePtr, nodeNum, &right);
+                                       err = GetNode(btreePtr, nodeNum, 0, &right);
                                        M_ExitOnError(err);
                                } else {
                                        err = fsBTEndOfIterationErr;
@@ -1459,7 +1459,7 @@ OSStatus  BTReplaceRecord         (FCB                                            *filePtr,
        {
                insertNodeNum = iterator->hint.nodeNum;
 
-               err = GetNode (btreePtr, insertNodeNum, &nodeRec);
+               err = GetNode (btreePtr, insertNodeNum, kGetNodeHint, &nodeRec);
                if( err == noErr )
                {
                        // XXXdbg
@@ -1602,7 +1602,7 @@ BTUpdateRecord(FCB *filePtr, BTreeIterator *iterator,
        {
                insertNodeNum = iterator->hint.nodeNum;
 
-               err = GetNode (btreePtr, insertNodeNum, &nodeRec);
+               err = GetNode (btreePtr, insertNodeNum, kGetNodeHint, &nodeRec);
                if (err == noErr)
                {
                        if (((NodeDescPtr)nodeRec.buffer)->kind == kBTLeafNode &&
@@ -1870,7 +1870,7 @@ BTReloadData(FCB *filePtr)
 
        REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
 
-       err = GetNode(btreePtr, kHeaderNodeNum, &node);
+       err = GetNode(btreePtr, kHeaderNodeNum, 0, &node);
        if (err != noErr)
                return (err);
        
@@ -2042,7 +2042,7 @@ BTGetUserData(FCB *filePtr, void * dataPtr, int dataSize)
 
        REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
 
-       err = GetNode(btreePtr, kHeaderNodeNum, &node);
+       err = GetNode(btreePtr, kHeaderNodeNum, 0, &node);
        if (err)
                return (err);
        
@@ -2080,7 +2080,7 @@ BTSetUserData(FCB *filePtr, void * dataPtr, int dataSize)
 
        REQUIRE_FILE_LOCK(btreePtr->fileRefNum, false);
 
-       err = GetNode(btreePtr, kHeaderNodeNum, &node);
+       err = GetNode(btreePtr, kHeaderNodeNum, 0, &node);
        if (err)
                return (err);
        
index ff917113e9fc09613fc09f3e2bac25129d847f57..41adf88635b5b6aff2b67ffdced38343329ee359 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003, 2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003, 2005-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -451,7 +451,7 @@ OSStatus    ExtendBTree     (BTreeControlBlockPtr   btreePtr,
                        err = UpdateNode (btreePtr, &mapNode, 0, kLockTransaction);
                        M_ExitOnError (err);
                        
-                       err = GetNode (btreePtr, nextNodeNum, &mapNode);
+                       err = GetNode (btreePtr, nextNodeNum, 0, &mapNode);
                        M_ExitOnError (err);
                        
                        // XXXdbg
@@ -558,7 +558,7 @@ OSStatus    GetMapNode (BTreeControlBlockPtr          btreePtr,
                err = ReleaseNode (btreePtr, nodePtr);
                M_ExitOnError (err);
                
-               err = GetNode (btreePtr, nextNodeNum, nodePtr);
+               err = GetNode (btreePtr, nextNodeNum, 0, nodePtr);
                M_ExitOnError (err);
                
                if ( ((NodeDescPtr)nodePtr->buffer)->kind != kBTMapNode)
@@ -570,7 +570,7 @@ OSStatus    GetMapNode (BTreeControlBlockPtr          btreePtr,
                ++btreePtr->numMapNodesRead;
                mapIndex = 0;
        } else {
-               err = GetNode (btreePtr, kHeaderNodeNum, nodePtr);
+               err = GetNode (btreePtr, kHeaderNodeNum, 0, nodePtr);
                M_ExitOnError (err);
                
                if ( ((NodeDescPtr)nodePtr->buffer)->kind != kBTHeaderNode)
index 0e47310eef6fbc841cc1a5745ff2910d765906de..0b1d265919a33b6933e4cfb72d0eb8f5314b3f8e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003, 2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2003, 2005-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -248,7 +248,7 @@ OSStatus UpdateHeader(BTreeControlBlockPtr btreePtr, Boolean forceWrite)
        return  noErr;
        
        
-       err = GetNode (btreePtr, kHeaderNodeNum, &node );
+       err = GetNode (btreePtr, kHeaderNodeNum, 0, &node );
        if (err != noErr) {
                return  err;
        }
@@ -356,7 +356,7 @@ OSStatus    FindIteratorPosition    (BTreeControlBlockPtr    btreePtr,
                goto SearchTheTree;
        }
        
-       err = GetNode (btreePtr, nodeNum, middle);
+       err = GetNode (btreePtr, nodeNum, kGetNodeHint, middle);
        if( err == fsBTInvalidNodeErr ) // returned if nodeNum is out of range
                goto SearchTheTree;
                
@@ -392,7 +392,7 @@ OSStatus    FindIteratorPosition    (BTreeControlBlockPtr    btreePtr,
                M_ExitOnError(err);
 
                // Look up the left node 
-               err = GetNode (btreePtr, nodeNum, left);
+               err = GetNode (btreePtr, nodeNum, 0, left);
                M_ExitOnError (err);
 
                // Look up the current node again
index ab2962683a6fc7a932fba9a7724349616b7e3ca7..2db71479d50ce2b14384cd98bf49a25f99e4dd31 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2002, 2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000, 2002, 2005-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -191,13 +191,15 @@ Result:
 
 OSStatus       GetNode         (BTreeControlBlockPtr    btreePtr,
                                                 u_int32_t                               nodeNum,
+                                                u_int32_t                               flags, 
                                                 NodeRec                                *nodePtr )
 {
        OSStatus                        err;
        GetBlockProcPtr         getNodeProc;
+       u_int32_t                       options;
        
 
-       //\80\80 is nodeNum within proper range?
+       // is nodeNum within proper range?
        if( nodeNum >= btreePtr->totalNodes )
        {
                Panic("\pGetNode:nodeNum >= totalNodes");
@@ -206,17 +208,22 @@ OSStatus  GetNode         (BTreeControlBlockPtr    btreePtr,
        }
        
        nodePtr->blockSize = btreePtr->nodeSize;        // indicate the size of a node
+
+       options = kGetBlock;
+       if ( flags & kGetNodeHint ) 
+       {
+               options |= kGetBlockHint;
+       }
        
        getNodeProc = btreePtr->getBlockProc;
        err = getNodeProc (btreePtr->fileRefNum,
                                           nodeNum,
-                                          kGetBlock,
+                                          options,
                                           nodePtr );
 
        if (err != noErr)
        {
                Panic ("\pGetNode: getNodeProc returned error.");
-       //      nodePtr->buffer = nil;
                goto ErrorExit;
        }
        ++btreePtr->numGetNodes;
index 35aeafc1808ca9321978420dab05eb7c6ef18e8c..7eb4013d4f1f36e3d20e52ff3914a9bbecb499bc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1996-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1996-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -199,8 +199,11 @@ static int FindNextLeafNode(       BTScanState *scanState, Boolean avoidIO )
                
                fref = scanState->btcb->fileRefNum;
                
-               /* This node was read from disk, so it must be swapped/checked. */
-               err = hfs_swap_BTNode(&block, fref, kSwapBTNodeBigToHost);
+               /* This node was read from disk, so it must be swapped/checked.
+                * Since we are reading multiple nodes, we might have read an 
+                * unused node.  Therefore we allow swapping of unused nodes.
+                */
+               err = hfs_swap_BTNode(&block, fref, kSwapBTNodeBigToHost, true);
                if ( err != noErr ) {
                        printf("FindNextLeafNode: Error from hfs_swap_BTNode (node %u)\n", scanState->nodeNum);
                        continue;
index 2aad0a7b1996859351565ff8c105a22e0c6b4107..97e308497214d67cce0872adbf05947807c2becb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -240,7 +240,7 @@ OSStatus    SearchTree      (BTreeControlBlockPtr    btreePtr,
             goto ErrorExit;
         }
         
-        err = GetNode (btreePtr, curNodeNum, &nodeRec);
+        err = GetNode (btreePtr, curNodeNum, 0, &nodeRec);
         if (err != noErr)
         {
                 goto ErrorExit;
@@ -472,7 +472,7 @@ OSStatus    InsertLevel (BTreeControlBlockPtr                btreePtr,
 
                PanicIf ( parentNodeNum == 0, "\p InsertLevel: parent node is zero!?");
 
-               err = GetNode (btreePtr, parentNodeNum, &parentNode);   // released as target node in next level up
+               err = GetNode (btreePtr, parentNodeNum, 0, &parentNode);        // released as target node in next level up
                M_ExitOnError (err);
 #if defined(applec) && !defined(__SC__)
                if (DEBUG_BUILD && level > 1)
@@ -609,7 +609,7 @@ static OSErr        InsertNode      (BTreeControlBlockPtr    btreePtr,
 
                if ( leftNode->buffer == nil )
                {
-                       err = GetNode (btreePtr, leftNodeNum, leftNode);        // will be released by caller or a split below
+                       err = GetNode (btreePtr, leftNodeNum, 0, leftNode);     // will be released by caller or a split below
                        M_ExitOnError (err);
                        // XXXdbg
                        ModifyBlockStart(btreePtr->fileRefNum, leftNode);
@@ -730,7 +730,7 @@ OSStatus    DeleteTree                      (BTreeControlBlockPtr            btreePtr,
                siblingNodeNum = targetNodePtr->bLink;                          // Left Sibling Node
                if ( siblingNodeNum != 0 )
                {
-                       err = GetNode (btreePtr, siblingNodeNum, &siblingNode);
+                       err = GetNode (btreePtr, siblingNodeNum, 0, &siblingNode);
                        M_ExitOnError (err);
 
                        // XXXdbg
@@ -748,7 +748,7 @@ OSStatus    DeleteTree                      (BTreeControlBlockPtr            btreePtr,
                siblingNodeNum = targetNodePtr->fLink;                          // Right Sibling Node
                if ( siblingNodeNum != 0 )
                {
-                       err = GetNode (btreePtr, siblingNodeNum, &siblingNode);
+                       err = GetNode (btreePtr, siblingNodeNum, 0, &siblingNode);
                        M_ExitOnError (err);
 
                        // XXXdbg
@@ -803,7 +803,7 @@ OSStatus    DeleteTree                      (BTreeControlBlockPtr            btreePtr,
 
                //// Get Parent Node and index
                index = treePathTable [level].index;
-               err = GetNode (btreePtr, treePathTable[level].node, &parentNode);
+               err = GetNode (btreePtr, treePathTable[level].node, 0, &parentNode);
                M_ExitOnError (err);
 
                if ( updateRequired )
@@ -889,7 +889,7 @@ static OSStatus     CollapseTree    (BTreeControlBlockPtr           btreePtr,
                M_ExitOnError (err);
                
                //// Get New Root Node
-               err = GetNode (btreePtr, btreePtr->rootNode, blockPtr);
+               err = GetNode (btreePtr, btreePtr->rootNode, 0, blockPtr);
                M_ExitOnError (err);
 
                // XXXdbg
index a5f151953f54bb060c2f64a4c7d4e7a82afcd364..4e2a1df1244223bb40d7b50e4c2a2a7beecfe290 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -144,6 +144,7 @@ typedef FSBufferDescriptor *FSBufferDescriptorPtr;
 */
 enum {
                kGetBlock                       = 0x00000000,
+               kGetBlockHint           = 0x00000001,   // if set, the block is being looked up using hint
                kForceReadBlock         = 0x00000002,   //\80\80 how does this relate to Read/Verify? Do we need this?
                kGetEmptyBlock          = 0x00000008
 };
index c5d5ef4addf2bdf63b73853f28818b4d519fae21..6b7a1eb0347f804f029e42a832510615c2eac3cb 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -369,19 +369,23 @@ extern OSStatus TreeIsDirty(BTreeControlBlockPtr btreePtr);
 
 OSStatus       GetNode                                 (BTreeControlBlockPtr    btreePtr,
                                                                         u_int32_t                               nodeNum,
+                                                                        u_int32_t                               flags, 
                                                                         NodeRec                                *returnNodePtr );
 
+/* Flags for GetNode() */
+#define                kGetNodeHint    0x1             /* If set, the node is being looked up using a hint */
+
 OSStatus       GetLeftSiblingNode              (BTreeControlBlockPtr    btreePtr,
                                                                         NodeDescPtr                     node,
                                                                         NodeRec                                *left );
 
-#define                GetLeftSiblingNode(btree,node,left)                     GetNode ((btree), ((NodeDescPtr)(node))->bLink, (left))
+#define                GetLeftSiblingNode(btree,node,left)                     GetNode ((btree), ((NodeDescPtr)(node))->bLink, 0, (left))
 
 OSStatus       GetRightSiblingNode             (BTreeControlBlockPtr    btreePtr,
                                                                         NodeDescPtr                     node,
                                                                         NodeRec                                *right );
 
-#define                GetRightSiblingNode(btree,node,right)           GetNode ((btree), ((NodeDescPtr)(node))->fLink, (right))
+#define                GetRightSiblingNode(btree,node,right)           GetNode ((btree), ((NodeDescPtr)(node))->fLink, 0, (right))
 
 
 OSStatus       GetNewNode                              (BTreeControlBlockPtr    btreePtr,
index 27f0e09066f220aa3a25dbdeabf1317a372f7899..82f89c2deddc8035ce1a74c0585c0ab23170a49f 100644 (file)
@@ -731,6 +731,8 @@ debug_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
        /* all sysctl names at this level are name and field */
        if (namelen != 2)
                return (ENOTDIR);               /* overloaded */
+       if (name[0] < 0 || name[0] >= CTL_DEBUG_MAXID)
+                return (ENOTSUP);
        cdp = debugvars[name[0]];
        if (cdp->debugname == 0)
                return (ENOTSUP);
index 4bd8398010d152a95cff75ce6c45c44cdfcd2d27..0f05583d57229b661850c90b6403654b4912c57e 100644 (file)
@@ -367,6 +367,16 @@ ptmx_get_ioctl(int minor, int open_flag)
                        _state.pis_total += PTMX_GROW_VECTOR;
                        if (old_pis_ioctl_list)
                                FREE(old_pis_ioctl_list, M_TTYS);
+               } 
+               
+               if (_state.pis_ioctl_list[minor] != NULL) {
+                       ttyfree(new_ptmx_ioctl->pt_tty);
+                       DEVFS_UNLOCK();
+                       FREE(new_ptmx_ioctl, M_TTYS);
+
+                       /* Special error value so we know to redrive the open, we've been raced */
+                       return (struct ptmx_ioctl*)-1; 
+
                }
 
                /* Vector is large enough; grab a new ptmx_ioctl */
@@ -419,8 +429,6 @@ ptmx_free_ioctl(int minor, int open_flag)
        if (!(_state.pis_ioctl_list[minor]->pt_flags & (PF_OPEN_M|PF_OPEN_S))) {
                /* Mark as free so it can be reallocated later */
                old_ptmx_ioctl = _state.pis_ioctl_list[ minor];
-               _state.pis_ioctl_list[ minor] = NULL;
-               _state.pis_free++;
        }
        DEVFS_UNLOCK();
 
@@ -436,6 +444,12 @@ ptmx_free_ioctl(int minor, int open_flag)
                        devfs_remove(old_ptmx_ioctl->pt_devhandle);
                ttyfree(old_ptmx_ioctl->pt_tty);
                FREE(old_ptmx_ioctl, M_TTYS);
+
+               /* Don't remove the entry until the devfs slot is free */
+               DEVFS_LOCK();
+               _state.pis_ioctl_list[ minor] = NULL;
+               _state.pis_free++;
+               DEVFS_UNLOCK();
        }
 
        return (0);     /* Success */
@@ -767,9 +781,11 @@ ptmx_open(dev_t dev, __unused int flag, __unused int devtype, __unused proc_t p)
        int error = 0;
        boolean_t   funnel_state;
 
-
-       if ((pti = ptmx_get_ioctl(minor(dev), PF_OPEN_M)) == NULL) {
+       pti = ptmx_get_ioctl(minor(dev), PF_OPEN_M);
+       if (pti == NULL) {
                return (ENXIO);
+       } else if (pti == (struct ptmx_ioctl*)-1) {
+               return (EREDRIVEOPEN);
        }
        tp = pti->pt_tty;
 
index 38fe262e695ac104bb2d515ba3b3a3f18d9e50f8..aba70cc665a3f041f5739ac59fb6b9f6cc34de9e 100644 (file)
@@ -3011,8 +3011,9 @@ m_copy_pkthdr(struct mbuf *to, struct mbuf *from)
 #endif /* MAC_NET */
        to->m_pkthdr = from->m_pkthdr;          /* especially tags */
        m_tag_init(from);                       /* purge tags from src */
-       to->m_flags = from->m_flags & M_COPYFLAGS;
-       to->m_data = (to)->m_pktdat;
+       to->m_flags = (from->m_flags & M_COPYFLAGS) | (to->m_flags & M_EXT);
+       if ((to->m_flags & M_EXT) == 0)
+               to->m_data = to->m_pktdat;
 }
 
 /*
index 2076a730df12b9330f2830d82bb6af9b555975d3..0c74fc181baa29f9faaaee4a61bc8733f52ed1b0 100644 (file)
@@ -93,6 +93,8 @@
 
 #include <net/net_osdep.h>
 
+extern u_long  route_generation;
+
 int ip_gif_ttl = GIF_TTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_RW,
        &ip_gif_ttl,    0, "");
@@ -189,7 +191,10 @@ in_gif_output(
        bcopy(&iphdr, mtod(m, struct ip *), sizeof(struct ip));
 
        if (dst->sin_family != sin_dst->sin_family ||
-           dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr) {
+           dst->sin_addr.s_addr != sin_dst->sin_addr.s_addr ||
+           (sc->gif_ro.ro_rt != NULL &&
+           (sc->gif_ro.ro_rt->generation_id != route_generation ||
+           sc->gif_ro.ro_rt->rt_ifp == ifp))) {
                /* cache route doesn't match */
                dst->sin_family = sin_dst->sin_family;
                dst->sin_len = sizeof(struct sockaddr_in);
index 12d193d69f695552d7b7ba8ff0df5efb16070ed2..73ab3ea91caef70b4bee4e7f8815fcad4f93f317 100644 (file)
@@ -397,30 +397,6 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
                        m->m_pkthdr.rcvif = ifa->ifa_ifp;
                        ifafree(ifa);
                }
-               
-               if ((~IF_HWASSIST_CSUM_FLAGS(m->m_pkthdr.rcvif->if_hwassist) & 
-                               m->m_pkthdr.csum_flags) == 0) {
-                       if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-                               m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
-                       }
-                       m->m_pkthdr.csum_flags |=
-                               CSUM_DATA_VALID | CSUM_PSEUDO_HDR |
-                               CSUM_IP_CHECKED | CSUM_IP_VALID;
-                       m->m_pkthdr.csum_data = 0xffff;
-               }
-               else if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-                       int     hlen;
-                       
-#ifdef _IP_VHL
-                       hlen = IP_VHL_HL(ip->ip_vhl) << 2;
-#else
-                       hlen = ip->ip_hl << 2;
-#endif
-                       in_delayed_cksum(m);
-                       m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
-                       ip->ip_sum = in_cksum(m, hlen);
-               }
-
 #if CONFIG_MACF_NET
                mac_mbuf_label_associate_socket(so, m);
 #endif
index 1286b439f7f685764e519f8212ffa4a62cf38d12..ef9f0af805ff0d162b413600c49a8498a77cc4a7 100644 (file)
@@ -354,7 +354,7 @@ noreplaycheck:
 
        /* strip off the trailing pad area. */
        m_adj(m, -taillen);
-
+       ip = mtod(m, struct ip *);
 #ifdef IPLEN_FLIPPED
        ip->ip_len = ip->ip_len - taillen;
 #else
@@ -795,7 +795,7 @@ noreplaycheck:
 
        /* strip off the trailing pad area. */
        m_adj(m, -taillen);
-
+       ip6 = mtod(m, struct ip6_hdr *);
        ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - taillen);
 
        /* was it transmitted over the IPsec tunnel SA? */
index 090d0ad317058b0f860d759975109d0eda01bb57..029dd88108887b8f5a2df49343f6217456a2269c 100644 (file)
@@ -67,6 +67,8 @@
 
 #include <net/net_osdep.h>
 
+extern u_long  route_generation;
+
 static __inline__ void*
 _cast_non_const(const void * ptr) {
        union {
@@ -172,7 +174,10 @@ in6_gif_output(
        ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
 
        if (dst->sin6_family != sin6_dst->sin6_family ||
-            !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr)) {
+           !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &sin6_dst->sin6_addr) ||
+           (sc->gif_ro6.ro_rt != NULL &&
+           (sc->gif_ro6.ro_rt->generation_id != route_generation ||
+           sc->gif_ro6.ro_rt->rt_ifp == ifp))) {
                /* cache route doesn't match */
                bzero(dst, sizeof(*dst));
                dst->sin6_family = sin6_dst->sin6_family;
@@ -195,7 +200,7 @@ in6_gif_output(
                }
 
                /* if it constitutes infinite encapsulation, punt. */
-               if (sc->gif_ro.ro_rt->rt_ifp == ifp) {
+               if (sc->gif_ro6.ro_rt->rt_ifp == ifp) {
                        m_freem(m);
                        return ENETUNREACH;     /*XXX*/
                }
index 67aff41a164a6745d582ed167c6ffc993e57333d..ec42fa74d1301474cf03c00acd7889b813084af7 100644 (file)
@@ -258,5 +258,6 @@ __END_DECLS
 #define        ERESTART        (-1)            /* restart syscall */
 #define        EJUSTRETURN     (-2)            /* don't modify regs, just return */
 #define ERECYCLE    (-5)               /* restart lookup under heavy vnode pressure/recycling */
+#define EREDRIVEOPEN   (-6)
 #endif
 #endif /* _SYS_ERRNO_H_ */
index c4e93ab8e4742db543f2d0f298c94792b4c2b958..95a07d69a423819838815ab80388c1a8f0ecafd9 100644 (file)
@@ -303,33 +303,31 @@ again:
                        }
                        /* Ask the file system for its parent id and for its name (optional). */
                        ret = vnode_getattr(vp, &va, ctx);
+
                        if (fixhardlink) {
-                               if (vp->v_name || VATTR_IS_SUPPORTED(&va, va_name)) {
-                                       if (ret == 0) {
-                                               str = va.va_name;
-                                       } else if (vp->v_name) {
-                                               str = vp->v_name;
-                                               ret = 0;
-                                       } else {
-                                               ret = ENOENT;
-                                               goto bad_news;
-                                       }
+                               if ((ret == 0) && (VATTR_IS_SUPPORTED(&va, va_name))) {
+                                       str = va.va_name;
+                               } else if (vp->v_name) {
+                                       str = vp->v_name;
+                                       ret = 0;
+                               } else {
+                                       ret = ENOENT;
+                                       goto bad_news;
+                               }
+                               len = strlen(str);
 
-                                       len = strlen(str);
-
-                                       /* Check that there's enough space. */
-                                       if ((end - buff) < (len + 1)) {
-                                               ret = ENOSPC;
-                                       } else {
-                                               /* Copy the name backwards. */
-                                               str += len;
-                               
-                                               for (; len > 0; len--) {
-                                                      *--end = *--str;
-                                               }
-                                               /* Add a path separator. */
-                                               *--end = '/';
+                               /* Check that there's enough space. */
+                               if ((end - buff) < (len + 1)) {
+                                       ret = ENOSPC;
+                               } else {
+                                       /* Copy the name backwards. */
+                                       str += len;
+                                       
+                                       for (; len > 0; len--) {
+                                               *--end = *--str;
                                        }
+                                       /* Add a path separator. */
+                                       *--end = '/';
                                }
                          bad_news:
                                FREE_ZONE(va.va_name, MAXPATHLEN, M_NAMEI);
@@ -1644,7 +1642,7 @@ cache_purge(vnode_t vp)
         struct namecache *ncp;
        kauth_cred_t tcred = NULL;
 
-       if ((LIST_FIRST(&vp->v_nclinks) == NULL) && (LIST_FIRST(&vp->v_ncchildren) == NULL))
+       if ((LIST_FIRST(&vp->v_nclinks) == NULL) && (LIST_FIRST(&vp->v_ncchildren) == NULL) && (vp->v_cred == NOCRED))
                return;
 
        NAME_CACHE_LOCK();
index 891852d2634f8f523a34e78563ca138d4ee61cd3..3a34e1787f61be54be9aa68585e9f60692a28a70 100644 (file)
@@ -2423,7 +2423,6 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
        long long        zero_cnt1;
        off_t            zero_off1;
        struct cl_extent cl;
-        int              intersection;
        struct cl_writebehind *wbp;
        int              bflag;
        u_int            max_cluster_pgcount;
@@ -2604,7 +2603,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                                 * to release the rest of the pages in the upl without modifying
                                 * there state and mark the failed page in error
                                 */
-                               ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
+                               ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
 
                                if (upl_size > PAGE_SIZE)
                                        ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
@@ -2640,7 +2639,7 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                                         * need to release the rest of the pages in the upl without
                                         * modifying there state and mark the failed page in error
                                         */
-                                       ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES);
+                                       ubc_upl_abort_range(upl, upl_offset, PAGE_SIZE, UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY);
 
                                        if (upl_size > PAGE_SIZE)
                                                ubc_upl_abort_range(upl, 0, upl_size, UPL_ABORT_FREE_ON_EMPTY);
@@ -2745,6 +2744,33 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                                 */
                                cluster_zero(upl, io_size, upl_size - io_size, NULL); 
                        }
+                       /*
+                        * release the upl now if we hold one since...
+                        * 1) pages in it may be present in the sparse cluster map
+                        *    and may span 2 separate buckets there... if they do and 
+                        *    we happen to have to flush a bucket to make room and it intersects
+                        *    this upl, a deadlock may result on page BUSY
+                        * 2) we're delaying the I/O... from this point forward we're just updating
+                        *    the cluster state... no need to hold the pages, so commit them
+                        * 3) IO_SYNC is set...
+                        *    because we had to ask for a UPL that provides currenty non-present pages, the
+                        *    UPL has been automatically set to clear the dirty flags (both software and hardware)
+                        *    upon committing it... this is not the behavior we want since it's possible for
+                        *    pages currently present as part of a mapped file to be dirtied while the I/O is in flight.
+                        *    we'll pick these pages back up later with the correct behavior specified.
+                        * 4) we don't want to hold pages busy in a UPL and then block on the cluster lock... if a flush
+                        *    of this vnode is in progress, we will deadlock if the pages being flushed intersect the pages
+                        *    we hold since the flushing context is holding the cluster lock.
+                        */
+                       ubc_upl_commit_range(upl, 0, upl_size,
+                                            UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
+check_cluster:
+                       /*
+                        * calculate the last logical block number 
+                        * that this delayed I/O encompassed
+                        */
+                       cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64);
+
                        if (flags & IO_SYNC)
                                /*
                                 * if the IO_SYNC flag is set than we need to 
@@ -2752,35 +2778,20 @@ cluster_write_copy(vnode_t vp, struct uio *uio, u_int32_t io_req_size, off_t old
                                 * the I/O
                                 */
                                goto issue_io;
-check_cluster:
+
                        /*
                         * take the lock to protect our accesses
                         * of the writebehind and sparse cluster state
                         */
                        wbp = cluster_get_wbp(vp, CLW_ALLOCATE | CLW_RETURNLOCKED);
 
-                       /*
-                        * calculate the last logical block number 
-                        * that this delayed I/O encompassed
-                        */
-                       cl.e_addr = (daddr64_t)((upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64);
-
                        if (wbp->cl_scmap) {
 
                                if ( !(flags & IO_NOCACHE)) {
                                        /*
                                         * we've fallen into the sparse
                                         * cluster method of delaying dirty pages
-                                        * first, we need to release the upl if we hold one
-                                        * since pages in it may be present in the sparse cluster map
-                                        * and may span 2 separate buckets there... if they do and 
-                                        * we happen to have to flush a bucket to make room and it intersects
-                                        * this upl, a deadlock may result on page BUSY
                                         */
-                                       if (upl_size)
-                                               ubc_upl_commit_range(upl, 0, upl_size,
-                                                                    UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
                                        sparse_cluster_add(wbp, vp, &cl, newEOF, callback, callback_arg);
 
                                        lck_mtx_unlock(&wbp->cl_lockw);
@@ -2793,21 +2804,10 @@ check_cluster:
                                 * to uncached writes on the file, so go ahead
                                 * and push whatever's in the sparse map
                                 * and switch back to normal clustering
-                                *
-                                * see the comment above concerning a possible deadlock...
                                 */
-                               if (upl_size) {
-                                       ubc_upl_commit_range(upl, 0, upl_size,
-                                                            UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-                                       /*
-                                        * setting upl_size to 0 keeps us from committing a
-                                        * second time in the start_new_cluster path
-                                        */
-                                       upl_size = 0;
-                               }
-                               sparse_cluster_push(wbp, vp, newEOF, PUSH_ALL, callback, callback_arg);
-
                                wbp->cl_number = 0;
+
+                               sparse_cluster_push(wbp, vp, newEOF, PUSH_ALL, callback, callback_arg);
                                /*
                                 * no clusters of either type present at this point
                                 * so just go directly to start_new_cluster since
@@ -2817,8 +2817,6 @@ check_cluster:
                                 */
                                goto start_new_cluster;
                        }                   
-                       upl_offset = 0;
-
                        if (wbp->cl_number == 0)
                                /*
                                 * no clusters currently present
@@ -2862,21 +2860,6 @@ check_cluster:
                                                 */
                                                wbp->cl_clusters[cl_index].e_addr = wbp->cl_clusters[cl_index].b_addr + max_cluster_pgcount;
 
-                                               if (upl_size) {
-                                                       daddr64_t start_pg_in_upl;
-
-                                                       start_pg_in_upl = (daddr64_t)(upl_f_offset / PAGE_SIZE_64);
-                                                       
-                                                       if (start_pg_in_upl < wbp->cl_clusters[cl_index].e_addr) {
-                                                               intersection = (int)((wbp->cl_clusters[cl_index].e_addr - start_pg_in_upl) * PAGE_SIZE);
-
-                                                               ubc_upl_commit_range(upl, upl_offset, intersection,
-                                                                                    UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-                                                               upl_f_offset += intersection;
-                                                               upl_offset   += intersection;
-                                                               upl_size     -= intersection;
-                                                       }
-                                               }
                                                cl.b_addr = wbp->cl_clusters[cl_index].e_addr;
                                        }
                                        /*
@@ -2930,21 +2913,6 @@ check_cluster:
                                                 */
                                                wbp->cl_clusters[cl_index].b_addr = wbp->cl_clusters[cl_index].e_addr - max_cluster_pgcount;
 
-                                               if (upl_size) {
-                                                       intersection = (int)((cl.e_addr - wbp->cl_clusters[cl_index].b_addr) * PAGE_SIZE);
-
-                                                       if ((u_int)intersection > upl_size)
-                                                               /*
-                                                                * because the current write may consist of a number of pages found in the cache
-                                                                * which are not part of the UPL, we may have an intersection that exceeds
-                                                                * the size of the UPL that is also part of this write
-                                                                */
-                                                               intersection = upl_size;
-
-                                                       ubc_upl_commit_range(upl, upl_offset + (upl_size - intersection), intersection,
-                                                                            UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-                                                       upl_size -= intersection;
-                                               }
                                                cl.e_addr = wbp->cl_clusters[cl_index].b_addr;
                                        }
                                        /*
@@ -2999,16 +2967,7 @@ check_cluster:
                                 * no more room in the normal cluster mechanism
                                 * so let's switch to the more expansive but expensive
                                 * sparse mechanism....
-                                * first, we need to release the upl if we hold one
-                                * since pages in it may be present in the sparse cluster map (after the cluster_switch)
-                                * and may span 2 separate buckets there... if they do and 
-                                * we happen to have to flush a bucket to make room and it intersects
-                                * this upl, a deadlock may result on page BUSY
                                 */
-                               if (upl_size)
-                                       ubc_upl_commit_range(upl, upl_offset, upl_size,
-                                                            UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
                                sparse_cluster_switch(wbp, vp, newEOF, callback, callback_arg);
                                sparse_cluster_add(wbp, vp, &cl, newEOF, callback, callback_arg);
 
@@ -3042,33 +3001,19 @@ start_new_cluster:
 
                        wbp->cl_number++;
 delay_io:
-                       if (upl_size)
-                               ubc_upl_commit_range(upl, upl_offset, upl_size,
-                                                    UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
                        lck_mtx_unlock(&wbp->cl_lockw);
 
                        continue;
 issue_io:
                        /*
-                        * we don't hold the vnode lock at this point
+                        * we don't hold the lock at this point
                         *
-                        * because we had to ask for a UPL that provides currenty non-present pages, the
-                        * UPL has been automatically set to clear the dirty flags (both software and hardware)
-                        * upon committing it... this is not the behavior we want since it's possible for
-                        * pages currently present as part of a mapped file to be dirtied while the I/O is in flight.
-                        * in order to maintain some semblance of coherency with mapped writes
-                        * we need to drop the current upl and pick it back up with COPYOUT_FROM set
+                        * we've already dropped the current upl, so pick it back up with COPYOUT_FROM set
                         * so that we correctly deal with a change in state of the hardware modify bit...
                         * we do this via cluster_push_now... by passing along the IO_SYNC flag, we force
                         * cluster_push_now to wait until all the I/Os have completed... cluster_push_now is also
                         * responsible for generating the correct sized I/O(s)
                         */
-                       ubc_upl_commit_range(upl, 0, upl_size,
-                                            UPL_COMMIT_SET_DIRTY | UPL_COMMIT_INACTIVATE | UPL_COMMIT_FREE_ON_EMPTY);
-
-                       cl.e_addr = (upl_f_offset + (off_t)upl_size) / PAGE_SIZE_64;
-
                        retval = cluster_push_now(vp, &cl, newEOF, flags, callback, callback_arg);
                }
        }
@@ -4646,19 +4591,6 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla
                                goto dont_try;
                }
        }
-       /*
-        * drop the lock while we're firing off the I/Os...
-        * this is safe since I'm working off of a private sorted copy
-        * of the clusters, and I'm going to re-evaluate the public
-        * state after I retake the lock
-        *
-        * we need to drop it to avoid a lock inversion when trying to
-        * grab pages into the UPL... another thread in 'write' may
-        * have these pages in its UPL and be blocked trying to
-        * gain the write-behind lock for this vnode
-        */
-       lck_mtx_unlock(&wbp->cl_lockw);
-
        for (cl_index = 0; cl_index < cl_len; cl_index++) {
                int     flags;
                struct  cl_extent cl;
@@ -4690,8 +4622,6 @@ cluster_try_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_fla
                if ( !(push_flag & PUSH_ALL) )
                        break;
        }
-       lck_mtx_lock(&wbp->cl_lockw);
-
 dont_try:
        if (cl_len > cl_pushed) {
               /*
@@ -4979,23 +4909,8 @@ sparse_cluster_push(struct cl_writebehind *wbp, vnode_t vp, off_t EOF, int push_
 
                wbp->cl_scdirty -= (int)(cl.e_addr - cl.b_addr);
 
-               /*
-                * drop the lock while we're firing off the I/Os...
-                * this is safe since I've already updated the state
-                * this lock is protecting and I'm going to re-evaluate
-                * the public state after I retake the lock
-                *
-                * we need to drop it to avoid a lock inversion when trying to
-                * grab pages into the UPL... another thread in 'write' may
-                * have these pages in its UPL and be blocked trying to
-                * gain the write-behind lock for this vnode
-                */
-               lck_mtx_unlock(&wbp->cl_lockw);
-
                cluster_push_now(vp, &cl, EOF, push_flag & IO_PASSIVE, callback, callback_arg);
 
-               lck_mtx_lock(&wbp->cl_lockw);
-
                if ( !(push_flag & PUSH_ALL) )
                        break;
        }
index 8299bfd335a056552dceeb9a4c7f61f0ebe7da66..bff33c625850dffa54a16c52d0ce3242fa52140f 100644 (file)
@@ -2082,7 +2082,7 @@ check_free_space(journal *jnl, int desired_size)
 
                        lcl_counter = 0;
                        while (jnl->old_start[i] & 0x8000000000000000LL) {
-                               if (lcl_counter++ > 100) {
+                               if (lcl_counter++ > 1000) {
                                        panic("jnl: check_free_space: tr starting @ 0x%llx not flushing (jnl %p).\n",
                                                  jnl->old_start[i], jnl);
                                }
index ca6d9e6b10f5348690630fc52f59eefedb9fd64c..482fb8c46c33d6ddaed9e99bb287f665008d5830 100644 (file)
@@ -3265,7 +3265,6 @@ new_vnode(vnode_t *vpp)
         struct timeval current_tv;
         struct unsafe_fsnode *l_unsafefs = 0;
        proc_t  curproc = current_proc();
-       pid_t current_pid = proc_pid(curproc);
 
 retry:
        microuptime(&current_tv);
@@ -3315,11 +3314,11 @@ retry:
                    if ( !(vp->v_listflag & VLIST_RAGE) || !(vp->v_flag & VRAGE))
                        panic("new_vnode: vp on RAGE list not marked both VLIST_RAGE and VRAGE");
 
-                   // skip vnodes which have a dependency on this process
-                   // (i.e. they're vnodes in a disk image and this process
-                   // is diskimages-helper)
+                   // if we're a dependency-capable process, skip vnodes that can
+                   // cause recycling deadlocks. (i.e. this process is diskimages
+                   // helper and the vnode is in a disk image).
                    //
-                   if (vp->v_mount && vp->v_mount->mnt_dependent_pid != current_pid && vp->v_mount->mnt_dependent_process != curproc) {
+                   if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || vp->v_mount->mnt_dependent_process == NULL) {
                        break;
                    }
 
@@ -3339,11 +3338,11 @@ retry:
                 */
                walk_count = 0;
                TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
-                   // skip vnodes which have a dependency on this process
-                   // (i.e. they're vnodes in a disk image and this process
-                   // is diskimages-helper)
+                   // if we're a dependency-capable process, skip vnodes that can
+                   // cause recycling deadlocks. (i.e. this process is diskimages
+                   // helper and the vnode is in a disk image)
                    //
-                   if (vp->v_mount && vp->v_mount->mnt_dependent_pid != current_pid && vp->v_mount->mnt_dependent_process != curproc) {
+                   if ((curproc->p_flag & P_DEPENDENCY_CAPABLE) == 0 || vp->v_mount == NULL || vp->v_mount->mnt_dependent_process == NULL) {
                        break;
                    }
 
index c7e566bab8d5e8f8d8e6158656afa2f8f990aee6..4dafffdf3d8f3183f582a549152b094813ad72e7 100644 (file)
@@ -427,8 +427,10 @@ bad:
                 * Check for a race against unlink.  We had a vnode
                 * but according to vnode_authorize or VNOP_OPEN it
                 * no longer exists.
+                *
+                * EREDRIVEOPEN: means that we were hit by the tty allocation race.
                 */
-               if ((error == ENOENT) && (*fmodep & O_CREAT)) {
+               if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN)) {
                        goto again;
                }
        }
index 2c14c9c7c43fcdd363f30afe64ce3685af0b9132..64db93b879ccf0349a22ba9e0afdf708ab1b798a 100644 (file)
@@ -1,4 +1,4 @@
-9.4.0
+9.5.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 6743dc70bf83ea8bd428a79a0aee71bfca166cf0..bd01828889bc641aa913082ccf77a1991e715a8b 100644 (file)
@@ -714,7 +714,7 @@ Debugger(
                __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
 
                /* Print backtrace - callee is internally synchronized */
-               panic_i386_backtrace(stackptr, 16);
+               panic_i386_backtrace(stackptr, 16, NULL, FALSE, NULL);
 
                /* everything should be printed now so copy to NVRAM
                 */
@@ -725,6 +725,7 @@ Debugger(
                   */
                    if (commit_paniclog_to_nvram) {
                        unsigned int bufpos;
+                       uintptr_t cr0;
 
                         debug_putc(0);
 
@@ -749,8 +750,17 @@ Debugger(
                         * since we can subsequently halt the system.
                         */
                        kprintf("Attempting to commit panic log to NVRAM\n");
+/* The following sequence is a workaround for:
+ * <rdar://problem/5915669> SnowLeopard10A67: AppleEFINVRAM should not invoke
+ * any routines that use floating point (MMX in this case) when saving panic
+ * logs to nvram/flash.
+ */
+                       cr0 = get_cr0();
+                       clear_ts();
+
                         pi_size = PESavePanicInfo((unsigned char *)debug_buf,
                            pi_size );
+                       set_cr0(cr0);
 
                        /* Uncompress in-place, to permit examination of
                         * the panic log by debuggers.
@@ -939,10 +949,11 @@ panic_print_symbol_name(vm_address_t search)
 #define DUMPFRAMES 32
 #define PBT_TIMEOUT_CYCLES (5 * 1000 * 1000 * 1000ULL)
 void
-panic_i386_backtrace(void *_frame, int nframes)
+panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdump, x86_saved_state_t *regs)
 {
        cframe_t        *frame = (cframe_t *)_frame;
        vm_offset_t raddrs[DUMPFRAMES];
+       vm_offset_t PC = 0;
        int frame_index;
        volatile uint32_t *ppbtcnt = &pbtcnt;
        uint64_t bt_tsc_timeout;
@@ -959,8 +970,25 @@ panic_i386_backtrace(void *_frame, int nframes)
 
        PE_parse_boot_arg("keepsyms", &keepsyms);
 
-       kdb_printf("Backtrace, "
-           "Format - Frame : Return Address (4 potential args on stack) \n");
+       if (msg != NULL) {
+               kdb_printf(msg);
+       }
+
+       if ((regdump == TRUE) && (regs != NULL)) {
+               x86_saved_state32_t     *ss32p = saved_state32(regs);
+
+               kdb_printf(
+                   "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
+                   "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
+                   "EFL: 0x%08x, EIP: 0x%08x, CS:  0x%08x, DS:  0x%08x\n",
+                   ss32p->eax,ss32p->ebx,ss32p->ecx,ss32p->edx,
+                   ss32p->cr2,ss32p->ebp,ss32p->esi,ss32p->edi,
+                   ss32p->efl,ss32p->eip,ss32p->cs, ss32p->ds);
+               PC = ss32p->eip;
+       }
+
+       kdb_printf("Backtrace (CPU %d), "
+               "Frame : Return Address (4 potential args on stack)\n", cpu_number());
 
        for (frame_index = 0; frame_index < nframes; frame_index++) {
                vm_offset_t curframep = (vm_offset_t) frame;
@@ -1020,6 +1048,9 @@ out:
        if (frame_index)
                kmod_dump((vm_offset_t *)&raddrs[0], frame_index);
 
+       if (PC != 0)
+               kmod_dump(&PC, 1);
+
        panic_display_system_configuration();
        /* Release print backtrace lock, to permit other callers in the
         * event of panics on multiple processors.
index 2c0df6147f0f753c78186de36a70f223c0ff0dac..67f0f28032dc98a8b8071bb31d9e3e463143e70b 100644 (file)
@@ -120,6 +120,7 @@ i386_init(vm_offset_t boot_args_start)
        uint64_t        maxmemtouse;
        unsigned int    cpus;
        boolean_t       legacy_mode;
+       boolean_t       fidn;
 
        postcode(I386_INIT_ENTRY);
 
@@ -187,8 +188,10 @@ i386_init(vm_offset_t boot_args_start)
        if (!PE_parse_boot_arg("himemory_mode", &vm_himemory_mode))
                vm_himemory_mode = 0;
 
-       if (!PE_parse_boot_arg("immediate_NMI", &force_immediate_debugger_NMI))
+       if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof (fidn)))
                force_immediate_debugger_NMI = FALSE;
+       else
+               force_immediate_debugger_NMI = fidn;
 
        /*
         * At this point we check whether we are a 64-bit processor
index ac1bb007a7a44a711303f18311f692432b69b032..9fa0eb36fe9ea795f2dcc4b8ab001f3e0c7667fd 100644 (file)
        call    EXT(fn)                 ;\
        movl    %edi, %esp
 
-#define CCALL3(fn, arg1, arg2, arg3)   \
+/*
+ * CCALL5 is used for callee functions with 3 arguments but
+ * where arg2 (a3:a2) and arg3 (a5:a4) are 64-bit values.
+ */
+#define CCALL5(fn, a1, a2, a3, a4, a5) \
        movl    %esp, %edi              ;\
-       subl    $12, %esp               ;\
+       subl    $20, %esp               ;\
        andl    $0xFFFFFFF0, %esp       ;\
-       movl    arg3, 8(%esp)           ;\
-       movl    arg2, 4(%esp)           ;\
-       movl    arg1, 0(%esp)           ;\
+       movl    a5, 16(%esp)            ;\
+       movl    a4, 12(%esp)            ;\
+       movl    a3,  8(%esp)            ;\
+       movl    a2,  4(%esp)            ;\
+       movl    a1,  0(%esp)            ;\
        call    EXT(fn)                 ;\
        movl    %edi, %esp
 
@@ -297,13 +303,13 @@ Entry(timer_grab)
  * Update time on user trap entry.
  * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
  */
-#define        TIME_TRAP_UENTRY        TIMER_EVENT(USER,SYSTEM)
+#define        TIME_TRAP_UENTRY                        TIMER_EVENT(USER,SYSTEM)
 
 /*
  * update time on user trap exit.
  * Uses %eax,%ebx,%ecx,%edx,%esi,%edi.
  */
-#define        TIME_TRAP_UEXIT         TIMER_EVENT(SYSTEM,USER)
+#define        TIME_TRAP_UEXIT                         TIMER_EVENT(SYSTEM,USER)
 
 /*
  * update time on interrupt entry.
@@ -926,7 +932,7 @@ Entry(lo_diag_scall)
        popl    %esp                    // Get back the original stack
        jmp     EXT(return_to_user)     // Normal return, do not check asts...
 2:     
-       CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
+       CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
                // pass what would be the diag syscall
                // error return - cause an exception
        /* no return */
@@ -950,6 +956,8 @@ Entry(lo_diag_scall)
  */
 
 Entry(lo_syscall)
+       TIME_TRAP_UENTRY
+
        /*
         * We can be here either for a mach, unix machdep or diag syscall,
         * as indicated by the syscall class:
@@ -972,13 +980,11 @@ Entry(lo_syscall)
        sti
 
        /* Syscall class unknown */
-       CCALL3(i386_exception, $(EXC_SYSCALL), %eax, $1)
+       CCALL5(i386_exception, $(EXC_SYSCALL), %eax, $0, $1, $0)
        /* no return */
 
 
 Entry(lo64_unix_scall)
-       TIME_TRAP_UENTRY
-
        movl    %gs:CPU_ACTIVE_THREAD,%ecx      /* get current thread     */
        movl    ACT_TASK(%ecx),%ebx                     /* point to current task  */
        addl    $1,TASK_SYSCALLS_UNIX(%ebx)     /* increment call count   */
@@ -1007,8 +1013,6 @@ Entry(lo64_unix_scall)
 
 
 Entry(lo64_mach_scall)
-       TIME_TRAP_UENTRY
-
        movl    %gs:CPU_ACTIVE_THREAD,%ecx      /* get current thread     */
        movl    ACT_TASK(%ecx),%ebx                     /* point to current task  */
        addl    $1,TASK_SYSCALLS_MACH(%ebx)     /* increment call count   */
@@ -1037,8 +1041,6 @@ Entry(lo64_mach_scall)
 
 
 Entry(lo64_mdep_scall)
-       TIME_TRAP_UENTRY
-
        movl    %gs:CPU_ACTIVE_THREAD,%ecx      /* get current thread     */
        movl    ACT_TASK(%ecx),%ebx                     /* point to current task  */
 
@@ -1066,8 +1068,6 @@ Entry(lo64_mdep_scall)
 
 
 Entry(lo64_diag_scall)
-       TIME_TRAP_UENTRY
-
        movl    %gs:CPU_ACTIVE_THREAD,%ecx      /* get current thread     */
        movl    ACT_TASK(%ecx),%ebx                     /* point to current task  */
 
@@ -1094,7 +1094,7 @@ Entry(lo64_diag_scall)
        popl    %esp                    // Get back the original stack
        jmp     EXT(return_to_user)     // Normal return, do not check asts...
 2:     
-       CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
+       CCALL5(i386_exception, $EXC_SYSCALL, $0x6000, $0, $1, $0)
                // pass what would be the diag syscall
                // error return - cause an exception
        /* no return */
index fb7afc4d05b9387fa8a0f798ec7744c4cf37d397..23ce618604ebff3a6b19ab069d38d99d0270cae2 100644 (file)
@@ -126,10 +126,10 @@ static void       mp_broadcast_action(void);
 
 static int             NMIInterruptHandler(x86_saved_state_t *regs);
 static boolean_t       cpu_signal_pending(int cpu, mp_event_t event);
-static void            cpu_NMI_interrupt(int cpu);
 
 boolean_t      smp_initialized = FALSE;
-boolean_t      force_immediate_debugger_NMI = FALSE;
+volatile boolean_t     force_immediate_debugger_NMI = FALSE;
+volatile boolean_t     pmap_tlb_flush_timeout = FALSE;
 
 decl_simple_lock_data(,mp_kdp_lock);
 
@@ -931,10 +931,22 @@ cpu_signal_handler(x86_saved_state_t *regs)
 static int __attribute__((noinline))
 NMIInterruptHandler(x86_saved_state_t *regs)
 {
-       boolean_t state = ml_set_interrupts_enabled(FALSE);
+       void    *stackptr;
+       
        sync_iss_to_iks_unconditionally(regs);
+       __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
+
+       if (pmap_tlb_flush_timeout == TRUE && current_cpu_datap()->cpu_tlb_invalid) {
+               panic_i386_backtrace(stackptr, 10, "Panic: Unresponsive processor\n", TRUE, regs);
+               panic_io_port_read();
+               mca_check_save();
+               if (pmsafe_debug)
+                       pmSafeMode(&current_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
+               for(;;) {
+                       cpu_pause();
+               }
+       }
        mp_kdp_wait(FALSE);
-       (void) ml_set_interrupts_enabled(state);
        return 1;
 }
 
@@ -1003,7 +1015,7 @@ cpu_interrupt(int cpu)
 /*
  * Send a true NMI via the local APIC to the specified CPU.
  */
-static void
+void
 cpu_NMI_interrupt(int cpu)
 {
        boolean_t       state;
index 062d2a488cd762898c7f9cde486d29de017134a3..99ba34fe2441bd78ab32d1058d7f8a24e7d756ce 100644 (file)
@@ -192,7 +192,8 @@ extern      int     kdb_debug;
 extern int     kdb_active[];
 
 extern volatile boolean_t mp_kdp_trap;
-extern boolean_t force_immediate_debugger_NMI;
+extern  volatile boolean_t force_immediate_debugger_NMI;
+extern  volatile boolean_t pmap_tlb_flush_timeout;
 
 extern void    mp_kdp_enter(void);
 extern void    mp_kdp_exit(void);
index 43257779a47a18b9509dc0a572f325774f3385b3..0da1d98c02b3f195fe97602c6f1017329ad04374 100644 (file)
@@ -72,6 +72,7 @@ extern void   i386_signal_cpus(mp_event_t event, mp_sync_t mode);
 extern int     i386_active_cpus(void);
 extern void    i386_activate_cpu(void);
 extern void    i386_deactivate_cpu(void);
+extern void    cpu_NMI_interrupt(int /* cpu */);
 
 __END_DECLS
 
index 72ecf5f761a771085e94ed83c0f383deb23f8530..b839471934efd31a9a0cfdaeae2db6b869ae15ec 100644 (file)
@@ -4493,6 +4493,20 @@ vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e)
   return  vaddr;
 }
 
+static inline void
+pmap_cpuset_NMIPI(cpu_set cpu_mask) {
+       unsigned int cpu, cpu_bit;
+       uint64_t deadline;
+
+       for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
+               if (cpu_mask & cpu_bit)
+                       cpu_NMI_interrupt(cpu);
+       }
+       deadline = mach_absolute_time() + (LockTimeOut >> 2);
+       while (mach_absolute_time() < deadline)
+               cpu_pause();
+}
+
 
 /*
  * Called with pmap locked, we:
@@ -4551,28 +4565,33 @@ pmap_flush_tlbs(pmap_t  pmap)
                   (int) pmap, cpus_to_signal, flush_self, 0, 0);
 
        if (cpus_to_signal) {
+               cpu_set cpus_to_respond = cpus_to_signal;
+
                deadline = mach_absolute_time() + LockTimeOut;
                /*
                 * Wait for those other cpus to acknowledge
                 */
-               for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
-                       while ((cpus_to_signal & cpu_bit) != 0) {
-                               if (!cpu_datap(cpu)->cpu_running ||
-                                   cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
-                                   !CPU_CR3_IS_ACTIVE(cpu)) {
-                                       cpus_to_signal &= ~cpu_bit;
-                                       break;
-                               }
-                               if (mach_absolute_time() > deadline) {
-                                       force_immediate_debugger_NMI = TRUE;
-                                       panic("pmap_flush_tlbs() timeout: "
-                                                               "cpu %d failing to respond to interrupts, pmap=%p cpus_to_signal=%lx",
-                                                               cpu, pmap, cpus_to_signal);
+               while (cpus_to_respond != 0) {
+                       if (mach_absolute_time() > deadline) {
+                               pmap_tlb_flush_timeout = TRUE;
+                               pmap_cpuset_NMIPI(cpus_to_respond);
+                               panic("pmap_flush_tlbs() timeout: "
+                                   "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
+                                   pmap, cpus_to_respond);
+                       }
+
+                       for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) {
+                               if ((cpus_to_respond & cpu_bit) != 0) {
+                                       if (!cpu_datap(cpu)->cpu_running ||
+                                           cpu_datap(cpu)->cpu_tlb_invalid == FALSE ||
+                                           !CPU_CR3_IS_ACTIVE(cpu)) {
+                                               cpus_to_respond &= ~cpu_bit;
+                                       }
+                                       cpu_pause();
                                }
-                               cpu_pause();
+                               if (cpus_to_respond == 0)
+                                       break;
                        }
-                       if (cpus_to_signal == 0)
-                               break;
                }
        }
 
@@ -4585,7 +4604,6 @@ pmap_flush_tlbs(pmap_t    pmap)
        if (flush_self)
                flush_tlb();
 
-
        PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END,
                   (int) pmap, cpus_to_signal, flush_self, 0, 0);
 }
index bec974a5d1bde202a486827a7be10b6b9e345894..d65419a236962a76fc7a4f083f04b8ce0621b6b5 100644 (file)
@@ -735,7 +735,7 @@ panic_double_fault(
        /*
         * Print backtrace leading to first fault:
         */
-       panic_i386_backtrace((void *) my_ktss->ebp, 10);
+       panic_i386_backtrace((void *) my_ktss->ebp, 10, NULL, FALSE, NULL);
 #endif
 
        panic("Double fault at 0x%08x, thread:%p, code:0x%x, "
index 2488dd2a8ee7eeb469770eea2576f03005018632..9ae4a8b5fea7358aa9ec0af3e11c0d5f17902c23 100644 (file)
@@ -145,7 +145,7 @@ extern perfCallback perfTrapHook;
 extern perfCallback perfASTHook;
 extern perfCallback perfIntHook;
 
-extern void            panic_i386_backtrace(void *, int);
+extern void            panic_i386_backtrace(void *, int, const char *, boolean_t, x86_saved_state_t *);
 #if MACH_KDP
 extern boolean_t       kdp_i386_trap(
                                unsigned int,
index b0280482533a6bce82bf33e3e15206942bdbe663..e9af2b6ee51fd2c5490efa60111adba558302f71 100644 (file)
@@ -1026,7 +1026,7 @@ int
 fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t  *vnodeaddr, uint32_t  *vid)
 {
 
-       vm_map_t map = task->map;
+       vm_map_t map;
        vm_map_offset_t address = (vm_map_offset_t )arg;
        vm_map_entry_t          tmp_entry;
        vm_map_entry_t          entry;
@@ -1034,16 +1034,23 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
        vm_region_extended_info_data_t extended;
        vm_region_top_info_data_t top;
 
-
-       if (map == VM_MAP_NULL) 
-               return(0);
-
+           task_lock(task);
+           map = task->map;
+           if (map == VM_MAP_NULL) 
+           {
+                       task_unlock(task);
+                       return(0);
+           }
+           vm_map_reference(map); 
+           task_unlock(task);
+           
            vm_map_lock_read(map);
 
            start = address;
            if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
                if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
                        vm_map_unlock_read(map);
+                       vm_map_deallocate(map); 
                        return(0);
                }
            } else {
@@ -1108,11 +1115,13 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
 
                if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
                        vm_map_unlock_read(map);
+                       vm_map_deallocate(map); 
                        return(1);
                }
            }
 
            vm_map_unlock_read(map);
+           vm_map_deallocate(map); 
            return(1);
 }
 
index a573d49eac4a726824881f931b4be1fc2c39f616..680c07f12f506ffa56905f73ce8ceebad812fcc6 100644 (file)
@@ -1914,7 +1914,7 @@ vm_object_pmap_protect(
                        for (phys_addr = phys_start;
                             phys_addr < phys_end;
                             phys_addr += PAGE_SIZE_64) {
-                               pmap_page_protect(phys_addr >> 12, prot);
+                               pmap_page_protect(phys_addr >> PAGE_SHIFT, prot);
                        }
                }
                return;
@@ -4766,7 +4766,7 @@ vm_object_populate_with_private(
                
                /* shadows on contiguous memory are not allowed */
                /* we therefore can use the offset field */
-               object->shadow_offset = (vm_object_offset_t)(phys_page << 12);
+               object->shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT;
                object->size = size;
        }
        vm_object_unlock(object);
@@ -6195,7 +6195,7 @@ vm_object_page_op(
                if(object->phys_contiguous) {
                        if (phys_entry) {
                                *phys_entry = (ppnum_t)
-                                       (object->shadow_offset >> 12);
+                                       (object->shadow_offset >> PAGE_SHIFT);
                        }
                        vm_object_unlock(object);
                        return KERN_SUCCESS;
index 313549ecfb062e468a3d8e4540bb1ac770d88ed1..b92e35ae0abc94b7199d4f442890548be84bc082 100644 (file)
@@ -2164,18 +2164,23 @@ vm_page_free_list(
                nxt = (vm_page_t)(mem->pageq.next);
 
                if (!mem->fictitious) {
-                       mem->free = TRUE;
+                       if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
+                               mem->pageq.next = NULL;
+                               vm_page_release(mem);
+                       } else {
+                               mem->free = TRUE;
 
-                       color = mem->phys_page & vm_color_mask;
-                       if (queue_empty(&free_list[color])) {
-                               inuse[color] = inuse_list_head;
-                               inuse_list_head = color;
+                               color = mem->phys_page & vm_color_mask;
+                               if (queue_empty(&free_list[color])) {
+                                       inuse[color] = inuse_list_head;
+                                       inuse_list_head = color;
+                               }
+                               queue_enter_first(&free_list[color],
+                                                 mem,
+                                                 vm_page_t,
+                                                 pageq);
+                               pg_count++;
                        }
-                       queue_enter_first(&free_list[color],
-                                         mem,
-                                         vm_page_t,
-                                         pageq);
-                       pg_count++;
                } else {
                        assert(mem->phys_page == vm_page_fictitious_addr ||
                               mem->phys_page == vm_page_guard_addr);