From fa4905b191e0d16b0fffd53bd565eca71d01fae0 Mon Sep 17 00:00:00 2001 From: Apple Date: Sun, 10 Aug 2003 07:38:02 +0000 Subject: [PATCH] xnu-201.19.tar.gz --- bsd/conf/version.minor | 2 +- bsd/hfs/hfs_vfsops.c | 38 +- bsd/isofs/cd9660/cd9660_node.c | 7 +- bsd/kern/bsd_init.c | 4 +- bsd/kern/kern_descrip.c | 43 +- bsd/kern/kern_exec.c | 3 +- bsd/kern/kern_exit.c | 35 +- bsd/kern/ubc_subr.c | 3 - bsd/kern/uipc_mbuf.c | 346 +++++- bsd/kern/uipc_socket.c | 122 +- bsd/kern/uipc_socket2.c | 42 +- bsd/kern/uipc_usrreq.c | 7 +- bsd/miscfs/specfs/spec_vnops.c | 4 +- bsd/miscfs/synthfs/synthfs_vfsops.c | 3 +- bsd/netat/adsp_stream.c | 7 +- bsd/netinet/ip_output.c | 4 +- bsd/netinet/tcp_input.c | 6 + bsd/netinet/tcp_output.c | 59 +- bsd/nfs/nfs.h | 9 +- bsd/nfs/nfs_bio.c | 668 +++++----- bsd/nfs/nfs_node.c | 93 +- bsd/nfs/nfs_nqlease.c | 5 +- bsd/nfs/nfs_serv.c | 27 +- bsd/nfs/nfs_socket.c | 101 +- bsd/nfs/nfs_subs.c | 104 +- bsd/nfs/nfs_vfsops.c | 56 +- bsd/nfs/nfs_vnops.c | 1100 +++++++---------- bsd/nfs/nfsm_subs.h | 23 +- bsd/nfs/nfsnode.h | 12 +- bsd/sys/mbuf.h | 2 + bsd/ufs/ufs/ufs_inode.c | 7 +- bsd/vfs/vfs_bio.c | 28 +- bsd/vfs/vfs_subr.c | 26 +- bsd/vfs/vfs_syscalls.c | 25 +- bsd/vfs/vfs_vnops.c | 8 +- bsd/vm/vnode_pager.c | 142 +-- .../drvAppleRootDomain/RootDomain.cpp | 7 +- iokit/Kernel/IODeviceTreeSupport.cpp | 38 +- iokit/Kernel/IONVRAM.cpp | 6 +- iokit/Kernel/IOPlatformExpert.cpp | 77 +- iokit/Kernel/IORegistryEntry.cpp | 26 +- iokit/Kernel/IOService.cpp | 15 +- iokit/KernelConfigTables.cpp | 10 +- iokit/conf/version.minor | 2 +- iokit/conf/version.variant | 1 + libkern/conf/version.minor | 2 +- libkern/ppc/OSAtomic.s | 7 - libsa/conf/version.minor | 2 +- osfmk/conf/kernelversion.minor | 2 +- osfmk/conf/version.minor | 2 +- osfmk/default_pager/dp_backing_store.c | 3 - pexpert/conf/version.minor | 2 +- 52 files changed, 1893 insertions(+), 1480 deletions(-) diff --git a/bsd/conf/version.minor b/bsd/conf/version.minor index 0cfbf0888..00750edc0 100644 --- a/bsd/conf/version.minor +++ b/bsd/conf/version.minor @@ -1 +1 @@ -2 +3 diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c index 001c0f600..6329bd207 100644 --- a/bsd/hfs/hfs_vfsops.c +++ b/bsd/hfs/hfs_vfsops.c @@ -1246,6 +1246,8 @@ struct proc *p; struct hfsnode *hp; struct hfsmount *hfsmp = VFSTOHFS(mp); ExtendedVCB *vcb; + struct vnode *meta_vp[3]; + int i; int error, allerror = 0; DBG_FUNC_NAME("hfs_sync"); @@ -1285,7 +1287,8 @@ loop:; nvp = vp->v_mntvnodes.le_next; hp = VTOH(vp); - if ((vp->v_type == VNON) || (((hp->h_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && + if ((vp->v_flag & VSYSTEM) || (vp->v_type == VNON) || + (((hp->h_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && (vp->v_dirtyblkhd.lh_first == NULL) && !(vp->v_flag & VHASDIRTY))) { simple_unlock(&vp->v_interlock); simple_unlock(&mntvnode_slock); @@ -1315,30 +1318,31 @@ loop:; simple_lock(&mntvnode_slock); }; - vcb = HFSTOVCB(hfsmp); + vcb = HFSTOVCB(hfsmp); + meta_vp[0] = vcb->extentsRefNum; + meta_vp[1] = vcb->catalogRefNum; + meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */ + + /* Now sync our three metadata files */ + for (i = 0; i < 3; ++i) { + struct vnode *btvp; + + btvp = meta_vp[i]; - /* Now reprocess the BTree node, stored above */ - { - struct vnode *btvp; - /* - * If the vnode that we are about to sync is no longer - * associated with this mount point, start over. - */ - btvp = vcb->extentsRefNum; if ((btvp==0) || (btvp->v_type == VNON) || (btvp->v_mount != mp)) - goto skipBtree; + continue; simple_lock(&btvp->v_interlock); hp = VTOH(btvp); if (((hp->h_nodeflags & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) && (btvp->v_dirtyblkhd.lh_first == NULL) && !(btvp->v_flag & VHASDIRTY)) { simple_unlock(&btvp->v_interlock); - goto skipBtree; + continue; } simple_unlock(&mntvnode_slock); error = vget(btvp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); if (error) { simple_lock(&mntvnode_slock); - goto skipBtree; + continue; } if ((error = VOP_FSYNC(btvp, cred, waitfor, p))) allerror = error; @@ -1347,15 +1351,15 @@ loop:; simple_lock(&mntvnode_slock); }; -skipBtree:; - simple_unlock(&mntvnode_slock); /* * Force stale file system control information to be flushed. */ - if ((error = VOP_FSYNC(hfsmp->hfs_devvp, cred, waitfor, p))) - allerror = error; + if (vcb->vcbSigWord == kHFSSigWord) { + if ((error = VOP_FSYNC(hfsmp->hfs_devvp, cred, waitfor, p))) + allerror = error; + } /* * Write back modified superblock. */ diff --git a/bsd/isofs/cd9660/cd9660_node.c b/bsd/isofs/cd9660/cd9660_node.c index c9400cd78..f9e201a32 100644 --- a/bsd/isofs/cd9660/cd9660_node.c +++ b/bsd/isofs/cd9660/cd9660_node.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -306,8 +306,9 @@ cd9660_reclaim(ap) */ cache_purge(vp); if (ip->i_devvp) { - vrele(ip->i_devvp); - ip->i_devvp = 0; + struct vnode *tvp = ip->i_devvp; + ip->i_devvp = NULL; + vrele(tvp); } if (ip->i_namep != isonullname) FREE(ip->i_namep, M_TEMP); diff --git a/bsd/kern/bsd_init.c b/bsd/kern/bsd_init.c index 1934f20f9..6a055af65 100644 --- a/bsd/kern/bsd_init.c +++ b/bsd/kern/bsd_init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -496,8 +496,8 @@ bsd_init() /* Get the vnode for '/'. Set fdp->fd_fd.fd_cdir to reference it. */ if (VFS_ROOT(mountlist.cqh_first, &rootvnode)) panic("bsd_init: cannot find root vnode"); - filedesc0.fd_cdir = rootvnode; VREF(rootvnode); + filedesc0.fd_cdir = rootvnode; VOP_UNLOCK(rootvnode, 0, p); diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c index 7ede17b05..c592c4140 100644 --- a/bsd/kern/kern_descrip.c +++ b/bsd/kern/kern_descrip.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -58,14 +58,6 @@ * SUCH DAMAGE. * * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95 - * - * History: - * CHW 8/5/98 Added F_SETSIZE command to truncate without - * zero filling space - * CHW 7/6/98 Updated Preallocate command to take a structure - * and return output. - * CHW 6/25/98 Fixed a bug in the lock call in fcntl - * Preallocate command */ #include @@ -103,7 +95,6 @@ getdtablesize(p, uap, retval) void *uap; register_t *retval; { - *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); return (0); } @@ -115,7 +106,6 @@ ogetdtablesize(p, uap, retval) void *uap; register_t *retval; { - *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, NOFILE); return (0); } @@ -200,8 +190,7 @@ dup2(p, uap, retval) _fdrelse(fdp, i); goto closeit; } - } - else { + } else { struct file **fpp; char flags; closeit: @@ -214,7 +203,8 @@ closeit: if (*(fpp = &fdp->fd_ofiles[new])) { struct file *fp = *fpp; - *fpp = NULL; (void) closef(fp, p); + *fpp = NULL; + (void) closef(fp, p); } } return (finishdup(fdp, old, new, retval)); @@ -972,9 +962,9 @@ ffree(fp) fp->f_cred = NOCRED; crfree(cred); } -#if 1 || DIAGNOSTIC + fp->f_count = 0; -#endif + nfiles--; FREE_ZONE(fp, sizeof *fp, M_FILE); } @@ -1062,8 +1052,7 @@ fdcopy(p) *fpp = NULL; *flags = 0; } - } - else + } else (void) memset(newfdp->fd_ofiles, 0, i * OFILESIZE); return (newfdp); @@ -1076,9 +1065,10 @@ void fdfree(p) struct proc *p; { - register struct filedesc *fdp; - register struct file **fpp; - register int i; + struct filedesc *fdp; + struct file **fpp; + int i; + struct vnode *tvp; if ((fdp = p->p_fd) == NULL) return; @@ -1093,9 +1083,14 @@ fdfree(p) FREE_ZONE(fdp->fd_ofiles, fdp->fd_nfiles * OFILESIZE, M_OFILETABL); } - vrele(fdp->fd_cdir); - if (fdp->fd_rdir) - vrele(fdp->fd_rdir); + tvp = fdp->fd_cdir; + fdp->fd_cdir = NULL; + vrele(tvp); + if (fdp->fd_rdir) { + tvp = fdp->fd_rdir; + fdp->fd_rdir = NULL; + vrele(tvp); + } FREE_ZONE(fdp, sizeof *fdp, M_FILEDESC); } diff --git a/bsd/kern/kern_exec.c b/bsd/kern/kern_exec.c index 7dd27b97c..88348db3f 100644 --- a/bsd/kern/kern_exec.c +++ b/bsd/kern/kern_exec.c @@ -530,9 +530,10 @@ again: * root set it. */ if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT)) { - vrele(p->p_tracep); + struct vnode *tvp = p->p_tracep; p->p_tracep = NULL; p->p_traceflag = 0; + vrele(tvp); } #endif if (origvattr.va_mode & VSUID) diff --git a/bsd/kern/kern_exit.c b/bsd/kern/kern_exit.c index 84c51a09c..f588872ac 100644 --- a/bsd/kern/kern_exit.c +++ b/bsd/kern/kern_exit.c @@ -267,6 +267,8 @@ proc_exit(struct proc *p) register struct session *sp = p->p_session; if (sp->s_ttyvp) { + struct vnode *ttyvp; + /* * Controlling process. * Signal foreground pgrp, @@ -284,9 +286,10 @@ proc_exit(struct proc *p) if (sp->s_ttyvp) VOP_REVOKE(sp->s_ttyvp, REVOKEALL); } - if (sp->s_ttyvp) - vrele(sp->s_ttyvp); + ttyvp = sp->s_ttyvp; sp->s_ttyvp = NULL; + if (ttyvp) + vrele(ttyvp); /* * s_ttyp is not zero'd; we use this to indicate * that the session once had a controlling terminal. @@ -303,8 +306,11 @@ proc_exit(struct proc *p) * release trace file */ p->p_traceflag = 0; /* don't trace the vrele() */ - if (p->p_tracep) - vrele(p->p_tracep); + if (p->p_tracep) { + struct vnode *tvp = p->p_tracep; + p->p_tracep = NULL; + vrele(tvp); + } #endif @@ -520,6 +526,7 @@ wait1(q, uap, retval, compat) register int nfound; register struct proc *p, *t; int status, error; + struct vnode *tvp; retry: if (uap->pid == 0) @@ -610,8 +617,10 @@ loop: /* * Release reference to text vnode */ - if (p->p_textvp) - vrele(p->p_textvp); + tvp = p->p_textvp; + p->p_textvp = NULL; + if (tvp) + vrele(tvp); /* * Finally finished with old proc entry. @@ -824,6 +833,8 @@ vproc_exit(struct proc *p) register struct session *sp = p->p_session; if (sp->s_ttyvp) { + struct vnode *ttyvp; + /* * Controlling process. * Signal foreground pgrp, @@ -841,9 +852,10 @@ vproc_exit(struct proc *p) if (sp->s_ttyvp) VOP_REVOKE(sp->s_ttyvp, REVOKEALL); } - if (sp->s_ttyvp) - vrele(sp->s_ttyvp); + ttyvp = sp->s_ttyvp; sp->s_ttyvp = NULL; + if (ttyvp) + vrele(ttyvp); /* * s_ttyp is not zero'd; we use this to indicate * that the session once had a controlling terminal. @@ -860,8 +872,11 @@ vproc_exit(struct proc *p) * release trace file */ p->p_traceflag = 0; /* don't trace the vrele() */ - if (p->p_tracep) - vrele(p->p_tracep); + if (p->p_tracep) { + struct vnode *tvp = p->p_tracep; + p->p_tracep = NULL; + vrele(tvp); + } #endif q = p->p_children.lh_first; diff --git a/bsd/kern/ubc_subr.c b/bsd/kern/ubc_subr.c index 32a5924cb..c82fd94a2 100644 --- a/bsd/kern/ubc_subr.c +++ b/bsd/kern/ubc_subr.c @@ -322,13 +322,10 @@ ubc_setsize(struct vnode *vp, off_t nsize) /* * Get the size of the file - * For local file systems the size is locally cached. For NFS - * there might be a network transaction for this. */ off_t ubc_getsize(struct vnode *vp) { - /* XXX deal with NFS */ return (vp->v_ubcinfo->ui_size); } diff --git a/bsd/kern/uipc_mbuf.c b/bsd/kern/uipc_mbuf.c index 7f7250d3c..513cc8e53 100644 --- a/bsd/kern/uipc_mbuf.c +++ b/bsd/kern/uipc_mbuf.c @@ -121,6 +121,13 @@ mbinit() if (nclpp < 1) nclpp = 1; MBUF_LOCKINIT(); // NETISR_LOCKINIT(); + + mbstat.m_msize = MSIZE; + mbstat.m_mclbytes = MCLBYTES; + mbstat.m_minclsize = MINCLSIZE; + mbstat.m_mlen = MLEN; + mbstat.m_mhlen = MHLEN; + if (nmbclusters == 0) nmbclusters = NMBCLUSTERS; MALLOC(mclrefcnt, short *, nmbclusters * sizeof (short), @@ -330,6 +337,14 @@ m_retryhdr(canwait, type) if (m = m_retry(canwait, type)) { m->m_flags |= M_PKTHDR; m->m_data = m->m_pktdat; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.header = NULL; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.aux = (struct mbuf *)NULL; + m->m_pkthdr.reserved1 = NULL; + m->m_pkthdr.reserved2 = NULL; } return (m); } @@ -456,13 +471,18 @@ m_getpacket(void) mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next; m->m_next = m->m_nextpkt = 0; - m->m_ext.ext_free = 0; m->m_type = MT_DATA; m->m_data = m->m_ext.ext_buf; m->m_flags = M_PKTHDR | M_EXT; - m->m_pkthdr.aux = (struct mbuf *)NULL; + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.header = NULL; m->m_pkthdr.csum_data = 0; m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.aux = (struct mbuf *)NULL; + m->m_pkthdr.reserved1 = 0; + m->m_pkthdr.reserved2 = 0; + m->m_ext.ext_free = 0; m->m_ext.ext_size = MCLBYTES; m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward = &m->m_ext.ext_refs; @@ -485,6 +505,142 @@ m_getpacket(void) return (m); } + +struct mbuf * +m_getpackets(int num_needed, int num_with_pkthdrs, int how) +{ + struct mbuf *m; + struct mbuf **np, *top; + + top = NULL; + np = ⊤ + + m_clalloc(num_needed, how); /* takes the MBUF_LOCK, but doesn't release it... */ + + while (num_needed--) { + if (mfree && mclfree) { /* mbuf + cluster are available */ + m = mfree; + MCHECK(m); + mfree = m->m_next; + ++mclrefcnt[mtocl(m)]; + mbstat.m_mtypes[MT_FREE]--; + mbstat.m_mtypes[MT_DATA]++; + m->m_ext.ext_buf = (caddr_t)mclfree; /* get the cluster */ + ++mclrefcnt[mtocl(m->m_ext.ext_buf)]; + mbstat.m_clfree--; + mclfree = ((union mcluster *)(m->m_ext.ext_buf))->mcl_next; + + m->m_next = m->m_nextpkt = 0; + m->m_type = MT_DATA; + m->m_data = m->m_ext.ext_buf; + m->m_ext.ext_free = 0; + m->m_ext.ext_size = MCLBYTES; + m->m_ext.ext_refs.forward = m->m_ext.ext_refs.backward = &m->m_ext.ext_refs; + + if (num_with_pkthdrs == 0) + m->m_flags = M_EXT; + else { + m->m_flags = M_PKTHDR | M_EXT; + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.header = NULL; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.aux = (struct mbuf *)NULL; + m->m_pkthdr.reserved1 = NULL; + m->m_pkthdr.reserved2 = NULL; + + num_with_pkthdrs--; + } + + } else { + + MBUF_UNLOCK(); + + if (num_with_pkthdrs == 0) { + MGET(m, how, MT_DATA ); + } else { + MGETHDR(m, how, MT_DATA); + + if (m) + m->m_pkthdr.len = 0; + num_with_pkthdrs--; + } + if (m == 0) + return(top); + + MCLGET(m, how); + if ((m->m_flags & M_EXT) == 0) { + m_free(m); + return(top); + } + MBUF_LOCK(); + } + *np = m; + + if (num_with_pkthdrs) + np = &m->m_nextpkt; + else + np = &m->m_next; + } + MBUF_UNLOCK(); + + return (top); +} + + +struct mbuf * +m_getpackethdrs(int num_needed, int how) +{ + struct mbuf *m; + struct mbuf **np, *top; + + top = NULL; + np = ⊤ + + MBUF_LOCK(); + + while (num_needed--) { + if (m = mfree) { /* mbufs are available */ + MCHECK(m); + mfree = m->m_next; + ++mclrefcnt[mtocl(m)]; + mbstat.m_mtypes[MT_FREE]--; + mbstat.m_mtypes[MT_DATA]++; + + m->m_next = m->m_nextpkt = 0; + m->m_type = MT_DATA; + m->m_flags = M_PKTHDR; + m->m_data = m->m_pktdat; + m->m_pkthdr.len = 0; + m->m_pkthdr.rcvif = NULL; + m->m_pkthdr.header = NULL; + m->m_pkthdr.csum_flags = 0; + m->m_pkthdr.csum_data = 0; + m->m_pkthdr.aux = (struct mbuf *)NULL; + m->m_pkthdr.reserved1 = NULL; + m->m_pkthdr.reserved2 = NULL; + + } else { + + MBUF_UNLOCK(); + + m = m_retryhdr(how, MT_DATA); + + if (m == 0) + return(top); + + MBUF_LOCK(); + } + *np = m; + np = &m->m_nextpkt; + } + MBUF_UNLOCK(); + + return (top); +} + + /* free and mbuf list (m_nextpkt) while following m_next under one lock. * returns the count for mbufs packets freed. Used by the drivers. */ @@ -493,22 +649,25 @@ m_freem_list(m) struct mbuf *m; { struct mbuf *nextpkt; - int i, s, count=0; + int i, count=0; -// s = splimp(); MBUF_LOCK(); + while (m) { if (m) - nextpkt = m->m_nextpkt; /* chain of linked mbufs from driver */ + nextpkt = m->m_nextpkt; /* chain of linked mbufs from driver */ else - nextpkt = 0; + nextpkt = 0; count++; + while (m) { /* free the mbuf chain (like mfreem) */ struct mbuf *n = m->m_next; + if (n && n->m_nextpkt) panic("m_freem_list: m_nextpkt of m_next != NULL"); if (m->m_type == MT_FREE) panic("freeing free mbuf"); + if (m->m_flags & M_EXT) { if (MCLHASREFERENCE(m)) { remque((queue_t)&m->m_ext.ext_refs); @@ -526,8 +685,8 @@ m_freem_list(m) } mbstat.m_mtypes[m->m_type]--; (void) MCLUNREF(m); + mbstat.m_mtypes[MT_FREE]++; m->m_type = MT_FREE; - mbstat.m_mtypes[m->m_type]++; m->m_flags = 0; m->m_len = 0; m->m_next = mfree; @@ -536,10 +695,14 @@ m_freem_list(m) } m = nextpkt; /* bump m with saved nextpkt if any */ } - i = m_want; - m_want = 0; + if (i = m_want) + m_want = 0; + MBUF_UNLOCK(); - if (i) wakeup((caddr_t)&mfree); + + if (i) + wakeup((caddr_t)&mfree); + return (count); } @@ -638,24 +801,41 @@ m_copym(m, off0, len, wait) panic("m_copym"); if (off == 0 && m->m_flags & M_PKTHDR) copyhdr = 1; - while (off > 0) { + + while (off >= m->m_len) { if (m == 0) panic("m_copym"); - if (off < m->m_len) - break; off -= m->m_len; m = m->m_next; } np = ⊤ top = 0; + + MBUF_LOCK(); + while (len > 0) { if (m == 0) { if (len != M_COPYALL) panic("m_copym"); break; } - MGET(n, wait, m->m_type); + if (n = mfree) { + MCHECK(n); + ++mclrefcnt[mtocl(n)]; + mbstat.m_mtypes[MT_FREE]--; + mbstat.m_mtypes[m->m_type]++; + mfree = n->m_next; + n->m_next = n->m_nextpkt = 0; + n->m_type = m->m_type; + n->m_data = n->m_dat; + n->m_flags = 0; + } else { + MBUF_UNLOCK(); + n = m_retry(wait, m->m_type); + MBUF_LOCK(); + } *np = n; + if (n == 0) goto nospace; if (copyhdr) { @@ -679,30 +859,158 @@ m_copym(m, off0, len, wait) n->m_len = MHLEN; } if (m->m_flags & M_EXT) { - MBUF_LOCK(); n->m_ext = m->m_ext; insque((queue_t)&n->m_ext.ext_refs, (queue_t)&m->m_ext.ext_refs); - MBUF_UNLOCK(); n->m_data = m->m_data + off; n->m_flags |= M_EXT; - } else + } else { bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), (unsigned)n->m_len); + } if (len != M_COPYALL) len -= n->m_len; off = 0; m = m->m_next; np = &n->m_next; } + MBUF_UNLOCK(); + if (top == 0) MCFail++; + return (top); nospace: + MBUF_UNLOCK(); + m_freem(top); MCFail++; return (0); } + + +struct mbuf * +m_copym_with_hdrs(m, off0, len, wait, m_last, m_off) + register struct mbuf *m; + int off0, wait; + register int len; + struct mbuf **m_last; + int *m_off; +{ + register struct mbuf *n, **np; + register int off = off0; + struct mbuf *top = 0; + int copyhdr = 0; + int type; + + if (off == 0 && m->m_flags & M_PKTHDR) + copyhdr = 1; + + if (*m_last) { + m = *m_last; + off = *m_off; + } else { + while (off >= m->m_len) { + off -= m->m_len; + m = m->m_next; + } + } + MBUF_LOCK(); + + while (len > 0) { + if (top == 0) + type = MT_HEADER; + else { + if (m == 0) + panic("m_gethdr_and_copym"); + type = m->m_type; + } + if (n = mfree) { + MCHECK(n); + ++mclrefcnt[mtocl(n)]; + mbstat.m_mtypes[MT_FREE]--; + mbstat.m_mtypes[type]++; + mfree = n->m_next; + n->m_next = n->m_nextpkt = 0; + n->m_type = type; + + if (top) { + n->m_data = n->m_dat; + n->m_flags = 0; + } else { + n->m_data = n->m_pktdat; + n->m_flags = M_PKTHDR; + n->m_pkthdr.len = 0; + n->m_pkthdr.rcvif = NULL; + n->m_pkthdr.header = NULL; + n->m_pkthdr.csum_flags = 0; + n->m_pkthdr.csum_data = 0; + n->m_pkthdr.aux = (struct mbuf *)NULL; + n->m_pkthdr.reserved1 = NULL; + n->m_pkthdr.reserved2 = NULL; + } + } else { + MBUF_UNLOCK(); + if (top) + n = m_retry(wait, type); + else + n = m_retryhdr(wait, type); + MBUF_LOCK(); + } + if (n == 0) + goto nospace; + if (top == 0) { + top = n; + np = &top->m_next; + continue; + } else + *np = n; + + if (copyhdr) { + M_COPY_PKTHDR(n, m); + n->m_pkthdr.len = len; + copyhdr = 0; + } + n->m_len = min(len, (m->m_len - off)); + + if (m->m_flags & M_EXT) { + n->m_ext = m->m_ext; + insque((queue_t)&n->m_ext.ext_refs, (queue_t)&m->m_ext.ext_refs); + n->m_data = m->m_data + off; + n->m_flags |= M_EXT; + } else { + bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), + (unsigned)n->m_len); + } + len -= n->m_len; + + if (len == 0) { + if ((off + n->m_len) == m->m_len) { + *m_last = m->m_next; + *m_off = 0; + } else { + *m_last = m; + *m_off = off + n->m_len; + } + break; + } + off = 0; + m = m->m_next; + np = &n->m_next; + } + MBUF_UNLOCK(); + + return (top); +nospace: + MBUF_UNLOCK(); + + if (top) + m_freem(top); + MCFail++; + return (0); +} + + /* * Copy data from an mbuf chain starting "off" bytes from the beginning, * continuing for "len" bytes, into the indicated buffer. @@ -1172,7 +1480,11 @@ m_dup(register struct mbuf *m, int how) n->m_pkthdr.len = m->m_pkthdr.len; n->m_pkthdr.rcvif = m->m_pkthdr.rcvif; n->m_pkthdr.header = NULL; + n->m_pkthdr.csum_flags = 0; + n->m_pkthdr.csum_data = 0; n->m_pkthdr.aux = NULL; + n->m_pkthdr.reserved1 = 0; + n->m_pkthdr.reserved2 = 0; bcopy(m->m_data, n->m_data, m->m_pkthdr.len); return(n); } diff --git a/bsd/kern/uipc_socket.c b/bsd/kern/uipc_socket.c index f8fbf313d..c5e6b4891 100644 --- a/bsd/kern/uipc_socket.c +++ b/bsd/kern/uipc_socket.c @@ -117,6 +117,7 @@ SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); /* Should we get a maximum also ??? */ +static int sosendmaxchain = 65536; static int sosendminchain = 16384; SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain, 0, ""); @@ -818,7 +819,7 @@ sosend(so, addr, uio, top, control, flags) { struct mbuf **mp; - register struct mbuf *m; + register struct mbuf *m, *freelist = NULL; register long space, len, resid; int clen = 0, error, s, dontroute, mlen, sendflags; int atomic = sosendallatonce(so) || top; @@ -911,6 +912,7 @@ restart: splx(s); mp = ⊤ space -= clen; + do { if (uio == NULL) { /* @@ -920,41 +922,69 @@ restart: if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { - boolean_t funnel_state = TRUE; - int chainmbufs = (sosendminchain > 0 && resid >= sosendminchain); - - if (chainmbufs) - funnel_state = thread_funnel_set(network_flock, FALSE); + boolean_t dropped_funnel = FALSE; + int chainlength; + int bytes_to_copy; + + bytes_to_copy = min(resid, space); + + if (sosendminchain > 0) { + if (bytes_to_copy >= sosendminchain) { + dropped_funnel = TRUE; + (void)thread_funnel_set(network_flock, FALSE); + } + chainlength = 0; + } else + chainlength = sosendmaxchain; + do { - KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_NONE, -1, 0, 0, 0, 0); - if (top == 0) { + + if (bytes_to_copy >= MINCLSIZE) { + if ((m = freelist) == NULL) { + int num_needed; + int hdrs_needed = 0; + + if (top == 0) + hdrs_needed = 1; + num_needed = bytes_to_copy / MCLBYTES; + + if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE) + num_needed++; + + if ((freelist = m_getpackets(num_needed, hdrs_needed, M_WAIT)) == NULL) + goto getpackets_failed; + m = freelist; + } + freelist = m->m_next; + m->m_next = NULL; + + mlen = MCLBYTES; + len = min(mlen, bytes_to_copy); + } else { +getpackets_failed: + if (top == 0) { MGETHDR(m, M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; m->m_pkthdr.rcvif = (struct ifnet *)0; - } else { + } else { MGET(m, M_WAIT, MT_DATA); mlen = MLEN; + } + len = min(mlen, bytes_to_copy); + /* + * For datagram protocols, leave room + * for protocol headers in first mbuf. + */ + if (atomic && top == 0 && len < mlen) + MH_ALIGN(m, len); } - if (resid >= MINCLSIZE) { - MCLGET(m, M_WAIT); - if ((m->m_flags & M_EXT) == 0) - goto nopages; - mlen = MCLBYTES; - len = min(min(mlen, resid), space); - } else { -nopages: - len = min(min(mlen, resid), space); - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && top == 0 && len < mlen) - MH_ALIGN(m, len); - } - KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_NONE, -1, 0, 0, 0, 0); + chainlength += len; + space -= len; + error = uiomove(mtod(m, caddr_t), (int)len, uio); + resid = uio->uio_resid; m->m_len = len; @@ -968,9 +998,12 @@ nopages: top->m_flags |= M_EOR; break; } - } while (space > 0 && (chainmbufs || atomic || resid < MINCLSIZE)); - if (chainmbufs) - funnel_state = thread_funnel_set(network_flock, TRUE); + bytes_to_copy = min(resid, space); + + } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE)); + + if (dropped_funnel == TRUE) + (void)thread_funnel_set(network_flock, TRUE); if (error) goto release; } @@ -1024,6 +1057,9 @@ nopages: { splx(s); if (error == EJUSTRETURN) { sbunlock(&so->so_snd); + + if (freelist) + m_freem_list(freelist); return(0); } goto release; @@ -1056,6 +1092,8 @@ out: m_freem(top); if (control) m_freem(control); + if (freelist) + m_freem_list(freelist); KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, so, @@ -1093,6 +1131,7 @@ soreceive(so, psa, uio, mp0, controlp, flagsp) int *flagsp; { register struct mbuf *m, **mp; + register struct mbuf *free_list, *ml; register int flags, len, error, s, offset; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; @@ -1295,6 +1334,10 @@ dontblock: } moff = 0; offset = 0; + + free_list = m; + ml = (struct mbuf *)0; + while (m && uio->uio_resid > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) @@ -1357,8 +1400,9 @@ dontblock: so->so_rcv.sb_mb = m = m->m_next; *mp = (struct mbuf *)0; } else { - MFREE(m, so->so_rcv.sb_mb); - m = so->so_rcv.sb_mb; + m->m_nextpkt = 0; + ml = m; + m = m->m_next; } if (m) m->m_nextpkt = nextrecord; @@ -1401,6 +1445,12 @@ dontblock: !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; + + if (ml) { + so->so_rcv.sb_mb = ml->m_next; + ml->m_next = (struct mbuf *)0; + m_freem_list(free_list); + } error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); @@ -1409,10 +1459,18 @@ dontblock: return (0); } m = so->so_rcv.sb_mb; - if (m) + if (m) { nextrecord = m->m_nextpkt; + free_list = m; + } + ml = (struct mbuf *)0; } } + if (ml) { + so->so_rcv.sb_mb = ml->m_next; + ml->m_next = (struct mbuf *)0; + m_freem_list(free_list); + } if (m && pr->pr_flags & PR_ATOMIC) { if (so->so_options & SO_DONTTRUNC) diff --git a/bsd/kern/uipc_socket2.c b/bsd/kern/uipc_socket2.c index 4026fe677..33d2ede3b 100644 --- a/bsd/kern/uipc_socket2.c +++ b/bsd/kern/uipc_socket2.c @@ -71,6 +71,12 @@ #include #include +#include + +#define DBG_FNC_SBDROP NETDBG_CODE(DBG_NETSOCK, 4) +#define DBG_FNC_SBAPPEND NETDBG_CODE(DBG_NETSOCK, 5) + + /* * Primitive routines for operating on sockets and socket buffers */ @@ -281,6 +287,7 @@ sonewconn(head, connstatus) so->so_pgid = head->so_pgid; so->so_uid = head->so_uid; so->so_rcv.sb_flags |= SB_RECV; /* XXX */ + (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); if (so->so_proto->pr_sfilter.tqh_first) @@ -572,6 +579,9 @@ sbappend(sb, m) { register struct kextcb *kp; register struct mbuf *n; + + KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_START), sb, m->m_len, 0, 0, 0); + if (m == 0) return; kp = sotokextcb(sbtoso(sb)); @@ -594,6 +604,8 @@ sbappend(sb, m) } while (n->m_next && (n = n->m_next)); } sbcompress(sb, m, n); + + KERNEL_DEBUG((DBG_FNC_SBAPPEND | DBG_FUNC_END), sb, sb->sb_cc, 0, 0, 0); } #ifdef SOCKBUF_DEBUG @@ -920,10 +932,12 @@ sbdrop(sb, len) register struct sockbuf *sb; register int len; { - register struct mbuf *m, *mn; - struct mbuf *next; + register struct mbuf *m, *free_list, *ml; + struct mbuf *next, *last; register struct kextcb *kp; + KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_START), sb, len, 0, 0, 0); + kp = sotokextcb(sbtoso(sb)); while (kp) { if (kp->e_sout && kp->e_sout->su_sbdrop) @@ -932,13 +946,15 @@ sbdrop(sb, len) } kp = kp->e_next; } - next = (m = sb->sb_mb) ? m->m_nextpkt : 0; + free_list = last = m; + ml = (struct mbuf *)0; + while (len > 0) { if (m == 0) { if (next == 0) panic("sbdrop"); - m = next; + m = last = next; next = m->m_nextpkt; continue; } @@ -950,20 +966,30 @@ sbdrop(sb, len) } len -= m->m_len; sbfree(sb, m); - MFREE(m, mn); - m = mn; + + ml = m; + m = m->m_next; } while (m && m->m_len == 0) { sbfree(sb, m); - MFREE(m, mn); - m = mn; + + ml = m; + m = m->m_next; + } + if (ml) { + ml->m_next = (struct mbuf *)0; + last->m_nextpkt = (struct mbuf *)0; + m_freem_list(free_list); } if (m) { sb->sb_mb = m; m->m_nextpkt = next; } else sb->sb_mb = next; + postevent(0, sb, EV_RWBYTES); + + KERNEL_DEBUG((DBG_FNC_SBDROP | DBG_FUNC_END), sb, 0, 0, 0, 0); } /* diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c index 35f27aa4f..459a67a05 100644 --- a/bsd/kern/uipc_usrreq.c +++ b/bsd/kern/uipc_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -524,11 +524,12 @@ unp_detach(unp) unp->unp_gencnt = ++unp_gencnt; --unp_count; if (unp->unp_vnode) { + struct vnode *tvp = unp->unp_vnode; unp->unp_vnode->v_socket = 0; + unp->unp_vnode = 0; thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - vrele(unp->unp_vnode); + vrele(tvp); thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); - unp->unp_vnode = 0; } if (unp->unp_conn) unp_disconnect(unp); diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c index 86dba1588..81b836085 100644 --- a/bsd/miscfs/specfs/spec_vnops.c +++ b/bsd/miscfs/specfs/spec_vnops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -663,8 +663,8 @@ spec_close(ap) */ if (vcount(vp) == 2 && ap->a_p && vp == ap->a_p->p_session->s_ttyvp) { - vrele(vp); ap->a_p->p_session->s_ttyvp = NULL; + vrele(vp); } /* * If the vnode is locked, then we are in the midst diff --git a/bsd/miscfs/synthfs/synthfs_vfsops.c b/bsd/miscfs/synthfs/synthfs_vfsops.c index 35c00e8dc..121b99dcb 100644 --- a/bsd/miscfs/synthfs/synthfs_vfsops.c +++ b/bsd/miscfs/synthfs/synthfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -34,7 +34,6 @@ #include #include #include -//#include #include #include #include diff --git a/bsd/netat/adsp_stream.c b/bsd/netat/adsp_stream.c index 8dd2ec374..c3ea0ae2f 100644 --- a/bsd/netat/adsp_stream.c +++ b/bsd/netat/adsp_stream.c @@ -339,7 +339,12 @@ int adsp_wput(gref, mp) int s; gbuf_t *xm; ioc_t *iocbp; - CCBPtr sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); + CCBPtr sp; + + if (gref->info) + sp = (CCBPtr)gbuf_rptr(((gbuf_t *)gref->info)); + else + sp = 0; if (gbuf_type(mp) == MSG_IOCTL) { iocbp = (ioc_t *)gbuf_rptr(mp); diff --git a/bsd/netinet/ip_output.c b/bsd/netinet/ip_output.c index ff9b2da85..fa63025aa 100644 --- a/bsd/netinet/ip_output.c +++ b/bsd/netinet/ip_output.c @@ -351,8 +351,10 @@ ip_output(m0, opt, ro, flags, imo) */ if (imo != NULL) { ip->ip_ttl = imo->imo_multicast_ttl; - if (imo->imo_multicast_ifp != NULL) + if (imo->imo_multicast_ifp != NULL) { ifp = imo->imo_multicast_ifp; + dl_tag = ifp->if_data.default_proto; + } if (imo->imo_multicast_vif != -1) ip->ip_src.s_addr = ip_mcast_src(imo->imo_multicast_vif); diff --git a/bsd/netinet/tcp_input.c b/bsd/netinet/tcp_input.c index 599c6e0d9..92f3697c2 100644 --- a/bsd/netinet/tcp_input.c +++ b/bsd/netinet/tcp_input.c @@ -721,6 +721,12 @@ findpcb: goto dropwithreset; if (tp->t_state == TCPS_CLOSED) goto drop; + /* + * Bogus state when listening port owned by SharedIP with loopback as the + * only configured interface: BlueBox does not filters loopback + */ + if (tp->t_state == TCP_NSTATES) + goto drop; /* Unscale the window into a 32-bit value. */ if ((thflags & TH_SYN) == 0) diff --git a/bsd/netinet/tcp_output.c b/bsd/netinet/tcp_output.c index c648029c6..56043b059 100644 --- a/bsd/netinet/tcp_output.c +++ b/bsd/netinet/tcp_output.c @@ -93,6 +93,7 @@ #endif #include + #define DBG_LAYER_BEG NETDBG_CODE(DBG_NETTCP, 1) #define DBG_LAYER_END NETDBG_CODE(DBG_NETTCP, 3) #define DBG_FNC_TCP_OUTPUT NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1) @@ -128,14 +129,19 @@ tcp_output(tp) #if INET6 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0; #endif + int last_off; + int m_off; + struct mbuf *m_last = 0; + struct mbuf *m_head = 0; + KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_START, 0,0,0,0,0); + KERNEL_DEBUG(DBG_LAYER_BEG, ((tp->t_template->tt_dport << 16) | tp->t_template->tt_sport), (((tp->t_template->tt_src.s_addr & 0xffff) << 16) | (tp->t_template->tt_dst.s_addr & 0xffff)), 0,0,0); - /* * Determine length of data that should be transmitted, * and flags that will be used. @@ -563,33 +569,57 @@ send: m->m_len += hdrlen; m->m_data -= hdrlen; #else - MGETHDR(m, M_DONTWAIT, MT_HEADER); - if (m == NULL) { - error = ENOBUFS; - goto out; - } + m = NULL; #if INET6 if (MHLEN < hdrlen + max_linkhdr) { + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) { + error = ENOBUFS; + goto out; + } MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_freem(m); error = ENOBUFS; goto out; } + m->m_data += max_linkhdr; + m->m_len = hdrlen; } #endif - m->m_data += max_linkhdr; - m->m_len = hdrlen; if (len <= MHLEN - hdrlen - max_linkhdr) { + if (m == NULL) { + MGETHDR(m, M_DONTWAIT, MT_HEADER); + if (m == NULL) { + error = ENOBUFS; + goto out; + } + m->m_data += max_linkhdr; + m->m_len = hdrlen; + } m_copydata(so->so_snd.sb_mb, off, (int) len, mtod(m, caddr_t) + hdrlen); m->m_len += len; } else { - m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); - if (m->m_next == 0) { - (void) m_free(m); - error = ENOBUFS; - goto out; + if (m != NULL) { + m->m_next = m_copy(so->so_snd.sb_mb, off, (int) len); + if (m->m_next == 0) { + (void) m_free(m); + error = ENOBUFS; + goto out; + } + } else { + if (m_head != so->so_snd.sb_mb || last_off != off) + m_last = NULL; + last_off = off + len; + m_head = so->so_snd.sb_mb; + + if ((m = m_copym_with_hdrs(so->so_snd.sb_mb, off, (int) len, M_DONTWAIT, &m_last, &m_off)) == NULL) { + error = ENOBUFS; + goto out; + } + m->m_data += max_linkhdr; + m->m_len = hdrlen; } } #endif @@ -701,6 +731,7 @@ send: */ tp->snd_up = tp->snd_una; /* drag it along */ + /* * Put TCP length in extended header, and then * checksum extended header and data. @@ -857,8 +888,6 @@ send: KERNEL_DEBUG(DBG_LAYER_END, ((th->th_dport << 16) | th->th_sport), (((thtoti(th)->ti_src.s_addr & 0xffff) << 16) | (thtoti(th)->ti_dst.s_addr & 0xffff)), th->th_seq, th->th_ack, th->th_win); - - #if 1 /* * See if we should do MTU discovery. We do it only if the following diff --git a/bsd/nfs/nfs.h b/bsd/nfs/nfs.h index 00cc5f468..c98c92458 100644 --- a/bsd/nfs/nfs.h +++ b/bsd/nfs/nfs.h @@ -638,9 +638,9 @@ void nfsm_srvpostopattr __P((struct nfsrv_descript *, int, struct vattr *, int netaddr_match __P((int, union nethostaddr *, struct mbuf *)); int nfs_request __P((struct vnode *, struct mbuf *, int, struct proc *, struct ucred *, struct mbuf **, struct mbuf **, - caddr_t *)); + caddr_t *, u_int64_t *)); int nfs_loadattrcache __P((struct vnode **, struct mbuf **, caddr_t *, - struct vattr *)); + struct vattr *, int, u_int64_t *)); int nfs_namei __P((struct nameidata *, fhandle_t *, int, struct nfssvc_sock *, struct mbuf *, struct mbuf **, caddr_t *, struct vnode **, struct proc *, int, int)); @@ -747,6 +747,11 @@ int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, void nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag)); void nfsrv_slpderef __P((struct nfssvc_sock *slp)); +/* + * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG) + * But some of this code may prove useful someday... + */ +#undef NFSDIAG #if NFSDIAG extern int nfstraceindx; diff --git a/bsd/nfs/nfs_bio.c b/bsd/nfs/nfs_bio.c index 040678d37..926de3419 100644 --- a/bsd/nfs/nfs_bio.c +++ b/bsd/nfs/nfs_bio.c @@ -58,7 +58,6 @@ * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 * FreeBSD-Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $ */ - #include #include #include @@ -86,11 +85,18 @@ #include +#define FSDBG(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) +#define FSDBG_TOP(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) +#define FSDBG_BOT(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) + static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size, struct proc *p, int operation)); -static struct buf *nfs_getwriteblk __P((struct vnode *vp, daddr_t bn, - int size, struct proc *p, - struct ucred *cred, int off, int len)); extern int nfs_numasync; extern struct nfsstats nfsstats; @@ -129,7 +135,7 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) p = uio->uio_procp; if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) (void)nfs_fsinfo(nmp, vp, cred, p); - /*due to getblk/vm interractions, use vm page size or less values */ + /*due to getblk/vm interractions, use vm page size or less values */ biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); /* * For nfs, cache consistency can only be maintained approximately. @@ -231,7 +237,8 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) */ if (nfs_numasync > 0 && nmp->nm_readahead > 0) { for (nra = 0; nra < nmp->nm_readahead && - (off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) { + (off_t)(lbn + 1 + nra) * biosize < np->n_size; + nra++) { rabn = lbn + 1 + nra; if (!incore(vp, rabn)) { rabp = nfs_getcacheblk(vp, rabn, biosize, p, operation); @@ -335,36 +342,52 @@ again: SET(bp->b_flags, B_READ); error = nfs_doio(bp, cred, p); if (error) { - brelse(bp); - while (error == NFSERR_BAD_COOKIE) { - nfs_invaldir(vp); - error = nfs_vinvalbuf(vp, 0, cred, p, 1); - /* - * Yuck! The directory has been modified on the - * server. The only way to get the block is by - * reading from the beginning to get all the - * offset cookies. - */ - for (i = 0; i <= lbn && !error; i++) { - if (np->n_direofoffset - && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) - return (0); - bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p, operation); - if (!bp) - return (EINTR); - if (!ISSET(bp->b_flags, B_DONE)) { - SET(bp->b_flags, B_READ); - error = nfs_doio(bp, cred, p); - if (error) { - brelse(bp); - } else if (i < lbn) - brelse(bp); - } - } - } - if (error) - return (error); + brelse(bp); } + while (error == NFSERR_BAD_COOKIE) { + nfs_invaldir(vp); + error = nfs_vinvalbuf(vp, 0, cred, p, 1); + /* + * Yuck! The directory has been modified on the + * server. The only way to get the block is by + * reading from the beginning to get all the + * offset cookies. + */ + for (i = 0; i <= lbn && !error; i++) { + if (np->n_direofoffset + && (i * NFS_DIRBLKSIZ) >= np->n_direofoffset) + return (0); + bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p, + operation); + if (!bp) + return (EINTR); + if (!ISSET(bp->b_flags, B_CACHE)) { + SET(bp->b_flags, B_READ); + error = nfs_doio(bp, cred, p); + /* + * no error + B_INVAL == directory EOF, + * use the block. + */ + if (error == 0 && (bp->b_flags & B_INVAL)) + break; + } + /* + * An error will throw away the block and the + * for loop will break out. If no error and this + * is not the block we want, we throw away the + * block and go for the next one via the for loop. + */ + if (error || i < lbn) + brelse(bp); + } + } + /* + * The above while is repeated if we hit another cookie + * error. If we hit an error and it wasn't a cookie error, + * we give up. + */ + if (error) + return (error); } /* @@ -377,17 +400,18 @@ again: (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && !(np->n_flag & NQNFSNONCACHE) && !incore(vp, lbn + 1)) { - rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p, operation); + rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p, + operation); if (rabp) { if (!ISSET(rabp->b_flags, (B_CACHE|B_DELWRI))) { - SET(rabp->b_flags, (B_READ | B_ASYNC)); - if (nfs_asyncio(rabp, cred)) { - SET(rabp->b_flags, (B_INVAL|B_ERROR)); - rabp->b_error = EIO; - brelse(rabp); - } + SET(rabp->b_flags, (B_READ | B_ASYNC)); + if (nfs_asyncio(rabp, cred)) { + SET(rabp->b_flags, (B_INVAL|B_ERROR)); + rabp->b_error = EIO; + brelse(rabp); + } } else { - brelse(rabp); + brelse(rabp); } } } @@ -396,6 +420,21 @@ again: * the second term may be negative. */ n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); + /* + * Unlike VREG files, whos buffer size ( bp->b_bcount ) is + * chopped for the EOF condition, we cannot tell how large + * NFS directories are going to be until we hit EOF. So + * an NFS directory buffer is *not* chopped to its EOF. Now, + * it just so happens that b_resid will effectively chop it + * to EOF. *BUT* this information is lost if the buffer goes + * away and is reconstituted into a B_CACHE state (recovered + * from VM) later. So we keep track of the directory eof + * in np->n_direofoffset and chop it off as an extra step + * right here. + */ + if (np->n_direofoffset && + n > np->n_direofoffset - uio->uio_offset) + n = np->n_direofoffset - uio->uio_offset; break; default: printf(" nfs_bioread: type %x unexpected\n",vp->v_type); @@ -423,6 +462,7 @@ again: return (error); } + /* * Vnode op for write using bio */ @@ -448,6 +488,9 @@ nfs_write(ap) daddr_t lbn; int bufsize; int n, on, error = 0, iomode, must_commit; + off_t boff; + struct iovec iov; + struct uio auio; #if DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) @@ -496,8 +539,8 @@ nfs_write(ap) * will be the same size within a filesystem. nfs_writerpc will * still use nm_wsize when sizing the rpc's. */ - /*due to getblk/vm interractions, use vm page size or less values */ - biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); + /*due to getblk/vm interractions, use vm page size or less values */ + biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); do { /* @@ -530,12 +573,6 @@ nfs_write(ap) on = uio->uio_offset & (biosize-1); n = min((unsigned)(biosize - on), uio->uio_resid); again: - if (uio->uio_offset + n > np->n_size) { - np->n_size = uio->uio_offset + n; - np->n_flag |= NMODIFIED; - if (UBCISVALID(vp)) - ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */ - } bufsize = biosize; #if 0 /* (removed for UBC) */ @@ -544,21 +581,175 @@ again: bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); } #endif - bp = nfs_getwriteblk(vp, lbn, bufsize, p, cred, on, n); + /* + * Get a cache block for writing. The range to be written is + * (off..off+len) within the block. We ensure that the block + * either has no dirty region or that the given range is + * contiguous with the existing dirty region. + */ + bp = nfs_getcacheblk(vp, lbn, bufsize, p, BLK_WRITE); if (!bp) return (EINTR); + /* + * Resize nfsnode *after* we busy the buffer to prevent + * readers from reading garbage. + * If there was a partial buf at the old eof, validate + * and zero the new bytes. + */ + if (uio->uio_offset + n > np->n_size) { + struct buf *bp0 = NULL; + daddr_t bn = np->n_size / biosize; + int off = np->n_size & (biosize - 1); + + if (off && bn < lbn && incore(vp, bn)) + bp0 = nfs_getcacheblk(vp, bn, biosize, p, + BLK_WRITE); + np->n_flag |= NMODIFIED; + np->n_size = uio->uio_offset + n; + ubc_setsize(vp, (off_t)np->n_size); /* XXX errors */ + if (bp0) { + bzero((char *)bp0->b_data + off, biosize - off); + bp0->b_validend = biosize; + brelse(bp0); + } + } + /* + * NFS has embedded ucred so crhold() risks zone corruption + */ + if (bp->b_wcred == NOCRED) + bp->b_wcred = crdup(cred); + /* + * If dirtyend exceeds file size, chop it down. This should + * not occur unless there is a race. + */ + if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > + np->n_size) + bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * + DEV_BSIZE; + /* + * UBC doesn't (yet) handle partial pages so nfs_biowrite was + * hacked to never bdwrite, to start every little write right + * away. Running IE Avie noticed the performance problem, thus + * this code, which permits those delayed writes by ensuring an + * initial read of the entire page. The read may hit eof + * ("short read") but that we will handle. + * + * We are quite dependant on the correctness of B_CACHE so check + * that first in case of problems. + */ + if (!ISSET(bp->b_flags, B_CACHE) && n < PAGE_SIZE) { + boff = (off_t)bp->b_blkno * DEV_BSIZE; + auio.uio_iov = &iov; + auio.uio_iovcnt = 1; + auio.uio_offset = boff; + auio.uio_resid = PAGE_SIZE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_procp = p; + iov.iov_base = bp->b_data; + iov.iov_len = PAGE_SIZE; + error = nfs_readrpc(vp, &auio, cred); + if (error) { + bp->b_error = error; + SET(bp->b_flags, B_ERROR); + printf("nfs_write: readrpc %d", error); + } + if (auio.uio_resid > 0) + bzero(iov.iov_base, auio.uio_resid); + bp->b_validoff = 0; + bp->b_validend = PAGE_SIZE - auio.uio_resid; + if (np->n_size > boff + bp->b_validend) + bp->b_validend = min(np->n_size - boff, + PAGE_SIZE); + bp->b_dirtyoff = 0; + bp->b_dirtyend = 0; + } + + /* + * If the new write will leave a contiguous dirty + * area, just update the b_dirtyoff and b_dirtyend, + * otherwise try to extend the dirty region. + */ + if (bp->b_dirtyend > 0 && + (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { + off_t start, end; + + boff = (off_t)bp->b_blkno * DEV_BSIZE; + if (on > bp->b_dirtyend) { + start = boff + bp->b_validend; + end = boff + on; + } else { + start = boff + on + n; + end = boff + bp->b_validoff; + } + + /* + * It may be that the valid region in the buffer + * covers the region we want, in which case just + * extend the dirty region. Otherwise we try to + * extend the valid region. + */ + if (end > start) { + auio.uio_iov = &iov; + auio.uio_iovcnt = 1; + auio.uio_offset = start; + auio.uio_resid = end - start; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_procp = p; + iov.iov_base = bp->b_data + (start - boff); + iov.iov_len = end - start; + error = nfs_readrpc(vp, &auio, cred); + /* + * If we couldn't read, do not do a VOP_BWRITE + * as originally coded. That could also error + * and looping back to "again" as it was doing + * could have us stuck trying to write same buf + * again. nfs_write, will get the entire region + * if nfs_readrpc succeeded. If unsuccessful + * we should just error out. Errors like ESTALE + * would keep us looping rather than transient + * errors justifying a retry. We can return here + * instead of altering dirty region later. We + * did not write old dirty region at this point. + */ + if (error) { + bp->b_error = error; + SET(bp->b_flags, B_ERROR); + printf("nfs_write: readrpc2 %d", error); + brelse(bp); + return (error); + } + /* + * The read worked. + * If there was a short read, just zero fill. + */ + if (auio.uio_resid > 0) + bzero(iov.iov_base, auio.uio_resid); + if (on > bp->b_dirtyend) + bp->b_validend = on; + else + bp->b_validoff = on + n; + } + /* + * We now have a valid region which extends up to the + * dirty region which we want. + */ + if (on > bp->b_dirtyend) + bp->b_dirtyend = on; + else + bp->b_dirtyoff = on + n; + } if (ISSET(bp->b_flags, B_ERROR)) { error = bp->b_error; brelse(bp); return (error); } - if (bp->b_wcred == NOCRED) { - /* - * NFS has embedded ucred. - * Can not crhold() here as that causes zone corruption - */ + /* + * NFS has embedded ucred so crhold() risks zone corruption + */ + if (bp->b_wcred == NOCRED) bp->b_wcred = crdup(cred); - } np->n_flag |= NMODIFIED; /* @@ -636,168 +827,6 @@ again: return (0); } -/* - * Get a cache block for writing. The range to be written is - * (off..off+len) within the block. This routine ensures that the - * block is either has no dirty region or that the given range is - * contiguous with the existing dirty region. - */ -static struct buf * -nfs_getwriteblk(vp, bn, size, p, cred, off, len) - struct vnode *vp; - daddr_t bn; - int size; - struct proc *p; - struct ucred *cred; - int off, len; -{ - struct nfsnode *np = VTONFS(vp); - struct buf *bp; - int error; - struct iovec iov; - struct uio uio; - off_t boff; - - again: - bp = nfs_getcacheblk(vp, bn, size, p, BLK_WRITE); - if (!bp) - return (NULL); - if (bp->b_wcred == NOCRED) { - /* - * NFS has embedded ucred. - * Can not crhold() here as that causes zone corruption - */ - bp->b_wcred = crdup(cred); - } - - if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) { - bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); - } - - /* - * UBC doesn't (yet) handle partial pages so nfs_biowrite was - * hacked to never bdwrite, to start every little write right away. - * Running IE Avie noticed the performance problem, thus this code, - * which permits those delayed writes by ensuring an initial read - * of the entire page. The read may hit eof ("short read") but - * that we will handle. - * - * We are quite dependant on the correctness of B_CACHE so check - * that first in case of problems. - */ - if (!ISSET(bp->b_flags, B_CACHE) && len < PAGE_SIZE) { - struct nfsnode *np = VTONFS(vp); - - boff = (off_t)bp->b_blkno * DEV_BSIZE; - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_offset = boff; - uio.uio_resid = PAGE_SIZE; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_rw = UIO_READ; - uio.uio_procp = p; - iov.iov_base = bp->b_data; - iov.iov_len = PAGE_SIZE; - error = nfs_readrpc(vp, &uio, cred); - if (error) { - bp->b_error = error; - SET(bp->b_flags, B_ERROR); - printf("nfs_getwriteblk: readrpc returned %d", error); - } - if (uio.uio_resid > 0) - bzero(iov.iov_base, uio.uio_resid); - bp->b_validoff = 0; - bp->b_validend = PAGE_SIZE - uio.uio_resid; - if (np->n_size > boff + bp->b_validend) - bp->b_validend = min(np->n_size - boff, PAGE_SIZE); - bp->b_dirtyoff = 0; - bp->b_dirtyend = 0; - } - - /* - * If the new write will leave a contiguous dirty - * area, just update the b_dirtyoff and b_dirtyend, - * otherwise try to extend the dirty region. - */ - if (bp->b_dirtyend > 0 && - (off > bp->b_dirtyend || (off + len) < bp->b_dirtyoff)) { - off_t start, end; - - boff = (off_t)bp->b_blkno * DEV_BSIZE; - if (off > bp->b_dirtyend) { - start = boff + bp->b_validend; - end = boff + off; - } else { - start = boff + off + len; - end = boff + bp->b_validoff; - } - - /* - * It may be that the valid region in the buffer - * covers the region we want, in which case just - * extend the dirty region. Otherwise we try to - * extend the valid region. - */ - if (end > start) { - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_offset = start; - uio.uio_resid = end - start; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_rw = UIO_READ; - uio.uio_procp = p; - iov.iov_base = bp->b_data + (start - boff); - iov.iov_len = end - start; - error = nfs_readrpc(vp, &uio, cred); - if (error) { - /* - * If we couldn't read, do not do a VOP_BWRITE - * as originally coded. That, could also error - * and looping back to "again" as it was doing - * could have us stuck trying to write same buffer - * again. nfs_write, will get the entire region - * if nfs_readrpc was successful. If not successful - * we should just error out. Errors like ESTALE - * would keep us in this loop rather than transient - * errors justifying a retry. We can return from here - * instead of altering dirty region later in routine. - * We did not write out old dirty region at this point. - */ - bp->b_error = error; - SET(bp->b_flags, B_ERROR); - printf("nfs_getwriteblk: readrpc (2) returned %d", error); - return bp; - } else { - /* - * The read worked. - */ - if (uio.uio_resid > 0) { - /* - * If there was a short read, - * just zero fill. - */ - bzero(iov.iov_base, - uio.uio_resid); - } - if (off > bp->b_dirtyend) - bp->b_validend = off; - else - bp->b_validoff = off + len; - } - } - - /* - * We now have a valid region which extends up to the - * dirty region which we want. - */ - if (off > bp->b_dirtyend) - bp->b_dirtyend = off; - else - bp->b_dirtyoff = off + len; - } - - return bp; -} /* * Get an nfs cache block. @@ -816,7 +845,7 @@ nfs_getcacheblk(vp, bn, size, p, operation) { register struct buf *bp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); - /*due to getblk/vm interractions, use vm page size or less values */ + /*due to getblk/vm interractions, use vm page size or less values */ int biosize = min(vp->v_mount->mnt_stat.f_iosize, PAGE_SIZE); if (nmp->nm_flag & NFSMNT_INT) { @@ -887,7 +916,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg) * necessary. -- EKN */ if ((intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) || - ((error == EINTR) && current_thread_aborted())) { + (error == EINTR && current_thread_aborted())) { np->n_flag &= ~NFLUSHINPROG; if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; @@ -904,7 +933,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg) } didhold = ubc_hold(vp); if (didhold) { - (void) ubc_clean(vp, 1); /* get the pages out of vm also */ + (void) ubc_clean(vp, 1); /* get the pages out of vm also */ ubc_rele(vp); } return (0); @@ -1053,7 +1082,6 @@ nfs_doio(bp, cr, p) struct iovec io; vp = bp->b_vp; - NFSTRACE(NFSTRC_DIO, vp); np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); uiop = &uio; @@ -1068,7 +1096,7 @@ nfs_doio(bp, cr, p) * NFS being stateless, this case poses a problem. * By definition, the NFS server should always be consulted * for the data in that page. - * So we choose to clear the B_DONE and to the IO. + * So we choose to clear the B_DONE and to do the IO. * * XXX revisit this if there is a performance issue. * XXX In that case, we could play the attribute cache games ... @@ -1078,13 +1106,10 @@ nfs_doio(bp, cr, p) panic("nfs_doio: done and not async"); CLR(bp->b_flags, B_DONE); } - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 256)) | DBG_FUNC_START, - (int)np->n_size, bp->b_blkno * DEV_BSIZE, bp->b_bcount, bp->b_flags, 0); - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 257)) | DBG_FUNC_NONE, - bp->b_validoff, bp->b_validend, bp->b_dirtyoff, bp->b_dirtyend, 0); - + FSDBG_TOP(256, np->n_size, bp->b_blkno * DEV_BSIZE, bp->b_bcount, + bp->b_flags); + FSDBG(257, bp->b_validoff, bp->b_validend, bp->b_dirtyoff, + bp->b_dirtyend); /* * Historically, paging was done with physio, but no more. */ @@ -1095,7 +1120,7 @@ nfs_doio(bp, cr, p) io.iov_len = uiop->uio_resid = bp->b_bcount; /* mapping was done by vmapbuf() */ io.iov_base = bp->b_data; - uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; + uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE; if (ISSET(bp->b_flags, B_READ)) { uiop->uio_rw = UIO_READ; nfsstats.read_physios++; @@ -1118,14 +1143,11 @@ nfs_doio(bp, cr, p) uiop->uio_rw = UIO_READ; switch (vp->v_type) { case VREG: - uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; + uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE; nfsstats.read_bios++; error = nfs_readrpc(vp, uiop, cr); - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 262)) | DBG_FUNC_NONE, - (int)np->n_size, bp->b_blkno * DEV_BSIZE, uiop->uio_resid, error, 0); - - + FSDBG(262, np->n_size, bp->b_blkno * DEV_BSIZE, + uiop->uio_resid, error); if (!error) { bp->b_validoff = 0; if (uiop->uio_resid) { @@ -1136,33 +1158,32 @@ nfs_doio(bp, cr, p) * Just zero fill the rest of the valid area. */ diff = bp->b_bcount - uiop->uio_resid; - len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE - + diff); - if (len > 0) { - len = min(len, uiop->uio_resid); - bzero((char *)bp->b_data + diff, len); - bp->b_validend = diff + len; - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 258)) | DBG_FUNC_NONE, - diff, len, 0, 1, 0); - - } else - bp->b_validend = diff; + len = np->n_size - ((u_quad_t)bp->b_blkno * DEV_BSIZE + + diff); + if (len > 0) { + len = min(len, uiop->uio_resid); + bzero((char *)bp->b_data + diff, len); + bp->b_validend = diff + len; + FSDBG(258, diff, len, 0, 1); + } else + bp->b_validend = diff; } else bp->b_validend = bp->b_bcount; #if 1 /* USV + JOE [ */ if (bp->b_validend < bp->b_bufsize) { - /* - * we're about to release a partial buffer after a read... the only - * way we should get here is if this buffer contains the EOF - * before releasing it, we'll zero out to the end of the buffer - * so that if a mmap of this page occurs, we'll see zero's even - * if a ftruncate extends the file in the meantime + /* + * we're about to release a partial buffer after a + * read... the only way we should get here is if + * this buffer contains the EOF before releasing it, + * we'll zero out to the end of the buffer so that + * if a mmap of this page occurs, we'll see zero's + * even if a ftruncate extends the file in the + * meantime */ - bzero((caddr_t)(bp->b_data + bp->b_validend), (bp->b_bufsize - bp->b_validend)); - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 258)) | DBG_FUNC_NONE, - bp->b_validend, (bp->b_bufsize - bp->b_validend), 0, 2, 0); + bzero((caddr_t)(bp->b_data + bp->b_validend), + bp->b_bufsize - bp->b_validend); + FSDBG(258, bp->b_validend, + bp->b_bufsize - bp->b_validend, 0, 2); } #endif /* ] USV + JOE */ } @@ -1196,28 +1217,40 @@ nfs_doio(bp, cr, p) error = nfs_readdirrpc(vp, uiop, cr); break; default: - printf("nfs_doio: type %x unexpected\n",vp->v_type); + printf("nfs_doio: type %x unexpected\n", vp->v_type); break; }; if (error) { - SET(bp->b_flags, B_ERROR); - bp->b_error = error; + SET(bp->b_flags, B_ERROR); + bp->b_error = error; } } else { - if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size) - bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); + /* + * mapped I/O may have altered any bytes, so we extend + * the dirty zone to the valid zone. For best performance + * a better solution would be to save & restore page dirty bits + * around the uiomove which brings write-data into the buffer. + * Then here we'd check if the page is dirty rather than WASMAPPED + * Also vnode_pager would change - if a page is clean it might + * still need to be written due to DELWRI. + */ + if (UBCINFOEXISTS(vp) && ubc_issetflags(vp, UI_WASMAPPED)) { + bp->b_dirtyoff = min(bp->b_dirtyoff, bp->b_validoff); + bp->b_dirtyend = max(bp->b_dirtyend, bp->b_validend); + } + if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size) + bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE; if (bp->b_dirtyend > bp->b_dirtyoff) { - - io.iov_len = uiop->uio_resid = bp->b_dirtyend - - bp->b_dirtyoff; - uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE - + bp->b_dirtyoff; + io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; + uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE + + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; nfsstats.write_bios++; - if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) + if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == + B_ASYNC) iomode = NFSV3WRITE_UNSTABLE; else iomode = NFSV3WRITE_FILESYNC; @@ -1228,7 +1261,6 @@ nfs_doio(bp, cr, p) else CLR(bp->b_flags, B_NEEDCOMMIT); CLR(bp->b_flags, B_WRITEINPROG); - /* * For an interrupted write, the buffer is still valid * and the write hasn't been pushed to the server yet, @@ -1242,20 +1274,17 @@ nfs_doio(bp, cr, p) * the block is reused. This is indicated by setting * the B_DELWRI and B_NEEDCOMMIT flags. */ - if (error == EINTR - || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + if (error == EINTR || (!error && bp->b_flags & B_NEEDCOMMIT)) { int s; - CLR(bp->b_flags, (B_INVAL|B_NOCACHE)); + CLR(bp->b_flags, B_INVAL | B_NOCACHE); SET(bp->b_flags, B_DELWRI); - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 261)) | DBG_FUNC_NONE, - bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0); - + FSDBG(261, bp->b_validoff, bp->b_validend, + bp->b_bufsize, bp->b_bcount); /* - * Since for the B_ASYNC case, nfs_bwrite() has reassigned the - * buffer to the clean list, we have to reassign it back to the - * dirty one. Ugh. + * Since for the B_ASYNC case, nfs_bwrite() has + * reassigned the buffer to the clean list, we have to + * reassign it back to the dirty one. Ugh. */ if (ISSET(bp->b_flags, B_ASYNC)) { s = splbio(); @@ -1271,51 +1300,54 @@ nfs_doio(bp, cr, p) np->n_flag |= NWRITEERR; } bp->b_dirtyoff = bp->b_dirtyend = 0; - #if 1 /* JOE */ /* - * validoff and validend represent the real data present in this buffer - * if validoff is non-zero, than we have to invalidate the buffer and kill - * the page when biodone is called... the same is also true when validend - * doesn't extend all the way to the end of the buffer and validend doesn't - * equate to the current EOF... eventually we need to deal with this in a - * more humane way (like keeping the partial buffer without making it immediately - * available to the VM page cache). + * validoff and validend represent the real data present + * in this buffer if validoff is non-zero, than we have + * to invalidate the buffer and kill the page when + * biodone is called... the same is also true when + * validend doesn't extend all the way to the end of the + * buffer and validend doesn't equate to the current + * EOF... eventually we need to deal with this in a more + * humane way (like keeping the partial buffer without + * making it immediately available to the VM page cache) */ if (bp->b_validoff) SET(bp->b_flags, B_INVAL); else if (bp->b_validend < bp->b_bufsize) { - if ((((off_t)bp->b_blkno * (off_t)DEV_BSIZE) + bp->b_validend) == np->n_size) { - bzero((caddr_t)(bp->b_data + bp->b_validend), (bp->b_bufsize - bp->b_validend)); - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 259)) | DBG_FUNC_NONE, - bp->b_validend, (bp->b_bufsize - bp->b_validend), 0, 0, 0);; - } - else - SET(bp->b_flags, B_INVAL); + if ((off_t)bp->b_blkno * DEV_BSIZE + + bp->b_validend == np->n_size) { + bzero((caddr_t)(bp->b_data + + bp->b_validend), + bp->b_bufsize - bp->b_validend); + FSDBG(259, bp->b_validend, + bp->b_bufsize - bp->b_validend, 0, + 0); + } else + SET(bp->b_flags, B_INVAL); } #endif } } else { - #if 1 /* JOE */ - if (bp->b_validoff) - SET(bp->b_flags, B_INVAL); - else if (bp->b_validend < bp->b_bufsize) { - if ((((off_t)bp->b_blkno * (off_t)DEV_BSIZE) + bp->b_validend) != np->n_size) - SET(bp->b_flags, B_INVAL); - } - if (bp->b_flags & B_INVAL) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 260)) | DBG_FUNC_NONE, - bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0); - } + if (bp->b_validoff || + (bp->b_validend < bp->b_bufsize && + (off_t)bp->b_blkno * DEV_BSIZE + bp->b_validend != + np->n_size)) { + SET(bp->b_flags, B_INVAL); + } + if (bp->b_flags & B_INVAL) { + FSDBG(260, bp->b_validoff, bp->b_validend, + bp->b_bufsize, bp->b_bcount); + } #endif - bp->b_resid = 0; - biodone(bp); - NFSTRACE(NFSTRC_DIO_DONE, vp); - return (0); + bp->b_resid = 0; + biodone(bp); + FSDBG_BOT(256, bp->b_validoff, bp->b_validend, bp->b_bufsize, + np->n_size); + return (0); } } bp->b_resid = uiop->uio_resid; @@ -1323,13 +1355,11 @@ nfs_doio(bp, cr, p) nfs_clearcommit(vp->v_mount); if (bp->b_flags & B_INVAL) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 260)) | DBG_FUNC_NONE, - bp->b_validoff, bp->b_validend, bp->b_bufsize, bp->b_bcount, 0); + FSDBG(260, bp->b_validoff, bp->b_validend, bp->b_bufsize, + bp->b_bcount); } - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 256)) | DBG_FUNC_END, - bp->b_validoff, bp->b_validend, bp->b_bcount, error, 0); + FSDBG_BOT(256, bp->b_validoff, bp->b_validend, bp->b_bcount, error); biodone(bp); - NFSTRACE(NFSTRC_DIO_DONE, vp); return (error); } diff --git a/bsd/nfs/nfs_node.c b/bsd/nfs/nfs_node.c index 383428171..f12696b36 100644 --- a/bsd/nfs/nfs_node.c +++ b/bsd/nfs/nfs_node.c @@ -206,9 +206,9 @@ loop: /* * Lock the new nfsnode. */ - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - return (0); + return (error); } int @@ -253,18 +253,19 @@ nfs_inactive(ap) } else if (vget(ap->a_vp, 0, ap->a_p)) panic("nfs_inactive: vget failed"); (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1); + np->n_size = 0; ubc_setsize(ap->a_vp, (off_t)0); - /* We have a problem. The dvp could have gone away on us - * while in the unmount path. Thus it appears as VBAD and we - * cannot use it. If we tried locking the parent (future), for silly + /* We have a problem. The dvp could have gone away on us while + * in the unmount path. Thus it appears as VBAD and we cannot + * use it. If we tried locking the parent (future), for silly * rename files, it is unclear where we would lock. The unmount * code just pulls unlocked vnodes as it goes thru its list and * yanks them. Could unmount be smarter to see if a busy reg vnode has * a parent, and not yank it yet? Put in more passes at unmount - * time? In the meantime, just check if it went away on us. Could - * have gone away during the nfs_vinvalbuf or ubc_setsize which block. - * Or perhaps even before nfs_inactive got called. + * time? In the meantime, just check if it went away on us. + * Could have gone away during the nfs_vinvalbuf or ubc_setsize + * which block. Or perhaps even before nfs_inactive got called. */ if ((sp->s_dvp)->v_type != VBAD) nfs_removeit(sp); /* uses the dvp */ @@ -339,57 +340,30 @@ nfs_reclaim(ap) return (0); } -#if 0 /* * Lock an nfsnode */ int nfs_lock(ap) struct vop_lock_args /* { - struct vnode *a_vp; + struct vnode *a_vp; + int a_flags; + struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; /* * Ugh, another place where interruptible mounts will get hung. - * If you make this sleep interruptible, then you have to fix all + * If you make this call interruptible, then you have to fix all * the VOP_LOCK() calls to expect interruptibility. */ - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - (void) tsleep((caddr_t)vp, PINOD, "nfslck", 0); - } if (vp->v_tag == VT_NON) - return (ENOENT); - -#if 0 - /* - * Only lock regular files. If a server crashed while we were - * holding a directory lock, we could easily end up sleeping - * until the server rebooted while holding a lock on the root. - * Locks are only needed for protecting critical sections in - * VMIO at the moment. - * New vnodes will have type VNON but they should be locked - * since they may become VREG. This is checked in loadattrcache - * and unwanted locks are released there. - */ - if (vp->v_type == VREG || vp->v_type == VNON) { - while (np->n_flag & NLOCKED) { - np->n_flag |= NWANTED; - (void) tsleep((caddr_t) np, PINOD, "nfslck2", 0); - /* - * If the vnode has transmuted into a VDIR while we - * were asleep, then skip the lock. - */ - if (vp->v_type != VREG && vp->v_type != VNON) - return (0); - } - np->n_flag |= NLOCKED; - } -#endif - - return (0); + return (ENOENT); /* ??? -- got to check something and error, but what? */ + + return(lockmgr(&VTONFS(vp)->n_lock, ap->a_flags, &vp->v_interlock, + ap->a_p)); + } /* @@ -397,26 +371,16 @@ nfs_lock(ap) */ int nfs_unlock(ap) - struct vop_unlock_args /* { - struct vnode *a_vp; - } */ *ap; + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; { -#if 0 - struct vnode* vp = ap->a_vp; - struct nfsnode* np = VTONFS(vp); - - if (vp->v_type == VREG || vp->v_type == VNON) { - if (!(np->n_flag & NLOCKED)) - panic("nfs_unlock: nfsnode not locked"); - np->n_flag &= ~NLOCKED; - if (np->n_flag & NWANTED) { - np->n_flag &= ~NWANTED; - wakeup((caddr_t) np); - } - } -#endif + struct vnode *vp = ap->a_vp; - return (0); + return (lockmgr(&VTONFS(vp)->n_lock, ap->a_flags | LK_RELEASE, + &vp->v_interlock, ap->a_p)); } /* @@ -428,9 +392,10 @@ nfs_islocked(ap) struct vnode *a_vp; } */ *ap; { - return VTONFS(ap->a_vp)->n_flag & NLOCKED ? 1 : 0; + return (lockstatus(&VTONFS(ap->a_vp)->n_lock)); + } -#endif + /* * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually diff --git a/bsd/nfs/nfs_nqlease.c b/bsd/nfs/nfs_nqlease.c index 3f3d51ef0..758242702 100644 --- a/bsd/nfs/nfs_nqlease.c +++ b/bsd/nfs/nfs_nqlease.c @@ -900,6 +900,7 @@ nqnfs_getlease(vp, rwflag, cred, p) struct mbuf *mreq, *mrep, *md, *mb, *mb2; int cachable; u_quad_t frev; + u_int64_t xid; nfsstats.rpccnt[NQNFSPROC_GETLEASE]++; mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_V3FH+2*NFSX_UNSIGNED, @@ -909,7 +910,7 @@ nqnfs_getlease(vp, rwflag, cred, p) *tl++ = txdr_unsigned(rwflag); *tl = txdr_unsigned(nmp->nm_leaseterm); reqtime = time.tv_sec; - nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred); + nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred, &xid); np = VTONFS(vp); nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); cachable = fxdr_unsigned(int, *tl++); @@ -917,7 +918,7 @@ nqnfs_getlease(vp, rwflag, cred, p) if (reqtime > time.tv_sec) { fxdr_hyper(tl, &frev); nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev); - nfsm_loadattr(vp, (struct vattr *)0); + nfsm_loadattr(vp, (struct vattr *)0, &xid); } else error = NQNFS_EXPIRED; nfsm_reqdone; diff --git a/bsd/nfs/nfs_serv.c b/bsd/nfs/nfs_serv.c index 30b112769..f03085e4e 100644 --- a/bsd/nfs/nfs_serv.c +++ b/bsd/nfs/nfs_serv.c @@ -2648,10 +2648,16 @@ again: io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); - if (cookies) { - _FREE((caddr_t)cookies, M_TEMP); - cookies = NULL; + + if (cookies) { + _FREE((caddr_t)cookies, M_TEMP); + cookies = NULL; + } + if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp)) { + FREE((caddr_t)rbuf, M_TEMP); + nfsm_reply(NFSX_POSTOPATTR(v3)); + nfsm_srvpostop_attr(getret, &at); + return (0); } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (off_t)io.uio_offset; @@ -2922,11 +2928,16 @@ again: io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); if (cookies) { - _FREE((caddr_t)cookies, M_TEMP); - cookies = NULL; - } + _FREE((caddr_t)cookies, M_TEMP); + cookies = NULL; + } + if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp)) { + FREE((caddr_t)rbuf, M_TEMP); + nfsm_reply(NFSX_V3POSTOPATTR); + nfsm_srvpostop_attr(getret, &at); + return (0); + } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (u_quad_t)io.uio_offset; getret = VOP_GETATTR(vp, &at, cred, procp); diff --git a/bsd/nfs/nfs_socket.c b/bsd/nfs/nfs_socket.c index 9c331ce9f..9aaf35b99 100644 --- a/bsd/nfs/nfs_socket.c +++ b/bsd/nfs/nfs_socket.c @@ -95,6 +95,18 @@ #include #include +#include + +#define FSDBG(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) +#define FSDBG_TOP(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) +#define FSDBG_BOT(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) + #define TRUE 1 #define FALSE 0 @@ -128,6 +140,7 @@ extern time_t nqnfsstarttime; extern struct nfsstats nfsstats; extern int nfsv3_procid[NFS_NPROCS]; extern int nfs_ticks; +extern u_long nfs_xidwrap; /* * Defines which timer to use for the procnum. @@ -203,6 +216,11 @@ int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, }; #endif /* NFS_NOSERVER */ +/* + * NFSTRACE points were changed to FSDBG (KERNEL_DEBUG) + * But some of this code may prove useful someday... + */ +#undef NFSDIAG #if NFSDIAG int nfstraceindx = 0; struct nfstracerec nfstracebuf[NFSTBUFSIZ] = {{0,0,0,0}}; @@ -470,8 +488,7 @@ nfs_connect(nmp, rep) nmp->nm_sdrtt[3] = 0; nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */ nmp->nm_sent = 0; - NFSTRACE4(NFSTRC_CWND_INIT, nmp, nmp->nm_flag, nmp->nm_soflags, - nmp->nm_cwnd); + FSDBG(529, nmp, nmp->nm_flag, nmp->nm_soflags, nmp->nm_cwnd); nmp->nm_timeouts = 0; return (0); @@ -956,25 +973,23 @@ nfs_reply(myrep) */ if (myrep->r_mrep != NULL) { nfs_rcvunlock(&nmp->nm_flag); - NFSTRACE4(NFSTRC_RCVALREADY, myrep->r_xid, myrep, - myrep->r_nmp, 2); + FSDBG(530, myrep->r_xid, myrep, myrep->r_nmp, -1); return (0); } /* * Get the next Rpc reply off the socket. Assume myrep->r_nmp - * is still in tact by checks done in nfs_rcvlock. + * is still intact by checks done in nfs_rcvlock. */ error = nfs_receive(myrep, &nam, &mrep); /* * Bailout asap if nfsmount struct gone (unmounted). */ if (!myrep->r_nmp) { - NFSTRACE4(NFSTRC_ECONN, myrep->r_xid, myrep, nmp, 2); + FSDBG(530, myrep->r_xid, myrep, nmp, -2); return (ECONNABORTED); } if (error) { - NFSTRACE4(NFSTRC_RCVERR, myrep->r_xid, myrep, nmp, - error); + FSDBG(530, myrep->r_xid, myrep, nmp, error); nfs_rcvunlock(&nmp->nm_flag); /* @@ -1005,7 +1020,7 @@ nfs_reply(myrep) * just check here and get out. (ekn) */ if (!mrep) { - NFSTRACE4(NFSTRC_ECONN, myrep->r_xid, myrep, nmp, 3); + FSDBG(530, myrep->r_xid, myrep, nmp, -3); return (ECONNABORTED); /* sounds good */ } @@ -1073,8 +1088,8 @@ nfsmout: * Do the additive increase of * one rpc/rtt. */ - NFSTRACE4(NFSTRC_CWND_REPLY, rep->r_xid, rep, - nmp->nm_sent, nmp->nm_cwnd); + FSDBG(530, rep->r_xid, rep, nmp->nm_sent, + nmp->nm_cwnd); if (nmp->nm_cwnd <= nmp->nm_sent) { nmp->nm_cwnd += (NFS_CWNDSCALE * NFS_CWNDSCALE + @@ -1127,8 +1142,8 @@ nfsmout: panic("nfs_reply: nil r_mrep"); return (0); } - NFSTRACE4(NFSTRC_NOTMINE, myrep->r_xid, myrep, rep, - rep ? rep->r_xid : myrep->r_flags); + FSDBG(530, myrep->r_xid, myrep, rep, + rep ? rep->r_xid : myrep->r_flags); if (myrep->r_flags & R_GETONEREP) return (0); /* this path used by NQNFS */ } @@ -1145,7 +1160,7 @@ nfsmout: * nb: always frees up mreq mbuf list */ int -nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) +nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp, xidp) struct vnode *vp; struct mbuf *mrest; int procnum; @@ -1154,6 +1169,7 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) struct mbuf **mrp; struct mbuf **mdp; caddr_t *dposp; + u_int64_t *xidp; { register struct mbuf *m, *mrep; register struct nfsreq *rep, *rp; @@ -1173,10 +1189,12 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) char *auth_str, *verf_str; NFSKERBKEY_T key; /* save session key */ + if (xidp) + *xidp = 0; nmp = VFSTONFS(vp->v_mount); MALLOC_ZONE(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK); - NFSTRACE4(NFSTRC_REQ, vp, procnum, nmp, rep); + FSDBG_TOP(531, vp, procnum, nmp, rep); /* * make sure if we blocked above, that the file system didn't get @@ -1189,7 +1207,7 @@ nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp) */ if (vp->v_type == VBAD) { - NFSTRACE4(NFSTRC_VBAD, 1, vp, nmp, rep); + FSDBG_BOT(531, 1, vp, nmp, rep); _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (EINVAL); } @@ -1220,6 +1238,7 @@ kerbauth: error = nfs_getauth(nmp, rep, cred, &auth_str, &auth_len, verf_str, &verf_len, key); if (error) { + FSDBG_BOT(531, 2, vp, error, rep); _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); m_freem(mrest); @@ -1236,6 +1255,8 @@ kerbauth: } m = nfsm_rpchead(cred, nmp->nm_flag, procnum, auth_type, auth_len, auth_str, verf_len, verf_str, mrest, mrest_len, &mheadend, &xid); + if (xidp) + *xidp = xid + ((u_int64_t)nfs_xidwrap << 32); if (auth_str) _FREE(auth_str, M_TEMP); @@ -1293,8 +1314,8 @@ tryagain: */ if (!error) { if ((rep->r_flags & R_MUSTRESEND) == 0) { - NFSTRACE4(NFSTRC_CWND_REQ1, rep->r_xid, rep, - nmp->nm_sent, nmp->nm_cwnd); + FSDBG(531, rep->r_xid, rep, nmp->nm_sent, + nmp->nm_cwnd); nmp->nm_sent += NFS_CWNDSCALE; rep->r_flags |= R_SENT; } @@ -1336,8 +1357,7 @@ tryagain: * Decrement the outstanding request count. */ if (rep->r_flags & R_SENT) { - NFSTRACE4(NFSTRC_CWND_REQ2, rep->r_xid, rep, nmp->nm_sent, - nmp->nm_cwnd); + FSDBG(531, rep->r_xid, rep, nmp->nm_sent, nmp->nm_cwnd); rep->r_flags &= ~R_SENT; /* paranoia */ nmp->nm_sent -= NFS_CWNDSCALE; } @@ -1354,7 +1374,7 @@ tryagain: dpos = rep->r_dpos; if (error) { m_freem(rep->r_mreq); - NFSTRACE4(NFSTRC_REQERR, error, rep->r_xid, nmp, rep); + FSDBG_BOT(531, error, rep->r_xid, nmp, rep); _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (error); } @@ -1379,7 +1399,7 @@ tryagain: error = EACCES; m_freem(mrep); m_freem(rep->r_mreq); - NFSTRACE4(NFSTRC_RPCERR, error, rep->r_xid, nmp, rep); + FSDBG_BOT(531, error, rep->r_xid, nmp, rep); _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (error); } @@ -1434,8 +1454,7 @@ tryagain: } else m_freem(mrep); m_freem(rep->r_mreq); - NFSTRACE4(NFSTRC_DISSECTERR, error, rep->r_xid, nmp, - rep); + FSDBG_BOT(531, error, rep->r_xid, nmp, rep); _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (error); @@ -1463,7 +1482,7 @@ tryagain: *mdp = md; *dposp = dpos; m_freem(rep->r_mreq); - NFSTRACE4(NFSTRC_REQFREE, 0xf0f0f0f0, rep->r_xid, nmp, rep); + FSDBG_BOT(531, 0xf0f0f0f0, rep->r_xid, nmp, rep); FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (0); } @@ -1471,7 +1490,7 @@ tryagain: error = EPROTONOSUPPORT; nfsmout: m_freem(rep->r_mreq); - NFSTRACE4(NFSTRC_REQFREE, error, rep->r_xid, nmp, rep); + FSDBG_BOT(531, error, rep->r_xid, nmp, rep); _FREE_ZONE((caddr_t)rep, sizeof (struct nfsreq), M_NFSREQ); return (error); } @@ -1645,8 +1664,8 @@ nfs_softterm(struct nfsreq *rep) { rep->r_flags |= R_SOFTTERM; if (rep->r_flags & R_SENT) { - NFSTRACE4(NFSTRC_CWND_SOFT, rep->r_xid, rep, - rep->r_nmp->nm_sent, rep->r_nmp->nm_cwnd); + FSDBG(532, rep->r_xid, rep, rep->r_nmp->nm_sent, + rep->r_nmp->nm_cwnd); rep->r_nmp->nm_sent -= NFS_CWNDSCALE; rep->r_flags &= ~R_SENT; } @@ -1816,8 +1835,7 @@ rescan: rep->r_flags |= R_SENT; nmp->nm_sent += NFS_CWNDSCALE; } - NFSTRACE4(NFSTRC_CWND_TIMER, xid, rep, - nmp->nm_sent, nmp->nm_cwnd); + FSDBG(535, xid, rep, nmp->nm_sent, nmp->nm_cwnd); thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); @@ -1830,7 +1848,7 @@ rescan: thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); - NFSTRACE4(NFSTRC_CWND_TIMER, xid, error, sent, cwnd); + FSDBG(535, xid, error, sent, cwnd); /* * This is to fix "nfs_sigintr" DSI panics. * We may have slept during the send so the current @@ -1976,24 +1994,23 @@ nfs_rcvlock(rep) register int *flagp = &rep->r_nmp->nm_flag; int slpflag, slptimeo = 0; + FSDBG_TOP(534, rep->r_xid, rep, rep->r_nmp, *flagp); if (*flagp & NFSMNT_INT) slpflag = PCATCH; else slpflag = 0; while (*flagp & NFSMNT_RCVLOCK) { if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) { - NFSTRACE4(NFSTRC_RCVLCKINTR, rep->r_xid, rep, - rep->r_nmp, *flagp); + FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x100); return (EINTR); } else if (rep->r_mrep != NULL) { /* * Don't bother sleeping if reply already arrived */ - NFSTRACE4(NFSTRC_RCVALREADY, rep->r_xid, rep, - rep->r_nmp, 1); + FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x101); return (EALREADY); } - NFSTRACE4(NFSTRC_RCVLCKW, rep->r_xid, rep, rep->r_nmp, *flagp); + FSDBG(534, rep->r_xid, rep, rep->r_nmp, 0x102); *flagp |= NFSMNT_WANTRCV; (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk", slptimeo); @@ -2003,16 +2020,18 @@ nfs_rcvlock(rep) } /* * Make sure while we slept that the mountpoint didn't go away. - * nfs_sigintr and caller nfs_reply expect it in tact. + * nfs_sigintr and caller nfs_reply expect it intact. */ - if (!rep->r_nmp) + if (!rep->r_nmp) { + FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, 0x103); return (ECONNABORTED); /* don't have lock until out of loop */ + } } /* * nfs_reply will handle it if reply already arrived. * (We may have slept or been preempted while on network funnel). */ - NFSTRACE4(NFSTRC_RCVLCK, rep->r_xid, rep, rep->r_nmp, *flagp); + FSDBG_BOT(534, rep->r_xid, rep, rep->r_nmp, *flagp); *flagp |= NFSMNT_RCVLOCK; return (0); } @@ -2025,15 +2044,13 @@ nfs_rcvunlock(flagp) register int *flagp; { + FSDBG(533, flagp, *flagp, 0, 0); if ((*flagp & NFSMNT_RCVLOCK) == 0) panic("nfs rcvunlock"); *flagp &= ~NFSMNT_RCVLOCK; if (*flagp & NFSMNT_WANTRCV) { - NFSTRACE(NFSTRC_RCVUNLW, flagp); *flagp &= ~NFSMNT_WANTRCV; wakeup((caddr_t)flagp); - } else { - NFSTRACE(NFSTRC_RCVUNL, flagp); } } diff --git a/bsd/nfs/nfs_subs.c b/bsd/nfs/nfs_subs.c index 9018b50a6..e152a0d18 100644 --- a/bsd/nfs/nfs_subs.c +++ b/bsd/nfs/nfs_subs.c @@ -104,6 +104,17 @@ #include #endif +#include + +#define FSDBG(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) +#define FSDBG_TOP(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) +#define FSDBG_BOT(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps @@ -116,6 +127,7 @@ u_long nfs_prog, nqnfs_prog, nfs_true, nfs_false; /* And other global data */ static u_long nfs_xid = 0; +u_long nfs_xidwrap = 0; /* to build a (non-wwrapping) 64 bit xid */ static enum vtype nv2tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; @@ -691,8 +703,10 @@ nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, /* * Skip zero xid if it should ever happen. */ - if (++nfs_xid == 0) + if (++nfs_xid == 0) { + nfs_xidwrap++; nfs_xid++; + } *tl++ = *xidp = txdr_unsigned(nfs_xid); *tl++ = rpc_call; @@ -1227,11 +1241,13 @@ nfs_init(vfsp) * copy the attributes to *vaper */ int -nfs_loadattrcache(vpp, mdp, dposp, vaper) +nfs_loadattrcache(vpp, mdp, dposp, vaper, dontshrink, xidp) struct vnode **vpp; struct mbuf **mdp; caddr_t *dposp; struct vattr *vaper; + int dontshrink; + u_int64_t *xidp; { register struct vnode *vp = *vpp; register struct vattr *vap; @@ -1247,18 +1263,24 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) struct vnode *nvp; int v3; - /* this routine is a good place to check for VBAD again. We caught most of them - * in nfsm_request, but postprocessing may indirectly get here, so check again. - */ - if (vp->v_type==VBAD) - return (EINVAL); - - v3 = NFS_ISV3(vp); - NFSTRACE(NFSTRC_LAC, vp); + FSDBG_TOP(527, vp, 0, *xidp >> 32, *xidp); + /* + * this routine is a good place to check for VBAD again. We caught + * most of them in nfsm_request, but postprocessing may indirectly get + * here, so check again. + */ + if (vp->v_type == VBAD) { + FSDBG_BOT(527, EINVAL, 1, 0, *xidp); + return (EINVAL); + } + + v3 = NFS_ISV3(vp); md = *mdp; t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; - if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2))) + if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2))) { + FSDBG_BOT(527, error, 2, 0, *xidp); return (error); + } fp = (struct nfs_fattr *)cp2; if (v3) { vtyp = nfsv3tov_type(fp->fa_type); @@ -1308,12 +1330,30 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) * information. */ np = VTONFS(vp); + if (*xidp < np->n_xid) { + /* + * We have already updated attributes with a response from + * a later request. The attributes we have here are probably + * stale so we drop them (just return). However, our + * out-of-order receipt could be correct - if the requests were + * processed out of order at the server. Given the uncertainty + * we invalidate our cached attributes. *xidp is zeroed here + * to indicate the attributes were dropped - only getattr + * cares - it needs to retry the rpc. + */ + np->n_attrstamp = 0; + FSDBG_BOT(527, 0, np, np->n_xid, *xidp); + *xidp = 0; + return (0); + } if (vp->v_type != vtyp) { vp->v_type = vtyp; if (UBCINFOMISSING(vp) || UBCINFORECLAIMED(vp)) - if (error = ubc_info_init(vp)) /* VREG */ + if ((error = ubc_info_init(vp))) { /* VREG */ + FSDBG_BOT(527, error, 3, 0, *xidp); return(error); + } if (vp->v_type == VFIFO) { vp->v_op = fifo_nfsv2nodeop_p; @@ -1342,8 +1382,9 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) } } np->n_mtime = mtime.tv_sec; - NFSTRACE(NFSTRC_LAC_INIT, vp); + FSDBG(527, vp, np->n_mtime, 0, 0); } + np->n_xid = *xidp; vap = &np->n_vattr; vap->va_type = vtyp; vap->va_mode = (vmode & 07777); @@ -1378,15 +1419,15 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) vap->va_filerev = 0; } + np->n_attrstamp = time.tv_sec; if (vap->va_size != np->n_size) { - NFSTRACE4(NFSTRC_LAC_NP, vp, vap->va_size, np->n_size, - (vap->va_type == VREG) | - (np->n_flag & NMODIFIED ? 2 : 0)); + FSDBG(527, vp, vap->va_size, np->n_size, + (vap->va_type == VREG) | + (np->n_flag & NMODIFIED ? 6 : 4)); if (vap->va_type == VREG) { - int orig_size; + int orig_size; orig_size = np->n_size; - if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; @@ -1394,13 +1435,16 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) np->n_size = vap->va_size; } else np->n_size = vap->va_size; - if (UBCISVALID(vp) && np->n_size > orig_size) - ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */ + if (dontshrink && UBCISVALID(vp) && + np->n_size < ubc_getsize(vp)) { + vap->va_size = np->n_size = orig_size; + np->n_attrstamp = 0; + } else + ubc_setsize(vp, (off_t)np->n_size); /* XXX */ } else np->n_size = vap->va_size; } - np->n_attrstamp = time.tv_sec; if (vaper != NULL) { bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { @@ -1410,6 +1454,7 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper) vaper->va_mtime = np->n_mtim; } } + FSDBG_BOT(527, 0, np, 0, *xidp); return (0); } @@ -1427,23 +1472,19 @@ nfs_getattrcache(vp, vaper) register struct vattr *vap; if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) { - NFSTRACE(NFSTRC_GAC_MISS, vp); + FSDBG(528, vp, 0, 0, 1); nfsstats.attrcache_misses++; return (ENOENT); } - NFSTRACE(NFSTRC_GAC_HIT, vp); + FSDBG(528, vp, 0, 0, 2); nfsstats.attrcache_hits++; vap = &np->n_vattr; if (vap->va_size != np->n_size) { - NFSTRACE4(NFSTRC_GAC_NP, vp, vap->va_size, np->n_size, - (vap->va_type == VREG) | - (np->n_flag & NMODIFIED ? 2 : 0)); + FSDBG(528, vp, vap->va_size, np->n_size, + (vap->va_type == VREG) | + (np->n_flag & NMODIFIED ? 6 : 4)); if (vap->va_type == VREG) { - int orig_size; - - orig_size = np->n_size; - if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; @@ -1451,8 +1492,7 @@ nfs_getattrcache(vp, vaper) np->n_size = vap->va_size; } else np->n_size = vap->va_size; - if (UBCISVALID(vp) && np->n_size > orig_size) - ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */ + ubc_setsize(vp, (off_t)np->n_size); /* XXX */ } else np->n_size = vap->va_size; } diff --git a/bsd/nfs/nfs_vfsops.c b/bsd/nfs/nfs_vfsops.c index 9c1c7caad..8281df111 100644 --- a/bsd/nfs/nfs_vfsops.c +++ b/bsd/nfs/nfs_vfsops.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -57,14 +57,6 @@ * * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 * FreeBSD-Id: nfs_vfsops.c,v 1.52 1997/11/12 05:42:21 julian Exp $ - * - * History: - * - * - * 23-May-97 Umesh Vaishampayan (umeshv@apple.com) - * Added the ability to mount "/private" separately. - * Fixed bug which caused incorrect reporting of "mounted on" - * directory name in case of nfs root. */ #include @@ -228,12 +220,8 @@ static int nfs_iosize(nmp) * space. */ iosize = max(nmp->nm_rsize, nmp->nm_wsize); - if (iosize < PAGE_SIZE) iosize = PAGE_SIZE; -#if 0 - /* XXX UPL changes for UBC do not support multiple pages */ - iosize = PAGE_SIZE; /* XXX FIXME */ -#endif - /* return iosize; */ + if (iosize < PAGE_SIZE) + iosize = PAGE_SIZE; return (trunc_page(iosize)); } @@ -282,12 +270,14 @@ nfs_statfs(mp, sbp, p) struct ucred *cred; u_quad_t tquad; extern int nfs_mount_type; + u_int64_t xid; #ifndef nolint sfp = (struct nfs_statfs *)0; #endif vp = nmp->nm_dvp; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) + return(error); cred = crget(); cred->cr_ngroups = 1; if (v3 && (nmp->nm_flag & NFSMNT_GOTFSINFO) == 0) @@ -295,9 +285,9 @@ nfs_statfs(mp, sbp, p) nfsstats.rpccnt[NFSPROC_FSSTAT]++; nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); nfsm_fhtom(vp, v3); - nfsm_request(vp, NFSPROC_FSSTAT, p, cred); + nfsm_request(vp, NFSPROC_FSSTAT, p, cred, &xid); if (v3) - nfsm_postop_attr(vp, retattr); + nfsm_postop_attr(vp, retattr, &xid); nfsm_dissect(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); /* XXX CSM 12/2/97 Cleanup when/if we integrate FreeBSD mount.h */ @@ -355,12 +345,13 @@ nfs_fsinfo(nmp, vp, cred, p) caddr_t bpos, dpos, cp2; int error = 0, retattr; struct mbuf *mreq, *mrep, *md, *mb, *mb2; + u_int64_t xid; nfsstats.rpccnt[NFSPROC_FSINFO]++; nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); nfsm_fhtom(vp, 1); - nfsm_request(vp, NFSPROC_FSINFO, p, cred); - nfsm_postop_attr(vp, retattr); + nfsm_request(vp, NFSPROC_FSINFO, p, cred, &xid); + nfsm_postop_attr(vp, retattr, &xid); if (!error) { nfsm_dissect(fsp, struct nfsv3_fsinfo *, NFSX_V3FSINFO); pref = fxdr_unsigned(u_long, fsp->fs_wtpref); @@ -562,8 +553,8 @@ nfs_mount_diskless_private(ndmntp, mntname, mntflag, vpp, mpp) /* Get the vnode for '/'. Set fdp->fd_cdir to reference it. */ if (VFS_ROOT(mountlist.cqh_first, &rootvnode)) panic("cannot find root vnode"); + VREF(rootvnode); fdp->fd_cdir = rootvnode; - VREF(fdp->fd_cdir); VOP_UNLOCK(rootvnode, 0, procp); fdp->fd_rdir = NULL; } @@ -947,13 +938,8 @@ nfs_unmount(mp, mntflags, p) * - Decrement reference on the vnode representing remote root. * - Close the socket * - Free up the data structures - * - * We need to decrement the ref. count on the nfsnode representing - * the remote root. See comment in mountnfs(). The VFS unmount() - * has done vput on this vnode, otherwise we would get deadlock! */ vp = nmp->nm_dvp; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* * Must handshake with nqnfs_clientd() if it is active. @@ -968,10 +954,9 @@ nfs_unmount(mp, mntflags, p) */ error = vflush(mp, vp, SKIPSWAP | flags); if (mntflags & MNT_FORCE) - error = vflush(mp, NULLVP, flags); + error = vflush(mp, NULLVP, flags); /* locks vp in the process */ else { if (vp->v_usecount > 1) { - VOP_UNLOCK(vp, 0, p); nmp->nm_flag &= ~NFSMNT_DISMINPROG; return (EBUSY); } @@ -979,7 +964,6 @@ nfs_unmount(mp, mntflags, p) } if (error) { - VOP_UNLOCK(vp, 0, p); nmp->nm_flag &= ~NFSMNT_DISMINPROG; return (error); } @@ -993,10 +977,11 @@ nfs_unmount(mp, mntflags, p) /* * Release the root vnode reference held by mountnfs() - * Note: vflush would have done the vgone for us if we - * didn't skip over it due to mount reference held. + * vflush did the vgone for us when we didn't skip over + * it in the MNT_FORCE case. (Thus vp can't be locked when + * called vflush in non-skip vp case.) */ - vput(vp); + vrele(vp); if (!(mntflags & MNT_FORCE)) vgone(vp); mp->mnt_data = 0; /* don't want to end up using stale vp */ @@ -1076,6 +1061,7 @@ loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) { + int didhold = 0; /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. @@ -1086,10 +1072,14 @@ loop: continue; if (vget(vp, LK_EXCLUSIVE, p)) goto loop; + didhold = ubc_hold(vp); error = VOP_FSYNC(vp, cred, waitfor, p); if (error) allerror = error; - vput(vp); + VOP_UNLOCK(vp, 0, p); + if (didhold) + ubc_rele(vp); + vrele(vp); } return (allerror); } diff --git a/bsd/nfs/nfs_vnops.c b/bsd/nfs/nfs_vnops.c index a34b11003..8b64b7d24 100644 --- a/bsd/nfs/nfs_vnops.c +++ b/bsd/nfs/nfs_vnops.c @@ -63,7 +63,6 @@ /* * vnode op calls for Sun NFS version 2 and 3 */ - #include #include #include @@ -111,6 +110,16 @@ #include +#define FSDBG(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_NONE, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) +#define FSDBG_TOP(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_START, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) +#define FSDBG_BOT(A, B, C, D, E) \ + KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, (A))) | DBG_FUNC_END, \ + (int)(B), (int)(C), (int)(D), (int)(E), 0) + #define TRUE 1 #define FALSE 0 @@ -523,21 +532,22 @@ nfs3_access_otw(struct vnode *vp, register caddr_t cp; u_int32_t rmode; struct nfsnode *np = VTONFS(vp); + u_int64_t xid; nfsstats.rpccnt[NFSPROC_ACCESS]++; nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); nfsm_fhtom(vp, v3); nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(wmode); - nfsm_request(vp, NFSPROC_ACCESS, p, cred); - nfsm_postop_attr(vp, attrflag); + nfsm_request(vp, NFSPROC_ACCESS, p, cred, &xid); + nfsm_postop_attr(vp, attrflag, &xid); if (!error) { nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_int32_t, *tl); np->n_mode = rmode; np->n_modeuid = cred->cr_uid; np->n_modestamp = time_second; - } + } nfsm_reqdone; return error; } @@ -578,21 +588,21 @@ nfs_access(ap) mode = 0; if (vp->v_type == VDIR) { if (ap->a_mode & VWRITE) - mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | - NFSV3ACCESS_DELETE); + mode |= NFSV3ACCESS_MODIFY | + NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE; if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_LOOKUP; } else { if (ap->a_mode & VWRITE) - mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); + mode |= NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND; if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_EXECUTE; } /* XXX safety belt, only make blanket request if caching */ if (nfsaccess_cache_timeout > 0) { wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | - NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | - NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; + NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | + NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; } else wmode = mode; @@ -600,9 +610,9 @@ nfs_access(ap) * Does our cached result allow us to give a definite yes to * this request? */ - if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) && - (ap->a_cred->cr_uid == np->n_modeuid) && - ((np->n_mode & mode) == mode)) { + if (time_second < np->n_modestamp + nfsaccess_cache_timeout && + ap->a_cred->cr_uid == np->n_modeuid && + (np->n_mode & mode) == mode) { /* nfsstats.accesscache_hits++; */ } else { /* @@ -613,8 +623,8 @@ nfs_access(ap) if (!error) { if ((np->n_mode & mode) != mode) error = EACCES; - } - } + } + } } else return (nfsspec_access(ap)); /* NFSv2 case checks for EROFS here */ /* @@ -631,8 +641,8 @@ nfs_access(ap) error = EROFS; default: break; - } } + } return (error); } @@ -644,6 +654,7 @@ nfs_access(ap) * if consistency is lost. */ /* ARGSUSED */ + static int nfs_open(ap) struct vop_open_args /* { @@ -659,10 +670,9 @@ nfs_open(ap) struct vattr vattr; int error; - if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) -{ printf("open eacces vtyp=%d\n",vp->v_type); + if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { return (EACCES); -} + } /* * Get a valid lease. If cached data is stale, flush it. */ @@ -769,8 +779,14 @@ nfs_close(ap) if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 && (np->n_flag & NMODIFIED)) { if (NFS_ISV3(vp)) { - error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0); - np->n_flag &= ~NMODIFIED; + error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 1); + /* + * We cannot clear the NMODIFIED bit in np->n_flag due to + * potential races with other processes (and because + * the commit arg is 0 in the nfs_flush call above.) + * NMODIFIED is a hint + */ + /* np->n_flag &= ~NMODIFIED; */ } else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); np->n_attrstamp = 0; @@ -804,65 +820,72 @@ nfs_getattr(ap) int error = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); + u_int64_t xid; + int avoidfloods; + FSDBG_TOP(513, np->n_size, np, np->n_vattr.va_size, np->n_flag); /* * Update local times for special files. */ if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_START, - (int)np->n_size, 0, (int)np->n_vattr.va_size, np->n_flag, 0); - /* * First look in the cache. */ if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0) { - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_END, - (int)np->n_size, 0, (int)np->n_vattr.va_size, np->n_flag, 0); - + FSDBG_BOT(513, np->n_size, 0, np->n_vattr.va_size, np->n_flag); return (0); } - if (error != ENOENT) + if (error != ENOENT) { + FSDBG_BOT(513, np->n_size, error, np->n_vattr.va_size, + np->n_flag); return (error); + } error = 0; - + if (v3 && nfsaccess_cache_timeout > 0) { /* nfsstats.accesscache_misses++; */ - if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred)) - return (error); + if (error = nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, + ap->a_cred)) + return (error); if ((error = nfs_getattrcache(vp, ap->a_vap)) == 0) return (0); if (error != ENOENT) return (error); error = 0; } - + avoidfloods = 0; +tryagain: nfsstats.rpccnt[NFSPROC_GETATTR]++; nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); nfsm_fhtom(vp, v3); - nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred); + nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred, &xid); if (!error) { - nfsm_loadattr(vp, ap->a_vap); + nfsm_loadattr(vp, ap->a_vap, &xid); + if (!xid) { /* out-of-order rpc - attributes were dropped */ + m_freem(mrep); + FSDBG(513, -1, np, np->n_xid << 32, np->n_xid); + if (avoidfloods++ < 100) + goto tryagain; + /* + * avoidfloods>1 is bizarre. at 100 pull the plug + */ + panic("nfs_getattr: getattr flood\n"); + } if (np->n_mtime != ap->a_vap->va_mtime.tv_sec) { - NFSTRACE(NFSTRC_GA_INV, vp); + FSDBG(513, -1, np, -1, vp); if (vp->v_type == VDIR) nfs_invaldir(vp); error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); - if (!error) { - NFSTRACE(NFSTRC_GA_INV1, vp); + FSDBG(513, -1, np, -2, error); + if (!error) np->n_mtime = ap->a_vap->va_mtime.tv_sec; - } else { - NFSTRACE(NFSTRC_GA_INV2, error); - } } } nfsm_reqdone; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 513)) | DBG_FUNC_END, - (int)np->n_size, -1, (int)np->n_vattr.va_size, error, 0); - + FSDBG_BOT(513, np->n_size, -1, np->n_vattr.va_size, error); return (error); } @@ -888,6 +911,15 @@ nfs_setattr(ap) #ifndef nolint tsize = (u_quad_t)0; #endif + +#ifdef XXX /* enable this code soon! (but test it first) */ + /* + * Setting of flags is not supported. + */ + if (vap->va_flags != VNOVAL) + return (EOPNOTSUPP); +#endif + /* * Disallow write attempts if the filesystem is mounted read-only. */ @@ -919,72 +951,63 @@ nfs_setattr(ap) */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); - np->n_flag |= NMODIFIED; - tsize = np->n_size; - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_START, - (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, np->n_flag, 0); - - if (vap->va_size == 0) - error = nfs_vinvalbuf(vp, 0, - ap->a_cred, ap->a_p, 1); - else - error = nfs_vinvalbuf(vp, V_SAVE, - ap->a_cred, ap->a_p, 1); - - if (UBCISVALID(vp)) - ubc_setsize(vp, (off_t)vap->va_size); /* XXX check error */ - - if (error) { - printf("nfs_setattr: nfs_vinvalbuf %d\n", error); - -#if DIAGNOSTIC - kprintf("nfs_setattr: nfs_vinvalbuf %d\n", - error); -#endif /* DIAGNOSTIC */ - if (UBCISVALID(vp)) - ubc_setsize(vp, (off_t)tsize); /* XXX check error */ - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_END, - (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, -1, 0); - - return (error); + FSDBG_TOP(512, np->n_size, vap->va_size, + np->n_vattr.va_size, np->n_flag); + if (np->n_flag & NMODIFIED) { + if (vap->va_size == 0) + error = nfs_vinvalbuf(vp, 0, + ap->a_cred, ap->a_p, 1); + else + error = nfs_vinvalbuf(vp, V_SAVE, + ap->a_cred, ap->a_p, 1); + if (error) { + printf("nfs_setattr: nfs_vinvalbuf %d\n", error); + FSDBG_BOT(512, np->n_size, vap->va_size, + np->n_vattr.va_size, -1); + return (error); + } + } else if (np->n_size > vap->va_size) { /* shrinking? */ + daddr_t obn, bn; + int biosize; + struct buf *bp; + + biosize = min(vp->v_mount->mnt_stat.f_iosize, + PAGE_SIZE); + obn = (np->n_size - 1) / biosize; + bn = vap->va_size / biosize; + for ( ; obn >= bn; obn--) + if (incore(vp, obn)) { + bp = getblk(vp, obn, biosize, 0, + 0, BLK_READ); + FSDBG(512, bp, bp->b_flags, + 0, obn); + SET(bp->b_flags, B_INVAL); + brelse(bp); + } } + tsize = np->n_size; np->n_size = np->n_vattr.va_size = vap->va_size; - + ubc_setsize(vp, (off_t)vap->va_size); /* XXX */ }; } else if ((vap->va_mtime.tv_sec != VNOVAL || - vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && - vp->v_type == VREG && + vap->va_atime.tv_sec != VNOVAL) && + (np->n_flag & NMODIFIED) && vp->v_type == VREG && (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) - return (error); - + return (error); error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); - - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 512)) | DBG_FUNC_END, - (int)np->n_size, (int)vap->va_size, (int)np->n_vattr.va_size, error, 0); - + FSDBG_BOT(512, np->n_size, vap->va_size, np->n_vattr.va_size, error); if (error && vap->va_size != VNOVAL) { /* make every effort to resync file size w/ server... */ int err = 0; /* preserve "error" for return */ printf("nfs_setattr: nfs_setattrrpc %d\n", error); -#if DIAGNOSTIC - kprintf("nfs_setattr: nfs_setattrrpc %d\n", error); -#endif /* DIAGNOSTIC */ np->n_size = np->n_vattr.va_size = tsize; - if (UBCISVALID(vp)) - ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */ + ubc_setsize(vp, (off_t)np->n_size); /* XXX check error */ vap->va_size = tsize; err = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); - if (err) printf("nfs_setattr1: nfs_setattrrpc %d\n", err); -#if DIAGNOSTIC - if (err) - kprintf("nfs_setattr nfs_setattrrpc %d\n", err); -#endif /* DIAGNOSTIC */ } return (error); } @@ -1007,6 +1030,7 @@ nfs_setattrrpc(vp, vap, cred, procp) int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); + u_int64_t xid; nfsstats.rpccnt[NFSPROC_SETATTR]++; nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); @@ -1090,13 +1114,13 @@ nfs_setattrrpc(vp, vap, cred, procp) txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } - nfsm_request(vp, NFSPROC_SETATTR, procp, cred); + nfsm_request(vp, NFSPROC_SETATTR, procp, cred, &xid); if (v3) { - nfsm_wcc_data(vp, wccflag); - if ((!wccflag) && (vp->v_type != VBAD)) /* EINVAL set on VBAD vnode */ - VTONFS(vp)->n_attrstamp = 0; + nfsm_wcc_data(vp, wccflag, &xid); + if (!wccflag && vp->v_type != VBAD) /* EINVAL on VBAD node */ + VTONFS(vp)->n_attrstamp = 0; } else - nfsm_loadattr(vp, (struct vattr *)0); + nfsm_loadattr(vp, (struct vattr *)0, &xid); nfsm_reqdone; return (error); } @@ -1133,6 +1157,7 @@ nfs_lookup(ap) int v3 = NFS_ISV3(dvp); struct proc *p = cnp->cn_proc; int worldbuildworkaround = 1; + u_int64_t xid; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) @@ -1144,7 +1169,7 @@ nfs_lookup(ap) wantparent = flags & (LOCKPARENT|WANTPARENT); nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); - + if (worldbuildworkaround) { /* * Temporary workaround for world builds to not have dvp go @@ -1169,7 +1194,7 @@ nfs_lookup(ap) *vpp = NULLVP; goto error_return; } - + /* got to check to make sure the vnode didn't go away if access went to server */ if ((*vpp)->v_type == VBAD) { error = EINVAL; @@ -1216,7 +1241,7 @@ nfs_lookup(ap) if (error) goto error_return; } - + /* * Got to check to make sure the vnode didn't go away if VOP_GETATTR went to server * or callers prior to this blocked and had it go VBAD. @@ -1236,10 +1261,10 @@ nfs_lookup(ap) nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); /* nfsm_request for NFSv2 causes you to goto to nfsmout upon errors */ - nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); + nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred, &xid); if (error) { - nfsm_postop_attr(dvp, attrflag); + nfsm_postop_attr(dvp, attrflag, &xid); m_freem(mrep); goto nfsmout; } @@ -1260,10 +1285,12 @@ nfs_lookup(ap) } newvp = NFSTOV(np); if (v3) { - nfsm_postop_attr(newvp, attrflag); - nfsm_postop_attr(dvp, attrflag); + u_int64_t dxid = xid; + + nfsm_postop_attr(newvp, attrflag, &xid); + nfsm_postop_attr(dvp, attrflag, &dxid); } else - nfsm_loadattr(newvp, (struct vattr *)0); + nfsm_loadattr(newvp, (struct vattr *)0, &xid); *vpp = newvp; m_freem(mrep); cnp->cn_flags |= SAVENAME; @@ -1299,10 +1326,12 @@ nfs_lookup(ap) newvp = NFSTOV(np); } if (v3) { - nfsm_postop_attr(newvp, attrflag); - nfsm_postop_attr(dvp, attrflag); + u_int64_t dxid = xid; + + nfsm_postop_attr(newvp, attrflag, &xid); + nfsm_postop_attr(dvp, attrflag, &dxid); } else - nfsm_loadattr(newvp, (struct vattr *)0); + nfsm_loadattr(newvp, (struct vattr *)0, &xid); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && @@ -1364,6 +1393,7 @@ nfs_read(ap) return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0)); } + /* * nfs readlink call */ @@ -1399,17 +1429,18 @@ nfs_readlinkrpc(vp, uiop, cred) int error = 0, len, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); + u_int64_t xid; nfsstats.rpccnt[NFSPROC_READLINK]++; nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); nfsm_fhtom(vp, v3); - nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred); + nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred, &xid); if (v3) - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, &xid); if (!error) { nfsm_strsiz(len, NFS_MAXPATHLEN); - if (len == NFS_MAXPATHLEN) { - struct nfsnode *np = VTONFS(vp); + if (len == NFS_MAXPATHLEN) { + struct nfsnode *np = VTONFS(vp); #if DIAGNOSTIC if (!np) panic("nfs_readlinkrpc: null np"); @@ -1441,13 +1472,15 @@ nfs_readrpc(vp, uiop, cred) struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof, attrflag; int v3 = NFS_ISV3(vp); + u_int64_t xid; #ifndef nolint eof = 0; #endif nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; - if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && !v3) + if (((u_int64_t)uiop->uio_offset + (unsigned int)tsiz > 0xffffffff) && + !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; @@ -1463,9 +1496,9 @@ nfs_readrpc(vp, uiop, cred) *tl++ = txdr_unsigned(len); *tl = 0; } - nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred); + nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred, &xid); if (v3) { - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, &xid); if (error) { m_freem(mrep); goto nfsmout; @@ -1473,7 +1506,7 @@ nfs_readrpc(vp, uiop, cred) nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else - nfsm_loadattr(vp, (struct vattr *)0); + nfsm_loadattr(vp, (struct vattr *)0, &xid); nfsm_strsiz(retlen, nmp->nm_rsize); nfsm_mtouio(uiop, retlen); m_freem(mrep); @@ -1506,6 +1539,7 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit) struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; + u_int64_t xid; #if DIAGNOSTIC if (uiop->uio_iovcnt != 1) @@ -1534,10 +1568,10 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit) } *tl = txdr_unsigned(len); nfsm_uiotom(uiop, len); - nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred); + nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred, &xid); if (v3) { wccflag = NFSV3_WCCCHK; - nfsm_wcc_data(vp, wccflag); + nfsm_wcc_data(vp, wccflag, &xid); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF); @@ -1576,23 +1610,29 @@ nfs_writerpc(vp, uiop, cred, iomode, must_commit) } } } else - nfsm_loadattr(vp, (struct vattr *)0); - if ((wccflag) && (vp->v_type != VBAD)) /* EINVAL set on VBAD vnode */ + nfsm_loadattr(vp, (struct vattr *)0, &xid); + + if (wccflag && vp->v_type != VBAD) /* EINVAL set on VBAD node */ VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; m_freem(mrep); - /* - * we seem to have a case where we end up looping on shutdown and taking down nfs servers. - * For V3, error cases, there is no way to terminate loop, if the len was 0, meaning, - * nmp->nm_wsize was trashed. FreeBSD has this fix in it. Let's try it. - */ - if (error) - break; - tsiz -= len; + /* + * we seem to have a case where we end up looping on shutdown + * and taking down nfs servers. For V3, error cases, there is + * no way to terminate loop, if the len was 0, meaning, + * nmp->nm_wsize was trashed. FreeBSD has this fix in it. + * Let's try it. + */ + if (error) + break; + tsiz -= len; } nfsmout: - /* does it make sense to even say it was committed if we had an error? EKN */ - /* okay well just don't on bad vnodes then. EINVAL will be returned on bad vnodes */ - if ((vp->v_type != VBAD) && (vp->v_mount->mnt_flag & MNT_ASYNC)) + /* EKN + * does it make sense to even say it was committed if we had an error? + * okay well just don't on bad vnodes then. EINVAL will be + * returned on bad vnodes + */ + if (vp->v_type != VBAD && (vp->v_mount->mnt_flag & MNT_ASYNC)) committed = NFSV3WRITE_FILESYNC; *iomode = committed; if (error) @@ -1625,6 +1665,7 @@ nfs_mknodrpc(dvp, vpp, cnp, vap) int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long rdev; + u_int64_t xid; int v3 = NFS_ISV3(dvp); if (vap->va_type == VCHR || vap->va_type == VBLK) @@ -1665,9 +1706,9 @@ nfs_mknodrpc(dvp, vpp, cnp, vap) txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } - nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred); + nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred, &xid); if (!error) { - nfsm_mtofh(dvp, newvp, v3, gotvp); + nfsm_mtofh(dvp, newvp, v3, gotvp, &xid); if (!gotvp) { if (newvp) { vput(newvp); @@ -1680,7 +1721,7 @@ nfs_mknodrpc(dvp, vpp, cnp, vap) } } if (v3) - nfsm_wcc_data(dvp, wccflag); + nfsm_wcc_data(dvp, wccflag, &xid); nfsm_reqdone; if (error) { if (newvp) @@ -1691,11 +1732,11 @@ nfs_mknodrpc(dvp, vpp, cnp, vap) *vpp = newvp; } FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); - if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ - VTONFS(dvp)->n_flag |= NMODIFIED; - if (!wccflag) - VTONFS(dvp)->n_attrstamp = 0; - } + if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + } vput(dvp); return (error); } @@ -1718,8 +1759,9 @@ nfs_mknod(ap) int error; error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap); - if (!error) + if (!error && newvp) vput(newvp); + *ap->a_vpp = 0; return (error); } @@ -1751,6 +1793,7 @@ nfs_create(ap) struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); + u_int64_t xid; /* * Oops, not for me.. @@ -1796,9 +1839,9 @@ again: txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } - nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred); + nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred, &xid); if (!error) { - nfsm_mtofh(dvp, newvp, v3, gotvp); + nfsm_mtofh(dvp, newvp, v3, gotvp, &xid); if (!gotvp) { if (newvp) { vput(newvp); @@ -1811,7 +1854,7 @@ again: } } if (v3) - nfsm_wcc_data(dvp, wccflag); + nfsm_wcc_data(dvp, wccflag, &xid); nfsm_reqdone; if (error) { if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { @@ -1889,7 +1932,8 @@ nfs_remove(ap) * unnecessary delayed writes later. */ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1); - ubc_setsize(vp, (off_t)0); + np->n_size = 0; + ubc_setsize(vp, (off_t)0); /* XXX check error */ /* Do the rpc */ if (error != EINTR) error = nfs_removerpc(dvp, cnp->cn_nameptr, @@ -1951,21 +1995,22 @@ nfs_removerpc(dvp, name, namelen, cred, proc) int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); + u_int64_t xid; nfsstats.rpccnt[NFSPROC_REMOVE]++; nfsm_reqhead(dvp, NFSPROC_REMOVE, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, namelen, NFS_MAXNAMLEN); - nfsm_request(dvp, NFSPROC_REMOVE, proc, cred); + nfsm_request(dvp, NFSPROC_REMOVE, proc, cred, &xid); if (v3) - nfsm_wcc_data(dvp, wccflag); + nfsm_wcc_data(dvp, wccflag, &xid); nfsm_reqdone; if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; - } + } return (error); } @@ -2075,30 +2120,33 @@ nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc) int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(fdvp); + u_int64_t xid; nfsstats.rpccnt[NFSPROC_RENAME]++; nfsm_reqhead(fdvp, NFSPROC_RENAME, - (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + - nfsm_rndup(tnamelen)); + (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + + nfsm_rndup(tnamelen)); nfsm_fhtom(fdvp, v3); nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); nfsm_fhtom(tdvp, v3); nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); - nfsm_request(fdvp, NFSPROC_RENAME, proc, cred); + nfsm_request(fdvp, NFSPROC_RENAME, proc, cred, &xid); if (v3) { - nfsm_wcc_data(fdvp, fwccflag); - nfsm_wcc_data(tdvp, twccflag); + u_int64_t txid = xid; + + nfsm_wcc_data(fdvp, fwccflag, &xid); + nfsm_wcc_data(tdvp, twccflag, &txid); } nfsm_reqdone; - if (fdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ - VTONFS(fdvp)->n_flag |= NMODIFIED; - if (!fwccflag) - VTONFS(fdvp)->n_attrstamp = 0; - } - if (tdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ - VTONFS(tdvp)->n_flag |= NMODIFIED; - if (!twccflag) - VTONFS(tdvp)->n_attrstamp = 0; + if (fdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ + VTONFS(fdvp)->n_flag |= NMODIFIED; + if (!fwccflag) + VTONFS(fdvp)->n_attrstamp = 0; + } + if (tdvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ + VTONFS(tdvp)->n_flag |= NMODIFIED; + if (!twccflag) + VTONFS(tdvp)->n_attrstamp = 0; } return (error); } @@ -2124,6 +2172,7 @@ nfs_link(ap) int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); + u_int64_t xid; if (vp->v_mount != tdvp->v_mount) { VOP_ABORTOP(vp, cnp); @@ -2147,18 +2196,20 @@ nfs_link(ap) nfsm_fhtom(vp, v3); nfsm_fhtom(tdvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); - nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred); + nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred, &xid); if (v3) { - nfsm_postop_attr(vp, attrflag); - nfsm_wcc_data(tdvp, wccflag); + u_int64_t txid = xid; + + nfsm_postop_attr(vp, attrflag, &xid); + nfsm_wcc_data(tdvp, wccflag, &txid); } nfsm_reqdone; FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); VTONFS(tdvp)->n_flag |= NMODIFIED; - if ((!attrflag) && (vp->v_type != VBAD)) /* EINVAL set on VBAD vnode */ + if (!attrflag && vp->v_type != VBAD) /* EINVAL set on VBAD vnode */ VTONFS(vp)->n_attrstamp = 0; - if ((!wccflag) && (tdvp->v_type != VBAD)) /* EINVAL set on VBAD vnode */ + if (!wccflag && tdvp->v_type != VBAD) /* EINVAL set on VBAD vnode */ VTONFS(tdvp)->n_attrstamp = 0; vput(tdvp); /* @@ -2195,6 +2246,7 @@ nfs_symlink(ap) struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vnode *newvp = (struct vnode *)0; int v3 = NFS_ISV3(dvp); + u_int64_t xid; nfsstats.rpccnt[NFSPROC_SYMLINK]++; slen = strlen(ap->a_target); @@ -2217,21 +2269,23 @@ nfs_symlink(ap) txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } - nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred); + nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred, &xid); if (v3) { + u_int64_t dxid = xid; + if (!error) - nfsm_mtofh(dvp, newvp, v3, gotvp); - nfsm_wcc_data(dvp, wccflag); + nfsm_mtofh(dvp, newvp, v3, gotvp, &xid); + nfsm_wcc_data(dvp, wccflag, &dxid); } nfsm_reqdone; if (newvp) vput(newvp); FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); - if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ - VTONFS(dvp)->n_flag |= NMODIFIED; - if (!wccflag) - VTONFS(dvp)->n_attrstamp = 0; - } + if (dvp->v_type != VBAD) { /* EINVAL set on VBAD vnode */ + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + } vput(dvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. @@ -2270,6 +2324,7 @@ nfs_mkdir(ap) struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); + u_int64_t xid, dxid; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc))) { VOP_ABORTOP(dvp, cnp); @@ -2294,17 +2349,18 @@ nfs_mkdir(ap) txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } - nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred); + nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred, &xid); + dxid = xid; if (!error) - nfsm_mtofh(dvp, newvp, v3, gotvp); + nfsm_mtofh(dvp, newvp, v3, gotvp, &xid); if (v3) - nfsm_wcc_data(dvp, wccflag); + nfsm_wcc_data(dvp, wccflag, &dxid); nfsm_reqdone; if (dvp->v_type != VBAD) { /* EINVAL set on this case */ VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; - } + } /* * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry * if we can succeed in looking up the directory. @@ -2315,7 +2371,7 @@ nfs_mkdir(ap) newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, - cnp->cn_proc, &np); + cnp->cn_proc, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) @@ -2353,22 +2409,23 @@ nfs_rmdir(ap) int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); + u_int64_t xid; nfsstats.rpccnt[NFSPROC_RMDIR]++; nfsm_reqhead(dvp, NFSPROC_RMDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); - nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred); + nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred, &xid); if (v3) - nfsm_wcc_data(dvp, wccflag); + nfsm_wcc_data(dvp, wccflag, &xid); nfsm_reqdone; FREE_ZONE(cnp->cn_pnbuf, cnp->cn_pnlen, M_NAMEI); - if (dvp->v_type != VBAD) { /* EINVAL set on this case */ - VTONFS(dvp)->n_flag |= NMODIFIED; - if (!wccflag) - VTONFS(dvp)->n_attrstamp = 0; - } + if (dvp->v_type != VBAD) { /* EINVAL set on this case */ + VTONFS(dvp)->n_flag |= NMODIFIED; + if (!wccflag) + VTONFS(dvp)->n_attrstamp = 0; + } cache_purge(dvp); cache_purge(vp); vput(vp); @@ -2454,6 +2511,7 @@ nfs_readdirrpc(vp, uiop, cred) int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag; int v3 = NFS_ISV3(vp); + u_int64_t xid; #ifndef nolint dp = (struct dirent *)0; @@ -2493,9 +2551,9 @@ nfs_readdirrpc(vp, uiop, cred) *tl++ = cookie.nfsuquad[0]; } *tl = txdr_unsigned(nmp->nm_readdirsize); - nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred); + nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred, &xid); if (v3) { - nfsm_postop_attr(vp, attrflag); + nfsm_postop_attr(vp, attrflag, &xid); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; @@ -2641,6 +2699,7 @@ nfs_readdirplusrpc(vp, uiop, cred) u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int attrflag, fhsize; + u_int64_t xid, savexid; #ifndef nolint dp = (struct dirent *)0; @@ -2678,8 +2737,10 @@ nfs_readdirplusrpc(vp, uiop, cred) *tl++ = dnp->n_cookieverf.nfsuquad[1]; *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); - nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred); - nfsm_postop_attr(vp, attrflag); + nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred, + &xid); + savexid = xid; + nfsm_postop_attr(vp, attrflag, &xid); if (error) { m_freem(mrep); goto nfsmout; @@ -2776,7 +2837,8 @@ nfs_readdirplusrpc(vp, uiop, cred) dpos = dpossav1; mdsav2 = md; md = mdsav1; - nfsm_loadattr(newvp, (struct vattr *)0); + xid = savexid; + nfsm_loadattr(newvp, (struct vattr *)0, &xid); dpos = dpossav2; md = mdsav2; dp->d_type = @@ -2937,13 +2999,14 @@ nfs_lookitup(dvp, name, len, cred, procp, npp) struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsfh_t *nfhp; int v3 = NFS_ISV3(dvp); + u_int64_t xid; nfsstats.rpccnt[NFSPROC_LOOKUP]++; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, len, NFS_MAXNAMLEN); - nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred); + nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred, &xid); if (npp && !error) { nfsm_getfh(nfhp, fhlen, v3); if (*npp) { @@ -2970,7 +3033,7 @@ nfs_lookitup(dvp, name, len, cred, procp, npp) newvp = NFSTOV(np); } if (v3) { - nfsm_postop_attr(newvp, attrflag); + nfsm_postop_attr(newvp, attrflag, &xid); if (!attrflag && *npp == NULL) { m_freem(mrep); if (newvp == dvp) @@ -2980,7 +3043,7 @@ nfs_lookitup(dvp, name, len, cred, procp, npp) return (ENOENT); } } else - nfsm_loadattr(newvp, (struct vattr *)0); + nfsm_loadattr(newvp, (struct vattr *)0, &xid); } nfsm_reqdone; if (npp && *npp == NULL) { @@ -3014,7 +3077,9 @@ nfs_commit(vp, offset, cnt, cred, procp) caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; + u_int64_t xid; + FSDBG(521, vp, offset, cnt, nmp->nm_flag); if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) return (0); nfsstats.rpccnt[NFSPROC_COMMIT]++; @@ -3024,12 +3089,12 @@ nfs_commit(vp, offset, cnt, cred, procp) txdr_hyper(&offset, tl); tl += 2; *tl = txdr_unsigned(cnt); - nfsm_request(vp, NFSPROC_COMMIT, procp, cred); - nfsm_wcc_data(vp, wccflag); + nfsm_request(vp, NFSPROC_COMMIT, procp, cred, &xid); + nfsm_wcc_data(vp, wccflag, &xid); if (!error) { nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF); if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, - NFSX_V3WRITEVERF)) { + NFSX_V3WRITEVERF)) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); error = NFSERR_STALEWRITEVERF; @@ -3144,7 +3209,6 @@ nfs_fsync(ap) struct proc * a_p; } */ *ap; { - return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1)); } @@ -3171,16 +3235,15 @@ nfs_flush(vp, cred, waitfor, p, commit) u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; - kern_return_t kret; - upl_t *upls = NULL; - - #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; - struct upl_t *upls_on_stack[NFS_COMMITBVECSIZ]; - int bvecsize = 0, bveccount, buplpos; + int bvecsize = 0, bveccount; + kern_return_t kret; + upl_t upl; + + FSDBG_TOP(517, vp, np, waitfor, commit); if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; @@ -3195,16 +3258,18 @@ nfs_flush(vp, cred, waitfor, p, commit) * job. */ again: + FSDBG(518, vp->v_dirtyblkhd.lh_first, np->n_flag, 0, 0); if (vp->v_dirtyblkhd.lh_first) np->n_flag |= NMODIFIED; off = (u_quad_t)-1; endoff = 0; bvecpos = 0; - buplpos = 0; if (NFS_ISV3(vp) && commit) { s = splbio(); /* * Count up how many buffers waiting for a commit. + * This is an upper bound - any with dirty pages must be + * written not commited. */ bveccount = 0; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { @@ -3212,45 +3277,79 @@ again: if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; + FSDBG(519, bp, bp->b_flags, bveccount, 0); } /* * Allocate space to remember the list of bufs to commit. It is - * important to use M_NOWAIT here to avoid a race with nfs_write. + * important to use M_NOWAIT here to avoid a race with nfs_write * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bvec != NULL && bvec != bvec_on_stack) - _FREE(bvec, M_TEMP); - if (upls != NULL && upls != (upl_t *) upls_on_stack) - _FREE(upls, M_TEMP); - - bvecsize = NFS_COMMITBVECSIZ; + _FREE(bvec, M_TEMP); if (bveccount > NFS_COMMITBVECSIZ) { MALLOC(bvec, struct buf **, - bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); - MALLOC(upls, upl_t *, - bveccount * sizeof(upl_t), M_TEMP, M_NOWAIT); - if ((bvec == NULL) || (upls == NULL)) { - if (bvec) - _FREE(bvec, M_TEMP); - if (upls) - _FREE(upls, M_TEMP); + bveccount * sizeof(struct buf *), M_TEMP, + M_NOWAIT); + if (bvec == NULL) { bvec = bvec_on_stack; - upls = (upl_t *) upls_on_stack; + bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; - upls = (upl_t *) upls_on_stack; + bvecsize = NFS_COMMITBVECSIZ; } + FSDBG(519, 0, bvecsize, bveccount, 0); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; + /* XXX nbp aok if we sleep in this loop? */ + FSDBG(520, bp, bp->b_flags, bvecpos, bp->b_bufsize); + FSDBG(520, bp->b_validoff, bp->b_validend, + bp->b_dirtyoff, bp->b_dirtyend); if (bvecpos >= bvecsize) break; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) continue; + SET(bp->b_flags, B_BUSY); + /* + * we need a upl to see if the page has been + * dirtied (think mmap) since the unstable write, and + * so to prevent vm from paging during our commit rpc + */ + if (ISSET(bp->b_flags, B_PAGELIST)) { + upl = bp->b_pagelist; + } else { + kret = ubc_create_upl(vp, ubc_blktooff(vp, bp->b_lblkno), + bp->b_bufsize, &upl, + NULL, UPL_PRECIOUS); + if (kret != KERN_SUCCESS) + panic("nfs_flush: create upl %d", kret); +#ifdef UBC_DEBUG + upl_ubc_alias_set(upl, current_act(), 1); +#endif /* UBC_DEBUG */ + } + if (upl_dirty_page(ubc_upl_pageinfo(upl), 0)) { + if (!ISSET(bp->b_flags, B_PAGELIST)) { + err = ubc_upl_abort(upl, NULL); + if (err) + printf("nfs_flush: upl abort %d\n", err); + } + /* + * Any/all of it may be modified... + */ + bp->b_dirtyoff = bp->b_validoff; + bp->b_dirtyend = bp->b_validend; + CLR(bp->b_flags, B_BUSY | B_NEEDCOMMIT); + continue; + } + if (!ISSET(bp->b_flags, B_PAGELIST)) { + bp->b_pagelist = upl; + SET(bp->b_flags, B_PAGELIST); + ubc_upl_map(upl, (vm_address_t *)&bp->b_data); + } bremfree(bp); /* * Work out if all buffers are using the same cred @@ -3260,28 +3359,7 @@ again: wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; - SET(bp->b_flags, (B_BUSY | B_WRITEINPROG)); - - /* - * we need ubc_create_upl so if vm decides to - * do paging while we are waiting on commit rpc, - * that it doesn't pick these pages. - */ - if (!ISSET(bp->b_flags, B_PAGELIST)) { - kret = ubc_create_upl(vp, - ubc_blktooff(vp, bp->b_lblkno), - bp->b_bufsize, - &(upls[buplpos]), - NULL, - UPL_PRECIOUS); - if (kret != KERN_SUCCESS) - panic("nfs_getcacheblk: get pagelists failed with (%d)", kret); - -#ifdef UBC_DEBUG - upl_ubc_alias_set(upls[buplpos], ioaddr, 1); -#endif /* UBC_DEBUG */ - buplpos++; /* not same as bvecpos if upl existed already */ - } + SET(bp->b_flags, B_WRITEINPROG); /* * A list of these buffers is kept so that the @@ -3316,6 +3394,8 @@ again: for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; + FSDBG(522, bp, bp->b_blkno * DEV_BSIZE, + bp->b_dirtyoff, bp->b_dirtyend); off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend @@ -3328,21 +3408,6 @@ again: if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); - - for (i = 0; i < buplpos; i++) { - /* - * Before the VOP_BWRITE and biodone(ASYNC)/brelse, we have to undo - * holding the vm page or we we will deadlock on another vm_fault_list_request. - * Here's a convenient place to put it. - * Better if we could hold it by setting the PAGELIST flag and kernel_upl_map - * as does nfs_writebp. Then normal biodones and brelse will clean it up and - * we can avoid this abort. For now make minimal changes. - */ - err = ubc_upl_abort(upls[i], NULL); - if (err) - printf("nfs_flush: kernel_upl_abort %d\n", err); - } - /* * Now, either mark the blocks I/O done or mark the @@ -3350,31 +3415,31 @@ again: * succeeded. */ for (i = 0; i < bvecpos; i++) { - bp = bvec[i]; + FSDBG(523, bp, retv, bp->b_flags, 0); CLR(bp->b_flags, (B_NEEDCOMMIT | B_WRITEINPROG)); if (retv) { - brelse(bp); + brelse(bp); } else { - vp->v_numoutput++; - SET(bp->b_flags, B_ASYNC); - s = splbio(); - CLR(bp->b_flags, (B_READ|B_DONE|B_ERROR|B_DELWRI)); - bp->b_dirtyoff = bp->b_dirtyend = 0; - reassignbuf(bp, vp); - splx(s); - biodone(bp); + s = splbio(); + vp->v_numoutput++; + SET(bp->b_flags, B_ASYNC); + CLR(bp->b_flags, + (B_READ|B_DONE|B_ERROR|B_DELWRI)); + bp->b_dirtyoff = bp->b_dirtyend = 0; + reassignbuf(bp, vp); + splx(s); + biodone(bp); } } } - /* - * Start/do any write(s) that are required. - * There is a window here where B_BUSY protects the buffer. The vm pages have been - * freed up, yet B_BUSY is set. Don't think you will hit any busy/incore problems while - * we sleep, but not absolutely sure. Keep an eye on it. Otherwise we will have to hold - * vm page across this locked. - EKN + * Start/do any write(s) that are required. There is a window here + * where B_BUSY protects the buffer. The vm pages have been freed up, + * yet B_BUSY is set. Don't think you will hit any busy/incore problems + * while we sleep, but not absolutely sure. Keep an eye on it. Otherwise + * we will have to hold vm page across this locked. - EKN */ loop: if (current_thread_aborted()) { @@ -3385,34 +3450,36 @@ loop: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (ISSET(bp->b_flags, B_BUSY)) { + FSDBG(524, bp, waitfor, passone, bp->b_flags); if (waitfor != MNT_WAIT || passone) continue; SET(bp->b_flags, B_WANTED); error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), - "nfsfsync", slptimeo); + "nfsfsync", slptimeo); splx(s); if (error) { - if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { - error = EINTR; - goto done; - } - if (slpflag == PCATCH) { - slpflag = 0; - slptimeo = 2 * hz; - } + if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { + error = EINTR; + goto done; + } + if (slpflag == PCATCH) { + slpflag = 0; + slptimeo = 2 * hz; + } } goto loop; } if (!ISSET(bp->b_flags, B_DELWRI)) panic("nfs_fsync: not dirty"); + FSDBG(525, bp, passone, commit, bp->b_flags); if ((passone || !commit) && ISSET(bp->b_flags, B_NEEDCOMMIT)) continue; bremfree(bp); if (passone || !commit) - SET(bp->b_flags, (B_BUSY|B_ASYNC)); + SET(bp->b_flags, B_BUSY|B_ASYNC); else - SET(bp->b_flags, (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT)); - + SET(bp->b_flags, + B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT); splx(s); VOP_BWRITE(bp); goto loop; @@ -3442,15 +3509,15 @@ loop: goto loop; } } + FSDBG(526, np->n_flag, np->n_error, 0, 0); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } done: + FSDBG_BOT(517, vp, np, error, 0); if (bvec != NULL && bvec != bvec_on_stack) _FREE(bvec, M_TEMP); - if (upls != NULL && upls != (upl_t *) upls_on_stack) - _FREE(upls, M_TEMP); return (error); } @@ -3732,51 +3799,31 @@ nfs_writebp(bp, force) splx(s); /* - * Since the B_BUSY flag is set, we need to lock the page before doing nfs_commit. - * Otherwise we may block and get a busy incore pages during a vm pageout. - * Move the existing code up before the commit. + * Since the B_BUSY flag is set, we need to lock the page before doing + * nfs_commit. Otherwise we may block and get a busy incore pages + * during a vm pageout. Move the existing code up before the commit. */ - - if (!ISSET(bp->b_flags, B_META) && UBCISVALID(vp)) { - - if (!ISSET(bp->b_flags, B_PAGELIST)) { - kret = ubc_create_upl(vp, - ubc_blktooff(vp, bp->b_lblkno), - bp->b_bufsize, - &upl, - &pl, - UPL_PRECIOUS); - if (kret != KERN_SUCCESS) { - panic("nfs_writebp: get pagelists failed with (%d)", kret); - } - + if (!ISSET(bp->b_flags, B_META) && UBCISVALID(vp) && + !ISSET(bp->b_flags, B_PAGELIST)) { + kret = ubc_create_upl(vp, ubc_blktooff(vp, bp->b_lblkno), + bp->b_bufsize, &upl, &pl, UPL_PRECIOUS); + if (kret != KERN_SUCCESS) + panic("nfs_writebp: ubc_create_upl %d", kret); #ifdef UBC_DEBUG - upl_ubc_alias_set(upl, ioaddr, 2); + upl_ubc_alias_set(upl, current_act(), 2); #endif /* UBC_DEBUG */ + s = splbio(); + bp->b_pagelist = upl; + SET(bp->b_flags, B_PAGELIST); + splx(s); - s = splbio(); - - bp->b_pagelist = upl; - SET(bp->b_flags, B_PAGELIST); - splx(s); - - kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data)); - if (kret != KERN_SUCCESS) { - panic("nfs_writebp: ubc_upl_map() failed with (%d)", kret); - } - if(bp->b_data == 0) - panic("nfs_writebp: upl_map mapped 0"); - - if (!upl_page_present(pl, 0)) { - /* - * may be the page got paged out. - * let's just read it in. It is marked - * busy so we should not have any one - * yanking this page underneath the fileIO - */ - panic("nfs_writebp: nopage"); - } - } + kret = ubc_upl_map(upl, (vm_address_t *)&(bp->b_data)); + if (kret != KERN_SUCCESS) + panic("nfs_writebp: ubc_upl_map %d", kret); + if(bp->b_data == 0) + panic("nfs_writebp: ubc_upl_map mapped 0"); + if (!upl_page_present(pl, 0)) /* even more paranoia */ + panic("nfs_writebp: nopage"); } /* @@ -4110,37 +4157,36 @@ nfs_pagein(ap) struct uio auio; struct iovec aiov; struct uio * uio = &auio; - int nocommit = flags & UPL_NOCOMMIT; + int nofreeupl = flags & UPL_NOCOMMIT; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE, - (int)f_offset, size, pl, pl_offset, 0); + FSDBG(322, f_offset, size, pl, pl_offset); + if (pl == (upl_t)NULL) + panic("nfs_pagein: no upl"); if (UBCINVALID(vp)) { -#if DIAGNOSTIC - panic("nfs_pagein: invalid vp"); -#endif /* DIAGNOSTIC */ + printf("nfs_pagein: invalid vnode 0x%x", (int)vp); + if (!nofreeupl) + (void) ubc_upl_abort(pl, NULL); return (EPERM); } - UBCINFOCHECK("nfs_pagein", vp); - if(pl == (upl_t)NULL) { - panic("nfs_pagein: no upl"); - } - cred = ubc_getcred(vp); - if (cred == NOCRED) - cred = ap->a_cred; - - if (size <= 0) + if (size <= 0) { + printf("nfs_pagein: invalid size %d", size); + if (!nofreeupl) + (void) ubc_upl_abort(pl, NULL); return (EINVAL); - - if (f_offset < 0 || f_offset >= np->n_size - || (f_offset & PAGE_MASK_64)) { - if (!nocommit) + } + if (f_offset < 0 || f_offset >= np->n_size || + (f_offset & PAGE_MASK_64)) { + if (!nofreeupl) ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); return (EINVAL); } + cred = ubc_getcred(vp); + if (cred == NOCRED) + cred = ap->a_cred; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; @@ -4149,7 +4195,6 @@ nfs_pagein(ap) auio.uio_rw = UIO_READ; auio.uio_procp = NULL; - if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) (void)nfs_fsinfo(nmp, vp, cred, p); biosize = min(vp->v_mount->mnt_stat.f_iosize, size); @@ -4157,59 +4202,6 @@ nfs_pagein(ap) if (biosize & PAGE_MASK) panic("nfs_pagein(%x): biosize not page aligned", biosize); -#if 0 /* Why bother? */ -/* DO NOT BOTHER WITH "approximately maintained cache consistency" */ -/* Does not make sense in paging paths -- Umesh*/ - /* - * For nfs, cache consistency can only be maintained approximately. - * Although RFC1094 does not specify the criteria, the following is - * believed to be compatible with the reference port. - * For nqnfs, full cache consistency is maintained within the loop. - * For nfs: - * If the file's modify time on the server has changed since the - * last read rpc or you have written to the file, - * you may have lost data cache consistency with the - * server, so flush all of the file's data out of the cache. - * Then force a getattr rpc to ensure that you have up to date - * attributes. - * NB: This implies that cache data can be read when up to - * NFS_ATTRTIMEO seconds out of date. If you find that you need current - * attributes this could be forced by setting n_attrstamp to 0 before - * the VOP_GETATTR() call. - */ - if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) { - if (np->n_flag & NMODIFIED) { - np->n_attrstamp = 0; - error = VOP_GETATTR(vp, &vattr, cred, p); - if (error) { - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); - return (error); - } - np->n_mtime = vattr.va_mtime.tv_sec; - } else { - error = VOP_GETATTR(vp, &vattr, cred, p); - if (error){ - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); - return (error); - } - if (np->n_mtime != vattr.va_mtime.tv_sec) { - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error){ - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); - return (error); - } - np->n_mtime = vattr.va_mtime.tv_sec; - } - } - } -#endif 0 /* Why bother? */ - ubc_upl_map(pl, &ioaddr); ioaddr += pl_offset; xsize = size; @@ -4219,102 +4211,43 @@ nfs_pagein(ap) aiov.iov_len = uio->uio_resid; aiov.iov_base = (caddr_t)ioaddr; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE, - (int)uio->uio_offset, uio->uio_resid, ioaddr, xsize, 0); - -#warning nfs_pagein does not support NQNFS yet. -#if 0 /* why bother? */ -/* NO RESOURCES TO FIX NQNFS CASE */ -/* We need to deal with this later -- Umesh */ - /* - * Get a valid lease. If cached data is stale, flush it. - */ - if (nmp->nm_flag & NFSMNT_NQNFS) { - if (NQNFS_CKINVALID(vp, np, ND_READ)) { - do { - error = nqnfs_getlease(vp, ND_READ, cred, p); - } while (error == NQNFS_EXPIRED); - if (error){ - ubc_upl_unmap(pl); - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); - - return (error); - } - if (np->n_lrev != np->n_brev || - (np->n_flag & NQNFSNONCACHE)) { - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - ubc_upl_unmap(pl); - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); - return (error); - } - np->n_brev = np->n_lrev; - } - } - } -#endif 0 /* why bother? */ - - if (np->n_flag & NQNFSNONCACHE) { - error = nfs_readrpc(vp, uio, cred); - ubc_upl_unmap(pl); - - if (!nocommit) { - if(error) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); - else - ubc_upl_commit_range(pl, pl_offset, size, - UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY); - } - return (error); - } - + FSDBG(322, uio->uio_offset, uio->uio_resid, ioaddr, xsize); +#warning our nfs_pagein does not support NQNFS /* * With UBC we get here only when the file data is not in the VM * page cache, so go ahead and read in. */ #ifdef UBC_DEBUG - upl_ubc_alias_set(pl, ioaddr, 2); + upl_ubc_alias_set(pl, current_act(), 2); #endif /* UBC_DEBUG */ nfsstats.pageins++; error = nfs_readrpc(vp, uio, cred); if (!error) { - int zoff; - int zcnt; - if (uio->uio_resid) { /* - * If uio_resid > 0, there is a hole in the file and - * no writes after the hole have been pushed to - * the server yet... or we're at the EOF + * If uio_resid > 0, there is a hole in the file + * and no writes after the hole have been pushed + * to the server yet... or we're at the EOF * Just zero fill the rest of the valid area. */ - zcnt = uio->uio_resid; - zoff = biosize - zcnt; + int zcnt = uio->uio_resid; + int zoff = biosize - zcnt; bzero((char *)ioaddr + zoff, zcnt); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 324)) | DBG_FUNC_NONE, - (int)uio->uio_offset, zoff, zcnt, ioaddr, 0); - + FSDBG(324, uio->uio_offset, zoff, zcnt, ioaddr); uio->uio_offset += zcnt; } ioaddr += biosize; xsize -= biosize; } else - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 322)) | DBG_FUNC_NONE, - (int)uio->uio_offset, uio->uio_resid, error, -1, 0); - + FSDBG(322, uio->uio_offset, uio->uio_resid, error, -1); if (p && (vp->v_flag & VTEXT) && - (((nmp->nm_flag & NFSMNT_NQNFS) && - NQNFS_CKINVALID(vp, np, ND_READ) && - np->n_lrev != np->n_brev) || - (!(nmp->nm_flag & NFSMNT_NQNFS) && - np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { + ((nmp->nm_flag & NFSMNT_NQNFS && + NQNFS_CKINVALID(vp, np, ND_READ) && + np->n_lrev != np->n_brev) || + (!(nmp->nm_flag & NFSMNT_NQNFS) && + np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { uprintf("Process killed due to text file modification\n"); psignal(p, SIGKILL); p->p_flag |= P_NOSWAP; @@ -4324,20 +4257,20 @@ nfs_pagein(ap) ubc_upl_unmap(pl); - if (!nocommit) { + if (!nofreeupl) { if (error) ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_ERROR | UPL_ABORT_FREE_ON_EMPTY); + UPL_ABORT_ERROR | + UPL_ABORT_FREE_ON_EMPTY); else ubc_upl_commit_range(pl, pl_offset, size, - UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY); + UPL_COMMIT_CLEAR_DIRTY | + UPL_COMMIT_FREE_ON_EMPTY); } - return (error); } - /* * Vnode op for pageout using UPL * Derived from nfs_write() @@ -4369,33 +4302,34 @@ nfs_pageout(ap) struct buf *bp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); daddr_t lbn; - int bufsize; int n = 0, on, error = 0, iomode, must_commit, s; off_t off; vm_offset_t ioaddr; struct uio auio; struct iovec aiov; struct uio * uio = &auio; - int nocommit = flags & UPL_NOCOMMIT; + int nofreeupl = flags & UPL_NOCOMMIT; int iosize; int pgsize; - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 323)) | DBG_FUNC_NONE, - (int)f_offset, size, pl, pl_offset, 0); + FSDBG(323, f_offset, size, pl, pl_offset); + + if (pl == (upl_t)NULL) + panic("nfs_pageout: no upl"); if (UBCINVALID(vp)) { -#if DIAGNOSTIC - panic("nfs_pageout: invalid vnode"); -#endif + printf("nfs_pageout: invalid vnode 0x%x", (int)vp); + if (!nofreeupl) + (void) ubc_upl_abort(pl, NULL); return (EIO); } UBCINFOCHECK("nfs_pageout", vp); - if (size <= 0) + if (size <= 0) { + printf("nfs_pageout: invalid size %d", size); + if (!nofreeupl) + (void) ubc_upl_abort(pl, NULL); return (EINVAL); - - if (pl == (upl_t)NULL) { - panic("nfs_pageout: no upl"); } /* @@ -4408,7 +4342,6 @@ nfs_pageout(ap) if (biosize & PAGE_MASK) panic("nfs_pageout(%x): biosize not page aligned", biosize); - /* * Check to see whether the buffer is incore * If incore and not busy invalidate it from the cache @@ -4419,15 +4352,15 @@ nfs_pageout(ap) lbn = f_offset / PAGE_SIZE; /* to match the size getblk uses */ for (iosize = size; iosize > 0; iosize -= PAGE_SIZE, lbn++) { - s = splbio(); if (bp = incore(vp, lbn)) { + FSDBG(323, lbn*PAGE_SIZE, 1, bp, bp->b_flags); if (ISSET(bp->b_flags, B_BUSY)) { - /* don't panic incore. just tell vm we are busy */ - (void) ubc_upl_abort(pl, NULL); + /* no panic. just tell vm we are busy */ + if (!nofreeupl) + (void) ubc_upl_abort(pl, NULL); return(EBUSY); - }; - + } bremfree(bp); SET(bp->b_flags, (B_BUSY | B_INVAL)); brelse(bp); @@ -4441,23 +4374,25 @@ nfs_pageout(ap) if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_FREE_ON_EMPTY); + if (!nofreeupl) + ubc_upl_abort_range(pl, pl_offset, size, + UPL_ABORT_FREE_ON_EMPTY); return (np->n_error); } if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) (void)nfs_fsinfo(nmp, vp, cred, p); if (f_offset < 0 || f_offset >= np->n_size || - (f_offset & PAGE_MASK_64) || (size & PAGE_MASK)) { - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, UPL_ABORT_FREE_ON_EMPTY); + f_offset & PAGE_MASK_64 || size & PAGE_MASK) { + if (!nofreeupl) + ubc_upl_abort_range(pl, pl_offset, size, + UPL_ABORT_FREE_ON_EMPTY); return (EINVAL); } ubc_upl_map(pl, &ioaddr); - if ((f_offset + size) > np->n_size) + if (f_offset + size > np->n_size) iosize = np->n_size - f_offset; else iosize = size; @@ -4465,9 +4400,10 @@ nfs_pageout(ap) pgsize = (iosize + (PAGE_SIZE - 1)) & ~PAGE_MASK; if (size > pgsize) { - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset + pgsize, size - pgsize, - UPL_ABORT_FREE_ON_EMPTY); + if (!nofreeupl) + ubc_upl_abort_range(pl, pl_offset + pgsize, + size - pgsize, + UPL_ABORT_FREE_ON_EMPTY); } auio.uio_iov = &aiov; auio.uio_iovcnt = 1; @@ -4479,158 +4415,41 @@ nfs_pageout(ap) aiov.iov_len = iosize; aiov.iov_base = (caddr_t)ioaddr + pl_offset; - /* * check for partial page and clear the * contents past end of the file before * releasing it in the VM page cache */ - if ((f_offset < np->n_size) && (f_offset + size) > np->n_size) { + if (f_offset < np->n_size && f_offset + size > np->n_size) { size_t io = np->n_size - f_offset; bzero((caddr_t)(ioaddr + pl_offset + io), size - io); - KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 321)) | DBG_FUNC_NONE, - (int)np->n_size, (int)f_offset, (int)f_offset + io, size - io, 0); + FSDBG(321, np->n_size, f_offset, f_offset + io, size - io); } do { - -#warning nfs_pageout does not support NQNFS yet. -#if 0 /* why bother? */ -/* NO RESOURCES TO FIX NQNFS CASE */ -/* We need to deal with this later -- Umesh */ - - /* - * Check for a valid write lease. - */ - if ((nmp->nm_flag & NFSMNT_NQNFS) && - NQNFS_CKINVALID(vp, np, ND_WRITE)) { - do { - error = nqnfs_getlease(vp, ND_WRITE, cred, p); - } while (error == NQNFS_EXPIRED); - if (error) { - ubc_upl_unmap(pl); - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_FREE_ON_EMPTY); - return (error); - } - if (np->n_lrev != np->n_brev || - (np->n_flag & NQNFSNONCACHE)) { - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - ubc_upl_unmap(pl); - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_FREE_ON_EMPTY); - return (error); - } - np->n_brev = np->n_lrev; - } - } -#endif 0 /* why bother? */ - - if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { - iomode = NFSV3WRITE_FILESYNC; - error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); - if (must_commit) - nfs_clearcommit(vp->v_mount); - ubc_upl_unmap(pl); - - /* copied from non-nqnfs case below. see there for comments */ - if (!nocommit) { - if (error) { - int abortflags; - short action = nfs_pageouterrorhandler(error); - - switch (action) { - case DUMP: - abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY; - break; - case DUMPANDLOG: - abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY; - if ((error <= ELAST) && (errorcount[error] % 100 == 0)) - printf("nfs_pageout: unexpected error %d. dumping vm page\n", error); - errorcount[error]++; - break; - case RETRY: - abortflags = UPL_ABORT_FREE_ON_EMPTY; - break; - case RETRYWITHSLEEP: - abortflags = UPL_ABORT_FREE_ON_EMPTY; - (void) tsleep(&lbolt, PSOCK, "nfspageout", 0); /* pri unused. PSOCK for placeholder. */ - break; - case SEVER: /* not implemented */ - default: - printf("nfs_pageout: action %d not expected\n", action); - break; - } - - ubc_upl_abort_range(pl, pl_offset, size, abortflags); - /* return error in all cases above */ - - } else - ubc_upl_commit_range(pl, - pl_offset, size, - UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY); - } - return (error); /* note this early return */ - } - +#warning our nfs_pageout does not support NQNFS nfsstats.pageouts++; lbn = uio->uio_offset / biosize; on = uio->uio_offset & (biosize-1); n = min((unsigned)(biosize - on), uio->uio_resid); again: - bufsize = biosize; #if 0 + /* (removed for UBC) */ + bufsize = biosize; if ((lbn + 1) * biosize > np->n_size) { bufsize = np->n_size - lbn * biosize; bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); } #endif vp->v_numoutput++; - - np->n_flag |= NMODIFIED; - -#if 0 /* why bother? */ -/* NO RESOURCES TO FIX NQNFS CASE */ -/* We need to deal with this later -- Umesh */ - /* - * Check for valid write lease and get one as required. - * In case getblk() and/or bwrite() delayed us. - */ - if ((nmp->nm_flag & NFSMNT_NQNFS) && - NQNFS_CKINVALID(vp, np, ND_WRITE)) { - do { - error = nqnfs_getlease(vp, ND_WRITE, cred, p); - } while (error == NQNFS_EXPIRED); - if (error) - goto cleanup; - - if (np->n_lrev != np->n_brev || - (np->n_flag & NQNFSNONCACHE)) { - error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); - if (error) { - ubc_upl_unmap(pl); - if (!nocommit) - ubc_upl_abort_range(pl, pl_offset, size, - UPL_ABORT_FREE_ON_EMPTY); - - return (error); - } - np->n_brev = np->n_lrev; - goto again; - } - } -#endif 0 /* why bother? */ - + /* NMODIFIED would be set here if doing unstable writes */ iomode = NFSV3WRITE_FILESYNC; error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); if (must_commit) nfs_clearcommit(vp->v_mount); - vp->v_numoutput--; + vpwakeup(vp); if (error) goto cleanup; @@ -4656,8 +4475,8 @@ cleanup: * the server is telling us that the file is no longer the same. We * would not want to keep paging out to that. We also saw some 151 * errors from Auspex server and NFSv3 can return errors higher than - * ELAST. Those along with NFS known server errors we will "dump" from vm. - * Errors we don't expect to occur, we dump and log for further + * ELAST. Those along with NFS known server errors we will "dump" from + * vm. Errors we don't expect to occur, we dump and log for further * analysis. Errors that could be transient, networking ones, * we let vm "retry". Lastly, errors that we retry, but may have potential * to storm the network, we "retrywithsleep". "sever" will be used in @@ -4666,10 +4485,10 @@ cleanup: * error handling. Tweaking expected as more statistics are gathered. * Note, in the long run we may need another more robust solution to * have some kind of persistant store when the vm cannot dump nor keep - * retrying as a solution, but this would be a file architectural change. + * retrying as a solution, but this would be a file architectural change */ - if (!nocommit) { /* otherwise stacked file system has to handle this */ + if (!nofreeupl) { /* otherwise stacked file system has to handle this */ if (error) { int abortflags; short action = nfs_pageouterrorhandler(error); @@ -4680,7 +4499,8 @@ cleanup: break; case DUMPANDLOG: abortflags = UPL_ABORT_DUMP_PAGES|UPL_ABORT_FREE_ON_EMPTY; - if ((error <= ELAST) && (errorcount[error] % 100 == 0)) + if (error <= ELAST && + (errorcount[error] % 100 == 0)) printf("nfs_pageout: unexpected error %d. dumping vm page\n", error); errorcount[error]++; break; @@ -4689,7 +4509,9 @@ cleanup: break; case RETRYWITHSLEEP: abortflags = UPL_ABORT_FREE_ON_EMPTY; - (void) tsleep(&lbolt, PSOCK, "nfspageout", 0); /* pri unused. PSOCK for placeholder. */ + /* pri unused. PSOCK for placeholder. */ + (void) tsleep(&lbolt, PSOCK, + "nfspageout", 0); break; case SEVER: /* not implemented */ default: @@ -4702,7 +4524,8 @@ cleanup: } else ubc_upl_commit_range(pl, pl_offset, pgsize, - UPL_COMMIT_CLEAR_DIRTY | UPL_COMMIT_FREE_ON_EMPTY); + UPL_COMMIT_CLEAR_DIRTY | + UPL_COMMIT_FREE_ON_EMPTY); } return (error); } @@ -4726,7 +4549,6 @@ nfs_blktooff(ap) return (0); } -/* Blktooff derives file offset given a logical block number */ static int nfs_offtoblk(ap) struct vop_offtoblk_args /* { diff --git a/bsd/nfs/nfsm_subs.h b/bsd/nfs/nfsm_subs.h index 75f019ce0..0f31ebb57 100644 --- a/bsd/nfs/nfsm_subs.h +++ b/bsd/nfs/nfsm_subs.h @@ -170,7 +170,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, bcopy((caddr_t)(f), (caddr_t)tl, NFSX_V3FH); \ } -#define nfsm_mtofh(d, v, v3, f) \ +#define nfsm_mtofh(d, v, v3, f, x) \ { struct nfsnode *ttnp; nfsfh_t *ttfhp; int ttfhsize; \ if (v3) { \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ @@ -195,7 +195,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, nfsm_adv(NFSX_V3FATTR); \ } \ if (f) \ - nfsm_loadattr((v), (struct vattr *)0); \ + nfsm_loadattr((v), (struct vattr *)0, (x)); \ } #define nfsm_getfh(f, s, v3) \ @@ -211,26 +211,29 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, (s) = NFSX_V2FH; \ nfsm_dissect((f), nfsfh_t *, nfsm_rndup(s)); } -#define nfsm_loadattr(v, a) \ +#define nfsm_loadattr(v, a, x) \ { struct vnode *ttvp = (v); \ - if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a)))) { \ + if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, (a), 0, \ + (x)))) { \ error = t1; \ m_freem(mrep); \ goto nfsmout; \ } \ (v) = ttvp; } -#define nfsm_postop_attr(v, f) \ +#define nfsm_postop_attr(v, f, x) \ { struct vnode *ttvp = (v); \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (((f) = fxdr_unsigned(int, *tl))) { \ if ((t1 = nfs_loadattrcache(&ttvp, &md, &dpos, \ - (struct vattr *)0))) { \ + (struct vattr *)0, 1, (x)))) { \ error = t1; \ (f) = 0; \ m_freem(mrep); \ goto nfsmout; \ } \ + if (*(x) == 0) \ + (f) = 0; \ (v) = ttvp; \ } } @@ -238,7 +241,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, #define NFSV3_WCCRATTR 0 #define NFSV3_WCCCHK 1 -#define nfsm_wcc_data(v, f) \ +#define nfsm_wcc_data(v, f, x) \ { int ttattrf, ttretf = 0; \ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); \ if (*tl == nfs_true) { \ @@ -247,7 +250,7 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, ttretf = (VTONFS(v)->n_mtime == \ fxdr_unsigned(u_long, *(tl + 2))); \ } \ - nfsm_postop_attr((v), ttattrf); \ + nfsm_postop_attr((v), ttattrf, (x)); \ if (f) { \ (f) = ttretf; \ } else { \ @@ -324,11 +327,11 @@ struct mbuf *nfsm_rpchead __P((struct ucred *cr, int nmflag, int procid, * m_freem(mrep). Wondering if some of our freeing problems could be * due to nfsv3 calling nfsm_reqdone unlike nfsv2. Separate problem. */ -#define nfsm_request(v, t, p, c) \ +#define nfsm_request(v, t, p, c, x) \ { \ int nfsv3 = (VFSTONFS((v)->v_mount))->nm_flag & NFSMNT_NFSV3; \ if ((error = nfs_request((v), mreq, (t), (p), \ - (c), &mrep, &md, &dpos))) { \ + (c), &mrep, &md, &dpos, (x)))) { \ if (error & NFSERR_RETERR) \ error &= ~NFSERR_RETERR; \ else \ diff --git a/bsd/nfs/nfsnode.h b/bsd/nfs/nfsnode.h index 829508a14..cf37ec795 100644 --- a/bsd/nfs/nfsnode.h +++ b/bsd/nfs/nfsnode.h @@ -66,6 +66,8 @@ #ifndef _NFS_NFS_H_ #include #endif +#include + /* * Silly rename structure that hangs off the nfsnode until the name @@ -108,6 +110,7 @@ struct nfsdmap { * be well aligned and, therefore, tightly packed. */ struct nfsnode { + struct lock__bsd__ n_lock; /* the vnode lock */ LIST_ENTRY(nfsnode) n_hash; /* Hash chain */ CIRCLEQ_ENTRY(nfsnode) n_timer; /* Nqnfs timer chain */ u_quad_t n_size; /* Current size of file */ @@ -140,6 +143,7 @@ struct nfsnode { short n_fhsize; /* size in bytes, of fh */ short n_flag; /* Flag for locking.. */ nfsfh_t n_fh; /* Small File Handle */ + u_int64_t n_xid; /* last xid to loadattr */ }; #define n_atim n_un1.nf_atim @@ -179,6 +183,7 @@ extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; #if defined(KERNEL) + typedef int vop_t __P((void *)); extern vop_t **fifo_nfsv2nodeop_p; extern vop_t **nfsv2_vnodeop_p; @@ -196,9 +201,10 @@ int nqnfs_vop_lease_check __P((struct vop_lease_args *)); int nfs_abortop __P((struct vop_abortop_args *)); int nfs_inactive __P((struct vop_inactive_args *)); int nfs_reclaim __P((struct vop_reclaim_args *)); -#define nfs_lock ((int (*) __P((struct vop_lock_args *)))vop_nolock) -#define nfs_unlock ((int (*) __P((struct vop_unlock_args *)))vop_nounlock) -#define nfs_islocked ((int (*) __P((struct vop_islocked_args *)))vop_noislocked) +int nfs_lock __P((struct vop_lock_args *)); +int nfs_unlock __P((struct vop_unlock_args *)); +int nfs_islocked __P((struct vop_islocked_args *)); + #define nfs_reallocblks \ ((int (*) __P((struct vop_reallocblks_args *)))eopnotsupp) diff --git a/bsd/sys/mbuf.h b/bsd/sys/mbuf.h index 9638bc398..ba1bf3bcd 100644 --- a/bsd/sys/mbuf.h +++ b/bsd/sys/mbuf.h @@ -286,6 +286,7 @@ extern simple_lock_data_t mbuf_slock; _MINTGET(m, type); \ if (m) { \ (m)->m_next = (m)->m_nextpkt = 0; \ + (m)->m_len = 0; \ (m)->m_type = (type); \ (m)->m_data = (m)->m_dat; \ (m)->m_flags = 0; \ @@ -300,6 +301,7 @@ extern simple_lock_data_t mbuf_slock; (m)->m_type = (type); \ (m)->m_data = (m)->m_pktdat; \ (m)->m_flags = M_PKTHDR; \ + (m)->m_pkthdr.len = 0; \ (m)->m_pkthdr.rcvif = NULL; \ (m)->m_pkthdr.header = NULL; \ (m)->m_pkthdr.csum_flags = 0; \ diff --git a/bsd/ufs/ufs/ufs_inode.c b/bsd/ufs/ufs/ufs_inode.c index ea004e531..b51ee0824 100644 --- a/bsd/ufs/ufs/ufs_inode.c +++ b/bsd/ufs/ufs/ufs_inode.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2001 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * @@ -157,8 +157,9 @@ ufs_reclaim(vp, p) */ cache_purge(vp); if (ip->i_devvp) { - vrele(ip->i_devvp); - ip->i_devvp = 0; + struct vnode *tvp = ip->i_devvp; + ip->i_devvp = NULL; + vrele(tvp); } #if QUOTA for (i = 0; i < MAXQUOTAS; i++) { diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c index 3e9b0a09b..5579b8c9c 100644 --- a/bsd/vfs/vfs_bio.c +++ b/bsd/vfs/vfs_bio.c @@ -64,6 +64,7 @@ * @(#)vfs_bio.c 8.6 (Berkeley) 1/11/94 */ + /* * Some references: * Bach: The Design of the UNIX Operating System (Prentice Hall, 1986) @@ -99,7 +100,7 @@ extern void bufq_balance_thread_init(); extern void reassignbuf(struct buf *, struct vnode *); static struct buf *getnewbuf(int slpflag, int slptimeo, int *queue); -extern int niobuf; /* The number of IO buffer headers for cluster IO */ +extern int niobuf; /* The number of IO buffer headers for cluster IO */ int blaundrycnt; #if TRACE @@ -632,7 +633,8 @@ brelse(bp) long whichq; KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_START, - bp->b_lblkno * PAGE_SIZE, bp, bp->b_data, bp->b_flags, 0); + bp->b_lblkno * PAGE_SIZE, (int)bp, (int)bp->b_data, + bp->b_flags, 0); trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); @@ -674,7 +676,9 @@ brelse(bp) upl_flags = 0; ubc_upl_abort(upl, upl_flags); } else { - if (ISSET(bp->b_flags, (B_DELWRI | B_WASDIRTY))) + if (ISSET(bp->b_flags, B_NEEDCOMMIT)) + upl_flags = UPL_COMMIT_CLEAR_DIRTY ; + else if (ISSET(bp->b_flags, B_DELWRI | B_WASDIRTY)) upl_flags = UPL_COMMIT_SET_DIRTY ; else upl_flags = UPL_COMMIT_CLEAR_DIRTY ; @@ -758,7 +762,7 @@ brelse(bp) splx(s); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 388)) | DBG_FUNC_END, - bp, bp->b_data, bp->b_flags, 0, 0); + (int)bp, (int)bp->b_data, bp->b_flags, 0, 0); } /* @@ -791,7 +795,8 @@ incore(vp, blkno) return (0); } -/* XXX FIXME -- Update the comment to reflect the UBC changes -- */ + +/* XXX FIXME -- Update the comment to reflect the UBC changes (please) -- */ /* * Get a block of requested size that is associated with * a given vnode and block offset. If it is found in the @@ -889,8 +894,11 @@ start: SET(bp->b_flags, B_PAGELIST); bp->b_pagelist = upl; - if ( !upl_valid_page(pl, 0)) - panic("getblk: incore buffer without valid page"); + if (!upl_valid_page(pl, 0)) { + if (vp->v_tag != VT_NFS) + panic("getblk: incore buffer without valid page"); + CLR(bp->b_flags, B_CACHE); + } if (upl_dirty_page(pl, 0)) SET(bp->b_flags, B_WASDIRTY); @@ -1112,7 +1120,7 @@ start: } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 386)) | DBG_FUNC_END, - bp, bp->b_data, bp->b_flags, 3, 0); + (int)bp, (int)bp->b_data, bp->b_flags, 3, 0); return (bp); } @@ -1639,7 +1647,7 @@ biodone(bp) funnel_state = thread_funnel_set(kernel_flock, TRUE); KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_START, - bp, bp->b_data, bp->b_flags, 0, 0); + (int)bp, (int)bp->b_data, bp->b_flags, 0, 0); if (ISSET(bp->b_flags, B_DONE)) panic("biodone already"); @@ -1664,7 +1672,7 @@ biodone(bp) } KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 387)) | DBG_FUNC_END, - bp, bp->b_data, bp->b_flags, 0, 0); + (int)bp, (int)bp->b_data, bp->b_flags, 0, 0); thread_funnel_set(kernel_flock, funnel_state); } diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c index fd99cca8d..be3a4d40c 100644 --- a/bsd/vfs/vfs_subr.c +++ b/bsd/vfs/vfs_subr.c @@ -635,6 +635,21 @@ insmntque(vp, mp) simple_unlock(&mntvnode_slock); } +__inline void +vpwakeup(struct vnode *vp) +{ + if (vp) { + if (--vp->v_numoutput < 0) + panic("vpwakeup: neg numoutput"); + if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { + if (vp->v_numoutput < 0) + panic("vpwakeup: neg numoutput 2"); + vp->v_flag &= ~VBWAIT; + wakeup((caddr_t)&vp->v_numoutput); + } + } +} + /* * Update outstanding I/O count and do wakeup if requested. */ @@ -645,16 +660,7 @@ vwakeup(bp) register struct vnode *vp; CLR(bp->b_flags, B_WRITEINPROG); - if (vp = bp->b_vp) { - if (--vp->v_numoutput < 0) - panic("vwakeup: neg numoutput"); - if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { - if (vp->v_numoutput < 0) - panic("vwakeup: neg numoutput 2"); - vp->v_flag &= ~VBWAIT; - wakeup((caddr_t)&vp->v_numoutput); - } - } + vpwakeup(bp->b_vp); } /* diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c index 42470db69..d70d27453 100644 --- a/bsd/vfs/vfs_syscalls.c +++ b/bsd/vfs/vfs_syscalls.c @@ -357,6 +357,7 @@ checkdirs(olddp) struct filedesc *fdp; struct vnode *newdp; struct proc *p; + struct vnode *tvp; if (olddp->v_usecount == 1) return; @@ -365,20 +366,23 @@ checkdirs(olddp) for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { fdp = p->p_fd; if (fdp->fd_cdir == olddp) { - vrele(fdp->fd_cdir); VREF(newdp); + tvp = fdp->fd_cdir; fdp->fd_cdir = newdp; + vrele(tvp); } if (fdp->fd_rdir == olddp) { - vrele(fdp->fd_rdir); VREF(newdp); + tvp = fdp->fd_rdir; fdp->fd_rdir = newdp; + vrele(tvp); } } if (rootvnode == olddp) { - vrele(rootvnode); VREF(newdp); + tvp = rootvnode; rootvnode = newdp; + vrele(tvp); } vput(newdp); } @@ -775,7 +779,7 @@ fchdir(p, uap, retval) register_t *retval; { register struct filedesc *fdp = p->p_fd; - struct vnode *vp, *tdp; + struct vnode *vp, *tdp, *tvp; struct mount *mp; struct file *fp; int error; @@ -804,8 +808,9 @@ fchdir(p, uap, retval) return (error); } VOP_UNLOCK(vp, 0, p); - vrele(fdp->fd_cdir); + tvp = fdp->fd_cdir; fdp->fd_cdir = vp; + vrele(tvp); return (0); } @@ -825,13 +830,15 @@ chdir(p, uap, retval) register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; + struct vnode *tvp; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); if (error = change_dir(&nd, p)) return (error); - vrele(fdp->fd_cdir); + tvp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; + vrele(tvp); return (0); } @@ -851,6 +858,7 @@ chroot(p, uap, retval) register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; + struct vnode *tvp; if (error = suser(p->p_ucred, &p->p_acflag)) return (error); @@ -865,9 +873,10 @@ chroot(p, uap, retval) return (error); } - if (fdp->fd_rdir != NULL) - vrele(fdp->fd_rdir); + tvp = fdp->fd_rdir; fdp->fd_rdir = nd.ni_vp; + if (tvp != NULL) + vrele(tvp); return (0); } diff --git a/bsd/vfs/vfs_vnops.c b/bsd/vfs/vfs_vnops.c index a16dfb167..b351d715b 100644 --- a/bsd/vfs/vfs_vnops.c +++ b/bsd/vfs/vfs_vnops.c @@ -431,6 +431,7 @@ vn_ioctl(fp, com, data, p) register struct vnode *vp = ((struct vnode *)fp->f_data); struct vattr vattr; int error; + struct vnode *ttyvp; switch (vp->v_type) { @@ -454,10 +455,11 @@ vn_ioctl(fp, com, data, p) case VBLK: error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); if (error == 0 && com == TIOCSCTTY) { - if (p->p_session->s_ttyvp) - vrele(p->p_session->s_ttyvp); - p->p_session->s_ttyvp = vp; VREF(vp); + ttyvp = p->p_session->s_ttyvp; + p->p_session->s_ttyvp = vp; + if (ttyvp) + vrele(ttyvp); } return (error); } diff --git a/bsd/vm/vnode_pager.c b/bsd/vm/vnode_pager.c index dede2616d..5cdbf447f 100644 --- a/bsd/vm/vnode_pager.c +++ b/bsd/vm/vnode_pager.c @@ -89,22 +89,17 @@ vnode_pageout(struct vnode *vp, int result = PAGER_SUCCESS; struct proc *p = current_proc(); int error = 0; - int vp_size = 0; int blkno=0, s; int cnt, isize; int pg_index; int offset; struct buf *bp; boolean_t funnel_state; - int haveupl=0; upl_page_info_t *pl; upl_t vpupl = NULL; funnel_state = thread_funnel_set(kernel_flock, TRUE); - if (upl != (upl_t)NULL) { - haveupl = 1; - } isize = (int)size; if (isize < 0) @@ -117,54 +112,44 @@ vnode_pageout(struct vnode *vp, if (UBCINVALID(vp)) { result = PAGER_ERROR; error = PAGER_ERROR; + if (upl && !(flags & UPL_NOCOMMIT)) + ubc_upl_abort(upl, 0); goto out; } - if (haveupl) { + if (upl) { /* - * This is a pageout form the Default pager, + * This is a pageout from the Default pager, * just go ahead and call VOP_PAGEOUT */ dp_pgouts++; - if (error = VOP_PAGEOUT(vp, upl, upl_offset, - (off_t)f_offset,(size_t)size, p->p_ucred, flags)) { - result = PAGER_ERROR; - error = PAGER_ERROR; - } + if (error = VOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset, + (size_t)size, p->p_ucred, flags)) + result = error = PAGER_ERROR; goto out; } - ubc_create_upl( vp, - f_offset, - isize, - &vpupl, - &pl, - UPL_COPYOUT_FROM); + ubc_create_upl(vp, f_offset, isize, &vpupl, &pl, UPL_COPYOUT_FROM); if (vpupl == (upl_t) 0) return PAGER_ABSENT; - vp_size = ubc_getsize(vp); - if (vp_size == 0) { - - while (isize) { + if (ubc_getsize(vp) == 0) { + for (offset = 0; isize; isize -= PAGE_SIZE, + offset += PAGE_SIZE) { blkno = ubc_offtoblk(vp, (off_t)f_offset); -start0: - if (bp = incore(vp, blkno)) { - if (ISSET(bp->b_flags, B_BUSY)) { - SET(bp->b_flags, B_WANTED); - error = tsleep(bp, (PRIBIO + 1), "vnpgout", 0); - goto start0; - } else { - bremfree(bp); - SET(bp->b_flags, (B_BUSY|B_INVAL)); - } - } - if (bp) - brelse(bp); f_offset += PAGE_SIZE; - isize -= PAGE_SIZE; + if ((bp = incore(vp, blkno)) && + ISSET(bp->b_flags, B_BUSY)) { + ubc_upl_abort_range(vpupl, offset, PAGE_SIZE, + UPL_ABORT_FREE_ON_EMPTY); + result = error = PAGER_ERROR; + continue; + } else if (bp) { + bremfree(bp); + SET(bp->b_flags, B_BUSY | B_INVAL); + brelse(bp); + } + ubc_upl_commit_range(vpupl, offset, PAGE_SIZE, + UPL_COMMIT_FREE_ON_EMPTY); } - ubc_upl_commit_range(vpupl, 0, size, UPL_COMMIT_FREE_ON_EMPTY); - - error = 0; goto out; } pg_index = 0; @@ -176,8 +161,7 @@ start0: if ( !upl_valid_page(pl, pg_index)) { ubc_upl_abort_range(vpupl, offset, PAGE_SIZE, - UPL_ABORT_FREE_ON_EMPTY); - + UPL_ABORT_FREE_ON_EMPTY); offset += PAGE_SIZE; isize -= PAGE_SIZE; pg_index++; @@ -192,28 +176,32 @@ start0: * We also get here from vm_object_terminate() * So all you need to do in these * cases is to invalidate incore buffer if it is there + * Note we must not sleep here if B_BUSY - that is + * a lock inversion which causes deadlock. */ blkno = ubc_offtoblk(vp, (off_t)(f_offset + offset)); s = splbio(); vp_pgoclean++; -start: - if (bp = incore(vp, blkno)) { - if (ISSET(bp->b_flags, B_BUSY)) { - SET(bp->b_flags, B_WANTED); - error = tsleep(bp, (PRIBIO + 1), "vnpgout", 0); - goto start; - } else { - bremfree(bp); - SET(bp->b_flags, (B_BUSY|B_INVAL)); - } - } - splx(s); - if (bp) + if ((bp = incore(vp, blkno)) && + ISSET(bp->b_flags, B_BUSY | B_NEEDCOMMIT)) { + splx(s); + ubc_upl_abort_range(vpupl, offset, PAGE_SIZE, + UPL_ABORT_FREE_ON_EMPTY); + result = error = PAGER_ERROR; + offset += PAGE_SIZE; + isize -= PAGE_SIZE; + pg_index++; + continue; + } else if (bp) { + bremfree(bp); + SET(bp->b_flags, B_BUSY | B_INVAL ); + splx(s); brelse(bp); + } else + splx(s); ubc_upl_commit_range(vpupl, offset, PAGE_SIZE, - UPL_COMMIT_FREE_ON_EMPTY); - + UPL_COMMIT_FREE_ON_EMPTY); offset += PAGE_SIZE; isize -= PAGE_SIZE; pg_index++; @@ -236,12 +224,10 @@ start: xsize = num_of_pages * PAGE_SIZE; /* By defn callee will commit or abort upls */ - if (error = VOP_PAGEOUT(vp, vpupl, (vm_offset_t) offset, - (off_t)(f_offset + offset), - xsize, p->p_ucred, flags & ~UPL_NOCOMMIT)) { - result = PAGER_ERROR; - error = PAGER_ERROR; - } + if (error = VOP_PAGEOUT(vp, vpupl, (vm_offset_t)offset, + (off_t)(f_offset + offset), xsize, + p->p_ucred, flags & ~UPL_NOCOMMIT)) + result = error = PAGER_ERROR; offset += xsize; isize -= xsize; pg_index += num_of_pages; @@ -271,52 +257,42 @@ vnode_pagein( int error = 0; int xfer_size; boolean_t funnel_state; - int haveupl=0; upl_t vpupl = NULL; off_t local_offset; unsigned int ioaddr; funnel_state = thread_funnel_set(kernel_flock, TRUE); -#if 0 - if(pl->page_list.npages >1 ) - panic("vnode_pageout: Can't handle more than one page"); -#endif /* 0 */ - - if (pl != (upl_t)NULL) { - haveupl = 1; - } UBCINFOCHECK("vnode_pagein", vp); if (UBCINVALID(vp)) { result = PAGER_ERROR; error = PAGER_ERROR; + if (pl && !(flags & UPL_NOCOMMIT)) { + ubc_upl_abort(pl, 0); + } goto out; } - if (haveupl) { + if (pl) { dp_pgins++; if (error = VOP_PAGEIN(vp, pl, pl_offset, (off_t)f_offset, - size,p->p_ucred, flags)) { + size, p->p_ucred, flags)) { result = PAGER_ERROR; } } else { local_offset = 0; while (size) { - if((size > 4096) && (vp->v_tag == VT_NFS)) { + if(size > 4096 && vp->v_tag == VT_NFS) { xfer_size = 4096; size = size - xfer_size; } else { xfer_size = size; size = 0; } - ubc_create_upl( vp, - f_offset+local_offset, - xfer_size, - &vpupl, - NULL, - UPL_FLAGS_NONE); + ubc_create_upl(vp, f_offset + local_offset, xfer_size, + &vpupl, NULL, UPL_FLAGS_NONE); if (vpupl == (upl_t) 0) { result = PAGER_ABSENT; error = PAGER_ABSENT; @@ -327,7 +303,9 @@ vnode_pagein( /* By defn callee will commit or abort upls */ if (error = VOP_PAGEIN(vp, vpupl, (vm_offset_t) 0, - (off_t)f_offset+local_offset, xfer_size,p->p_ucred, flags & ~UPL_NOCOMMIT)) { + (off_t)f_offset + local_offset, + xfer_size, p->p_ucred, + flags & ~UPL_NOCOMMIT)) { result = PAGER_ERROR; error = PAGER_ERROR; } @@ -336,7 +314,7 @@ vnode_pagein( } out: if (errorp) - *errorp = result; + *errorp = result; thread_funnel_set(kernel_flock, funnel_state); return (error); diff --git a/iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp b/iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp index 7fa5e3067..ebc0a8b23 100644 --- a/iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp +++ b/iokit/Drivers/platform/drvAppleRootDomain/RootDomain.cpp @@ -813,8 +813,6 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon, case kIOMessageSystemWillSleep: rootDomain->powerOverrideOnPriv(); // start ignoring children's requests // (fall through to other cases) - case kIOMessageSystemWillPowerOff: - case kIOMessageSystemWillRestart: // Interested applications have been notified of an impending power // change and have acked (when applicable). @@ -841,6 +839,11 @@ IOReturn IOPMrootDomain::sysPowerDownHandler( void * target, void * refCon, thread_call_enter1(rootDomain->diskSyncCalloutEntry, (thread_call_param_t)params->powerRef); ret = kIOReturnSuccess; break; + + case kIOMessageSystemWillPowerOff: + case kIOMessageSystemWillRestart: + break; + default: ret = kIOReturnUnsupported; break; diff --git a/iokit/Kernel/IODeviceTreeSupport.cpp b/iokit/Kernel/IODeviceTreeSupport.cpp index de364b87c..3c75dceb2 100644 --- a/iokit/Kernel/IODeviceTreeSupport.cpp +++ b/iokit/Kernel/IODeviceTreeSupport.cpp @@ -616,6 +616,7 @@ bool IODTMapInterrupts( IORegistryEntry * regEntry ) map = OSData::withData( local, mapped->getCount() * sizeof( UInt32), sizeof( UInt32)); controller = gIODTDefaultInterruptController; + controller->retain(); } localBits += skip; @@ -734,34 +735,41 @@ bool IODTMatchNubWithKeys( IORegistryEntry * regEntry, OSCollectionIterator * IODTFindMatchingEntries( IORegistryEntry * from, IOOptionBits options, const char * keys ) { - OSSet *result; + OSSet *result = 0; IORegistryEntry *next; IORegistryIterator *iter; OSCollectionIterator *cIter; bool cmp; bool minus = options & kIODTExclusive; - result = OSSet::withCapacity( 3 ); - if( !result) - return( 0); iter = IORegistryIterator::iterateOver( from, gIODTPlane, (options & kIODTRecursive) ? kIORegistryIterateRecursively : 0 ); if( iter) { - while( (next = iter->getNextObject())) { + do { - // Look for existence of a debug property to skip - if( next->getProperty("AAPL,ignore")) - continue; + if( result) + result->release(); + result = OSSet::withCapacity( 3 ); + if( !result) + break; - if( keys) { - cmp = IODTMatchNubWithKeys( next, keys ); - if( (minus && (false == cmp)) - || ((false == minus) && (false != cmp)) ) + iter->reset(); + while( (next = iter->getNextObject())) { + + // Look for existence of a debug property to skip + if( next->getProperty("AAPL,ignore")) + continue; + + if( keys) { + cmp = IODTMatchNubWithKeys( next, keys ); + if( (minus && (false == cmp)) + || ((false == minus) && (false != cmp)) ) + result->setObject( next); + } else result->setObject( next); - } else - result->setObject( next); - } + } + } while( !iter->isValid()); iter->release(); } diff --git a/iokit/Kernel/IONVRAM.cpp b/iokit/Kernel/IONVRAM.cpp index e4bc666b2..703dc94f4 100644 --- a/iokit/Kernel/IONVRAM.cpp +++ b/iokit/Kernel/IONVRAM.cpp @@ -280,8 +280,7 @@ IOReturn IODTNVRAM::setProperties(OSObject *properties) IOReturn IODTNVRAM::readXPRAM(IOByteCount offset, UInt8 *buffer, IOByteCount length) { - if ((_nvramImage == 0) || (_xpramPartitionOffset == 0)) - return kIOReturnNotReady; + if (_xpramImage == 0) return kIOReturnUnsupported; if ((buffer == 0) || (length <= 0) || (offset < 0) || (offset + length > kIODTNVRAMXPRAMSize)) @@ -295,8 +294,7 @@ IOReturn IODTNVRAM::readXPRAM(IOByteCount offset, UInt8 *buffer, IOReturn IODTNVRAM::writeXPRAM(IOByteCount offset, UInt8 *buffer, IOByteCount length) { - if ((_nvramImage == 0) || (_xpramPartitionOffset == 0)) - return kIOReturnNotReady; + if (_xpramImage == 0) return kIOReturnUnsupported; if ((buffer == 0) || (length <= 0) || (offset < 0) || (offset + length > kIODTNVRAMXPRAMSize)) diff --git a/iokit/Kernel/IOPlatformExpert.cpp b/iokit/Kernel/IOPlatformExpert.cpp index 7faa16986..0f07f4e75 100644 --- a/iokit/Kernel/IOPlatformExpert.cpp +++ b/iokit/Kernel/IOPlatformExpert.cpp @@ -32,7 +32,7 @@ #include #include #include - +#include #include @@ -64,6 +64,8 @@ OSMetaClassDefineReservedUnused(IOPlatformExpert, 10); OSMetaClassDefineReservedUnused(IOPlatformExpert, 11); static IOPlatformExpert * gIOPlatform; +static OSDictionary * gIOInterruptControllers; +static IOLock * gIOInterruptControllersLock; OSSymbol * gPlatformInterruptControllerName; @@ -86,6 +88,9 @@ bool IOPlatformExpert::start( IOService * provider ) if (!super::start(provider)) return false; + gIOInterruptControllers = OSDictionary::withCapacity(1); + gIOInterruptControllersLock = IOLockAlloc(); + // Correct the bus frequency in the device tree. busFrequency = OSData::withBytesNoCopy((void *)&gPEClockFrequencyInfo.bus_clock_rate_hz, 4); provider->setProperty("clock-frequency", busFrequency); @@ -255,21 +260,36 @@ IOReturn IOPlatformExpert::setConsoleInfo( PE_Video * consoleInfo, IOReturn IOPlatformExpert::registerInterruptController(OSSymbol *name, IOInterruptController *interruptController) { - publishResource(name, interruptController); + IOLockLock(gIOInterruptControllersLock); + + gIOInterruptControllers->setObject(name, interruptController); + + thread_wakeup(gIOInterruptControllers); + + IOLockUnlock(gIOInterruptControllersLock); return kIOReturnSuccess; } IOInterruptController *IOPlatformExpert::lookUpInterruptController(OSSymbol *name) { - IOInterruptController *interruptController; - IOService *service; - - service = waitForService(resourceMatching(name)); + OSObject *object; - interruptController = OSDynamicCast(IOInterruptController, service->getProperty(name)); + while (1) { + IOLockLock(gIOInterruptControllersLock); + + object = gIOInterruptControllers->getObject(name); + + if (object == 0) assert_wait(gIOInterruptControllers, THREAD_UNINT); + + IOLockUnlock(gIOInterruptControllersLock); + + if (object != 0) break; + + thread_block(0); + } - return interruptController; + return OSDynamicCast(IOInterruptController, object); } @@ -628,6 +648,18 @@ static void getCStringForObject (OSObject * inObj, char * outStr) } } +/* IOPMPanicOnShutdownHang + * - Called from a timer installed by PEHaltRestart + */ +static void IOPMPanicOnShutdownHang(thread_call_param_t p0, thread_call_param_t p1) +{ + int type = (int)p0; + + /* 30 seconds has elapsed - resume shutdown */ + gIOPlatform->haltRestart(type); +} + + extern "C" { /* @@ -660,6 +692,35 @@ int PEGetPlatformEpoch(void) int PEHaltRestart(unsigned int type) { + IOPMrootDomain *pmRootDomain = IOService::getPMRootDomain(); + bool noWaitForResponses; + AbsoluteTime deadline; + thread_call_t shutdown_hang; + + /* Notify IOKit PM clients of shutdown/restart + Clients subscribe to this message with a call to + IOService::registerInterest() + */ + + /* Spawn a thread that will panic in 30 seconds. + If all goes well the machine will be off by the time + the timer expires. + */ + shutdown_hang = thread_call_allocate( &IOPMPanicOnShutdownHang, (thread_call_param_t) type); + clock_interval_to_deadline( 30, kSecondScale, &deadline ); + thread_call_enter1_delayed( shutdown_hang, 0, deadline ); + + noWaitForResponses = pmRootDomain->tellChangeDown2(type); + /* This notification should have few clients who all do + their work synchronously. + + In this "shutdown notification" context we don't give + drivers the option of working asynchronously and responding + later. PM internals make it very hard to wait for asynchronous + replies. In fact, it's a bad idea to even be calling + tellChangeDown2 from here at all. + */ + if (gIOPlatform) return gIOPlatform->haltRestart(type); else return -1; } diff --git a/iokit/Kernel/IORegistryEntry.cpp b/iokit/Kernel/IORegistryEntry.cpp index e309047d4..4040b3cd0 100644 --- a/iokit/Kernel/IORegistryEntry.cpp +++ b/iokit/Kernel/IORegistryEntry.cpp @@ -440,12 +440,15 @@ bool IORegistryEntry::init( IORegistryEntry * old, WLOCK; fPropertyTable = old->getPropertyTable(); - old->fPropertyTable = 0; + fPropertyTable->retain(); #ifdef IOREGSPLITTABLES fRegistryTable = old->fRegistryTable; - old->fRegistryTable = 0; + old->fRegistryTable = OSDictionary::withDictionary( fRegistryTable ); #endif /* IOREGSPLITTABLES */ + old->registryTable()->removeObject( plane->keys[ kParentSetIndex ] ); + old->registryTable()->removeObject( plane->keys[ kChildSetIndex ] ); + all = getParentSetReference( plane ); if( all) for( index = 0; (next = (IORegistryEntry *) all->getObject(index)); @@ -1815,6 +1818,8 @@ unsigned int IORegistryEntry::getDepth( const IORegistryPlane * plane ) const OSDefineMetaClassAndStructors(IORegistryIterator, OSIterator) +enum { kIORegistryIteratorInvalidFlag = 0x80000000 }; + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ IORegistryIterator * @@ -1838,7 +1843,7 @@ IORegistryIterator::iterateOver( IORegistryEntry * root, create->where = &create->start; create->start.current = root; create->plane = plane; - create->options = options; + create->options = options & ~kIORegistryIteratorInvalidFlag; } else { create->release(); @@ -1860,10 +1865,12 @@ bool IORegistryIterator::isValid( void ) bool ok; IORegCursor * next; - ok = true; next = where; RLOCK; + + ok = (0 == (kIORegistryIteratorInvalidFlag & options)); + while( ok && next) { if( where->iter) ok = where->iter->isValid(); @@ -1927,6 +1934,7 @@ void IORegistryIterator::reset( void ) } where->current = root; + options &= ~kIORegistryIteratorInvalidFlag; } void IORegistryIterator::free( void ) @@ -1962,11 +1970,15 @@ IORegistryEntry * IORegistryIterator::getNextObjectFlat( void ) if( where->current) where->current->release(); - if( where->iter) + if( where->iter) { + next = (IORegistryEntry *) where->iter->getNextObject(); - if( next) - next->retain(); + if( next) + next->retain(); + else if( !where->iter->isValid()) + options |= kIORegistryIteratorInvalidFlag; + } where->current = next; diff --git a/iokit/Kernel/IOService.cpp b/iokit/Kernel/IOService.cpp index e84f9d9c0..4bd1a2afa 100644 --- a/iokit/Kernel/IOService.cpp +++ b/iokit/Kernel/IOService.cpp @@ -4138,20 +4138,14 @@ void IOService::setDeviceMemory( OSArray * array ) IOReturn IOService::resolveInterrupt(IOService *nub, int source) { IOInterruptController *interruptController; - OSDictionary *propTable; OSArray *array; OSData *data; OSSymbol *interruptControllerName; long numSources; IOInterruptSource *interruptSources; - // Get the property table from the nub. - propTable = nub->getPropertyTable(); - if (propTable == 0) return kIOReturnNoResources; - - // Get the parents list from the property table. - array = OSDynamicCast(OSArray, - propTable->getObject(gIOInterruptControllersKey)); + // Get the parents list from the nub. + array = OSDynamicCast(OSArray, nub->getProperty(gIOInterruptControllersKey)); if (array == 0) return kIOReturnNoResources; // Allocate space for the IOInterruptSources if needed... then return early. @@ -4173,9 +4167,8 @@ IOReturn IOService::resolveInterrupt(IOService *nub, int source) interruptController = getPlatform()->lookUpInterruptController(interruptControllerName); if (interruptController == 0) return kIOReturnNoResources; - // Get the interrupt numbers from the property table. - array = OSDynamicCast(OSArray, - propTable->getObject(gIOInterruptSpecifiersKey)); + // Get the interrupt numbers from the nub. + array = OSDynamicCast(OSArray, nub->getProperty(gIOInterruptSpecifiersKey)); if (array == 0) return kIOReturnNoResources; data = OSDynamicCast(OSData, array->getObject(source)); if (data == 0) return kIOReturnNoResources; diff --git a/iokit/KernelConfigTables.cpp b/iokit/KernelConfigTables.cpp index deff7b74e..33c8a75b9 100644 --- a/iokit/KernelConfigTables.cpp +++ b/iokit/KernelConfigTables.cpp @@ -28,11 +28,11 @@ */ const char * gIOKernelKmods = "{ - 'com.apple.kernel' = '5.2'; - 'com.apple.kernel.bsd' = '5.2'; - 'com.apple.kernel.iokit' = '5.2'; - 'com.apple.kernel.libkern' = '5.2'; - 'com.apple.kernel.mach' = '5.2'; + 'com.apple.kernel' = '5.3'; + 'com.apple.kernel.bsd' = '5.3'; + 'com.apple.kernel.iokit' = '5.3'; + 'com.apple.kernel.libkern' = '5.3'; + 'com.apple.kernel.mach' = '5.3'; 'com.apple.iokit.IOADBFamily' = '1.1'; 'com.apple.iokit.IOSystemManagementFamily' = '1.1'; }"; diff --git a/iokit/conf/version.minor b/iokit/conf/version.minor index 0cfbf0888..00750edc0 100644 --- a/iokit/conf/version.minor +++ b/iokit/conf/version.minor @@ -1 +1 @@ -2 +3 diff --git a/iokit/conf/version.variant b/iokit/conf/version.variant index e69de29bb..8b1378917 100644 --- a/iokit/conf/version.variant +++ b/iokit/conf/version.variant @@ -0,0 +1 @@ + diff --git a/libkern/conf/version.minor b/libkern/conf/version.minor index 0cfbf0888..00750edc0 100644 --- a/libkern/conf/version.minor +++ b/libkern/conf/version.minor @@ -1 +1 @@ -2 +3 diff --git a/libkern/ppc/OSAtomic.s b/libkern/ppc/OSAtomic.s index 39481085d..a2aeb032d 100644 --- a/libkern/ppc/OSAtomic.s +++ b/libkern/ppc/OSAtomic.s @@ -55,8 +55,6 @@ int OSCompareAndSwap( UInt32 oldVal, UInt32 newVal, UInt32 * addr ) ENTRY _OSCompareAndSwap - - lwarx r6, 0,r5 /* CEMV10 */ .L_CASretry: lwarx r6, 0,r5 cmpw r6, r3 @@ -94,14 +92,9 @@ SInt32 OSAddAtomic(SInt32 amount, SInt32 * value) ENTRY _OSAddAtomic mr r5,r3 /* Save the increment */ - lwarx r3,0,r4 /* CEMV10 */ - .L_AAretry: lwarx r3, 0, r4 /* Grab the area value */ add r6, r3, r5 /* Add the value */ stwcx. r6, 0, r4 /* Try to save the new value */ bne- .L_AAretry /* Didn't get it, try again... */ blr /* Return the original value */ - - - diff --git a/libsa/conf/version.minor b/libsa/conf/version.minor index 0cfbf0888..00750edc0 100644 --- a/libsa/conf/version.minor +++ b/libsa/conf/version.minor @@ -1 +1 @@ -2 +3 diff --git a/osfmk/conf/kernelversion.minor b/osfmk/conf/kernelversion.minor index 0cfbf0888..00750edc0 100644 --- a/osfmk/conf/kernelversion.minor +++ b/osfmk/conf/kernelversion.minor @@ -1 +1 @@ -2 +3 diff --git a/osfmk/conf/version.minor b/osfmk/conf/version.minor index 0cfbf0888..00750edc0 100644 --- a/osfmk/conf/version.minor +++ b/osfmk/conf/version.minor @@ -1 +1 @@ -2 +3 diff --git a/osfmk/default_pager/dp_backing_store.c b/osfmk/default_pager/dp_backing_store.c index effec727c..34f601059 100644 --- a/osfmk/default_pager/dp_backing_store.c +++ b/osfmk/default_pager/dp_backing_store.c @@ -3499,9 +3499,6 @@ vs_cluster_transfer( if (error == KERN_SUCCESS) { error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset, size, &residual, 0); - if(error) - upl_commit(upl, NULL); - upl_deallocate(upl); } #else diff --git a/pexpert/conf/version.minor b/pexpert/conf/version.minor index 0cfbf0888..00750edc0 100644 --- a/pexpert/conf/version.minor +++ b/pexpert/conf/version.minor @@ -1 +1 @@ -2 +3 -- 2.45.2