/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License"). You may not use this file except in compliance with the
- * License. Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
*
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
/*
#ifndef _SYS_BUF_H_
#define _SYS_BUF_H_
-#include <sys/queue.h>
-#include <sys/errno.h>
-#include <sys/vm.h>
+#include <sys/cdefs.h>
+#include <sys/kernel_types.h>
+#include <mach/memory_object_types.h>
-#define NOLIST ((struct buf *)0x87654321)
-#include <sys/cdefs.h>
+#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
+#define B_READ 0x00000001 /* Read buffer. */
+#define B_ASYNC 0x00000002 /* Start I/O, do not wait. */
+#define B_NOCACHE 0x00000004 /* Do not cache block after use. */
+#define B_DELWRI 0x00000008 /* Delay I/O until buffer reused. */
+#define B_LOCKED 0x00000010 /* Locked in core (not reusable). */
+#define B_PHYS 0x00000020 /* I/O to user memory. */
+#define B_CLUSTER 0x00000040 /* UPL based I/O generated by cluster layer */
+#define B_PAGEIO 0x00000080 /* Page in/out */
+#define B_META 0x00000100 /* buffer contains meta-data. */
+#define B_RAW 0x00000200 /* Set by physio for raw transfers. */
+#define B_FUA 0x00000400 /* Write-through disk cache(if supported) */
+#define B_PASSIVE 0x00000800 /* PASSIVE I/Os are ignored by THROTTLE I/O */
+/*
+ * make sure to check when adding flags that
+ * that the new flags don't overlap the definitions
+ * in buf_internal.h
+ */
+
+__BEGIN_DECLS
/*
- * The buffer header describes an I/O operation in the kernel.
- */
-struct buf {
- LIST_ENTRY(buf) b_hash; /* Hash chain. */
- LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
- TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
- struct proc *b_proc; /* Associated proc; NULL if kernel. */
- volatile long b_flags; /* B_* flags. */
- int b_error; /* Errno value. */
- long b_bufsize; /* Allocated buffer size. */
- long b_bcount; /* Valid bytes in buffer. */
- long b_resid; /* Remaining I/O. */
- dev_t b_dev; /* Device associated with buffer. */
- struct {
- caddr_t b_addr; /* Memory, superblocks, indirect etc.*/
- } b_un;
- void *b_saveaddr; /* Original b_addr for physio. */
- daddr_t b_lblkno; /* Logical block number. */
- daddr_t b_blkno; /* Underlying physical block number. */
- /* Function to call upon completion. */
- void (*b_iodone) __P((struct buf *));
- struct vnode *b_vp; /* Device vnode. */
- int b_dirtyoff; /* Offset in buffer of dirty region. */
- int b_dirtyend; /* Offset of end of dirty region. */
- int b_validoff; /* Offset in buffer of valid region. */
- int b_validend; /* Offset of end of valid region. */
- struct ucred *b_rcred; /* Read credentials reference. */
- struct ucred *b_wcred; /* Write credentials reference. */
- int b_timestamp; /* timestamp for queuing operation */
- long b_vectorcount; /* number of vectors in b_vectorlist */
- void *b_vectorlist; /* vector list for I/O */
- void *b_pagelist; /* to save pagelist info */
- long b_vects[2]; /* vectorlist when b_vectorcount is 1 */
- long b_whichq; /* the free list the buffer belongs to */
- TAILQ_ENTRY(buf) b_act; /* Device driver queue when active */
- void *b_drvdata; /* Device driver private use */
-};
-
-/*
- * For portability with historic industry practice, the cylinder number has
- * to be maintained in the `b_resid' field.
- */
-#define b_cylinder b_resid /* Cylinder number for disksort(). */
-
-/* Device driver compatibility definitions. */
-#define b_active b_bcount /* Driver queue head: drive active. */
-#define b_data b_un.b_addr /* b_un.b_addr is not changeable. */
-#define b_errcnt b_resid /* Retry count while I/O in progress. */
-#define iodone biodone /* Old name for biodone. */
-#define iowait biowait /* Old name for biowait. */
-
-/* cluster_io definitions for use with io bufs */
-#define b_uploffset b_bufsize
-#define b_trans_head b_freelist.tqe_prev
-#define b_trans_next b_freelist.tqe_next
-#define b_real_bp b_saveaddr
-
-/*
- * These flags are kept in b_flags.
- */
-#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
-#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
-#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
-#define B_BAD 0x00000008 /* Bad block revectoring in progress. */
-#define B_BUSY 0x00000010 /* I/O in progress. */
-#define B_CACHE 0x00000020 /* Bread found us in the cache. */
-#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
-#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
-#define B_DIRTY 0x00000100 /* Dirty page to be pushed out async. */
-#define B_DONE 0x00000200 /* I/O completed. */
-#define B_EINTR 0x00000400 /* I/O was interrupted */
-#define B_ERROR 0x00000800 /* I/O error occurred. */
-#define B_WASDIRTY 0x00001000 /* page was found dirty in the VM cache */
-#define B_INVAL 0x00002000 /* Does not contain valid info. */
-#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
-#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
-#define B_PAGEOUT 0x00010000 /* Page out indicator... */
-#define B_PGIN 0x00020000 /* Pagein op, so swap() can count it. */
-#define B_PHYS 0x00040000 /* I/O to user memory. */
-#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
-#define B_READ 0x00100000 /* Read buffer. */
-#define B_TAPE 0x00200000 /* Magnetic tape I/O. */
-#define B_PAGELIST 0x00400000 /* Buffer describes pagelist I/O. */
-#define B_WANTED 0x00800000 /* Process wants this buffer. */
-#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
-#define B_WRITEINPROG 0x01000000 /* Write in progress. */
-#define B_UNUSED0 0x02000000 /* Unused bit */
-#define B_UNUSED1 0x04000000 /* Unused bit */
-#define B_NEED_IODONE 0x08000000
- /* need to do a biodone on the */
- /* real_bp associated with a cluster_io */
-#define B_COMMIT_UPL 0x10000000
- /* commit pages in upl when */
- /* I/O completes/fails */
-#define B_ZALLOC 0x20000000 /* b_data is zalloc()ed */
-#define B_META 0x40000000 /* buffer contains meta-data. */
-#define B_VECTORLIST 0x80000000 /* Used by device drivers. */
-
-
-/*
- * Zero out the buffer's data area.
- */
-#define clrbuf(bp) { \
- bzero((bp)->b_data, (u_int)(bp)->b_bcount); \
- (bp)->b_resid = 0; \
-}
-
-/* Flags to low-level allocation routines. */
-#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
-#define B_SYNC 0x02 /* Do all allocations synchronously. */
-#define B_NOBUFF 0x04 /* Do not allocate struct buf */
-
-/* Flags for operation type in getblk() */
-#define BLK_READ 0x01 /* buffer for read */
-#define BLK_WRITE 0x02 /* buffer for write */
-#define BLK_PAGEIN 0x04 /* buffer for pagein */
-#define BLK_PAGEOUT 0x08 /* buffer for pageout */
-#define BLK_META 0x10 /* buffer for metadata */
-#define BLK_CLREAD 0x20 /* buffer for cluster read */
-#define BLK_CLWRITE 0x40 /* buffer for cluster write */
+ * mark the buffer associated with buf_t
+ * as AGED with respect to the LRU cache
+ */
+void buf_markaged(buf_t);
-#ifdef KERNEL
-extern int nbuf; /* The number of buffer headers */
-extern struct buf *buf; /* The buffer headers. */
+/*
+ * mark the buffer associated with buf_t
+ * as invalid... on release, it will go
+ * directly to the free list
+ */
+void buf_markinvalid(buf_t);
-/* Macros to clear/set/test flags. */
-#define SET(t, f) (t) |= (f)
-#define CLR(t, f) (t) &= ~(f)
-#define ISSET(t, f) ((t) & (f))
+/*
+ * mark the buffer assoicated with buf_t
+ * as a delayed write...
+ */
+void buf_markdelayed(buf_t);
/*
- * Definitions for the buffer free lists.
+ * mark the buffer associated with buf_t
+ * as having been interrupted... EINTR
*/
-#define BQUEUES 5 /* number of free buffer queues */
+void buf_markeintr(buf_t);
-#define BQ_LOCKED 0 /* super-blocks &c */
-#define BQ_LRU 1 /* lru, useful buffers */
-#define BQ_AGE 2 /* rubbish */
-#define BQ_EMPTY 3 /* buffer headers with no memory */
-#define BQ_META 4 /* buffer containing metadata */
+/*
+ * mark the buffer associated with buf_t
+ * for write through disk cache if device supports
+ */
+void buf_markfua(buf_t);
-__BEGIN_DECLS
-int allocbuf __P((struct buf *, int));
-void bawrite __P((struct buf *));
-void bdwrite __P((struct buf *));
-void biodone __P((struct buf *));
-int biowait __P((struct buf *));
-int bread __P((struct vnode *, daddr_t, int,
- struct ucred *, struct buf **));
-int meta_bread __P((struct vnode *, daddr_t, int,
- struct ucred *, struct buf **));
-int breada __P((struct vnode *, daddr_t, int, daddr_t, int,
- struct ucred *, struct buf **));
-int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
- struct ucred *, struct buf **));
-void brelse __P((struct buf *));
-void bremfree __P((struct buf *));
-void bufinit __P((void));
-int bwrite __P((struct buf *));
-struct buf *getblk __P((struct vnode *, daddr_t, int, int, int, int));
-struct buf *geteblk __P((int));
-struct buf *incore __P((struct vnode *, daddr_t));
-u_int minphys __P((struct buf *bp));
-int physio __P((void (*)(struct buf *), struct buf *, dev_t, int , u_int (*)(struct buf *), struct uio *, int ));
-int count_busy_buffers __P((void));
-struct buf *alloc_io_buf __P((struct vnode *, int));
-void free_io_buf __P((struct buf *));
-__END_DECLS
+/*
+ * returns 1 if the buffer associated with buf_t
+ * is set for write through disk cache... 0 if it does not
+ */
+int buf_fua(buf_t);
+
+/*
+ * returns 1 if the buffer associated with buf_t
+ * contains valid data... 0 if it does not
+ */
+int buf_valid(buf_t);
+
+/*
+ * returns 1 if the buffer was already valid
+ * in the cache... i.e. no I/O was performed
+ * returns 0 otherwise
+ */
+int buf_fromcache(buf_t);
+
+/*
+ * returns the UPL associated with buf_t
+ */
+void * buf_upl(buf_t);
+
+/*
+ * returns the offset into the UPL
+ * associated with buf_t which is to be
+ * used as the base offset for this I/O
+ */
+uint32_t buf_uploffset(buf_t);
+
+/*
+ * returns read credential associated with buf_t
+ * a reference is taken which must be explicilty dropped
+ */
+ucred_t buf_rcred(buf_t);
+
+/*
+ * returns write credential associated with buf_t
+ * a reference is taken which must be explicilty dropped
+ */
+ucred_t buf_wcred(buf_t);
+
+/*
+ * returns process handle associated with buf_t
+ * i.e identity of task that issued the I/O
+ */
+proc_t buf_proc(buf_t);
+
+uint32_t buf_dirtyoff(buf_t);
+uint32_t buf_dirtyend(buf_t);
+void buf_setdirtyoff(buf_t, uint32_t);
+void buf_setdirtyend(buf_t, uint32_t);
+
+/*
+ * return the errno value associated with buf_t
+ */
+errno_t buf_error(buf_t);
+
+/*
+ * set errno on buf_t
+ */
+void buf_seterror(buf_t, errno_t);
+
+/*
+ * set specified flags on buf_t
+ * B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA
+ */
+void buf_setflags(buf_t, int32_t);
+
+/*
+ * clear specified flags on buf_t
+ * B_LOCKED/B_NOCACHE/B_ASYNC/B_READ/B_WRITE/B_PAGEIO/B_FUA
+ */
+void buf_clearflags(buf_t, int32_t);
+
+/*
+ * return external flags associated with buf_t
+ * B_CLUSTER/B_PHYS/B_LOCKED/B_DELWRI/B_ASYNC/B_READ/B_WRITE/B_META/B_PAGEIO/B_FUA
+ */
+int32_t buf_flags(buf_t);
+
+/*
+ * clears I/O related flags (both internal and
+ * external) associated with buf_t and allows
+ * the following to be set...
+ * B_READ/B_WRITE/B_ASYNC/B_NOCACHE
+ */
+void buf_reset(buf_t, int32_t);
+
+/*
+ * insure that the data storage associated with buf_t
+ * is addressable
+ */
+errno_t buf_map(buf_t, caddr_t *);
+
+/*
+ * release our need to have the storage associated
+ * with buf_t in an addressable state
+ */
+errno_t buf_unmap(buf_t);
+
+/*
+ * set driver specific data for buf_t
+ */
+void buf_setdrvdata(buf_t, void *);
+
+/*
+ * retrieve driver specific data associated with buf_t
+ */
+void * buf_drvdata(buf_t);
+
+/*
+ * set fs specific data for buf_t
+ */
+void buf_setfsprivate(buf_t, void *);
+
+/*
+ * retrieve driver specific data associated with buf_t
+ */
+void * buf_fsprivate(buf_t);
+
+/*
+ * retrieve the phsyical block number associated with buf_t
+ */
+daddr64_t buf_blkno(buf_t);
+
+/*
+ * retrieve the logical block number associated with buf_t
+ * i.e. the block number derived from the file offset
+ */
+daddr64_t buf_lblkno(buf_t);
+
+/*
+ * set the phsyical block number associated with buf_t
+ */
+void buf_setblkno(buf_t, daddr64_t);
/*
- * Stats on usefulness of the buffer cache
+ * set the logical block number associated with buf_t
+ * i.e. the block number derived from the file offset
*/
-struct bufstats {
- long bufs_incore; /* found incore */
- long bufs_busyincore; /* found incore. was busy */
- long bufs_vmhits; /* not incore. found in VM */
- long bufs_miss; /* not incore. not in VM */
- long bufs_sleeps; /* buffer starvation */
- long bufs_eblk; /* Calls to geteblk */
- long bufs_iobufmax; /* Max. number of IO buffers used */
- long bufs_iobufinuse; /* number of IO buffers in use */
- long bufs_iobufsleeps; /* IO buffer starvation */
-};
+void buf_setlblkno(buf_t, daddr64_t);
+
+/*
+ * retrieve the count of valid bytes associated with buf_t
+ */
+uint32_t buf_count(buf_t);
+
+/*
+ * retrieve the size of the data store assoicated with buf_t
+ */
+uint32_t buf_size(buf_t);
+
+/*
+ * retrieve the residual I/O count assoicated with buf_t
+ * i.e. number of bytes that have not yet been completed
+ */
+uint32_t buf_resid(buf_t);
+
+/*
+ * set the count of bytes associated with buf_t
+ * typically used to set the size of the I/O to be performed
+ */
+void buf_setcount(buf_t, uint32_t);
+
+/*
+ * set the size of the buffer store associated with buf_t
+ * typically used when providing private storage to buf_t
+ */
+void buf_setsize(buf_t, uint32_t);
+
+/*
+ * set the size in bytes of the unfinished I/O associated with buf_t
+ */
+void buf_setresid(buf_t, uint32_t);
+
+/*
+ * associate kernel addressable storage with buf_t
+ */
+void buf_setdataptr(buf_t, uintptr_t);
+
+/*
+ * retrieve pointer to buffer associated with buf_t
+ * if non-null, than guaranteed to be kernel addressable
+ * size of buffer can be retrieved via buf_size
+ * size of valid data can be retrieved via buf_count
+ * if NULL, than use buf_map/buf_unmap to manage access to the underlying storage
+ */
+uintptr_t buf_dataptr(buf_t);
+
+/*
+ * return the vnode_t associated with buf_t
+ */
+vnode_t buf_vnode(buf_t);
+
+/*
+ * assign vnode_t to buf_t... the
+ * device currently associated with
+ * but_t is not changed.
+ */
+void buf_setvnode(buf_t, vnode_t);
+
+/*
+ * return the dev_t associated with buf_t
+ */
+dev_t buf_device(buf_t);
+
+/*
+ * assign the dev_t associated with vnode_t
+ * to buf_t
+ */
+errno_t buf_setdevice(buf_t, vnode_t);
+
+errno_t buf_strategy(vnode_t, void *);
+
+/*
+ * flags for buf_invalblkno
+ */
+#define BUF_WAIT 0x01
+
+errno_t buf_invalblkno(vnode_t, daddr64_t, int);
+
+
+/*
+ * return the callback function pointer
+ * if the callback is still valid
+ * returns NULL if a buffer that was not
+ * allocated via buf_alloc is specified
+ * or if a callback has not been set or
+ * it has already fired...
+ */
+void * buf_callback(buf_t);
+
+/*
+ * assign a one-shot callback function (driven from biodone)
+ * to a buf_t allocated via buf_alloc... a caller specified
+ * arg is passed to the callback function
+ */
+errno_t buf_setcallback(buf_t, void (*)(buf_t, void *), void *);
+
+/*
+ * add a upl_t to a buffer allocated via buf_alloc
+ * and set the offset into the upl_t (must be page
+ * aligned).
+ */
+errno_t buf_setupl(buf_t, upl_t, uint32_t);
+
+/*
+ * allocate a buf_t that is a clone of the buf_t
+ * passed in, but whose I/O range is a subset...
+ * if a callback routine is specified, it will
+ * be called from buf_biodone with the bp and
+ * arg specified.
+ * it must be freed via buf_free
+ */
+buf_t buf_clone(buf_t, int, int, void (*)(buf_t, void *), void *);
+
+/*
+ * allocate a buf_t associated with vnode_t
+ * that has NO storage associated with it
+ * but is suitable for use in issuing I/Os
+ * after storage has been assigned via buf_setdataptr
+ * or buf_addupl
+ */
+buf_t buf_alloc(vnode_t);
+
+/*
+ * free a buf_t that was allocated via buf_alloc
+ * any private storage associated with buf_t is the
+ * responsiblity of the caller to release
+ */
+void buf_free(buf_t);
+
+/*
+ * flags for buf_invalidateblks
+ */
+#define BUF_WRITE_DATA 0x0001 /* write data blocks first */
+#define BUF_SKIP_META 0x0002 /* skip over metadata blocks */
+
+int buf_invalidateblks(vnode_t, int, int, int);
+/*
+ * flags for buf_flushdirtyblks and buf_iterate
+ */
+#define BUF_SKIP_NONLOCKED 0x01
+#define BUF_SKIP_LOCKED 0x02
+#define BUF_SCAN_CLEAN 0x04 /* scan only the clean buffers */
+#define BUF_SCAN_DIRTY 0x08 /* scan only the dirty buffers */
+#define BUF_NOTIFY_BUSY 0x10 /* notify the caller about the busy pages during the scan */
+
+void buf_flushdirtyblks(vnode_t, int, int, const char *);
+void buf_iterate(vnode_t, int (*)(buf_t, void *), int, void *);
+
+#define BUF_RETURNED 0
+#define BUF_RETURNED_DONE 1
+#define BUF_CLAIMED 2
+#define BUF_CLAIMED_DONE 3
+
+/*
+ * zero the storage associated with buf_t
+ */
+void buf_clear(buf_t);
+
+errno_t buf_bawrite(buf_t);
+errno_t buf_bdwrite(buf_t);
+errno_t buf_bwrite(buf_t);
+
+void buf_biodone(buf_t);
+errno_t buf_biowait(buf_t);
+void buf_brelse(buf_t);
+
+errno_t buf_bread(vnode_t, daddr64_t, int, ucred_t, buf_t *);
+errno_t buf_breadn(vnode_t, daddr64_t, int, daddr64_t *, int *, int, ucred_t, buf_t *);
+errno_t buf_meta_bread(vnode_t, daddr64_t, int, ucred_t, buf_t *);
+errno_t buf_meta_breadn(vnode_t, daddr64_t, int, daddr64_t *, int *, int, ucred_t, buf_t *);
+
+u_int minphys(buf_t bp);
+int physio(void (*)(buf_t), buf_t, dev_t, int , u_int (*)(buf_t), struct uio *, int );
+
+
+/*
+ * Flags for operation type in getblk()
+ */
+#define BLK_READ 0x01 /* buffer for read */
+#define BLK_WRITE 0x02 /* buffer for write */
+#define BLK_META 0x10 /* buffer for metadata */
+/*
+ * modifier for above flags... if set, getblk will only return
+ * a bp that is already valid... i.e. found in the cache
+ */
+#define BLK_ONLYVALID 0x80000000
+
+/* timeout is in msecs */
+buf_t buf_getblk(vnode_t, daddr64_t, int, int, int, int);
+buf_t buf_geteblk(int);
+
+__END_DECLS
+
+
+/* Macros to clear/set/test flags. */
+#define SET(t, f) (t) |= (f)
+#define CLR(t, f) (t) &= ~(f)
+#define ISSET(t, f) ((t) & (f))
+
-#endif /* KERNEL */
#endif /* !_SYS_BUF_H_ */