]> git.saurik.com Git - apple/xnu.git/blame - bsd/ufs/ffs/ffs_alloc.c
xnu-344.23.tar.gz
[apple/xnu.git] / bsd / ufs / ffs / ffs_alloc.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
de355530
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
de355530
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
de355530
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23/*
24 * Copyright (c) 1982, 1986, 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95
56 */
57#include <rev_endian_fs.h>
58#include <vm/vm_pager.h>
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/buf.h>
63#include <sys/proc.h>
64#include <sys/vnode.h>
65#include <sys/mount.h>
66#include <sys/kernel.h>
67#include <sys/syslog.h>
9bccf70c 68#include <sys/quota.h>
1c79356b
A
69
70#include <sys/vm.h>
71
72#include <ufs/ufs/quota.h>
73#include <ufs/ufs/inode.h>
74
75#include <ufs/ffs/fs.h>
76#include <ufs/ffs/ffs_extern.h>
77
78#if REV_ENDIAN_FS
79#include <ufs/ufs/ufs_byte_order.h>
80#include <architecture/byte_order.h>
81#endif /* REV_ENDIAN_FS */
82
83extern u_long nextgennumber;
84
85static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
86static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t));
87static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
88 int));
89static ino_t ffs_dirpref __P((struct fs *));
90static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
91static void ffs_fserr __P((struct fs *, u_int, char *));
92static u_long ffs_hashalloc
93 __P((struct inode *, int, long, int, u_int32_t (*)()));
94static ino_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
95static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t,
96 int));
97
98/*
99 * Allocate a block in the file system.
100 *
101 * The size of the requested block is given, which must be some
102 * multiple of fs_fsize and <= fs_bsize.
103 * A preference may be optionally specified. If a preference is given
104 * the following hierarchy is used to allocate a block:
105 * 1) allocate the requested block.
106 * 2) allocate a rotationally optimal block in the same cylinder.
107 * 3) allocate a block in the same cylinder group.
108 * 4) quadradically rehash into other cylinder groups, until an
109 * available block is located.
110 * If no block preference is given the following heirarchy is used
111 * to allocate a block:
112 * 1) allocate a block in the cylinder group that contains the
113 * inode for the file.
114 * 2) quadradically rehash into other cylinder groups, until an
115 * available block is located.
116 */
117ffs_alloc(ip, lbn, bpref, size, cred, bnp)
118 register struct inode *ip;
119 ufs_daddr_t lbn, bpref;
120 int size;
121 struct ucred *cred;
122 ufs_daddr_t *bnp;
123{
124 register struct fs *fs;
125 ufs_daddr_t bno;
126 int cg, error;
127 int devBlockSize=0;
128 *bnp = 0;
129 fs = ip->i_fs;
130#if DIAGNOSTIC
131 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
132 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
133 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
134 panic("ffs_alloc: bad size");
135 }
136 if (cred == NOCRED)
137 panic("ffs_alloc: missing credential\n");
138#endif /* DIAGNOSTIC */
139 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
140 goto nospace;
141 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
142 goto nospace;
143 VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize);
144#if QUOTA
9bccf70c 145 if (error = chkdq(ip, (int64_t)size, cred, 0))
1c79356b
A
146 return (error);
147#endif /* QUOTA */
148 if (bpref >= fs->fs_size)
149 bpref = 0;
150 if (bpref == 0)
151 cg = ino_to_cg(fs, ip->i_number);
152 else
153 cg = dtog(fs, bpref);
154 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
155 (u_int32_t (*)())ffs_alloccg);
156 if (bno > 0) {
157 ip->i_blocks += btodb(size, devBlockSize);
158 ip->i_flag |= IN_CHANGE | IN_UPDATE;
159 *bnp = bno;
160 return (0);
161 }
162#if QUOTA
163 /*
164 * Restore user's disk quota because allocation failed.
165 */
9bccf70c 166 (void) chkdq(ip, (int64_t)-size, cred, FORCE);
1c79356b
A
167#endif /* QUOTA */
168nospace:
169 ffs_fserr(fs, cred->cr_uid, "file system full");
170 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
171 return (ENOSPC);
172}
173
174/*
175 * Reallocate a fragment to a bigger size
176 *
177 * The number and size of the old block is given, and a preference
178 * and new size is also specified. The allocator attempts to extend
179 * the original block. Failing that, the regular block allocator is
180 * invoked to get an appropriate block.
181 */
182ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
183 register struct inode *ip;
184 ufs_daddr_t lbprev;
185 ufs_daddr_t bpref;
186 int osize, nsize;
187 struct ucred *cred;
188 struct buf **bpp;
189{
190 register struct fs *fs;
191 struct buf *bp;
192 int cg, request, error;
193 ufs_daddr_t bprev, bno;
194 int devBlockSize=0;
195
196 *bpp = 0;
197 fs = ip->i_fs;
198#if DIAGNOSTIC
199 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
200 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
201 printf(
202 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
203 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
204 panic("ffs_realloccg: bad size");
205 }
206 if (cred == NOCRED)
207 panic("ffs_realloccg: missing credential\n");
208#endif /* DIAGNOSTIC */
209 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
210 goto nospace;
211 if ((bprev = ip->i_db[lbprev]) == 0) {
212 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
213 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
214 panic("ffs_realloccg: bad bprev");
215 }
216 /*
217 * Allocate the extra space in the buffer.
218 */
219 if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
220 brelse(bp);
221 return (error);
222 }
223 VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize);
224
225#if QUOTA
9bccf70c 226 if (error = chkdq(ip, (int64_t)(nsize - osize), cred, 0))
1c79356b
A
227 {
228 brelse(bp);
229 return (error);
230 }
231#endif /* QUOTA */
232 /*
233 * Check for extension in the existing location.
234 */
235 cg = dtog(fs, bprev);
236 if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
237 if (bp->b_blkno != fsbtodb(fs, bno))
238 panic("bad blockno");
239 ip->i_blocks += btodb(nsize - osize, devBlockSize);
240 ip->i_flag |= IN_CHANGE | IN_UPDATE;
241 allocbuf(bp, nsize);
242 bp->b_flags |= B_DONE;
243 bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
244 *bpp = bp;
245 return (0);
246 }
247 /*
248 * Allocate a new disk location.
249 */
250 if (bpref >= fs->fs_size)
251 bpref = 0;
252 switch ((int)fs->fs_optim) {
253 case FS_OPTSPACE:
254 /*
255 * Allocate an exact sized fragment. Although this makes
256 * best use of space, we will waste time relocating it if
257 * the file continues to grow. If the fragmentation is
258 * less than half of the minimum free reserve, we choose
259 * to begin optimizing for time.
260 */
261 request = nsize;
262 if (fs->fs_minfree < 5 ||
263 fs->fs_cstotal.cs_nffree >
264 fs->fs_dsize * fs->fs_minfree / (2 * 100))
265 break;
266 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
267 fs->fs_fsmnt);
268 fs->fs_optim = FS_OPTTIME;
269 break;
270 case FS_OPTTIME:
271 /*
272 * At this point we have discovered a file that is trying to
273 * grow a small fragment to a larger fragment. To save time,
274 * we allocate a full sized block, then free the unused portion.
275 * If the file continues to grow, the `ffs_fragextend' call
276 * above will be able to grow it in place without further
277 * copying. If aberrant programs cause disk fragmentation to
278 * grow within 2% of the free reserve, we choose to begin
279 * optimizing for space.
280 */
281 request = fs->fs_bsize;
282 if (fs->fs_cstotal.cs_nffree <
283 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
284 break;
285 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
286 fs->fs_fsmnt);
287 fs->fs_optim = FS_OPTSPACE;
288 break;
289 default:
290 printf("dev = 0x%x, optim = %d, fs = %s\n",
291 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
292 panic("ffs_realloccg: bad optim");
293 /* NOTREACHED */
294 }
295 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
296 (u_int32_t (*)())ffs_alloccg);
297 if (bno > 0) {
298 bp->b_blkno = fsbtodb(fs, bno);
299 ffs_blkfree(ip, bprev, (long)osize);
300 if (nsize < request)
301 ffs_blkfree(ip, bno + numfrags(fs, nsize),
302 (long)(request - nsize));
303 ip->i_blocks += btodb(nsize - osize, devBlockSize);
304 ip->i_flag |= IN_CHANGE | IN_UPDATE;
305 allocbuf(bp, nsize);
306 bp->b_flags |= B_DONE;
307 bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
308 *bpp = bp;
309 return (0);
310 }
311#if QUOTA
312 /*
313 * Restore user's disk quota because allocation failed.
314 */
9bccf70c 315 (void) chkdq(ip, (int64_t)-(nsize - osize), cred, FORCE);
1c79356b
A
316#endif /* QUOTA */
317 brelse(bp);
318nospace:
319 /*
320 * no space available
321 */
322 ffs_fserr(fs, cred->cr_uid, "file system full");
323 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
324 return (ENOSPC);
325}
326
327/*
328 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
329 *
330 * The vnode and an array of buffer pointers for a range of sequential
331 * logical blocks to be made contiguous is given. The allocator attempts
332 * to find a range of sequential blocks starting as close as possible to
333 * an fs_rotdelay offset from the end of the allocation for the logical
334 * block immediately preceeding the current range. If successful, the
335 * physical block numbers in the buffer pointers and in the inode are
336 * changed to reflect the new allocation. If unsuccessful, the allocation
337 * is left unchanged. The success in doing the reallocation is returned.
338 * Note that the error return is not reflected back to the user. Rather
339 * the previous block allocation will be used.
340 */
341/* Note: This routine is unused in UBC cluster I/O */
342
343int doasyncfree = 1;
344int doreallocblks = 1;
345
346int
347ffs_reallocblks(ap)
348 struct vop_reallocblks_args *ap;
349{
350 return (ENOSPC);
351}
352
353/*
354 * Allocate an inode in the file system.
355 *
356 * If allocating a directory, use ffs_dirpref to select the inode.
357 * If allocating in a directory, the following hierarchy is followed:
358 * 1) allocate the preferred inode.
359 * 2) allocate an inode in the same cylinder group.
360 * 3) quadradically rehash into other cylinder groups, until an
361 * available inode is located.
362 * If no inode preference is given the following heirarchy is used
363 * to allocate an inode:
364 * 1) allocate an inode in cylinder group 0.
365 * 2) quadradically rehash into other cylinder groups, until an
366 * available inode is located.
367 */
368int
369ffs_valloc(ap)
370 struct vop_valloc_args /* {
371 struct vnode *a_pvp;
372 int a_mode;
373 struct ucred *a_cred;
374 struct vnode **a_vpp;
375 } */ *ap;
376{
377 register struct vnode *pvp = ap->a_pvp;
378 register struct inode *pip;
379 register struct fs *fs;
380 register struct inode *ip;
381 mode_t mode = ap->a_mode;
382 ino_t ino, ipref;
383 int cg, error;
384
385 *ap->a_vpp = NULL;
386 pip = VTOI(pvp);
387 fs = pip->i_fs;
388 if (fs->fs_cstotal.cs_nifree == 0)
389 goto noinodes;
390
391 if ((mode & IFMT) == IFDIR)
392 ipref = ffs_dirpref(fs);
393 else
394 ipref = pip->i_number;
395 if (ipref >= fs->fs_ncg * fs->fs_ipg)
396 ipref = 0;
397 cg = ino_to_cg(fs, ipref);
398 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
399 if (ino == 0)
400 goto noinodes;
401 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
402 if (error) {
403 VOP_VFREE(pvp, ino, mode);
404 return (error);
405 }
406 ip = VTOI(*ap->a_vpp);
407 if (ip->i_mode) {
408 printf("mode = 0%o, inum = %d, fs = %s\n",
409 ip->i_mode, ip->i_number, fs->fs_fsmnt);
410 panic("ffs_valloc: dup alloc");
411 }
412 if (ip->i_blocks) { /* XXX */
413 printf("free inode %s/%d had %d blocks\n",
414 fs->fs_fsmnt, ino, ip->i_blocks);
415 ip->i_blocks = 0;
416 }
417 ip->i_flags = 0;
418 /*
419 * Set up a new generation number for this inode.
420 */
421 if (++nextgennumber < (u_long)time.tv_sec)
422 nextgennumber = time.tv_sec;
423 ip->i_gen = nextgennumber;
424 return (0);
425noinodes:
426 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
427 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
428 return (ENOSPC);
429}
430
431/*
432 * Find a cylinder to place a directory.
433 *
434 * The policy implemented by this algorithm is to select from
435 * among those cylinder groups with above the average number of
436 * free inodes, the one with the smallest number of directories.
437 */
438static ino_t
439ffs_dirpref(fs)
440 register struct fs *fs;
441{
442 int cg, minndir, mincg, avgifree;
443
444 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
445 minndir = fs->fs_ipg;
446 mincg = 0;
447 for (cg = 0; cg < fs->fs_ncg; cg++)
448 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
449 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
450 mincg = cg;
451 minndir = fs->fs_cs(fs, cg).cs_ndir;
452 }
453 return ((ino_t)(fs->fs_ipg * mincg));
454}
455
456/*
457 * Select the desired position for the next block in a file. The file is
458 * logically divided into sections. The first section is composed of the
459 * direct blocks. Each additional section contains fs_maxbpg blocks.
460 *
461 * If no blocks have been allocated in the first section, the policy is to
462 * request a block in the same cylinder group as the inode that describes
463 * the file. If no blocks have been allocated in any other section, the
464 * policy is to place the section in a cylinder group with a greater than
465 * average number of free blocks. An appropriate cylinder group is found
466 * by using a rotor that sweeps the cylinder groups. When a new group of
467 * blocks is needed, the sweep begins in the cylinder group following the
468 * cylinder group from which the previous allocation was made. The sweep
469 * continues until a cylinder group with greater than the average number
470 * of free blocks is found. If the allocation is for the first block in an
471 * indirect block, the information on the previous allocation is unavailable;
472 * here a best guess is made based upon the logical block number being
473 * allocated.
474 *
475 * If a section is already partially allocated, the policy is to
476 * contiguously allocate fs_maxcontig blocks. The end of one of these
477 * contiguous blocks and the beginning of the next is physically separated
478 * so that the disk head will be in transit between them for at least
479 * fs_rotdelay milliseconds. This is to allow time for the processor to
480 * schedule another I/O transfer.
481 */
482ufs_daddr_t
483ffs_blkpref(ip, lbn, indx, bap)
484 struct inode *ip;
485 ufs_daddr_t lbn;
486 int indx;
487 ufs_daddr_t *bap;
488{
489 register struct fs *fs;
490 register int cg;
491 int avgbfree, startcg;
492 ufs_daddr_t nextblk;
493#if REV_ENDIAN_FS
494 daddr_t prev=0;
495 struct vnode *vp=ITOV(ip);
496 struct mount *mp=vp->v_mount;
497 int rev_endian=(mp->mnt_flag & MNT_REVEND);
498#endif /* REV_ENDIAN_FS */
499
500 fs = ip->i_fs;
501#if REV_ENDIAN_FS
502 if (indx && bap) {
503 if (rev_endian) {
504 if (bap != &ip->i_db[0])
505 prev = NXSwapLong(bap[indx - 1]);
506 else
507 prev = bap[indx - 1];
508 } else prev = bap[indx - 1];
509 }
510 if (indx % fs->fs_maxbpg == 0 || prev == 0)
511#else /* REV_ENDIAN_FS */
512 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0)
513#endif /* REV_ENDIAN_FS */
514 {
515 if (lbn < NDADDR) {
516 cg = ino_to_cg(fs, ip->i_number);
517 return (fs->fs_fpg * cg + fs->fs_frag);
518 }
519 /*
520 * Find a cylinder with greater than average number of
521 * unused data blocks.
522 */
523#if REV_ENDIAN_FS
524 if (indx == 0 || prev == 0)
525#else /* REV_ENDIAN_FS */
526 if (indx == 0 || bap[indx - 1] == 0)
527#endif /* REV_ENDIAN_FS */
528 startcg =
529 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
530 else
531#if REV_ENDIAN_FS
532 startcg = dtog(fs, prev) + 1;
533#else /* REV_ENDIAN_FS */
534 startcg = dtog(fs, bap[indx - 1]) + 1;
535#endif /* REV_ENDIAN_FS */
536 startcg %= fs->fs_ncg;
537 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
538 for (cg = startcg; cg < fs->fs_ncg; cg++)
539 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
540 fs->fs_cgrotor = cg;
541 return (fs->fs_fpg * cg + fs->fs_frag);
542 }
543 for (cg = 0; cg <= startcg; cg++)
544 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
545 fs->fs_cgrotor = cg;
546 return (fs->fs_fpg * cg + fs->fs_frag);
547 }
548 return (NULL);
549 }
550 /*
551 * One or more previous blocks have been laid out. If less
552 * than fs_maxcontig previous blocks are contiguous, the
553 * next block is requested contiguously, otherwise it is
554 * requested rotationally delayed by fs_rotdelay milliseconds.
555 */
556#if REV_ENDIAN_FS
557 if (rev_endian) {
558 nextblk = prev + fs->fs_frag;
559 if (indx < fs->fs_maxcontig) {
560 return (nextblk);
561 }
562 if (bap != &ip->i_db[0])
563 prev = NXSwapLong(bap[indx - fs->fs_maxcontig]);
564 else
565 prev = bap[indx - fs->fs_maxcontig];
566 if (prev + blkstofrags(fs, fs->fs_maxcontig) != nextblk)
567 return (nextblk);
568 } else {
569#endif /* REV_ENDIAN_FS */
570 nextblk = bap[indx - 1] + fs->fs_frag;
571 if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
572 blkstofrags(fs, fs->fs_maxcontig) != nextblk)
573 return (nextblk);
574#if REV_ENDIAN_FS
575 }
576#endif /* REV_ENDIAN_FS */
577 if (fs->fs_rotdelay != 0)
578 /*
579 * Here we convert ms of delay to frags as:
580 * (frags) = (ms) * (rev/sec) * (sect/rev) /
581 * ((sect/frag) * (ms/sec))
582 * then round up to the next block.
583 */
584 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
585 (NSPF(fs) * 1000), fs->fs_frag);
586 return (nextblk);
587}
588
589/*
590 * Implement the cylinder overflow algorithm.
591 *
592 * The policy implemented by this algorithm is:
593 * 1) allocate the block in its requested cylinder group.
594 * 2) quadradically rehash on the cylinder group number.
595 * 3) brute force search for a free block.
596 */
597/*VARARGS5*/
598static u_long
599ffs_hashalloc(ip, cg, pref, size, allocator)
600 struct inode *ip;
601 int cg;
602 long pref;
603 int size; /* size for data blocks, mode for inodes */
604 u_int32_t (*allocator)();
605{
606 register struct fs *fs;
607 long result;
608 int i, icg = cg;
609
610 fs = ip->i_fs;
611 /*
612 * 1: preferred cylinder group
613 */
614 result = (*allocator)(ip, cg, pref, size);
615 if (result)
616 return (result);
617 /*
618 * 2: quadratic rehash
619 */
620 for (i = 1; i < fs->fs_ncg; i *= 2) {
621 cg += i;
622 if (cg >= fs->fs_ncg)
623 cg -= fs->fs_ncg;
624 result = (*allocator)(ip, cg, 0, size);
625 if (result)
626 return (result);
627 }
628 /*
629 * 3: brute force search
630 * Note that we start at i == 2, since 0 was checked initially,
631 * and 1 is always checked in the quadratic rehash.
632 */
633 cg = (icg + 2) % fs->fs_ncg;
634 for (i = 2; i < fs->fs_ncg; i++) {
635 result = (*allocator)(ip, cg, 0, size);
636 if (result)
637 return (result);
638 cg++;
639 if (cg == fs->fs_ncg)
640 cg = 0;
641 }
642 return (NULL);
643}
644
645/*
646 * Determine whether a fragment can be extended.
647 *
648 * Check to see if the necessary fragments are available, and
649 * if they are, allocate them.
650 */
651static ufs_daddr_t
652ffs_fragextend(ip, cg, bprev, osize, nsize)
653 struct inode *ip;
654 int cg;
655 long bprev;
656 int osize, nsize;
657{
658 register struct fs *fs;
659 register struct cg *cgp;
660 struct buf *bp;
661 long bno;
662 int frags, bbase;
663 int i, error;
664#if REV_ENDIAN_FS
665 struct vnode *vp=ITOV(ip);
666 struct mount *mp=vp->v_mount;
667 int rev_endian=(mp->mnt_flag & MNT_REVEND);
668#endif /* REV_ENDIAN_FS */
669
670 fs = ip->i_fs;
671 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
672 return (NULL);
673 frags = numfrags(fs, nsize); /* number of fragments needed */
674 bbase = fragnum(fs, bprev); /* offset in a frag (it is mod fragsize */
675 if (bbase > fragnum(fs, (bprev + frags - 1))) {
676 /* cannot extend across a block boundary */
677 return (NULL);
678 }
679 /* read corresponding cylinder group info */
680 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
681 (int)fs->fs_cgsize, NOCRED, &bp);
682 if (error) {
683 brelse(bp);
684 return (NULL);
685 }
686 cgp = (struct cg *)bp->b_data;
687#if REV_ENDIAN_FS
688 if (rev_endian) {
689 byte_swap_cgin(cgp, fs);
690 }
691#endif /* REV_ENDIAN_FS */
692
693 if (!cg_chkmagic(cgp)) {
694#if REV_ENDIAN_FS
695 if (rev_endian)
696 byte_swap_cgout(cgp,fs);
697#endif /* REV_ENDIAN_FS */
698 brelse(bp);
699 return (NULL);
700 }
701 cgp->cg_time = time.tv_sec;
702 bno = dtogd(fs, bprev);
703 for (i = numfrags(fs, osize); i < frags; i++)
704 if (isclr(cg_blksfree(cgp), bno + i)) {
705#if REV_ENDIAN_FS
706 if (rev_endian)
707 byte_swap_cgout(cgp,fs);
708#endif /* REV_ENDIAN_FS */
709 brelse(bp);
710 return (NULL);
711 }
712 /*
713 * the current fragment can be extended
714 * deduct the count on fragment being extended into
715 * increase the count on the remaining fragment (if any)
716 * allocate the extended piece
717 */
718 for (i = frags; i < fs->fs_frag - bbase; i++)
719 if (isclr(cg_blksfree(cgp), bno + i))
720 break;
721 cgp->cg_frsum[i - numfrags(fs, osize)]--;
722 if (i != frags)
723 cgp->cg_frsum[i - frags]++;
724 for (i = numfrags(fs, osize); i < frags; i++) {
725 clrbit(cg_blksfree(cgp), bno + i);
726 cgp->cg_cs.cs_nffree--;
727 fs->fs_cstotal.cs_nffree--;
728 fs->fs_cs(fs, cg).cs_nffree--;
729 }
730 fs->fs_fmod = 1;
731#if REV_ENDIAN_FS
732 if (rev_endian)
733 byte_swap_cgout(cgp,fs);
734#endif /* REV_ENDIAN_FS */
735 bdwrite(bp);
736 return (bprev);
737}
738
739/*
740 * Determine whether a block can be allocated.
741 *
742 * Check to see if a block of the appropriate size is available,
743 * and if it is, allocate it.
744 */
745static ufs_daddr_t
746ffs_alloccg(ip, cg, bpref, size)
747 struct inode *ip;
748 int cg;
749 ufs_daddr_t bpref;
750 int size;
751{
752 register struct fs *fs;
753 register struct cg *cgp;
754 struct buf *bp;
755 register int i;
756 int error, bno, frags, allocsiz;
757#if REV_ENDIAN_FS
758 struct vnode *vp=ITOV(ip);
759 struct mount *mp=vp->v_mount;
760 int rev_endian=(mp->mnt_flag & MNT_REVEND);
761#endif /* REV_ENDIAN_FS */
762
763 fs = ip->i_fs;
764 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
765 return (NULL);
766 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
767 (int)fs->fs_cgsize, NOCRED, &bp);
768 if (error) {
769 brelse(bp);
770 return (NULL);
771 }
772 cgp = (struct cg *)bp->b_data;
773#if REV_ENDIAN_FS
774 if (rev_endian)
775 byte_swap_cgin(cgp,fs);
776#endif /* REV_ENDIAN_FS */
777 if (!cg_chkmagic(cgp) ||
778 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
779#if REV_ENDIAN_FS
780 if (rev_endian)
781 byte_swap_cgout(cgp,fs);
782#endif /* REV_ENDIAN_FS */
783 brelse(bp);
784 return (NULL);
785 }
786 cgp->cg_time = time.tv_sec;
787 if (size == fs->fs_bsize) {
788 bno = ffs_alloccgblk(fs, cgp, bpref);
789#if REV_ENDIAN_FS
790 if (rev_endian)
791 byte_swap_cgout(cgp,fs);
792#endif /* REV_ENDIAN_FS */
793 bdwrite(bp);
794 return (bno);
795 }
796 /*
797 * check to see if any fragments are already available
798 * allocsiz is the size which will be allocated, hacking
799 * it down to a smaller size if necessary
800 */
801 frags = numfrags(fs, size);
802 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
803 if (cgp->cg_frsum[allocsiz] != 0)
804 break;
805 if (allocsiz == fs->fs_frag) {
806 /*
807 * no fragments were available, so a block will be
808 * allocated, and hacked up
809 */
810 if (cgp->cg_cs.cs_nbfree == 0) {
811#if REV_ENDIAN_FS
812 if (rev_endian)
813 byte_swap_cgout(cgp,fs);
814#endif /* REV_ENDIAN_FS */
815 brelse(bp);
816 return (NULL);
817 }
818 bno = ffs_alloccgblk(fs, cgp, bpref);
819 bpref = dtogd(fs, bno);
820 for (i = frags; i < fs->fs_frag; i++)
821 setbit(cg_blksfree(cgp), bpref + i);
822 i = fs->fs_frag - frags;
823 cgp->cg_cs.cs_nffree += i;
824 fs->fs_cstotal.cs_nffree += i;
825 fs->fs_cs(fs, cg).cs_nffree += i;
826 fs->fs_fmod = 1;
827 cgp->cg_frsum[i]++;
828#if REV_ENDIAN_FS
829 if (rev_endian)
830 byte_swap_cgout(cgp,fs);
831#endif /* REV_ENDIAN_FS */
832 bdwrite(bp);
833 return (bno);
834 }
835 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
836 if (bno < 0) {
837#if REV_ENDIAN_FS
838 if (rev_endian)
839 byte_swap_cgout(cgp,fs);
840#endif /* REV_ENDIAN_FS */
841 brelse(bp);
842 return (NULL);
843 }
844 for (i = 0; i < frags; i++)
845 clrbit(cg_blksfree(cgp), bno + i);
846 cgp->cg_cs.cs_nffree -= frags;
847 fs->fs_cstotal.cs_nffree -= frags;
848 fs->fs_cs(fs, cg).cs_nffree -= frags;
849 fs->fs_fmod = 1;
850 cgp->cg_frsum[allocsiz]--;
851 if (frags != allocsiz)
852 cgp->cg_frsum[allocsiz - frags]++;
853#if REV_ENDIAN_FS
854 if (rev_endian)
855 byte_swap_cgout(cgp,fs);
856#endif /* REV_ENDIAN_FS */
857 bdwrite(bp);
858 return (cg * fs->fs_fpg + bno);
859}
860
861/*
862 * Allocate a block in a cylinder group.
863 *
864 * This algorithm implements the following policy:
865 * 1) allocate the requested block.
866 * 2) allocate a rotationally optimal block in the same cylinder.
867 * 3) allocate the next available block on the block rotor for the
868 * specified cylinder group.
869 * Note that this routine only allocates fs_bsize blocks; these
870 * blocks may be fragmented by the routine that allocates them.
871 */
872static ufs_daddr_t
873ffs_alloccgblk(fs, cgp, bpref)
874 register struct fs *fs;
875 register struct cg *cgp;
876 ufs_daddr_t bpref;
877{
878 ufs_daddr_t bno, blkno;
879 int cylno, pos, delta;
880 short *cylbp;
881 register int i;
882
883 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
884 bpref = cgp->cg_rotor;
885 goto norot;
886 }
887 bpref = blknum(fs, bpref);
888 bpref = dtogd(fs, bpref);
889 /*
890 * if the requested block is available, use it
891 */
892 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
893 bno = bpref;
894 goto gotit;
895 }
896 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
897 /*
898 * Block layout information is not available.
899 * Leaving bpref unchanged means we take the
900 * next available free block following the one
901 * we just allocated. Hopefully this will at
902 * least hit a track cache on drives of unknown
903 * geometry (e.g. SCSI).
904 */
905 goto norot;
906 }
907 /*
908 * check for a block available on the same cylinder
909 */
910 cylno = cbtocylno(fs, bpref);
911 if (cg_blktot(cgp)[cylno] == 0)
912 goto norot;
913 /*
914 * check the summary information to see if a block is
915 * available in the requested cylinder starting at the
916 * requested rotational position and proceeding around.
917 */
918 cylbp = cg_blks(fs, cgp, cylno);
919 pos = cbtorpos(fs, bpref);
920 for (i = pos; i < fs->fs_nrpos; i++)
921 if (cylbp[i] > 0)
922 break;
923 if (i == fs->fs_nrpos)
924 for (i = 0; i < pos; i++)
925 if (cylbp[i] > 0)
926 break;
927 if (cylbp[i] > 0) {
928 /*
929 * found a rotational position, now find the actual
930 * block. A panic if none is actually there.
931 */
932 pos = cylno % fs->fs_cpc;
933 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
934 if (fs_postbl(fs, pos)[i] == -1) {
935 printf("pos = %d, i = %d, fs = %s\n",
936 pos, i, fs->fs_fsmnt);
937 panic("ffs_alloccgblk: cyl groups corrupted");
938 }
939 for (i = fs_postbl(fs, pos)[i];; ) {
940 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
941 bno = blkstofrags(fs, (bno + i));
942 goto gotit;
943 }
944 delta = fs_rotbl(fs)[i];
945 if (delta <= 0 ||
946 delta + i > fragstoblks(fs, fs->fs_fpg))
947 break;
948 i += delta;
949 }
950 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
951 panic("ffs_alloccgblk: can't find blk in cyl");
952 }
953norot:
954 /*
955 * no blocks in the requested cylinder, so take next
956 * available one in this cylinder group.
957 */
958 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
959 if (bno < 0)
960 return (NULL);
961 cgp->cg_rotor = bno;
962gotit:
963 blkno = fragstoblks(fs, bno);
964 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
965 ffs_clusteracct(fs, cgp, blkno, -1);
966 cgp->cg_cs.cs_nbfree--;
967 fs->fs_cstotal.cs_nbfree--;
968 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
969 cylno = cbtocylno(fs, bno);
970 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
971 cg_blktot(cgp)[cylno]--;
972 fs->fs_fmod = 1;
973 return (cgp->cg_cgx * fs->fs_fpg + bno);
974}
975
976/*
977 * Determine whether a cluster can be allocated.
978 *
979 * We do not currently check for optimal rotational layout if there
980 * are multiple choices in the same cylinder group. Instead we just
981 * take the first one that we find following bpref.
982 */
983static ufs_daddr_t
984ffs_clusteralloc(ip, cg, bpref, len)
985 struct inode *ip;
986 int cg;
987 ufs_daddr_t bpref;
988 int len;
989{
990 register struct fs *fs;
991 register struct cg *cgp;
992 struct buf *bp;
993 int i, got, run, bno, bit, map;
994 u_char *mapp;
995 int32_t *lp;
996#if REV_ENDIAN_FS
997 struct vnode *vp=ITOV(ip);
998 struct mount *mp=vp->v_mount;
999 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1000#endif /* REV_ENDIAN_FS */
1001
1002 fs = ip->i_fs;
1003 if (fs->fs_maxcluster[cg] < len)
1004 return (NULL);
1005 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1006 NOCRED, &bp))
1007 goto fail;
1008 cgp = (struct cg *)bp->b_data;
1009#if REV_ENDIAN_FS
1010 if (rev_endian)
1011 byte_swap_cgin(cgp,fs);
1012#endif /* REV_ENDIAN_FS */
1013 if (!cg_chkmagic(cgp)) {
1014#if REV_ENDIAN_FS
1015 if (rev_endian)
1016 byte_swap_cgout(cgp,fs);
1017#endif /* REV_ENDIAN_FS */
1018 goto fail;
1019 }
1020 /*
1021 * Check to see if a cluster of the needed size (or bigger) is
1022 * available in this cylinder group.
1023 */
1024 lp = &cg_clustersum(cgp)[len];
1025 for (i = len; i <= fs->fs_contigsumsize; i++)
1026 if (*lp++ > 0)
1027 break;
1028 if (i > fs->fs_contigsumsize) {
1029 /*
1030 * This is the first time looking for a cluster in this
1031 * cylinder group. Update the cluster summary information
1032 * to reflect the true maximum sized cluster so that
1033 * future cluster allocation requests can avoid reading
1034 * the cylinder group map only to find no clusters.
1035 */
1036 lp = &cg_clustersum(cgp)[len - 1];
1037 for (i = len - 1; i > 0; i--)
1038 if (*lp-- > 0)
1039 break;
1040 fs->fs_maxcluster[cg] = i;
1041#if REV_ENDIAN_FS
1042 if (rev_endian)
1043 byte_swap_cgout(cgp,fs);
1044#endif /* REV_ENDIAN_FS */
1045 goto fail;
1046 }
1047 /*
1048 * Search the cluster map to find a big enough cluster.
1049 * We take the first one that we find, even if it is larger
1050 * than we need as we prefer to get one close to the previous
1051 * block allocation. We do not search before the current
1052 * preference point as we do not want to allocate a block
1053 * that is allocated before the previous one (as we will
1054 * then have to wait for another pass of the elevator
1055 * algorithm before it will be read). We prefer to fail and
1056 * be recalled to try an allocation in the next cylinder group.
1057 */
1058 if (dtog(fs, bpref) != cg)
1059 bpref = 0;
1060 else
1061 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1062 mapp = &cg_clustersfree(cgp)[bpref / NBBY];
1063 map = *mapp++;
1064 bit = 1 << (bpref % NBBY);
1065 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
1066 if ((map & bit) == 0) {
1067 run = 0;
1068 } else {
1069 run++;
1070 if (run == len)
1071 break;
1072 }
1073 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1074 bit <<= 1;
1075 } else {
1076 map = *mapp++;
1077 bit = 1;
1078 }
1079 }
1080 if (got == cgp->cg_nclusterblks) {
1081#if REV_ENDIAN_FS
1082 if (rev_endian)
1083 byte_swap_cgout(cgp,fs);
1084#endif /* REV_ENDIAN_FS */
1085 goto fail;
1086 }
1087 /*
1088 * Allocate the cluster that we have found.
1089 */
1090 for (i = 1; i <= len; i++)
1091 if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
1092 panic("ffs_clusteralloc: map mismatch");
1093 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1094 if (dtog(fs, bno) != cg)
1095 panic("ffs_clusteralloc: allocated out of group");
1096 len = blkstofrags(fs, len);
1097 for (i = 0; i < len; i += fs->fs_frag)
1098 if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i)
1099 panic("ffs_clusteralloc: lost block");
1100#if REV_ENDIAN_FS
1101 if (rev_endian)
1102 byte_swap_cgout(cgp,fs);
1103#endif /* REV_ENDIAN_FS */
1104 bdwrite(bp);
1105 return (bno);
1106
1107fail:
1108 brelse(bp);
1109 return (0);
1110}
1111
1112/*
1113 * Determine whether an inode can be allocated.
1114 *
1115 * Check to see if an inode is available, and if it is,
1116 * allocate it using the following policy:
1117 * 1) allocate the requested inode.
1118 * 2) allocate the next available inode after the requested
1119 * inode in the specified cylinder group.
1120 */
1121static ino_t
1122ffs_nodealloccg(ip, cg, ipref, mode)
1123 struct inode *ip;
1124 int cg;
1125 ufs_daddr_t ipref;
1126 int mode;
1127{
1128 register struct fs *fs;
1129 register struct cg *cgp;
1130 struct buf *bp;
1131 int error, start, len, loc, map, i;
1132#if REV_ENDIAN_FS
1133 struct vnode *vp=ITOV(ip);
1134 struct mount *mp=vp->v_mount;
1135 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1136#endif /* REV_ENDIAN_FS */
1137
1138 fs = ip->i_fs;
1139 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1140 return (NULL);
1141 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1142 (int)fs->fs_cgsize, NOCRED, &bp);
1143 if (error) {
1144 brelse(bp);
1145 return (NULL);
1146 }
1147 cgp = (struct cg *)bp->b_data;
1148#if REV_ENDIAN_FS
1149 if (rev_endian)
1150 byte_swap_cgin(cgp,fs);
1151#endif /* REV_ENDIAN_FS */
1152 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
1153#if REV_ENDIAN_FS
1154 if (rev_endian)
1155 byte_swap_cgout(cgp,fs);
1156#endif /* REV_ENDIAN_FS */
1157 brelse(bp);
1158 return (NULL);
1159 }
1160
1161 cgp->cg_time = time.tv_sec;
1162 if (ipref) {
1163 ipref %= fs->fs_ipg;
1164 if (isclr(cg_inosused(cgp), ipref))
1165 goto gotit;
1166 }
1167 start = cgp->cg_irotor / NBBY;
1168 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1169 loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
1170 if (loc == 0) {
1171 len = start + 1;
1172 start = 0;
1173 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
1174 if (loc == 0) {
1175 printf("cg = %d, irotor = %d, fs = %s\n",
1176 cg, cgp->cg_irotor, fs->fs_fsmnt);
1177 panic("ffs_nodealloccg: map corrupted");
1178 /* NOTREACHED */
1179 }
1180 }
1181 i = start + len - loc;
1182 map = cg_inosused(cgp)[i];
1183 ipref = i * NBBY;
1184 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1185 if ((map & i) == 0) {
1186 cgp->cg_irotor = ipref;
1187 goto gotit;
1188 }
1189 }
1190 printf("fs = %s\n", fs->fs_fsmnt);
1191 panic("ffs_nodealloccg: block not in map");
1192 /* NOTREACHED */
1193gotit:
1194 setbit(cg_inosused(cgp), ipref);
1195 cgp->cg_cs.cs_nifree--;
1196 fs->fs_cstotal.cs_nifree--;
1197 fs->fs_cs(fs, cg).cs_nifree--;
1198 fs->fs_fmod = 1;
1199 if ((mode & IFMT) == IFDIR) {
1200 cgp->cg_cs.cs_ndir++;
1201 fs->fs_cstotal.cs_ndir++;
1202 fs->fs_cs(fs, cg).cs_ndir++;
1203 }
1204#if REV_ENDIAN_FS
1205 if (rev_endian)
1206 byte_swap_cgout(cgp,fs);
1207#endif /* REV_ENDIAN_FS */
1208 bdwrite(bp);
1209 return (cg * fs->fs_ipg + ipref);
1210}
1211
1212/*
1213 * Free a block or fragment.
1214 *
1215 * The specified block or fragment is placed back in the
1216 * free map. If a fragment is deallocated, a possible
1217 * block reassembly is checked.
1218 */
1219ffs_blkfree(ip, bno, size)
1220 register struct inode *ip;
1221 ufs_daddr_t bno;
1222 long size;
1223{
1224 register struct fs *fs;
1225 register struct cg *cgp;
1226 struct buf *bp;
1227 ufs_daddr_t blkno;
1228 int i, error, cg, blk, frags, bbase;
1229#if REV_ENDIAN_FS
1230 struct vnode *vp=ITOV(ip);
1231 struct mount *mp=vp->v_mount;
1232 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1233#endif /* REV_ENDIAN_FS */
1234 fs = ip->i_fs;
1235 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1236 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
1237 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1238 panic("blkfree: bad size");
1239 }
1240 cg = dtog(fs, bno);
1241 if ((u_int)bno >= fs->fs_size) {
1242 printf("bad block %d, ino %d\n", bno, ip->i_number);
1243 ffs_fserr(fs, ip->i_uid, "bad block");
1244 return;
1245 }
1246 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1247 (int)fs->fs_cgsize, NOCRED, &bp);
1248 if (error) {
1249 brelse(bp);
1250 return;
1251 }
1252 cgp = (struct cg *)bp->b_data;
1253#if REV_ENDIAN_FS
1254 if (rev_endian)
1255 byte_swap_cgin(cgp,fs);
1256#endif /* REV_ENDIAN_FS */
1257 if (!cg_chkmagic(cgp)) {
1258#if REV_ENDIAN_FS
1259 if (rev_endian)
1260 byte_swap_cgout(cgp,fs);
1261#endif /* REV_ENDIAN_FS */
1262 brelse(bp);
1263 return;
1264 }
1265 cgp->cg_time = time.tv_sec;
1266 bno = dtogd(fs, bno);
1267 if (size == fs->fs_bsize) {
1268 blkno = fragstoblks(fs, bno);
1269 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1270 printf("dev = 0x%x, block = %d, fs = %s\n",
1271 ip->i_dev, bno, fs->fs_fsmnt);
1272 panic("blkfree: freeing free block");
1273 }
1274 ffs_setblock(fs, cg_blksfree(cgp), blkno);
1275 ffs_clusteracct(fs, cgp, blkno, 1);
1276 cgp->cg_cs.cs_nbfree++;
1277 fs->fs_cstotal.cs_nbfree++;
1278 fs->fs_cs(fs, cg).cs_nbfree++;
1279 i = cbtocylno(fs, bno);
1280 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
1281 cg_blktot(cgp)[i]++;
1282 } else {
1283 bbase = bno - fragnum(fs, bno);
1284 /*
1285 * decrement the counts associated with the old frags
1286 */
1287 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1288 ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
1289 /*
1290 * deallocate the fragment
1291 */
1292 frags = numfrags(fs, size);
1293 for (i = 0; i < frags; i++) {
1294 if (isset(cg_blksfree(cgp), bno + i)) {
1295 printf("dev = 0x%x, block = %d, fs = %s\n",
1296 ip->i_dev, bno + i, fs->fs_fsmnt);
1297 panic("blkfree: freeing free frag");
1298 }
1299 setbit(cg_blksfree(cgp), bno + i);
1300 }
1301 cgp->cg_cs.cs_nffree += i;
1302 fs->fs_cstotal.cs_nffree += i;
1303 fs->fs_cs(fs, cg).cs_nffree += i;
1304 /*
1305 * add back in counts associated with the new frags
1306 */
1307 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1308 ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
1309 /*
1310 * if a complete block has been reassembled, account for it
1311 */
1312 blkno = fragstoblks(fs, bbase);
1313 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1314 cgp->cg_cs.cs_nffree -= fs->fs_frag;
1315 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1316 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1317 ffs_clusteracct(fs, cgp, blkno, 1);
1318 cgp->cg_cs.cs_nbfree++;
1319 fs->fs_cstotal.cs_nbfree++;
1320 fs->fs_cs(fs, cg).cs_nbfree++;
1321 i = cbtocylno(fs, bbase);
1322 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
1323 cg_blktot(cgp)[i]++;
1324 }
1325 }
1326 fs->fs_fmod = 1;
1327#if REV_ENDIAN_FS
1328 if (rev_endian)
1329 byte_swap_cgout(cgp,fs);
1330#endif /* REV_ENDIAN_FS */
1331 bdwrite(bp);
1332}
1333
1334#if DIAGNOSTIC
1335/*
1336 * Verify allocation of a block or fragment. Returns true if block or
1337 * fragment is allocated, false if it is free.
1338 */
1339ffs_checkblk(ip, bno, size)
1340 struct inode *ip;
1341 ufs_daddr_t bno;
1342 long size;
1343{
1344 struct fs *fs;
1345 struct cg *cgp;
1346 struct buf *bp;
1347 int i, error, frags, free;
1348#if REV_ENDIAN_FS
1349 struct vnode *vp=ITOV(ip);
1350 struct mount *mp=vp->v_mount;
1351 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1352#endif /* REV_ENDIAN_FS */
1353
1354 fs = ip->i_fs;
1355 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1356 printf("bsize = %d, size = %d, fs = %s\n",
1357 fs->fs_bsize, size, fs->fs_fsmnt);
1358 panic("checkblk: bad size");
1359 }
1360 if ((u_int)bno >= fs->fs_size)
1361 panic("checkblk: bad block %d", bno);
1362 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1363 (int)fs->fs_cgsize, NOCRED, &bp);
1364 if (error) {
1365 brelse(bp);
1366 return;
1367 }
1368 cgp = (struct cg *)bp->b_data;
1369#if REV_ENDIAN_FS
1370 if (rev_endian)
1371 byte_swap_cgin(cgp,fs);
1372#endif /* REV_ENDIAN_FS */
1373 if (!cg_chkmagic(cgp)) {
1374#if REV_ENDIAN_FS
1375 if (rev_endian)
1376 byte_swap_cgout(cgp,fs);
1377#endif /* REV_ENDIAN_FS */
1378 brelse(bp);
1379 return;
1380 }
1381 bno = dtogd(fs, bno);
1382 if (size == fs->fs_bsize) {
1383 free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
1384 } else {
1385 frags = numfrags(fs, size);
1386 for (free = 0, i = 0; i < frags; i++)
1387 if (isset(cg_blksfree(cgp), bno + i))
1388 free++;
1389 if (free != 0 && free != frags)
1390 panic("checkblk: partially free fragment");
1391 }
1392#if REV_ENDIAN_FS
1393 if (rev_endian)
1394 byte_swap_cgout(cgp,fs);
1395#endif /* REV_ENDIAN_FS */
1396 brelse(bp);
1397 return (!free);
1398}
1399#endif /* DIAGNOSTIC */
1400
1401/*
1402 * Free an inode.
1403 *
1404 * The specified inode is placed back in the free map.
1405 */
1406int
1407ffs_vfree(ap)
1408 struct vop_vfree_args /* {
1409 struct vnode *a_pvp;
1410 ino_t a_ino;
1411 int a_mode;
1412 } */ *ap;
1413{
1414 register struct fs *fs;
1415 register struct cg *cgp;
1416 register struct inode *pip;
1417 ino_t ino = ap->a_ino;
1418 struct buf *bp;
1419 int error, cg;
1420#if REV_ENDIAN_FS
1421 struct vnode *vp=ap->a_pvp;
1422 struct mount *mp=vp->v_mount;
1423 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1424#endif /* REV_ENDIAN_FS */
1425
1426 pip = VTOI(ap->a_pvp);
1427 fs = pip->i_fs;
1428 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1429 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
1430 pip->i_dev, ino, fs->fs_fsmnt);
1431 cg = ino_to_cg(fs, ino);
1432 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1433 (int)fs->fs_cgsize, NOCRED, &bp);
1434 if (error) {
1435 brelse(bp);
1436 return (0);
1437 }
1438 cgp = (struct cg *)bp->b_data;
1439#if REV_ENDIAN_FS
1440 if (rev_endian)
1441 byte_swap_cgin(cgp,fs);
1442#endif /* REV_ENDIAN_FS */
1443 if (!cg_chkmagic(cgp)) {
1444#if REV_ENDIAN_FS
1445 if (rev_endian)
1446 byte_swap_cgout(cgp,fs);
1447#endif /* REV_ENDIAN_FS */
1448 brelse(bp);
1449 return (0);
1450 }
1451 cgp->cg_time = time.tv_sec;
1452 ino %= fs->fs_ipg;
1453 if (isclr(cg_inosused(cgp), ino)) {
1454 printf("dev = 0x%x, ino = %d, fs = %s\n",
1455 pip->i_dev, ino, fs->fs_fsmnt);
1456 if (fs->fs_ronly == 0)
1457 panic("ifree: freeing free inode");
1458 }
1459 clrbit(cg_inosused(cgp), ino);
1460 if (ino < cgp->cg_irotor)
1461 cgp->cg_irotor = ino;
1462 cgp->cg_cs.cs_nifree++;
1463 fs->fs_cstotal.cs_nifree++;
1464 fs->fs_cs(fs, cg).cs_nifree++;
1465 if ((ap->a_mode & IFMT) == IFDIR) {
1466 cgp->cg_cs.cs_ndir--;
1467 fs->fs_cstotal.cs_ndir--;
1468 fs->fs_cs(fs, cg).cs_ndir--;
1469 }
1470 fs->fs_fmod = 1;
1471#if REV_ENDIAN_FS
1472 if (rev_endian)
1473 byte_swap_cgout(cgp,fs);
1474#endif /* REV_ENDIAN_FS */
1475 bdwrite(bp);
1476 return (0);
1477}
1478
1479/*
1480 * Find a block of the specified size in the specified cylinder group.
1481 *
1482 * It is a panic if a request is made to find a block if none are
1483 * available.
1484 */
1485static ufs_daddr_t
1486ffs_mapsearch(fs, cgp, bpref, allocsiz)
1487 register struct fs *fs;
1488 register struct cg *cgp;
1489 ufs_daddr_t bpref;
1490 int allocsiz;
1491{
1492 ufs_daddr_t bno;
1493 int start, len, loc, i;
1494 int blk, field, subfield, pos;
1495
1496 /*
1497 * find the fragment by searching through the free block
1498 * map for an appropriate bit pattern
1499 */
1500 if (bpref)
1501 start = dtogd(fs, bpref) / NBBY;
1502 else
1503 start = cgp->cg_frotor / NBBY;
1504 len = howmany(fs->fs_fpg, NBBY) - start;
1505 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
1506 (u_char *)fragtbl[fs->fs_frag],
1507 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1508 if (loc == 0) {
1509 len = start + 1;
1510 start = 0;
1511 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
1512 (u_char *)fragtbl[fs->fs_frag],
1513 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1514 if (loc == 0) {
1515 printf("start = %d, len = %d, fs = %s\n",
1516 start, len, fs->fs_fsmnt);
1517 panic("ffs_alloccg: map corrupted");
1518 /* NOTREACHED */
1519 }
1520 }
1521 bno = (start + len - loc) * NBBY;
1522 cgp->cg_frotor = bno;
1523 /*
1524 * found the byte in the map
1525 * sift through the bits to find the selected frag
1526 */
1527 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1528 blk = blkmap(fs, cg_blksfree(cgp), bno);
1529 blk <<= 1;
1530 field = around[allocsiz];
1531 subfield = inside[allocsiz];
1532 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1533 if ((blk & field) == subfield)
1534 return (bno + pos);
1535 field <<= 1;
1536 subfield <<= 1;
1537 }
1538 }
1539 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1540 panic("ffs_alloccg: block not in map");
1541 return (-1);
1542}
1543
1544/*
1545 * Update the cluster map because of an allocation or free.
1546 *
1547 * Cnt == 1 means free; cnt == -1 means allocating.
1548 */
1549ffs_clusteracct(fs, cgp, blkno, cnt)
1550 struct fs *fs;
1551 struct cg *cgp;
1552 ufs_daddr_t blkno;
1553 int cnt;
1554{
1555 int32_t *sump;
1556 int32_t *lp;
1557 u_char *freemapp, *mapp;
1558 int i, start, end, forw, back, map, bit;
1559
1560 if (fs->fs_contigsumsize <= 0)
1561 return;
1562 freemapp = cg_clustersfree(cgp);
1563 sump = cg_clustersum(cgp);
1564 /*
1565 * Allocate or clear the actual block.
1566 */
1567 if (cnt > 0)
1568 setbit(freemapp, blkno);
1569 else
1570 clrbit(freemapp, blkno);
1571 /*
1572 * Find the size of the cluster going forward.
1573 */
1574 start = blkno + 1;
1575 end = start + fs->fs_contigsumsize;
1576 if (end >= cgp->cg_nclusterblks)
1577 end = cgp->cg_nclusterblks;
1578 mapp = &freemapp[start / NBBY];
1579 map = *mapp++;
1580 bit = 1 << (start % NBBY);
1581 for (i = start; i < end; i++) {
1582 if ((map & bit) == 0)
1583 break;
1584 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1585 bit <<= 1;
1586 } else {
1587 map = *mapp++;
1588 bit = 1;
1589 }
1590 }
1591 forw = i - start;
1592 /*
1593 * Find the size of the cluster going backward.
1594 */
1595 start = blkno - 1;
1596 end = start - fs->fs_contigsumsize;
1597 if (end < 0)
1598 end = -1;
1599 mapp = &freemapp[start / NBBY];
1600 map = *mapp--;
1601 bit = 1 << (start % NBBY);
1602 for (i = start; i > end; i--) {
1603 if ((map & bit) == 0)
1604 break;
1605 if ((i & (NBBY - 1)) != 0) {
1606 bit >>= 1;
1607 } else {
1608 map = *mapp--;
1609 bit = 1 << (NBBY - 1);
1610 }
1611 }
1612 back = start - i;
1613 /*
1614 * Account for old cluster and the possibly new forward and
1615 * back clusters.
1616 */
1617 i = back + forw + 1;
1618 if (i > fs->fs_contigsumsize)
1619 i = fs->fs_contigsumsize;
1620 sump[i] += cnt;
1621 if (back > 0)
1622 sump[back] -= cnt;
1623 if (forw > 0)
1624 sump[forw] -= cnt;
1625 /*
1626 * Update cluster summary information.
1627 */
1628 lp = &sump[fs->fs_contigsumsize];
1629 for (i = fs->fs_contigsumsize; i > 0; i--)
1630 if (*lp-- > 0)
1631 break;
1632 fs->fs_maxcluster[cgp->cg_cgx] = i;
1633}
1634
1635/*
1636 * Fserr prints the name of a file system with an error diagnostic.
1637 *
1638 * The form of the error message is:
1639 * fs: error message
1640 */
1641static void
1642ffs_fserr(fs, uid, cp)
1643 struct fs *fs;
1644 u_int uid;
1645 char *cp;
1646{
1647
1648 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1649}