]> git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ffs/ffs_alloc.c
7671afb01fa66de3eb466d0256ecd91773fc65fb
[apple/xnu.git] / bsd / ufs / ffs / ffs_alloc.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26 /*
27 * Copyright (c) 1982, 1986, 1989, 1993
28 * The Regents of the University of California. All rights reserved.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95
59 */
60 #include <rev_endian_fs.h>
61 #include <vm/vm_pager.h>
62
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/buf.h>
66 #include <sys/proc.h>
67 #include <sys/vnode.h>
68 #include <sys/mount.h>
69 #include <sys/kernel.h>
70 #include <sys/syslog.h>
71 #include <sys/quota.h>
72
73 #include <sys/vm.h>
74
75 #include <ufs/ufs/quota.h>
76 #include <ufs/ufs/inode.h>
77
78 #include <ufs/ffs/fs.h>
79 #include <ufs/ffs/ffs_extern.h>
80
81 #if REV_ENDIAN_FS
82 #include <ufs/ufs/ufs_byte_order.h>
83 #include <architecture/byte_order.h>
84 #endif /* REV_ENDIAN_FS */
85
86 extern u_long nextgennumber;
87
88 static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
89 static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t));
90 static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
91 int));
92 static ino_t ffs_dirpref __P((struct fs *));
93 static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
94 static void ffs_fserr __P((struct fs *, u_int, char *));
95 static u_long ffs_hashalloc
96 __P((struct inode *, int, long, int, u_int32_t (*)()));
97 static ino_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
98 static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t,
99 int));
100
101 /*
102 * Allocate a block in the file system.
103 *
104 * The size of the requested block is given, which must be some
105 * multiple of fs_fsize and <= fs_bsize.
106 * A preference may be optionally specified. If a preference is given
107 * the following hierarchy is used to allocate a block:
108 * 1) allocate the requested block.
109 * 2) allocate a rotationally optimal block in the same cylinder.
110 * 3) allocate a block in the same cylinder group.
111 * 4) quadradically rehash into other cylinder groups, until an
112 * available block is located.
113 * If no block preference is given the following heirarchy is used
114 * to allocate a block:
115 * 1) allocate a block in the cylinder group that contains the
116 * inode for the file.
117 * 2) quadradically rehash into other cylinder groups, until an
118 * available block is located.
119 */
120 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
121 register struct inode *ip;
122 ufs_daddr_t lbn, bpref;
123 int size;
124 struct ucred *cred;
125 ufs_daddr_t *bnp;
126 {
127 register struct fs *fs;
128 ufs_daddr_t bno;
129 int cg, error;
130 int devBlockSize=0;
131 *bnp = 0;
132 fs = ip->i_fs;
133 #if DIAGNOSTIC
134 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
135 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
136 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
137 panic("ffs_alloc: bad size");
138 }
139 if (cred == NOCRED)
140 panic("ffs_alloc: missing credential\n");
141 #endif /* DIAGNOSTIC */
142 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
143 goto nospace;
144 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
145 goto nospace;
146 VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize);
147 #if QUOTA
148 if (error = chkdq(ip, (int64_t)size, cred, 0))
149 return (error);
150 #endif /* QUOTA */
151 if (bpref >= fs->fs_size)
152 bpref = 0;
153 if (bpref == 0)
154 cg = ino_to_cg(fs, ip->i_number);
155 else
156 cg = dtog(fs, bpref);
157 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
158 (u_int32_t (*)())ffs_alloccg);
159 if (bno > 0) {
160 ip->i_blocks += btodb(size, devBlockSize);
161 ip->i_flag |= IN_CHANGE | IN_UPDATE;
162 *bnp = bno;
163 return (0);
164 }
165 #if QUOTA
166 /*
167 * Restore user's disk quota because allocation failed.
168 */
169 (void) chkdq(ip, (int64_t)-size, cred, FORCE);
170 #endif /* QUOTA */
171 nospace:
172 ffs_fserr(fs, cred->cr_uid, "file system full");
173 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
174 return (ENOSPC);
175 }
176
177 /*
178 * Reallocate a fragment to a bigger size
179 *
180 * The number and size of the old block is given, and a preference
181 * and new size is also specified. The allocator attempts to extend
182 * the original block. Failing that, the regular block allocator is
183 * invoked to get an appropriate block.
184 */
185 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
186 register struct inode *ip;
187 ufs_daddr_t lbprev;
188 ufs_daddr_t bpref;
189 int osize, nsize;
190 struct ucred *cred;
191 struct buf **bpp;
192 {
193 register struct fs *fs;
194 struct buf *bp;
195 int cg, request, error;
196 ufs_daddr_t bprev, bno;
197 int devBlockSize=0;
198
199 *bpp = 0;
200 fs = ip->i_fs;
201 #if DIAGNOSTIC
202 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
203 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
204 printf(
205 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
206 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
207 panic("ffs_realloccg: bad size");
208 }
209 if (cred == NOCRED)
210 panic("ffs_realloccg: missing credential\n");
211 #endif /* DIAGNOSTIC */
212 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
213 goto nospace;
214 if ((bprev = ip->i_db[lbprev]) == 0) {
215 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
216 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
217 panic("ffs_realloccg: bad bprev");
218 }
219 /*
220 * Allocate the extra space in the buffer.
221 */
222 if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
223 brelse(bp);
224 return (error);
225 }
226 VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize);
227
228 #if QUOTA
229 if (error = chkdq(ip, (int64_t)(nsize - osize), cred, 0))
230 {
231 brelse(bp);
232 return (error);
233 }
234 #endif /* QUOTA */
235 /*
236 * Check for extension in the existing location.
237 */
238 cg = dtog(fs, bprev);
239 if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
240 if (bp->b_blkno != fsbtodb(fs, bno))
241 panic("bad blockno");
242 ip->i_blocks += btodb(nsize - osize, devBlockSize);
243 ip->i_flag |= IN_CHANGE | IN_UPDATE;
244 allocbuf(bp, nsize);
245 bp->b_flags |= B_DONE;
246 bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
247 *bpp = bp;
248 return (0);
249 }
250 /*
251 * Allocate a new disk location.
252 */
253 if (bpref >= fs->fs_size)
254 bpref = 0;
255 switch ((int)fs->fs_optim) {
256 case FS_OPTSPACE:
257 /*
258 * Allocate an exact sized fragment. Although this makes
259 * best use of space, we will waste time relocating it if
260 * the file continues to grow. If the fragmentation is
261 * less than half of the minimum free reserve, we choose
262 * to begin optimizing for time.
263 */
264 request = nsize;
265 if (fs->fs_minfree < 5 ||
266 fs->fs_cstotal.cs_nffree >
267 fs->fs_dsize * fs->fs_minfree / (2 * 100))
268 break;
269 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
270 fs->fs_fsmnt);
271 fs->fs_optim = FS_OPTTIME;
272 break;
273 case FS_OPTTIME:
274 /*
275 * At this point we have discovered a file that is trying to
276 * grow a small fragment to a larger fragment. To save time,
277 * we allocate a full sized block, then free the unused portion.
278 * If the file continues to grow, the `ffs_fragextend' call
279 * above will be able to grow it in place without further
280 * copying. If aberrant programs cause disk fragmentation to
281 * grow within 2% of the free reserve, we choose to begin
282 * optimizing for space.
283 */
284 request = fs->fs_bsize;
285 if (fs->fs_cstotal.cs_nffree <
286 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
287 break;
288 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
289 fs->fs_fsmnt);
290 fs->fs_optim = FS_OPTSPACE;
291 break;
292 default:
293 printf("dev = 0x%x, optim = %d, fs = %s\n",
294 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
295 panic("ffs_realloccg: bad optim");
296 /* NOTREACHED */
297 }
298 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
299 (u_int32_t (*)())ffs_alloccg);
300 if (bno > 0) {
301 bp->b_blkno = fsbtodb(fs, bno);
302 ffs_blkfree(ip, bprev, (long)osize);
303 if (nsize < request)
304 ffs_blkfree(ip, bno + numfrags(fs, nsize),
305 (long)(request - nsize));
306 ip->i_blocks += btodb(nsize - osize, devBlockSize);
307 ip->i_flag |= IN_CHANGE | IN_UPDATE;
308 allocbuf(bp, nsize);
309 bp->b_flags |= B_DONE;
310 bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
311 *bpp = bp;
312 return (0);
313 }
314 #if QUOTA
315 /*
316 * Restore user's disk quota because allocation failed.
317 */
318 (void) chkdq(ip, (int64_t)-(nsize - osize), cred, FORCE);
319 #endif /* QUOTA */
320 brelse(bp);
321 nospace:
322 /*
323 * no space available
324 */
325 ffs_fserr(fs, cred->cr_uid, "file system full");
326 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
327 return (ENOSPC);
328 }
329
330 /*
331 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
332 *
333 * The vnode and an array of buffer pointers for a range of sequential
334 * logical blocks to be made contiguous is given. The allocator attempts
335 * to find a range of sequential blocks starting as close as possible to
336 * an fs_rotdelay offset from the end of the allocation for the logical
337 * block immediately preceeding the current range. If successful, the
338 * physical block numbers in the buffer pointers and in the inode are
339 * changed to reflect the new allocation. If unsuccessful, the allocation
340 * is left unchanged. The success in doing the reallocation is returned.
341 * Note that the error return is not reflected back to the user. Rather
342 * the previous block allocation will be used.
343 */
344 /* Note: This routine is unused in UBC cluster I/O */
345
346 int doasyncfree = 1;
347 int doreallocblks = 1;
348
349 int
350 ffs_reallocblks(ap)
351 struct vop_reallocblks_args *ap;
352 {
353 return (ENOSPC);
354 }
355
356 /*
357 * Allocate an inode in the file system.
358 *
359 * If allocating a directory, use ffs_dirpref to select the inode.
360 * If allocating in a directory, the following hierarchy is followed:
361 * 1) allocate the preferred inode.
362 * 2) allocate an inode in the same cylinder group.
363 * 3) quadradically rehash into other cylinder groups, until an
364 * available inode is located.
365 * If no inode preference is given the following heirarchy is used
366 * to allocate an inode:
367 * 1) allocate an inode in cylinder group 0.
368 * 2) quadradically rehash into other cylinder groups, until an
369 * available inode is located.
370 */
371 int
372 ffs_valloc(ap)
373 struct vop_valloc_args /* {
374 struct vnode *a_pvp;
375 int a_mode;
376 struct ucred *a_cred;
377 struct vnode **a_vpp;
378 } */ *ap;
379 {
380 register struct vnode *pvp = ap->a_pvp;
381 register struct inode *pip;
382 register struct fs *fs;
383 register struct inode *ip;
384 mode_t mode = ap->a_mode;
385 ino_t ino, ipref;
386 int cg, error;
387
388 *ap->a_vpp = NULL;
389 pip = VTOI(pvp);
390 fs = pip->i_fs;
391 if (fs->fs_cstotal.cs_nifree == 0)
392 goto noinodes;
393
394 if ((mode & IFMT) == IFDIR)
395 ipref = ffs_dirpref(fs);
396 else
397 ipref = pip->i_number;
398 if (ipref >= fs->fs_ncg * fs->fs_ipg)
399 ipref = 0;
400 cg = ino_to_cg(fs, ipref);
401 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
402 if (ino == 0)
403 goto noinodes;
404 error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp);
405 if (error) {
406 VOP_VFREE(pvp, ino, mode);
407 return (error);
408 }
409 ip = VTOI(*ap->a_vpp);
410 if (ip->i_mode) {
411 printf("mode = 0%o, inum = %d, fs = %s\n",
412 ip->i_mode, ip->i_number, fs->fs_fsmnt);
413 panic("ffs_valloc: dup alloc");
414 }
415 if (ip->i_blocks) { /* XXX */
416 printf("free inode %s/%d had %d blocks\n",
417 fs->fs_fsmnt, ino, ip->i_blocks);
418 ip->i_blocks = 0;
419 }
420 ip->i_flags = 0;
421 /*
422 * Set up a new generation number for this inode.
423 */
424 if (++nextgennumber < (u_long)time.tv_sec)
425 nextgennumber = time.tv_sec;
426 ip->i_gen = nextgennumber;
427 return (0);
428 noinodes:
429 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
430 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
431 return (ENOSPC);
432 }
433
434 /*
435 * Find a cylinder to place a directory.
436 *
437 * The policy implemented by this algorithm is to select from
438 * among those cylinder groups with above the average number of
439 * free inodes, the one with the smallest number of directories.
440 */
441 static ino_t
442 ffs_dirpref(fs)
443 register struct fs *fs;
444 {
445 int cg, minndir, mincg, avgifree;
446
447 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
448 minndir = fs->fs_ipg;
449 mincg = 0;
450 for (cg = 0; cg < fs->fs_ncg; cg++)
451 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
452 fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
453 mincg = cg;
454 minndir = fs->fs_cs(fs, cg).cs_ndir;
455 }
456 return ((ino_t)(fs->fs_ipg * mincg));
457 }
458
459 /*
460 * Select the desired position for the next block in a file. The file is
461 * logically divided into sections. The first section is composed of the
462 * direct blocks. Each additional section contains fs_maxbpg blocks.
463 *
464 * If no blocks have been allocated in the first section, the policy is to
465 * request a block in the same cylinder group as the inode that describes
466 * the file. If no blocks have been allocated in any other section, the
467 * policy is to place the section in a cylinder group with a greater than
468 * average number of free blocks. An appropriate cylinder group is found
469 * by using a rotor that sweeps the cylinder groups. When a new group of
470 * blocks is needed, the sweep begins in the cylinder group following the
471 * cylinder group from which the previous allocation was made. The sweep
472 * continues until a cylinder group with greater than the average number
473 * of free blocks is found. If the allocation is for the first block in an
474 * indirect block, the information on the previous allocation is unavailable;
475 * here a best guess is made based upon the logical block number being
476 * allocated.
477 *
478 * If a section is already partially allocated, the policy is to
479 * contiguously allocate fs_maxcontig blocks. The end of one of these
480 * contiguous blocks and the beginning of the next is physically separated
481 * so that the disk head will be in transit between them for at least
482 * fs_rotdelay milliseconds. This is to allow time for the processor to
483 * schedule another I/O transfer.
484 */
485 ufs_daddr_t
486 ffs_blkpref(ip, lbn, indx, bap)
487 struct inode *ip;
488 ufs_daddr_t lbn;
489 int indx;
490 ufs_daddr_t *bap;
491 {
492 register struct fs *fs;
493 register int cg;
494 int avgbfree, startcg;
495 ufs_daddr_t nextblk;
496 #if REV_ENDIAN_FS
497 daddr_t prev=0;
498 struct vnode *vp=ITOV(ip);
499 struct mount *mp=vp->v_mount;
500 int rev_endian=(mp->mnt_flag & MNT_REVEND);
501 #endif /* REV_ENDIAN_FS */
502
503 fs = ip->i_fs;
504 #if REV_ENDIAN_FS
505 if (indx && bap) {
506 if (rev_endian) {
507 if (bap != &ip->i_db[0])
508 prev = NXSwapLong(bap[indx - 1]);
509 else
510 prev = bap[indx - 1];
511 } else prev = bap[indx - 1];
512 }
513 if (indx % fs->fs_maxbpg == 0 || prev == 0)
514 #else /* REV_ENDIAN_FS */
515 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0)
516 #endif /* REV_ENDIAN_FS */
517 {
518 if (lbn < NDADDR) {
519 cg = ino_to_cg(fs, ip->i_number);
520 return (fs->fs_fpg * cg + fs->fs_frag);
521 }
522 /*
523 * Find a cylinder with greater than average number of
524 * unused data blocks.
525 */
526 #if REV_ENDIAN_FS
527 if (indx == 0 || prev == 0)
528 #else /* REV_ENDIAN_FS */
529 if (indx == 0 || bap[indx - 1] == 0)
530 #endif /* REV_ENDIAN_FS */
531 startcg =
532 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
533 else
534 #if REV_ENDIAN_FS
535 startcg = dtog(fs, prev) + 1;
536 #else /* REV_ENDIAN_FS */
537 startcg = dtog(fs, bap[indx - 1]) + 1;
538 #endif /* REV_ENDIAN_FS */
539 startcg %= fs->fs_ncg;
540 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
541 for (cg = startcg; cg < fs->fs_ncg; cg++)
542 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
543 fs->fs_cgrotor = cg;
544 return (fs->fs_fpg * cg + fs->fs_frag);
545 }
546 for (cg = 0; cg <= startcg; cg++)
547 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
548 fs->fs_cgrotor = cg;
549 return (fs->fs_fpg * cg + fs->fs_frag);
550 }
551 return (NULL);
552 }
553 /*
554 * One or more previous blocks have been laid out. If less
555 * than fs_maxcontig previous blocks are contiguous, the
556 * next block is requested contiguously, otherwise it is
557 * requested rotationally delayed by fs_rotdelay milliseconds.
558 */
559 #if REV_ENDIAN_FS
560 if (rev_endian) {
561 nextblk = prev + fs->fs_frag;
562 if (indx < fs->fs_maxcontig) {
563 return (nextblk);
564 }
565 if (bap != &ip->i_db[0])
566 prev = NXSwapLong(bap[indx - fs->fs_maxcontig]);
567 else
568 prev = bap[indx - fs->fs_maxcontig];
569 if (prev + blkstofrags(fs, fs->fs_maxcontig) != nextblk)
570 return (nextblk);
571 } else {
572 #endif /* REV_ENDIAN_FS */
573 nextblk = bap[indx - 1] + fs->fs_frag;
574 if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
575 blkstofrags(fs, fs->fs_maxcontig) != nextblk)
576 return (nextblk);
577 #if REV_ENDIAN_FS
578 }
579 #endif /* REV_ENDIAN_FS */
580 if (fs->fs_rotdelay != 0)
581 /*
582 * Here we convert ms of delay to frags as:
583 * (frags) = (ms) * (rev/sec) * (sect/rev) /
584 * ((sect/frag) * (ms/sec))
585 * then round up to the next block.
586 */
587 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
588 (NSPF(fs) * 1000), fs->fs_frag);
589 return (nextblk);
590 }
591
592 /*
593 * Implement the cylinder overflow algorithm.
594 *
595 * The policy implemented by this algorithm is:
596 * 1) allocate the block in its requested cylinder group.
597 * 2) quadradically rehash on the cylinder group number.
598 * 3) brute force search for a free block.
599 */
600 /*VARARGS5*/
601 static u_long
602 ffs_hashalloc(ip, cg, pref, size, allocator)
603 struct inode *ip;
604 int cg;
605 long pref;
606 int size; /* size for data blocks, mode for inodes */
607 u_int32_t (*allocator)();
608 {
609 register struct fs *fs;
610 long result;
611 int i, icg = cg;
612
613 fs = ip->i_fs;
614 /*
615 * 1: preferred cylinder group
616 */
617 result = (*allocator)(ip, cg, pref, size);
618 if (result)
619 return (result);
620 /*
621 * 2: quadratic rehash
622 */
623 for (i = 1; i < fs->fs_ncg; i *= 2) {
624 cg += i;
625 if (cg >= fs->fs_ncg)
626 cg -= fs->fs_ncg;
627 result = (*allocator)(ip, cg, 0, size);
628 if (result)
629 return (result);
630 }
631 /*
632 * 3: brute force search
633 * Note that we start at i == 2, since 0 was checked initially,
634 * and 1 is always checked in the quadratic rehash.
635 */
636 cg = (icg + 2) % fs->fs_ncg;
637 for (i = 2; i < fs->fs_ncg; i++) {
638 result = (*allocator)(ip, cg, 0, size);
639 if (result)
640 return (result);
641 cg++;
642 if (cg == fs->fs_ncg)
643 cg = 0;
644 }
645 return (NULL);
646 }
647
648 /*
649 * Determine whether a fragment can be extended.
650 *
651 * Check to see if the necessary fragments are available, and
652 * if they are, allocate them.
653 */
654 static ufs_daddr_t
655 ffs_fragextend(ip, cg, bprev, osize, nsize)
656 struct inode *ip;
657 int cg;
658 long bprev;
659 int osize, nsize;
660 {
661 register struct fs *fs;
662 register struct cg *cgp;
663 struct buf *bp;
664 long bno;
665 int frags, bbase;
666 int i, error;
667 #if REV_ENDIAN_FS
668 struct vnode *vp=ITOV(ip);
669 struct mount *mp=vp->v_mount;
670 int rev_endian=(mp->mnt_flag & MNT_REVEND);
671 #endif /* REV_ENDIAN_FS */
672
673 fs = ip->i_fs;
674 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
675 return (NULL);
676 frags = numfrags(fs, nsize); /* number of fragments needed */
677 bbase = fragnum(fs, bprev); /* offset in a frag (it is mod fragsize */
678 if (bbase > fragnum(fs, (bprev + frags - 1))) {
679 /* cannot extend across a block boundary */
680 return (NULL);
681 }
682 /* read corresponding cylinder group info */
683 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
684 (int)fs->fs_cgsize, NOCRED, &bp);
685 if (error) {
686 brelse(bp);
687 return (NULL);
688 }
689 cgp = (struct cg *)bp->b_data;
690 #if REV_ENDIAN_FS
691 if (rev_endian) {
692 byte_swap_cgin(cgp, fs);
693 }
694 #endif /* REV_ENDIAN_FS */
695
696 if (!cg_chkmagic(cgp)) {
697 #if REV_ENDIAN_FS
698 if (rev_endian)
699 byte_swap_cgout(cgp,fs);
700 #endif /* REV_ENDIAN_FS */
701 brelse(bp);
702 return (NULL);
703 }
704 cgp->cg_time = time.tv_sec;
705 bno = dtogd(fs, bprev);
706 for (i = numfrags(fs, osize); i < frags; i++)
707 if (isclr(cg_blksfree(cgp), bno + i)) {
708 #if REV_ENDIAN_FS
709 if (rev_endian)
710 byte_swap_cgout(cgp,fs);
711 #endif /* REV_ENDIAN_FS */
712 brelse(bp);
713 return (NULL);
714 }
715 /*
716 * the current fragment can be extended
717 * deduct the count on fragment being extended into
718 * increase the count on the remaining fragment (if any)
719 * allocate the extended piece
720 */
721 for (i = frags; i < fs->fs_frag - bbase; i++)
722 if (isclr(cg_blksfree(cgp), bno + i))
723 break;
724 cgp->cg_frsum[i - numfrags(fs, osize)]--;
725 if (i != frags)
726 cgp->cg_frsum[i - frags]++;
727 for (i = numfrags(fs, osize); i < frags; i++) {
728 clrbit(cg_blksfree(cgp), bno + i);
729 cgp->cg_cs.cs_nffree--;
730 fs->fs_cstotal.cs_nffree--;
731 fs->fs_cs(fs, cg).cs_nffree--;
732 }
733 fs->fs_fmod = 1;
734 #if REV_ENDIAN_FS
735 if (rev_endian)
736 byte_swap_cgout(cgp,fs);
737 #endif /* REV_ENDIAN_FS */
738 bdwrite(bp);
739 return (bprev);
740 }
741
742 /*
743 * Determine whether a block can be allocated.
744 *
745 * Check to see if a block of the appropriate size is available,
746 * and if it is, allocate it.
747 */
748 static ufs_daddr_t
749 ffs_alloccg(ip, cg, bpref, size)
750 struct inode *ip;
751 int cg;
752 ufs_daddr_t bpref;
753 int size;
754 {
755 register struct fs *fs;
756 register struct cg *cgp;
757 struct buf *bp;
758 register int i;
759 int error, bno, frags, allocsiz;
760 #if REV_ENDIAN_FS
761 struct vnode *vp=ITOV(ip);
762 struct mount *mp=vp->v_mount;
763 int rev_endian=(mp->mnt_flag & MNT_REVEND);
764 #endif /* REV_ENDIAN_FS */
765
766 fs = ip->i_fs;
767 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
768 return (NULL);
769 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
770 (int)fs->fs_cgsize, NOCRED, &bp);
771 if (error) {
772 brelse(bp);
773 return (NULL);
774 }
775 cgp = (struct cg *)bp->b_data;
776 #if REV_ENDIAN_FS
777 if (rev_endian)
778 byte_swap_cgin(cgp,fs);
779 #endif /* REV_ENDIAN_FS */
780 if (!cg_chkmagic(cgp) ||
781 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
782 #if REV_ENDIAN_FS
783 if (rev_endian)
784 byte_swap_cgout(cgp,fs);
785 #endif /* REV_ENDIAN_FS */
786 brelse(bp);
787 return (NULL);
788 }
789 cgp->cg_time = time.tv_sec;
790 if (size == fs->fs_bsize) {
791 bno = ffs_alloccgblk(fs, cgp, bpref);
792 #if REV_ENDIAN_FS
793 if (rev_endian)
794 byte_swap_cgout(cgp,fs);
795 #endif /* REV_ENDIAN_FS */
796 bdwrite(bp);
797 return (bno);
798 }
799 /*
800 * check to see if any fragments are already available
801 * allocsiz is the size which will be allocated, hacking
802 * it down to a smaller size if necessary
803 */
804 frags = numfrags(fs, size);
805 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
806 if (cgp->cg_frsum[allocsiz] != 0)
807 break;
808 if (allocsiz == fs->fs_frag) {
809 /*
810 * no fragments were available, so a block will be
811 * allocated, and hacked up
812 */
813 if (cgp->cg_cs.cs_nbfree == 0) {
814 #if REV_ENDIAN_FS
815 if (rev_endian)
816 byte_swap_cgout(cgp,fs);
817 #endif /* REV_ENDIAN_FS */
818 brelse(bp);
819 return (NULL);
820 }
821 bno = ffs_alloccgblk(fs, cgp, bpref);
822 bpref = dtogd(fs, bno);
823 for (i = frags; i < fs->fs_frag; i++)
824 setbit(cg_blksfree(cgp), bpref + i);
825 i = fs->fs_frag - frags;
826 cgp->cg_cs.cs_nffree += i;
827 fs->fs_cstotal.cs_nffree += i;
828 fs->fs_cs(fs, cg).cs_nffree += i;
829 fs->fs_fmod = 1;
830 cgp->cg_frsum[i]++;
831 #if REV_ENDIAN_FS
832 if (rev_endian)
833 byte_swap_cgout(cgp,fs);
834 #endif /* REV_ENDIAN_FS */
835 bdwrite(bp);
836 return (bno);
837 }
838 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
839 if (bno < 0) {
840 #if REV_ENDIAN_FS
841 if (rev_endian)
842 byte_swap_cgout(cgp,fs);
843 #endif /* REV_ENDIAN_FS */
844 brelse(bp);
845 return (NULL);
846 }
847 for (i = 0; i < frags; i++)
848 clrbit(cg_blksfree(cgp), bno + i);
849 cgp->cg_cs.cs_nffree -= frags;
850 fs->fs_cstotal.cs_nffree -= frags;
851 fs->fs_cs(fs, cg).cs_nffree -= frags;
852 fs->fs_fmod = 1;
853 cgp->cg_frsum[allocsiz]--;
854 if (frags != allocsiz)
855 cgp->cg_frsum[allocsiz - frags]++;
856 #if REV_ENDIAN_FS
857 if (rev_endian)
858 byte_swap_cgout(cgp,fs);
859 #endif /* REV_ENDIAN_FS */
860 bdwrite(bp);
861 return (cg * fs->fs_fpg + bno);
862 }
863
864 /*
865 * Allocate a block in a cylinder group.
866 *
867 * This algorithm implements the following policy:
868 * 1) allocate the requested block.
869 * 2) allocate a rotationally optimal block in the same cylinder.
870 * 3) allocate the next available block on the block rotor for the
871 * specified cylinder group.
872 * Note that this routine only allocates fs_bsize blocks; these
873 * blocks may be fragmented by the routine that allocates them.
874 */
875 static ufs_daddr_t
876 ffs_alloccgblk(fs, cgp, bpref)
877 register struct fs *fs;
878 register struct cg *cgp;
879 ufs_daddr_t bpref;
880 {
881 ufs_daddr_t bno, blkno;
882 int cylno, pos, delta;
883 short *cylbp;
884 register int i;
885
886 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
887 bpref = cgp->cg_rotor;
888 goto norot;
889 }
890 bpref = blknum(fs, bpref);
891 bpref = dtogd(fs, bpref);
892 /*
893 * if the requested block is available, use it
894 */
895 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
896 bno = bpref;
897 goto gotit;
898 }
899 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
900 /*
901 * Block layout information is not available.
902 * Leaving bpref unchanged means we take the
903 * next available free block following the one
904 * we just allocated. Hopefully this will at
905 * least hit a track cache on drives of unknown
906 * geometry (e.g. SCSI).
907 */
908 goto norot;
909 }
910 /*
911 * check for a block available on the same cylinder
912 */
913 cylno = cbtocylno(fs, bpref);
914 if (cg_blktot(cgp)[cylno] == 0)
915 goto norot;
916 /*
917 * check the summary information to see if a block is
918 * available in the requested cylinder starting at the
919 * requested rotational position and proceeding around.
920 */
921 cylbp = cg_blks(fs, cgp, cylno);
922 pos = cbtorpos(fs, bpref);
923 for (i = pos; i < fs->fs_nrpos; i++)
924 if (cylbp[i] > 0)
925 break;
926 if (i == fs->fs_nrpos)
927 for (i = 0; i < pos; i++)
928 if (cylbp[i] > 0)
929 break;
930 if (cylbp[i] > 0) {
931 /*
932 * found a rotational position, now find the actual
933 * block. A panic if none is actually there.
934 */
935 pos = cylno % fs->fs_cpc;
936 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
937 if (fs_postbl(fs, pos)[i] == -1) {
938 printf("pos = %d, i = %d, fs = %s\n",
939 pos, i, fs->fs_fsmnt);
940 panic("ffs_alloccgblk: cyl groups corrupted");
941 }
942 for (i = fs_postbl(fs, pos)[i];; ) {
943 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
944 bno = blkstofrags(fs, (bno + i));
945 goto gotit;
946 }
947 delta = fs_rotbl(fs)[i];
948 if (delta <= 0 ||
949 delta + i > fragstoblks(fs, fs->fs_fpg))
950 break;
951 i += delta;
952 }
953 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
954 panic("ffs_alloccgblk: can't find blk in cyl");
955 }
956 norot:
957 /*
958 * no blocks in the requested cylinder, so take next
959 * available one in this cylinder group.
960 */
961 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
962 if (bno < 0)
963 return (NULL);
964 cgp->cg_rotor = bno;
965 gotit:
966 blkno = fragstoblks(fs, bno);
967 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
968 ffs_clusteracct(fs, cgp, blkno, -1);
969 cgp->cg_cs.cs_nbfree--;
970 fs->fs_cstotal.cs_nbfree--;
971 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
972 cylno = cbtocylno(fs, bno);
973 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
974 cg_blktot(cgp)[cylno]--;
975 fs->fs_fmod = 1;
976 return (cgp->cg_cgx * fs->fs_fpg + bno);
977 }
978
979 /*
980 * Determine whether a cluster can be allocated.
981 *
982 * We do not currently check for optimal rotational layout if there
983 * are multiple choices in the same cylinder group. Instead we just
984 * take the first one that we find following bpref.
985 */
986 static ufs_daddr_t
987 ffs_clusteralloc(ip, cg, bpref, len)
988 struct inode *ip;
989 int cg;
990 ufs_daddr_t bpref;
991 int len;
992 {
993 register struct fs *fs;
994 register struct cg *cgp;
995 struct buf *bp;
996 int i, got, run, bno, bit, map;
997 u_char *mapp;
998 int32_t *lp;
999 #if REV_ENDIAN_FS
1000 struct vnode *vp=ITOV(ip);
1001 struct mount *mp=vp->v_mount;
1002 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1003 #endif /* REV_ENDIAN_FS */
1004
1005 fs = ip->i_fs;
1006 if (fs->fs_maxcluster[cg] < len)
1007 return (NULL);
1008 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1009 NOCRED, &bp))
1010 goto fail;
1011 cgp = (struct cg *)bp->b_data;
1012 #if REV_ENDIAN_FS
1013 if (rev_endian)
1014 byte_swap_cgin(cgp,fs);
1015 #endif /* REV_ENDIAN_FS */
1016 if (!cg_chkmagic(cgp)) {
1017 #if REV_ENDIAN_FS
1018 if (rev_endian)
1019 byte_swap_cgout(cgp,fs);
1020 #endif /* REV_ENDIAN_FS */
1021 goto fail;
1022 }
1023 /*
1024 * Check to see if a cluster of the needed size (or bigger) is
1025 * available in this cylinder group.
1026 */
1027 lp = &cg_clustersum(cgp)[len];
1028 for (i = len; i <= fs->fs_contigsumsize; i++)
1029 if (*lp++ > 0)
1030 break;
1031 if (i > fs->fs_contigsumsize) {
1032 /*
1033 * This is the first time looking for a cluster in this
1034 * cylinder group. Update the cluster summary information
1035 * to reflect the true maximum sized cluster so that
1036 * future cluster allocation requests can avoid reading
1037 * the cylinder group map only to find no clusters.
1038 */
1039 lp = &cg_clustersum(cgp)[len - 1];
1040 for (i = len - 1; i > 0; i--)
1041 if (*lp-- > 0)
1042 break;
1043 fs->fs_maxcluster[cg] = i;
1044 #if REV_ENDIAN_FS
1045 if (rev_endian)
1046 byte_swap_cgout(cgp,fs);
1047 #endif /* REV_ENDIAN_FS */
1048 goto fail;
1049 }
1050 /*
1051 * Search the cluster map to find a big enough cluster.
1052 * We take the first one that we find, even if it is larger
1053 * than we need as we prefer to get one close to the previous
1054 * block allocation. We do not search before the current
1055 * preference point as we do not want to allocate a block
1056 * that is allocated before the previous one (as we will
1057 * then have to wait for another pass of the elevator
1058 * algorithm before it will be read). We prefer to fail and
1059 * be recalled to try an allocation in the next cylinder group.
1060 */
1061 if (dtog(fs, bpref) != cg)
1062 bpref = 0;
1063 else
1064 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1065 mapp = &cg_clustersfree(cgp)[bpref / NBBY];
1066 map = *mapp++;
1067 bit = 1 << (bpref % NBBY);
1068 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
1069 if ((map & bit) == 0) {
1070 run = 0;
1071 } else {
1072 run++;
1073 if (run == len)
1074 break;
1075 }
1076 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1077 bit <<= 1;
1078 } else {
1079 map = *mapp++;
1080 bit = 1;
1081 }
1082 }
1083 if (got == cgp->cg_nclusterblks) {
1084 #if REV_ENDIAN_FS
1085 if (rev_endian)
1086 byte_swap_cgout(cgp,fs);
1087 #endif /* REV_ENDIAN_FS */
1088 goto fail;
1089 }
1090 /*
1091 * Allocate the cluster that we have found.
1092 */
1093 for (i = 1; i <= len; i++)
1094 if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
1095 panic("ffs_clusteralloc: map mismatch");
1096 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1097 if (dtog(fs, bno) != cg)
1098 panic("ffs_clusteralloc: allocated out of group");
1099 len = blkstofrags(fs, len);
1100 for (i = 0; i < len; i += fs->fs_frag)
1101 if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i)
1102 panic("ffs_clusteralloc: lost block");
1103 #if REV_ENDIAN_FS
1104 if (rev_endian)
1105 byte_swap_cgout(cgp,fs);
1106 #endif /* REV_ENDIAN_FS */
1107 bdwrite(bp);
1108 return (bno);
1109
1110 fail:
1111 brelse(bp);
1112 return (0);
1113 }
1114
1115 /*
1116 * Determine whether an inode can be allocated.
1117 *
1118 * Check to see if an inode is available, and if it is,
1119 * allocate it using the following policy:
1120 * 1) allocate the requested inode.
1121 * 2) allocate the next available inode after the requested
1122 * inode in the specified cylinder group.
1123 */
1124 static ino_t
1125 ffs_nodealloccg(ip, cg, ipref, mode)
1126 struct inode *ip;
1127 int cg;
1128 ufs_daddr_t ipref;
1129 int mode;
1130 {
1131 register struct fs *fs;
1132 register struct cg *cgp;
1133 struct buf *bp;
1134 int error, start, len, loc, map, i;
1135 #if REV_ENDIAN_FS
1136 struct vnode *vp=ITOV(ip);
1137 struct mount *mp=vp->v_mount;
1138 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1139 #endif /* REV_ENDIAN_FS */
1140
1141 fs = ip->i_fs;
1142 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1143 return (NULL);
1144 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1145 (int)fs->fs_cgsize, NOCRED, &bp);
1146 if (error) {
1147 brelse(bp);
1148 return (NULL);
1149 }
1150 cgp = (struct cg *)bp->b_data;
1151 #if REV_ENDIAN_FS
1152 if (rev_endian)
1153 byte_swap_cgin(cgp,fs);
1154 #endif /* REV_ENDIAN_FS */
1155 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
1156 #if REV_ENDIAN_FS
1157 if (rev_endian)
1158 byte_swap_cgout(cgp,fs);
1159 #endif /* REV_ENDIAN_FS */
1160 brelse(bp);
1161 return (NULL);
1162 }
1163
1164 cgp->cg_time = time.tv_sec;
1165 if (ipref) {
1166 ipref %= fs->fs_ipg;
1167 if (isclr(cg_inosused(cgp), ipref))
1168 goto gotit;
1169 }
1170 start = cgp->cg_irotor / NBBY;
1171 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1172 loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
1173 if (loc == 0) {
1174 len = start + 1;
1175 start = 0;
1176 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
1177 if (loc == 0) {
1178 printf("cg = %d, irotor = %d, fs = %s\n",
1179 cg, cgp->cg_irotor, fs->fs_fsmnt);
1180 panic("ffs_nodealloccg: map corrupted");
1181 /* NOTREACHED */
1182 }
1183 }
1184 i = start + len - loc;
1185 map = cg_inosused(cgp)[i];
1186 ipref = i * NBBY;
1187 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1188 if ((map & i) == 0) {
1189 cgp->cg_irotor = ipref;
1190 goto gotit;
1191 }
1192 }
1193 printf("fs = %s\n", fs->fs_fsmnt);
1194 panic("ffs_nodealloccg: block not in map");
1195 /* NOTREACHED */
1196 gotit:
1197 setbit(cg_inosused(cgp), ipref);
1198 cgp->cg_cs.cs_nifree--;
1199 fs->fs_cstotal.cs_nifree--;
1200 fs->fs_cs(fs, cg).cs_nifree--;
1201 fs->fs_fmod = 1;
1202 if ((mode & IFMT) == IFDIR) {
1203 cgp->cg_cs.cs_ndir++;
1204 fs->fs_cstotal.cs_ndir++;
1205 fs->fs_cs(fs, cg).cs_ndir++;
1206 }
1207 #if REV_ENDIAN_FS
1208 if (rev_endian)
1209 byte_swap_cgout(cgp,fs);
1210 #endif /* REV_ENDIAN_FS */
1211 bdwrite(bp);
1212 return (cg * fs->fs_ipg + ipref);
1213 }
1214
1215 /*
1216 * Free a block or fragment.
1217 *
1218 * The specified block or fragment is placed back in the
1219 * free map. If a fragment is deallocated, a possible
1220 * block reassembly is checked.
1221 */
1222 ffs_blkfree(ip, bno, size)
1223 register struct inode *ip;
1224 ufs_daddr_t bno;
1225 long size;
1226 {
1227 register struct fs *fs;
1228 register struct cg *cgp;
1229 struct buf *bp;
1230 ufs_daddr_t blkno;
1231 int i, error, cg, blk, frags, bbase;
1232 #if REV_ENDIAN_FS
1233 struct vnode *vp=ITOV(ip);
1234 struct mount *mp=vp->v_mount;
1235 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1236 #endif /* REV_ENDIAN_FS */
1237 fs = ip->i_fs;
1238 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1239 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
1240 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1241 panic("blkfree: bad size");
1242 }
1243 cg = dtog(fs, bno);
1244 if ((u_int)bno >= fs->fs_size) {
1245 printf("bad block %d, ino %d\n", bno, ip->i_number);
1246 ffs_fserr(fs, ip->i_uid, "bad block");
1247 return;
1248 }
1249 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1250 (int)fs->fs_cgsize, NOCRED, &bp);
1251 if (error) {
1252 brelse(bp);
1253 return;
1254 }
1255 cgp = (struct cg *)bp->b_data;
1256 #if REV_ENDIAN_FS
1257 if (rev_endian)
1258 byte_swap_cgin(cgp,fs);
1259 #endif /* REV_ENDIAN_FS */
1260 if (!cg_chkmagic(cgp)) {
1261 #if REV_ENDIAN_FS
1262 if (rev_endian)
1263 byte_swap_cgout(cgp,fs);
1264 #endif /* REV_ENDIAN_FS */
1265 brelse(bp);
1266 return;
1267 }
1268 cgp->cg_time = time.tv_sec;
1269 bno = dtogd(fs, bno);
1270 if (size == fs->fs_bsize) {
1271 blkno = fragstoblks(fs, bno);
1272 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1273 printf("dev = 0x%x, block = %d, fs = %s\n",
1274 ip->i_dev, bno, fs->fs_fsmnt);
1275 panic("blkfree: freeing free block");
1276 }
1277 ffs_setblock(fs, cg_blksfree(cgp), blkno);
1278 ffs_clusteracct(fs, cgp, blkno, 1);
1279 cgp->cg_cs.cs_nbfree++;
1280 fs->fs_cstotal.cs_nbfree++;
1281 fs->fs_cs(fs, cg).cs_nbfree++;
1282 i = cbtocylno(fs, bno);
1283 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
1284 cg_blktot(cgp)[i]++;
1285 } else {
1286 bbase = bno - fragnum(fs, bno);
1287 /*
1288 * decrement the counts associated with the old frags
1289 */
1290 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1291 ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
1292 /*
1293 * deallocate the fragment
1294 */
1295 frags = numfrags(fs, size);
1296 for (i = 0; i < frags; i++) {
1297 if (isset(cg_blksfree(cgp), bno + i)) {
1298 printf("dev = 0x%x, block = %d, fs = %s\n",
1299 ip->i_dev, bno + i, fs->fs_fsmnt);
1300 panic("blkfree: freeing free frag");
1301 }
1302 setbit(cg_blksfree(cgp), bno + i);
1303 }
1304 cgp->cg_cs.cs_nffree += i;
1305 fs->fs_cstotal.cs_nffree += i;
1306 fs->fs_cs(fs, cg).cs_nffree += i;
1307 /*
1308 * add back in counts associated with the new frags
1309 */
1310 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1311 ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
1312 /*
1313 * if a complete block has been reassembled, account for it
1314 */
1315 blkno = fragstoblks(fs, bbase);
1316 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1317 cgp->cg_cs.cs_nffree -= fs->fs_frag;
1318 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1319 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1320 ffs_clusteracct(fs, cgp, blkno, 1);
1321 cgp->cg_cs.cs_nbfree++;
1322 fs->fs_cstotal.cs_nbfree++;
1323 fs->fs_cs(fs, cg).cs_nbfree++;
1324 i = cbtocylno(fs, bbase);
1325 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
1326 cg_blktot(cgp)[i]++;
1327 }
1328 }
1329 fs->fs_fmod = 1;
1330 #if REV_ENDIAN_FS
1331 if (rev_endian)
1332 byte_swap_cgout(cgp,fs);
1333 #endif /* REV_ENDIAN_FS */
1334 bdwrite(bp);
1335 }
1336
1337 #if DIAGNOSTIC
1338 /*
1339 * Verify allocation of a block or fragment. Returns true if block or
1340 * fragment is allocated, false if it is free.
1341 */
1342 ffs_checkblk(ip, bno, size)
1343 struct inode *ip;
1344 ufs_daddr_t bno;
1345 long size;
1346 {
1347 struct fs *fs;
1348 struct cg *cgp;
1349 struct buf *bp;
1350 int i, error, frags, free;
1351 #if REV_ENDIAN_FS
1352 struct vnode *vp=ITOV(ip);
1353 struct mount *mp=vp->v_mount;
1354 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1355 #endif /* REV_ENDIAN_FS */
1356
1357 fs = ip->i_fs;
1358 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1359 printf("bsize = %d, size = %d, fs = %s\n",
1360 fs->fs_bsize, size, fs->fs_fsmnt);
1361 panic("checkblk: bad size");
1362 }
1363 if ((u_int)bno >= fs->fs_size)
1364 panic("checkblk: bad block %d", bno);
1365 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1366 (int)fs->fs_cgsize, NOCRED, &bp);
1367 if (error) {
1368 brelse(bp);
1369 return;
1370 }
1371 cgp = (struct cg *)bp->b_data;
1372 #if REV_ENDIAN_FS
1373 if (rev_endian)
1374 byte_swap_cgin(cgp,fs);
1375 #endif /* REV_ENDIAN_FS */
1376 if (!cg_chkmagic(cgp)) {
1377 #if REV_ENDIAN_FS
1378 if (rev_endian)
1379 byte_swap_cgout(cgp,fs);
1380 #endif /* REV_ENDIAN_FS */
1381 brelse(bp);
1382 return;
1383 }
1384 bno = dtogd(fs, bno);
1385 if (size == fs->fs_bsize) {
1386 free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
1387 } else {
1388 frags = numfrags(fs, size);
1389 for (free = 0, i = 0; i < frags; i++)
1390 if (isset(cg_blksfree(cgp), bno + i))
1391 free++;
1392 if (free != 0 && free != frags)
1393 panic("checkblk: partially free fragment");
1394 }
1395 #if REV_ENDIAN_FS
1396 if (rev_endian)
1397 byte_swap_cgout(cgp,fs);
1398 #endif /* REV_ENDIAN_FS */
1399 brelse(bp);
1400 return (!free);
1401 }
1402 #endif /* DIAGNOSTIC */
1403
1404 /*
1405 * Free an inode.
1406 *
1407 * The specified inode is placed back in the free map.
1408 */
1409 int
1410 ffs_vfree(ap)
1411 struct vop_vfree_args /* {
1412 struct vnode *a_pvp;
1413 ino_t a_ino;
1414 int a_mode;
1415 } */ *ap;
1416 {
1417 register struct fs *fs;
1418 register struct cg *cgp;
1419 register struct inode *pip;
1420 ino_t ino = ap->a_ino;
1421 struct buf *bp;
1422 int error, cg;
1423 #if REV_ENDIAN_FS
1424 struct vnode *vp=ap->a_pvp;
1425 struct mount *mp=vp->v_mount;
1426 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1427 #endif /* REV_ENDIAN_FS */
1428
1429 pip = VTOI(ap->a_pvp);
1430 fs = pip->i_fs;
1431 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1432 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
1433 pip->i_dev, ino, fs->fs_fsmnt);
1434 cg = ino_to_cg(fs, ino);
1435 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1436 (int)fs->fs_cgsize, NOCRED, &bp);
1437 if (error) {
1438 brelse(bp);
1439 return (0);
1440 }
1441 cgp = (struct cg *)bp->b_data;
1442 #if REV_ENDIAN_FS
1443 if (rev_endian)
1444 byte_swap_cgin(cgp,fs);
1445 #endif /* REV_ENDIAN_FS */
1446 if (!cg_chkmagic(cgp)) {
1447 #if REV_ENDIAN_FS
1448 if (rev_endian)
1449 byte_swap_cgout(cgp,fs);
1450 #endif /* REV_ENDIAN_FS */
1451 brelse(bp);
1452 return (0);
1453 }
1454 cgp->cg_time = time.tv_sec;
1455 ino %= fs->fs_ipg;
1456 if (isclr(cg_inosused(cgp), ino)) {
1457 printf("dev = 0x%x, ino = %d, fs = %s\n",
1458 pip->i_dev, ino, fs->fs_fsmnt);
1459 if (fs->fs_ronly == 0)
1460 panic("ifree: freeing free inode");
1461 }
1462 clrbit(cg_inosused(cgp), ino);
1463 if (ino < cgp->cg_irotor)
1464 cgp->cg_irotor = ino;
1465 cgp->cg_cs.cs_nifree++;
1466 fs->fs_cstotal.cs_nifree++;
1467 fs->fs_cs(fs, cg).cs_nifree++;
1468 if ((ap->a_mode & IFMT) == IFDIR) {
1469 cgp->cg_cs.cs_ndir--;
1470 fs->fs_cstotal.cs_ndir--;
1471 fs->fs_cs(fs, cg).cs_ndir--;
1472 }
1473 fs->fs_fmod = 1;
1474 #if REV_ENDIAN_FS
1475 if (rev_endian)
1476 byte_swap_cgout(cgp,fs);
1477 #endif /* REV_ENDIAN_FS */
1478 bdwrite(bp);
1479 return (0);
1480 }
1481
1482 /*
1483 * Find a block of the specified size in the specified cylinder group.
1484 *
1485 * It is a panic if a request is made to find a block if none are
1486 * available.
1487 */
1488 static ufs_daddr_t
1489 ffs_mapsearch(fs, cgp, bpref, allocsiz)
1490 register struct fs *fs;
1491 register struct cg *cgp;
1492 ufs_daddr_t bpref;
1493 int allocsiz;
1494 {
1495 ufs_daddr_t bno;
1496 int start, len, loc, i;
1497 int blk, field, subfield, pos;
1498
1499 /*
1500 * find the fragment by searching through the free block
1501 * map for an appropriate bit pattern
1502 */
1503 if (bpref)
1504 start = dtogd(fs, bpref) / NBBY;
1505 else
1506 start = cgp->cg_frotor / NBBY;
1507 len = howmany(fs->fs_fpg, NBBY) - start;
1508 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
1509 (u_char *)fragtbl[fs->fs_frag],
1510 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1511 if (loc == 0) {
1512 len = start + 1;
1513 start = 0;
1514 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
1515 (u_char *)fragtbl[fs->fs_frag],
1516 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1517 if (loc == 0) {
1518 printf("start = %d, len = %d, fs = %s\n",
1519 start, len, fs->fs_fsmnt);
1520 panic("ffs_alloccg: map corrupted");
1521 /* NOTREACHED */
1522 }
1523 }
1524 bno = (start + len - loc) * NBBY;
1525 cgp->cg_frotor = bno;
1526 /*
1527 * found the byte in the map
1528 * sift through the bits to find the selected frag
1529 */
1530 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1531 blk = blkmap(fs, cg_blksfree(cgp), bno);
1532 blk <<= 1;
1533 field = around[allocsiz];
1534 subfield = inside[allocsiz];
1535 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1536 if ((blk & field) == subfield)
1537 return (bno + pos);
1538 field <<= 1;
1539 subfield <<= 1;
1540 }
1541 }
1542 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1543 panic("ffs_alloccg: block not in map");
1544 return (-1);
1545 }
1546
1547 /*
1548 * Update the cluster map because of an allocation or free.
1549 *
1550 * Cnt == 1 means free; cnt == -1 means allocating.
1551 */
1552 ffs_clusteracct(fs, cgp, blkno, cnt)
1553 struct fs *fs;
1554 struct cg *cgp;
1555 ufs_daddr_t blkno;
1556 int cnt;
1557 {
1558 int32_t *sump;
1559 int32_t *lp;
1560 u_char *freemapp, *mapp;
1561 int i, start, end, forw, back, map, bit;
1562
1563 if (fs->fs_contigsumsize <= 0)
1564 return;
1565 freemapp = cg_clustersfree(cgp);
1566 sump = cg_clustersum(cgp);
1567 /*
1568 * Allocate or clear the actual block.
1569 */
1570 if (cnt > 0)
1571 setbit(freemapp, blkno);
1572 else
1573 clrbit(freemapp, blkno);
1574 /*
1575 * Find the size of the cluster going forward.
1576 */
1577 start = blkno + 1;
1578 end = start + fs->fs_contigsumsize;
1579 if (end >= cgp->cg_nclusterblks)
1580 end = cgp->cg_nclusterblks;
1581 mapp = &freemapp[start / NBBY];
1582 map = *mapp++;
1583 bit = 1 << (start % NBBY);
1584 for (i = start; i < end; i++) {
1585 if ((map & bit) == 0)
1586 break;
1587 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1588 bit <<= 1;
1589 } else {
1590 map = *mapp++;
1591 bit = 1;
1592 }
1593 }
1594 forw = i - start;
1595 /*
1596 * Find the size of the cluster going backward.
1597 */
1598 start = blkno - 1;
1599 end = start - fs->fs_contigsumsize;
1600 if (end < 0)
1601 end = -1;
1602 mapp = &freemapp[start / NBBY];
1603 map = *mapp--;
1604 bit = 1 << (start % NBBY);
1605 for (i = start; i > end; i--) {
1606 if ((map & bit) == 0)
1607 break;
1608 if ((i & (NBBY - 1)) != 0) {
1609 bit >>= 1;
1610 } else {
1611 map = *mapp--;
1612 bit = 1 << (NBBY - 1);
1613 }
1614 }
1615 back = start - i;
1616 /*
1617 * Account for old cluster and the possibly new forward and
1618 * back clusters.
1619 */
1620 i = back + forw + 1;
1621 if (i > fs->fs_contigsumsize)
1622 i = fs->fs_contigsumsize;
1623 sump[i] += cnt;
1624 if (back > 0)
1625 sump[back] -= cnt;
1626 if (forw > 0)
1627 sump[forw] -= cnt;
1628 /*
1629 * Update cluster summary information.
1630 */
1631 lp = &sump[fs->fs_contigsumsize];
1632 for (i = fs->fs_contigsumsize; i > 0; i--)
1633 if (*lp-- > 0)
1634 break;
1635 fs->fs_maxcluster[cgp->cg_cgx] = i;
1636 }
1637
1638 /*
1639 * Fserr prints the name of a file system with an error diagnostic.
1640 *
1641 * The form of the error message is:
1642 * fs: error message
1643 */
1644 static void
1645 ffs_fserr(fs, uid, cp)
1646 struct fs *fs;
1647 u_int uid;
1648 char *cp;
1649 {
1650
1651 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1652 }