]> git.saurik.com Git - apple/xnu.git/blob - bsd/ufs/ffs/ffs_alloc.c
xnu-517.7.21.tar.gz
[apple/xnu.git] / bsd / ufs / ffs / ffs_alloc.c
1 /*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
23 /*
24 * Copyright (c) 1982, 1986, 1989, 1993
25 * The Regents of the University of California. All rights reserved.
26 *
27 * Redistribution and use in source and binary forms, with or without
28 * modification, are permitted provided that the following conditions
29 * are met:
30 * 1. Redistributions of source code must retain the above copyright
31 * notice, this list of conditions and the following disclaimer.
32 * 2. Redistributions in binary form must reproduce the above copyright
33 * notice, this list of conditions and the following disclaimer in the
34 * documentation and/or other materials provided with the distribution.
35 * 3. All advertising materials mentioning features or use of this software
36 * must display the following acknowledgement:
37 * This product includes software developed by the University of
38 * California, Berkeley and its contributors.
39 * 4. Neither the name of the University nor the names of its contributors
40 * may be used to endorse or promote products derived from this software
41 * without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95
56 */
57 #include <rev_endian_fs.h>
58 #include <vm/vm_pager.h>
59
60 #include <sys/param.h>
61 #include <sys/systm.h>
62 #include <sys/buf.h>
63 #include <sys/proc.h>
64 #include <sys/vnode.h>
65 #include <sys/mount.h>
66 #include <sys/kernel.h>
67 #include <sys/syslog.h>
68 #include <sys/quota.h>
69
70 #include <sys/vm.h>
71
72 #include <ufs/ufs/quota.h>
73 #include <ufs/ufs/inode.h>
74
75 #include <ufs/ffs/fs.h>
76 #include <ufs/ffs/ffs_extern.h>
77
78 #if REV_ENDIAN_FS
79 #include <ufs/ufs/ufs_byte_order.h>
80 #include <architecture/byte_order.h>
81 #endif /* REV_ENDIAN_FS */
82
83 extern u_long nextgennumber;
84
85 static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
86 static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t));
87 static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
88 int));
89 static ino_t ffs_dirpref __P((struct inode *));
90 static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
91 static void ffs_fserr __P((struct fs *, u_int, char *));
92 static u_long ffs_hashalloc
93 __P((struct inode *, int, long, int, u_int32_t (*)()));
94 static ino_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
95 static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t,
96 int));
97
98 /*
99 * Allocate a block in the file system.
100 *
101 * The size of the requested block is given, which must be some
102 * multiple of fs_fsize and <= fs_bsize.
103 * A preference may be optionally specified. If a preference is given
104 * the following hierarchy is used to allocate a block:
105 * 1) allocate the requested block.
106 * 2) allocate a rotationally optimal block in the same cylinder.
107 * 3) allocate a block in the same cylinder group.
108 * 4) quadradically rehash into other cylinder groups, until an
109 * available block is located.
110 * If no block preference is given the following heirarchy is used
111 * to allocate a block:
112 * 1) allocate a block in the cylinder group that contains the
113 * inode for the file.
114 * 2) quadradically rehash into other cylinder groups, until an
115 * available block is located.
116 */
117 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
118 register struct inode *ip;
119 ufs_daddr_t lbn, bpref;
120 int size;
121 struct ucred *cred;
122 ufs_daddr_t *bnp;
123 {
124 register struct fs *fs;
125 ufs_daddr_t bno;
126 int cg, error;
127 int devBlockSize=0;
128 *bnp = 0;
129 fs = ip->i_fs;
130 #if DIAGNOSTIC
131 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
132 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
133 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
134 panic("ffs_alloc: bad size");
135 }
136 if (cred == NOCRED)
137 panic("ffs_alloc: missing credential\n");
138 #endif /* DIAGNOSTIC */
139 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
140 goto nospace;
141 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
142 goto nospace;
143 VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize);
144 #if QUOTA
145 if (error = chkdq(ip, (int64_t)size, cred, 0))
146 return (error);
147 #endif /* QUOTA */
148 if (bpref >= fs->fs_size)
149 bpref = 0;
150 if (bpref == 0)
151 cg = ino_to_cg(fs, ip->i_number);
152 else
153 cg = dtog(fs, bpref);
154 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
155 (u_int32_t (*)())ffs_alloccg);
156 if (bno > 0) {
157 ip->i_blocks += btodb(size, devBlockSize);
158 ip->i_flag |= IN_CHANGE | IN_UPDATE;
159 *bnp = bno;
160 return (0);
161 }
162 #if QUOTA
163 /*
164 * Restore user's disk quota because allocation failed.
165 */
166 (void) chkdq(ip, (int64_t)-size, cred, FORCE);
167 #endif /* QUOTA */
168 nospace:
169 ffs_fserr(fs, cred->cr_uid, "file system full");
170 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
171 return (ENOSPC);
172 }
173
174 /*
175 * Reallocate a fragment to a bigger size
176 *
177 * The number and size of the old block is given, and a preference
178 * and new size is also specified. The allocator attempts to extend
179 * the original block. Failing that, the regular block allocator is
180 * invoked to get an appropriate block.
181 */
182 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
183 register struct inode *ip;
184 ufs_daddr_t lbprev;
185 ufs_daddr_t bpref;
186 int osize, nsize;
187 struct ucred *cred;
188 struct buf **bpp;
189 {
190 register struct fs *fs;
191 struct buf *bp;
192 int cg, request, error;
193 ufs_daddr_t bprev, bno;
194 int devBlockSize=0;
195
196 *bpp = 0;
197 fs = ip->i_fs;
198 #if DIAGNOSTIC
199 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
200 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
201 printf(
202 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
203 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
204 panic("ffs_realloccg: bad size");
205 }
206 if (cred == NOCRED)
207 panic("ffs_realloccg: missing credential\n");
208 #endif /* DIAGNOSTIC */
209 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
210 goto nospace;
211 if ((bprev = ip->i_db[lbprev]) == 0) {
212 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
213 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
214 panic("ffs_realloccg: bad bprev");
215 }
216 /*
217 * Allocate the extra space in the buffer.
218 */
219 if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
220 brelse(bp);
221 return (error);
222 }
223 VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize);
224
225 #if QUOTA
226 if (error = chkdq(ip, (int64_t)(nsize - osize), cred, 0))
227 {
228 brelse(bp);
229 return (error);
230 }
231 #endif /* QUOTA */
232 /*
233 * Check for extension in the existing location.
234 */
235 cg = dtog(fs, bprev);
236 if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
237 if (bp->b_blkno != fsbtodb(fs, bno))
238 panic("bad blockno");
239 ip->i_blocks += btodb(nsize - osize, devBlockSize);
240 ip->i_flag |= IN_CHANGE | IN_UPDATE;
241 allocbuf(bp, nsize);
242 bp->b_flags |= B_DONE;
243 bzero((char *)bp->b_data + osize, (u_int)bp->b_bufsize - osize);
244 *bpp = bp;
245 return (0);
246 }
247 /*
248 * Allocate a new disk location.
249 */
250 if (bpref >= fs->fs_size)
251 bpref = 0;
252 switch ((int)fs->fs_optim) {
253 case FS_OPTSPACE:
254 /*
255 * Allocate an exact sized fragment. Although this makes
256 * best use of space, we will waste time relocating it if
257 * the file continues to grow. If the fragmentation is
258 * less than half of the minimum free reserve, we choose
259 * to begin optimizing for time.
260 */
261 request = nsize;
262 if (fs->fs_minfree < 5 ||
263 fs->fs_cstotal.cs_nffree >
264 fs->fs_dsize * fs->fs_minfree / (2 * 100))
265 break;
266 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
267 fs->fs_fsmnt);
268 fs->fs_optim = FS_OPTTIME;
269 break;
270 case FS_OPTTIME:
271 /*
272 * At this point we have discovered a file that is trying to
273 * grow a small fragment to a larger fragment. To save time,
274 * we allocate a full sized block, then free the unused portion.
275 * If the file continues to grow, the `ffs_fragextend' call
276 * above will be able to grow it in place without further
277 * copying. If aberrant programs cause disk fragmentation to
278 * grow within 2% of the free reserve, we choose to begin
279 * optimizing for space.
280 */
281 request = fs->fs_bsize;
282 if (fs->fs_cstotal.cs_nffree <
283 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
284 break;
285 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
286 fs->fs_fsmnt);
287 fs->fs_optim = FS_OPTSPACE;
288 break;
289 default:
290 printf("dev = 0x%x, optim = %d, fs = %s\n",
291 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
292 panic("ffs_realloccg: bad optim");
293 /* NOTREACHED */
294 }
295 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
296 (u_int32_t (*)())ffs_alloccg);
297 if (bno > 0) {
298 bp->b_blkno = fsbtodb(fs, bno);
299 ffs_blkfree(ip, bprev, (long)osize);
300 if (nsize < request)
301 ffs_blkfree(ip, bno + numfrags(fs, nsize),
302 (long)(request - nsize));
303 ip->i_blocks += btodb(nsize - osize, devBlockSize);
304 ip->i_flag |= IN_CHANGE | IN_UPDATE;
305 allocbuf(bp, nsize);
306 bp->b_flags |= B_DONE;
307 bzero((char *)bp->b_data + osize, (u_int)bp->b_bufsize - osize);
308 *bpp = bp;
309 return (0);
310 }
311 #if QUOTA
312 /*
313 * Restore user's disk quota because allocation failed.
314 */
315 (void) chkdq(ip, (int64_t)-(nsize - osize), cred, FORCE);
316 #endif /* QUOTA */
317 brelse(bp);
318 nospace:
319 /*
320 * no space available
321 */
322 ffs_fserr(fs, cred->cr_uid, "file system full");
323 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
324 return (ENOSPC);
325 }
326
327 /*
328 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
329 *
330 * The vnode and an array of buffer pointers for a range of sequential
331 * logical blocks to be made contiguous is given. The allocator attempts
332 * to find a range of sequential blocks starting as close as possible to
333 * an fs_rotdelay offset from the end of the allocation for the logical
334 * block immediately preceeding the current range. If successful, the
335 * physical block numbers in the buffer pointers and in the inode are
336 * changed to reflect the new allocation. If unsuccessful, the allocation
337 * is left unchanged. The success in doing the reallocation is returned.
338 * Note that the error return is not reflected back to the user. Rather
339 * the previous block allocation will be used.
340 */
341 /* Note: This routine is unused in UBC cluster I/O */
342
343 int doasyncfree = 1;
344 int doreallocblks = 1;
345
346 int
347 ffs_reallocblks(ap)
348 struct vop_reallocblks_args *ap;
349 {
350 return (ENOSPC);
351 }
352
353 /*
354 * Allocate an inode in the file system.
355 *
356 * If allocating a directory, use ffs_dirpref to select the inode.
357 * If allocating in a directory, the following hierarchy is followed:
358 * 1) allocate the preferred inode.
359 * 2) allocate an inode in the same cylinder group.
360 * 3) quadradically rehash into other cylinder groups, until an
361 * available inode is located.
362 * If no inode preference is given the following heirarchy is used
363 * to allocate an inode:
364 * 1) allocate an inode in cylinder group 0.
365 * 2) quadradically rehash into other cylinder groups, until an
366 * available inode is located.
367 */
368 int
369 ffs_valloc(ap)
370 struct vop_valloc_args /* {
371 struct vnode *a_pvp;
372 int a_mode;
373 struct ucred *a_cred;
374 struct vnode **a_vpp;
375 } */ *ap;
376 {
377 register struct vnode *pvp = ap->a_pvp;
378 register struct inode *pip;
379 register struct fs *fs;
380 register struct inode *ip;
381 mode_t mode = ap->a_mode;
382 ino_t ino, ipref;
383 int cg, error;
384
385 *ap->a_vpp = NULL;
386 pip = VTOI(pvp);
387 fs = pip->i_fs;
388 if (fs->fs_cstotal.cs_nifree == 0)
389 goto noinodes;
390
391 if ((mode & IFMT) == IFDIR)
392 ipref = ffs_dirpref(pip);
393 else
394 ipref = pip->i_number;
395 if (ipref >= fs->fs_ncg * fs->fs_ipg)
396 ipref = 0;
397 cg = ino_to_cg(fs, ipref);
398 /*
399 * Track the number of dirs created one after another
400 * in a cg without intervening files.
401 */
402 if ((mode & IFMT) == IFDIR) {
403 if (fs->fs_contigdirs[cg] < 255)
404 fs->fs_contigdirs[cg]++;
405 } else {
406 if (fs->fs_contigdirs[cg] > 0)
407 fs->fs_contigdirs[cg]--;
408 }
409 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
410 if (ino == 0)
411 goto noinodes;
412 error = VFS_VGET(pvp->v_mount, (void *)ino, ap->a_vpp);
413 if (error) {
414 VOP_VFREE(pvp, ino, mode);
415 return (error);
416 }
417 ip = VTOI(*ap->a_vpp);
418 if (ip->i_mode) {
419 printf("mode = 0%o, inum = %d, fs = %s\n",
420 ip->i_mode, ip->i_number, fs->fs_fsmnt);
421 panic("ffs_valloc: dup alloc");
422 }
423 if (ip->i_blocks) { /* XXX */
424 printf("free inode %s/%d had %d blocks\n",
425 fs->fs_fsmnt, ino, ip->i_blocks);
426 ip->i_blocks = 0;
427 }
428 ip->i_flags = 0;
429 /*
430 * Set up a new generation number for this inode.
431 */
432 if (++nextgennumber < (u_long)time.tv_sec)
433 nextgennumber = time.tv_sec;
434 ip->i_gen = nextgennumber;
435 return (0);
436 noinodes:
437 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
438 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
439 return (ENOSPC);
440 }
441
442 /*
443 * Find a cylinder group to place a directory.
444 *
445 * The policy implemented by this algorithm is to allocate a
446 * directory inode in the same cylinder group as its parent
447 * directory, but also to reserve space for its files inodes
448 * and data. Restrict the number of directories which may be
449 * allocated one after another in the same cylinder group
450 * without intervening allocation of files.
451 */
452 static ino_t
453 ffs_dirpref(pip)
454 struct inode *pip;
455 {
456 register struct fs *fs;
457 int cg, prefcg, dirsize, cgsize;
458 int avgifree, avgbfree, avgndir, curdirsize;
459 int minifree, minbfree, maxndir;
460 int mincg, minndir;
461 int maxcontigdirs;
462
463 fs = pip->i_fs;
464 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
465 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
466 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
467
468 /*
469 * Force allocation in another cg if creating a first level dir.
470 */
471 if (ITOV(pip)->v_flag & VROOT) {
472 #ifdef __APPLE__
473 prefcg = random() % fs->fs_ncg;
474 #else
475 prefcg = arc4random() % fs->fs_ncg;
476 #endif
477 mincg = prefcg;
478 minndir = fs->fs_ipg;
479 for (cg = prefcg; cg < fs->fs_ncg; cg++)
480 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
481 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
482 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
483 mincg = cg;
484 minndir = fs->fs_cs(fs, cg).cs_ndir;
485 }
486 for (cg = 0; cg < prefcg; cg++)
487 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
488 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
489 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
490 mincg = cg;
491 minndir = fs->fs_cs(fs, cg).cs_ndir;
492 }
493 return ((ino_t)(fs->fs_ipg * mincg));
494 }
495
496 /*
497 * Count various limits which used for
498 * optimal allocation of a directory inode.
499 */
500 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
501 minifree = avgifree - fs->fs_ipg / 4;
502 if (minifree < 0)
503 minifree = 0;
504 minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4;
505 if (minbfree < 0)
506 minbfree = 0;
507 cgsize = fs->fs_fsize * fs->fs_fpg;
508 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
509 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
510 if (dirsize < curdirsize)
511 dirsize = curdirsize;
512 maxcontigdirs = min(cgsize / dirsize, 255);
513 if (fs->fs_avgfpdir > 0)
514 maxcontigdirs = min(maxcontigdirs,
515 fs->fs_ipg / fs->fs_avgfpdir);
516 if (maxcontigdirs == 0)
517 maxcontigdirs = 1;
518
519 /*
520 * Limit number of dirs in one cg and reserve space for
521 * regular files, but only if we have no deficit in
522 * inodes or space.
523 */
524 prefcg = ino_to_cg(fs, pip->i_number);
525 for (cg = prefcg; cg < fs->fs_ncg; cg++)
526 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
527 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
528 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
529 if (fs->fs_contigdirs[cg] < maxcontigdirs)
530 return ((ino_t)(fs->fs_ipg * cg));
531 }
532 for (cg = 0; cg < prefcg; cg++)
533 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
534 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
535 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
536 if (fs->fs_contigdirs[cg] < maxcontigdirs)
537 return ((ino_t)(fs->fs_ipg * cg));
538 }
539 /*
540 * This is a backstop when we have deficit in space.
541 */
542 for (cg = prefcg; cg < fs->fs_ncg; cg++)
543 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
544 return ((ino_t)(fs->fs_ipg * cg));
545 for (cg = 0; cg < prefcg; cg++)
546 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
547 break;
548 return ((ino_t)(fs->fs_ipg * cg));
549 }
550
551 /*
552 * Select the desired position for the next block in a file. The file is
553 * logically divided into sections. The first section is composed of the
554 * direct blocks. Each additional section contains fs_maxbpg blocks.
555 *
556 * If no blocks have been allocated in the first section, the policy is to
557 * request a block in the same cylinder group as the inode that describes
558 * the file. If no blocks have been allocated in any other section, the
559 * policy is to place the section in a cylinder group with a greater than
560 * average number of free blocks. An appropriate cylinder group is found
561 * by using a rotor that sweeps the cylinder groups. When a new group of
562 * blocks is needed, the sweep begins in the cylinder group following the
563 * cylinder group from which the previous allocation was made. The sweep
564 * continues until a cylinder group with greater than the average number
565 * of free blocks is found. If the allocation is for the first block in an
566 * indirect block, the information on the previous allocation is unavailable;
567 * here a best guess is made based upon the logical block number being
568 * allocated.
569 *
570 * If a section is already partially allocated, the policy is to
571 * contiguously allocate fs_maxcontig blocks. The end of one of these
572 * contiguous blocks and the beginning of the next is physically separated
573 * so that the disk head will be in transit between them for at least
574 * fs_rotdelay milliseconds. This is to allow time for the processor to
575 * schedule another I/O transfer.
576 */
577 ufs_daddr_t
578 ffs_blkpref(ip, lbn, indx, bap)
579 struct inode *ip;
580 ufs_daddr_t lbn;
581 int indx;
582 ufs_daddr_t *bap;
583 {
584 register struct fs *fs;
585 register int cg;
586 int avgbfree, startcg;
587 ufs_daddr_t nextblk;
588 #if REV_ENDIAN_FS
589 daddr_t prev=0;
590 struct vnode *vp=ITOV(ip);
591 struct mount *mp=vp->v_mount;
592 int rev_endian=(mp->mnt_flag & MNT_REVEND);
593 #endif /* REV_ENDIAN_FS */
594
595 fs = ip->i_fs;
596 #if REV_ENDIAN_FS
597 if (indx && bap) {
598 if (rev_endian) {
599 if (bap != &ip->i_db[0])
600 prev = NXSwapLong(bap[indx - 1]);
601 else
602 prev = bap[indx - 1];
603 } else prev = bap[indx - 1];
604 }
605 if (indx % fs->fs_maxbpg == 0 || prev == 0)
606 #else /* REV_ENDIAN_FS */
607 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0)
608 #endif /* REV_ENDIAN_FS */
609 {
610 if (lbn < NDADDR) {
611 cg = ino_to_cg(fs, ip->i_number);
612 return (fs->fs_fpg * cg + fs->fs_frag);
613 }
614 /*
615 * Find a cylinder with greater than average number of
616 * unused data blocks.
617 */
618 #if REV_ENDIAN_FS
619 if (indx == 0 || prev == 0)
620 #else /* REV_ENDIAN_FS */
621 if (indx == 0 || bap[indx - 1] == 0)
622 #endif /* REV_ENDIAN_FS */
623 startcg =
624 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
625 else
626 #if REV_ENDIAN_FS
627 startcg = dtog(fs, prev) + 1;
628 #else /* REV_ENDIAN_FS */
629 startcg = dtog(fs, bap[indx - 1]) + 1;
630 #endif /* REV_ENDIAN_FS */
631 startcg %= fs->fs_ncg;
632 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
633 for (cg = startcg; cg < fs->fs_ncg; cg++)
634 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
635 fs->fs_cgrotor = cg;
636 return (fs->fs_fpg * cg + fs->fs_frag);
637 }
638 for (cg = 0; cg <= startcg; cg++)
639 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
640 fs->fs_cgrotor = cg;
641 return (fs->fs_fpg * cg + fs->fs_frag);
642 }
643 return (NULL);
644 }
645 /*
646 * One or more previous blocks have been laid out. If less
647 * than fs_maxcontig previous blocks are contiguous, the
648 * next block is requested contiguously, otherwise it is
649 * requested rotationally delayed by fs_rotdelay milliseconds.
650 */
651 #if REV_ENDIAN_FS
652 if (rev_endian) {
653 nextblk = prev + fs->fs_frag;
654 if (indx < fs->fs_maxcontig) {
655 return (nextblk);
656 }
657 if (bap != &ip->i_db[0])
658 prev = NXSwapLong(bap[indx - fs->fs_maxcontig]);
659 else
660 prev = bap[indx - fs->fs_maxcontig];
661 if (prev + blkstofrags(fs, fs->fs_maxcontig) != nextblk)
662 return (nextblk);
663 } else {
664 #endif /* REV_ENDIAN_FS */
665 nextblk = bap[indx - 1] + fs->fs_frag;
666 if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
667 blkstofrags(fs, fs->fs_maxcontig) != nextblk)
668 return (nextblk);
669 #if REV_ENDIAN_FS
670 }
671 #endif /* REV_ENDIAN_FS */
672 if (fs->fs_rotdelay != 0)
673 /*
674 * Here we convert ms of delay to frags as:
675 * (frags) = (ms) * (rev/sec) * (sect/rev) /
676 * ((sect/frag) * (ms/sec))
677 * then round up to the next block.
678 */
679 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
680 (NSPF(fs) * 1000), fs->fs_frag);
681 return (nextblk);
682 }
683
684 /*
685 * Implement the cylinder overflow algorithm.
686 *
687 * The policy implemented by this algorithm is:
688 * 1) allocate the block in its requested cylinder group.
689 * 2) quadradically rehash on the cylinder group number.
690 * 3) brute force search for a free block.
691 */
692 /*VARARGS5*/
693 static u_long
694 ffs_hashalloc(ip, cg, pref, size, allocator)
695 struct inode *ip;
696 int cg;
697 long pref;
698 int size; /* size for data blocks, mode for inodes */
699 u_int32_t (*allocator)();
700 {
701 register struct fs *fs;
702 long result;
703 int i, icg = cg;
704
705 fs = ip->i_fs;
706 /*
707 * 1: preferred cylinder group
708 */
709 result = (*allocator)(ip, cg, pref, size);
710 if (result)
711 return (result);
712 /*
713 * 2: quadratic rehash
714 */
715 for (i = 1; i < fs->fs_ncg; i *= 2) {
716 cg += i;
717 if (cg >= fs->fs_ncg)
718 cg -= fs->fs_ncg;
719 result = (*allocator)(ip, cg, 0, size);
720 if (result)
721 return (result);
722 }
723 /*
724 * 3: brute force search
725 * Note that we start at i == 2, since 0 was checked initially,
726 * and 1 is always checked in the quadratic rehash.
727 */
728 cg = (icg + 2) % fs->fs_ncg;
729 for (i = 2; i < fs->fs_ncg; i++) {
730 result = (*allocator)(ip, cg, 0, size);
731 if (result)
732 return (result);
733 cg++;
734 if (cg == fs->fs_ncg)
735 cg = 0;
736 }
737 return (NULL);
738 }
739
740 /*
741 * Determine whether a fragment can be extended.
742 *
743 * Check to see if the necessary fragments are available, and
744 * if they are, allocate them.
745 */
746 static ufs_daddr_t
747 ffs_fragextend(ip, cg, bprev, osize, nsize)
748 struct inode *ip;
749 int cg;
750 long bprev;
751 int osize, nsize;
752 {
753 register struct fs *fs;
754 register struct cg *cgp;
755 struct buf *bp;
756 long bno;
757 int frags, bbase;
758 int i, error;
759 #if REV_ENDIAN_FS
760 struct vnode *vp=ITOV(ip);
761 struct mount *mp=vp->v_mount;
762 int rev_endian=(mp->mnt_flag & MNT_REVEND);
763 #endif /* REV_ENDIAN_FS */
764
765 fs = ip->i_fs;
766 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
767 return (NULL);
768 frags = numfrags(fs, nsize); /* number of fragments needed */
769 bbase = fragnum(fs, bprev); /* offset in a frag (it is mod fragsize */
770 if (bbase > fragnum(fs, (bprev + frags - 1))) {
771 /* cannot extend across a block boundary */
772 return (NULL);
773 }
774 /* read corresponding cylinder group info */
775 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
776 (int)fs->fs_cgsize, NOCRED, &bp);
777 if (error) {
778 brelse(bp);
779 return (NULL);
780 }
781 cgp = (struct cg *)bp->b_data;
782 #if REV_ENDIAN_FS
783 if (rev_endian) {
784 byte_swap_cgin(cgp, fs);
785 }
786 #endif /* REV_ENDIAN_FS */
787
788 if (!cg_chkmagic(cgp)) {
789 #if REV_ENDIAN_FS
790 if (rev_endian)
791 byte_swap_cgout(cgp,fs);
792 #endif /* REV_ENDIAN_FS */
793 brelse(bp);
794 return (NULL);
795 }
796 cgp->cg_time = time.tv_sec;
797 bno = dtogd(fs, bprev);
798 for (i = numfrags(fs, osize); i < frags; i++)
799 if (isclr(cg_blksfree(cgp), bno + i)) {
800 #if REV_ENDIAN_FS
801 if (rev_endian)
802 byte_swap_cgout(cgp,fs);
803 #endif /* REV_ENDIAN_FS */
804 brelse(bp);
805 return (NULL);
806 }
807 /*
808 * the current fragment can be extended
809 * deduct the count on fragment being extended into
810 * increase the count on the remaining fragment (if any)
811 * allocate the extended piece
812 */
813 for (i = frags; i < fs->fs_frag - bbase; i++)
814 if (isclr(cg_blksfree(cgp), bno + i))
815 break;
816 cgp->cg_frsum[i - numfrags(fs, osize)]--;
817 if (i != frags)
818 cgp->cg_frsum[i - frags]++;
819 for (i = numfrags(fs, osize); i < frags; i++) {
820 clrbit(cg_blksfree(cgp), bno + i);
821 cgp->cg_cs.cs_nffree--;
822 fs->fs_cstotal.cs_nffree--;
823 fs->fs_cs(fs, cg).cs_nffree--;
824 }
825 fs->fs_fmod = 1;
826 #if REV_ENDIAN_FS
827 if (rev_endian)
828 byte_swap_cgout(cgp,fs);
829 #endif /* REV_ENDIAN_FS */
830 bdwrite(bp);
831 return (bprev);
832 }
833
834 /*
835 * Determine whether a block can be allocated.
836 *
837 * Check to see if a block of the appropriate size is available,
838 * and if it is, allocate it.
839 */
840 static ufs_daddr_t
841 ffs_alloccg(ip, cg, bpref, size)
842 struct inode *ip;
843 int cg;
844 ufs_daddr_t bpref;
845 int size;
846 {
847 register struct fs *fs;
848 register struct cg *cgp;
849 struct buf *bp;
850 register int i;
851 int error, bno, frags, allocsiz;
852 #if REV_ENDIAN_FS
853 struct vnode *vp=ITOV(ip);
854 struct mount *mp=vp->v_mount;
855 int rev_endian=(mp->mnt_flag & MNT_REVEND);
856 #endif /* REV_ENDIAN_FS */
857
858 fs = ip->i_fs;
859 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
860 return (NULL);
861 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
862 (int)fs->fs_cgsize, NOCRED, &bp);
863 if (error) {
864 brelse(bp);
865 return (NULL);
866 }
867 cgp = (struct cg *)bp->b_data;
868 #if REV_ENDIAN_FS
869 if (rev_endian)
870 byte_swap_cgin(cgp,fs);
871 #endif /* REV_ENDIAN_FS */
872 if (!cg_chkmagic(cgp) ||
873 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
874 #if REV_ENDIAN_FS
875 if (rev_endian)
876 byte_swap_cgout(cgp,fs);
877 #endif /* REV_ENDIAN_FS */
878 brelse(bp);
879 return (NULL);
880 }
881 cgp->cg_time = time.tv_sec;
882 if (size == fs->fs_bsize) {
883 bno = ffs_alloccgblk(fs, cgp, bpref);
884 #if REV_ENDIAN_FS
885 if (rev_endian)
886 byte_swap_cgout(cgp,fs);
887 #endif /* REV_ENDIAN_FS */
888 bdwrite(bp);
889 return (bno);
890 }
891 /*
892 * check to see if any fragments are already available
893 * allocsiz is the size which will be allocated, hacking
894 * it down to a smaller size if necessary
895 */
896 frags = numfrags(fs, size);
897 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
898 if (cgp->cg_frsum[allocsiz] != 0)
899 break;
900 if (allocsiz == fs->fs_frag) {
901 /*
902 * no fragments were available, so a block will be
903 * allocated, and hacked up
904 */
905 if (cgp->cg_cs.cs_nbfree == 0) {
906 #if REV_ENDIAN_FS
907 if (rev_endian)
908 byte_swap_cgout(cgp,fs);
909 #endif /* REV_ENDIAN_FS */
910 brelse(bp);
911 return (NULL);
912 }
913 bno = ffs_alloccgblk(fs, cgp, bpref);
914 bpref = dtogd(fs, bno);
915 for (i = frags; i < fs->fs_frag; i++)
916 setbit(cg_blksfree(cgp), bpref + i);
917 i = fs->fs_frag - frags;
918 cgp->cg_cs.cs_nffree += i;
919 fs->fs_cstotal.cs_nffree += i;
920 fs->fs_cs(fs, cg).cs_nffree += i;
921 fs->fs_fmod = 1;
922 cgp->cg_frsum[i]++;
923 #if REV_ENDIAN_FS
924 if (rev_endian)
925 byte_swap_cgout(cgp,fs);
926 #endif /* REV_ENDIAN_FS */
927 bdwrite(bp);
928 return (bno);
929 }
930 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
931 if (bno < 0) {
932 #if REV_ENDIAN_FS
933 if (rev_endian)
934 byte_swap_cgout(cgp,fs);
935 #endif /* REV_ENDIAN_FS */
936 brelse(bp);
937 return (NULL);
938 }
939 for (i = 0; i < frags; i++)
940 clrbit(cg_blksfree(cgp), bno + i);
941 cgp->cg_cs.cs_nffree -= frags;
942 fs->fs_cstotal.cs_nffree -= frags;
943 fs->fs_cs(fs, cg).cs_nffree -= frags;
944 fs->fs_fmod = 1;
945 cgp->cg_frsum[allocsiz]--;
946 if (frags != allocsiz)
947 cgp->cg_frsum[allocsiz - frags]++;
948 #if REV_ENDIAN_FS
949 if (rev_endian)
950 byte_swap_cgout(cgp,fs);
951 #endif /* REV_ENDIAN_FS */
952 bdwrite(bp);
953 return (cg * fs->fs_fpg + bno);
954 }
955
956 /*
957 * Allocate a block in a cylinder group.
958 *
959 * This algorithm implements the following policy:
960 * 1) allocate the requested block.
961 * 2) allocate a rotationally optimal block in the same cylinder.
962 * 3) allocate the next available block on the block rotor for the
963 * specified cylinder group.
964 * Note that this routine only allocates fs_bsize blocks; these
965 * blocks may be fragmented by the routine that allocates them.
966 */
967 static ufs_daddr_t
968 ffs_alloccgblk(fs, cgp, bpref)
969 register struct fs *fs;
970 register struct cg *cgp;
971 ufs_daddr_t bpref;
972 {
973 ufs_daddr_t bno, blkno;
974 int cylno, pos, delta;
975 short *cylbp;
976 register int i;
977
978 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
979 bpref = cgp->cg_rotor;
980 goto norot;
981 }
982 bpref = blknum(fs, bpref);
983 bpref = dtogd(fs, bpref);
984 /*
985 * if the requested block is available, use it
986 */
987 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
988 bno = bpref;
989 goto gotit;
990 }
991 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
992 /*
993 * Block layout information is not available.
994 * Leaving bpref unchanged means we take the
995 * next available free block following the one
996 * we just allocated. Hopefully this will at
997 * least hit a track cache on drives of unknown
998 * geometry (e.g. SCSI).
999 */
1000 goto norot;
1001 }
1002 /*
1003 * check for a block available on the same cylinder
1004 */
1005 cylno = cbtocylno(fs, bpref);
1006 if (cg_blktot(cgp)[cylno] == 0)
1007 goto norot;
1008 /*
1009 * check the summary information to see if a block is
1010 * available in the requested cylinder starting at the
1011 * requested rotational position and proceeding around.
1012 */
1013 cylbp = cg_blks(fs, cgp, cylno);
1014 pos = cbtorpos(fs, bpref);
1015 for (i = pos; i < fs->fs_nrpos; i++)
1016 if (cylbp[i] > 0)
1017 break;
1018 if (i == fs->fs_nrpos)
1019 for (i = 0; i < pos; i++)
1020 if (cylbp[i] > 0)
1021 break;
1022 if (cylbp[i] > 0) {
1023 /*
1024 * found a rotational position, now find the actual
1025 * block. A panic if none is actually there.
1026 */
1027 pos = cylno % fs->fs_cpc;
1028 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
1029 if (fs_postbl(fs, pos)[i] == -1) {
1030 printf("pos = %d, i = %d, fs = %s\n",
1031 pos, i, fs->fs_fsmnt);
1032 panic("ffs_alloccgblk: cyl groups corrupted");
1033 }
1034 for (i = fs_postbl(fs, pos)[i];; ) {
1035 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
1036 bno = blkstofrags(fs, (bno + i));
1037 goto gotit;
1038 }
1039 delta = fs_rotbl(fs)[i];
1040 if (delta <= 0 ||
1041 delta + i > fragstoblks(fs, fs->fs_fpg))
1042 break;
1043 i += delta;
1044 }
1045 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
1046 panic("ffs_alloccgblk: can't find blk in cyl");
1047 }
1048 norot:
1049 /*
1050 * no blocks in the requested cylinder, so take next
1051 * available one in this cylinder group.
1052 */
1053 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1054 if (bno < 0)
1055 return (NULL);
1056 cgp->cg_rotor = bno;
1057 gotit:
1058 blkno = fragstoblks(fs, bno);
1059 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
1060 ffs_clusteracct(fs, cgp, blkno, -1);
1061 cgp->cg_cs.cs_nbfree--;
1062 fs->fs_cstotal.cs_nbfree--;
1063 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
1064 cylno = cbtocylno(fs, bno);
1065 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
1066 cg_blktot(cgp)[cylno]--;
1067 fs->fs_fmod = 1;
1068 return (cgp->cg_cgx * fs->fs_fpg + bno);
1069 }
1070
1071 /*
1072 * Determine whether a cluster can be allocated.
1073 *
1074 * We do not currently check for optimal rotational layout if there
1075 * are multiple choices in the same cylinder group. Instead we just
1076 * take the first one that we find following bpref.
1077 */
1078 static ufs_daddr_t
1079 ffs_clusteralloc(ip, cg, bpref, len)
1080 struct inode *ip;
1081 int cg;
1082 ufs_daddr_t bpref;
1083 int len;
1084 {
1085 register struct fs *fs;
1086 register struct cg *cgp;
1087 struct buf *bp;
1088 int i, got, run, bno, bit, map;
1089 u_char *mapp;
1090 int32_t *lp;
1091 #if REV_ENDIAN_FS
1092 struct vnode *vp=ITOV(ip);
1093 struct mount *mp=vp->v_mount;
1094 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1095 #endif /* REV_ENDIAN_FS */
1096
1097 fs = ip->i_fs;
1098 if (fs->fs_maxcluster[cg] < len)
1099 return (NULL);
1100 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1101 NOCRED, &bp))
1102 goto fail;
1103 cgp = (struct cg *)bp->b_data;
1104 #if REV_ENDIAN_FS
1105 if (rev_endian)
1106 byte_swap_cgin(cgp,fs);
1107 #endif /* REV_ENDIAN_FS */
1108 if (!cg_chkmagic(cgp)) {
1109 #if REV_ENDIAN_FS
1110 if (rev_endian)
1111 byte_swap_cgout(cgp,fs);
1112 #endif /* REV_ENDIAN_FS */
1113 goto fail;
1114 }
1115 /*
1116 * Check to see if a cluster of the needed size (or bigger) is
1117 * available in this cylinder group.
1118 */
1119 lp = &cg_clustersum(cgp)[len];
1120 for (i = len; i <= fs->fs_contigsumsize; i++)
1121 if (*lp++ > 0)
1122 break;
1123 if (i > fs->fs_contigsumsize) {
1124 /*
1125 * This is the first time looking for a cluster in this
1126 * cylinder group. Update the cluster summary information
1127 * to reflect the true maximum sized cluster so that
1128 * future cluster allocation requests can avoid reading
1129 * the cylinder group map only to find no clusters.
1130 */
1131 lp = &cg_clustersum(cgp)[len - 1];
1132 for (i = len - 1; i > 0; i--)
1133 if (*lp-- > 0)
1134 break;
1135 fs->fs_maxcluster[cg] = i;
1136 #if REV_ENDIAN_FS
1137 if (rev_endian)
1138 byte_swap_cgout(cgp,fs);
1139 #endif /* REV_ENDIAN_FS */
1140 goto fail;
1141 }
1142 /*
1143 * Search the cluster map to find a big enough cluster.
1144 * We take the first one that we find, even if it is larger
1145 * than we need as we prefer to get one close to the previous
1146 * block allocation. We do not search before the current
1147 * preference point as we do not want to allocate a block
1148 * that is allocated before the previous one (as we will
1149 * then have to wait for another pass of the elevator
1150 * algorithm before it will be read). We prefer to fail and
1151 * be recalled to try an allocation in the next cylinder group.
1152 */
1153 if (dtog(fs, bpref) != cg)
1154 bpref = 0;
1155 else
1156 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1157 mapp = &cg_clustersfree(cgp)[bpref / NBBY];
1158 map = *mapp++;
1159 bit = 1 << (bpref % NBBY);
1160 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
1161 if ((map & bit) == 0) {
1162 run = 0;
1163 } else {
1164 run++;
1165 if (run == len)
1166 break;
1167 }
1168 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1169 bit <<= 1;
1170 } else {
1171 map = *mapp++;
1172 bit = 1;
1173 }
1174 }
1175 if (got == cgp->cg_nclusterblks) {
1176 #if REV_ENDIAN_FS
1177 if (rev_endian)
1178 byte_swap_cgout(cgp,fs);
1179 #endif /* REV_ENDIAN_FS */
1180 goto fail;
1181 }
1182 /*
1183 * Allocate the cluster that we have found.
1184 */
1185 for (i = 1; i <= len; i++)
1186 if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
1187 panic("ffs_clusteralloc: map mismatch");
1188 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1189 if (dtog(fs, bno) != cg)
1190 panic("ffs_clusteralloc: allocated out of group");
1191 len = blkstofrags(fs, len);
1192 for (i = 0; i < len; i += fs->fs_frag)
1193 if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i)
1194 panic("ffs_clusteralloc: lost block");
1195 #if REV_ENDIAN_FS
1196 if (rev_endian)
1197 byte_swap_cgout(cgp,fs);
1198 #endif /* REV_ENDIAN_FS */
1199 bdwrite(bp);
1200 return (bno);
1201
1202 fail:
1203 brelse(bp);
1204 return (0);
1205 }
1206
1207 /*
1208 * Determine whether an inode can be allocated.
1209 *
1210 * Check to see if an inode is available, and if it is,
1211 * allocate it using the following policy:
1212 * 1) allocate the requested inode.
1213 * 2) allocate the next available inode after the requested
1214 * inode in the specified cylinder group.
1215 */
1216 static ino_t
1217 ffs_nodealloccg(ip, cg, ipref, mode)
1218 struct inode *ip;
1219 int cg;
1220 ufs_daddr_t ipref;
1221 int mode;
1222 {
1223 register struct fs *fs;
1224 register struct cg *cgp;
1225 struct buf *bp;
1226 int error, start, len, loc, map, i;
1227 #if REV_ENDIAN_FS
1228 struct vnode *vp=ITOV(ip);
1229 struct mount *mp=vp->v_mount;
1230 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1231 #endif /* REV_ENDIAN_FS */
1232
1233 fs = ip->i_fs;
1234 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1235 return (NULL);
1236 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1237 (int)fs->fs_cgsize, NOCRED, &bp);
1238 if (error) {
1239 brelse(bp);
1240 return (NULL);
1241 }
1242 cgp = (struct cg *)bp->b_data;
1243 #if REV_ENDIAN_FS
1244 if (rev_endian)
1245 byte_swap_cgin(cgp,fs);
1246 #endif /* REV_ENDIAN_FS */
1247 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
1248 #if REV_ENDIAN_FS
1249 if (rev_endian)
1250 byte_swap_cgout(cgp,fs);
1251 #endif /* REV_ENDIAN_FS */
1252 brelse(bp);
1253 return (NULL);
1254 }
1255
1256 cgp->cg_time = time.tv_sec;
1257 if (ipref) {
1258 ipref %= fs->fs_ipg;
1259 if (isclr(cg_inosused(cgp), ipref))
1260 goto gotit;
1261 }
1262 start = cgp->cg_irotor / NBBY;
1263 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1264 loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
1265 if (loc == 0) {
1266 len = start + 1;
1267 start = 0;
1268 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
1269 if (loc == 0) {
1270 printf("cg = %d, irotor = %d, fs = %s\n",
1271 cg, cgp->cg_irotor, fs->fs_fsmnt);
1272 panic("ffs_nodealloccg: map corrupted");
1273 /* NOTREACHED */
1274 }
1275 }
1276 i = start + len - loc;
1277 map = cg_inosused(cgp)[i];
1278 ipref = i * NBBY;
1279 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1280 if ((map & i) == 0) {
1281 cgp->cg_irotor = ipref;
1282 goto gotit;
1283 }
1284 }
1285 printf("fs = %s\n", fs->fs_fsmnt);
1286 panic("ffs_nodealloccg: block not in map");
1287 /* NOTREACHED */
1288 gotit:
1289 setbit(cg_inosused(cgp), ipref);
1290 cgp->cg_cs.cs_nifree--;
1291 fs->fs_cstotal.cs_nifree--;
1292 fs->fs_cs(fs, cg).cs_nifree--;
1293 fs->fs_fmod = 1;
1294 if ((mode & IFMT) == IFDIR) {
1295 cgp->cg_cs.cs_ndir++;
1296 fs->fs_cstotal.cs_ndir++;
1297 fs->fs_cs(fs, cg).cs_ndir++;
1298 }
1299 #if REV_ENDIAN_FS
1300 if (rev_endian)
1301 byte_swap_cgout(cgp,fs);
1302 #endif /* REV_ENDIAN_FS */
1303 bdwrite(bp);
1304 return (cg * fs->fs_ipg + ipref);
1305 }
1306
1307 /*
1308 * Free a block or fragment.
1309 *
1310 * The specified block or fragment is placed back in the
1311 * free map. If a fragment is deallocated, a possible
1312 * block reassembly is checked.
1313 */
1314 ffs_blkfree(ip, bno, size)
1315 register struct inode *ip;
1316 ufs_daddr_t bno;
1317 long size;
1318 {
1319 register struct fs *fs;
1320 register struct cg *cgp;
1321 struct buf *bp;
1322 ufs_daddr_t blkno;
1323 int i, error, cg, blk, frags, bbase;
1324 #if REV_ENDIAN_FS
1325 struct vnode *vp=ITOV(ip);
1326 struct mount *mp=vp->v_mount;
1327 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1328 #endif /* REV_ENDIAN_FS */
1329 fs = ip->i_fs;
1330 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1331 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
1332 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1333 panic("blkfree: bad size");
1334 }
1335 cg = dtog(fs, bno);
1336 if ((u_int)bno >= fs->fs_size) {
1337 printf("bad block %d, ino %d\n", bno, ip->i_number);
1338 ffs_fserr(fs, ip->i_uid, "bad block");
1339 return;
1340 }
1341 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1342 (int)fs->fs_cgsize, NOCRED, &bp);
1343 if (error) {
1344 brelse(bp);
1345 return;
1346 }
1347 cgp = (struct cg *)bp->b_data;
1348 #if REV_ENDIAN_FS
1349 if (rev_endian)
1350 byte_swap_cgin(cgp,fs);
1351 #endif /* REV_ENDIAN_FS */
1352 if (!cg_chkmagic(cgp)) {
1353 #if REV_ENDIAN_FS
1354 if (rev_endian)
1355 byte_swap_cgout(cgp,fs);
1356 #endif /* REV_ENDIAN_FS */
1357 brelse(bp);
1358 return;
1359 }
1360 cgp->cg_time = time.tv_sec;
1361 bno = dtogd(fs, bno);
1362 if (size == fs->fs_bsize) {
1363 blkno = fragstoblks(fs, bno);
1364 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1365 printf("dev = 0x%x, block = %d, fs = %s\n",
1366 ip->i_dev, bno, fs->fs_fsmnt);
1367 panic("blkfree: freeing free block");
1368 }
1369 ffs_setblock(fs, cg_blksfree(cgp), blkno);
1370 ffs_clusteracct(fs, cgp, blkno, 1);
1371 cgp->cg_cs.cs_nbfree++;
1372 fs->fs_cstotal.cs_nbfree++;
1373 fs->fs_cs(fs, cg).cs_nbfree++;
1374 i = cbtocylno(fs, bno);
1375 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
1376 cg_blktot(cgp)[i]++;
1377 } else {
1378 bbase = bno - fragnum(fs, bno);
1379 /*
1380 * decrement the counts associated with the old frags
1381 */
1382 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1383 ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
1384 /*
1385 * deallocate the fragment
1386 */
1387 frags = numfrags(fs, size);
1388 for (i = 0; i < frags; i++) {
1389 if (isset(cg_blksfree(cgp), bno + i)) {
1390 printf("dev = 0x%x, block = %d, fs = %s\n",
1391 ip->i_dev, bno + i, fs->fs_fsmnt);
1392 panic("blkfree: freeing free frag");
1393 }
1394 setbit(cg_blksfree(cgp), bno + i);
1395 }
1396 cgp->cg_cs.cs_nffree += i;
1397 fs->fs_cstotal.cs_nffree += i;
1398 fs->fs_cs(fs, cg).cs_nffree += i;
1399 /*
1400 * add back in counts associated with the new frags
1401 */
1402 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1403 ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
1404 /*
1405 * if a complete block has been reassembled, account for it
1406 */
1407 blkno = fragstoblks(fs, bbase);
1408 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1409 cgp->cg_cs.cs_nffree -= fs->fs_frag;
1410 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1411 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1412 ffs_clusteracct(fs, cgp, blkno, 1);
1413 cgp->cg_cs.cs_nbfree++;
1414 fs->fs_cstotal.cs_nbfree++;
1415 fs->fs_cs(fs, cg).cs_nbfree++;
1416 i = cbtocylno(fs, bbase);
1417 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
1418 cg_blktot(cgp)[i]++;
1419 }
1420 }
1421 fs->fs_fmod = 1;
1422 #if REV_ENDIAN_FS
1423 if (rev_endian)
1424 byte_swap_cgout(cgp,fs);
1425 #endif /* REV_ENDIAN_FS */
1426 bdwrite(bp);
1427 }
1428
1429 #if DIAGNOSTIC
1430 /*
1431 * Verify allocation of a block or fragment. Returns true if block or
1432 * fragment is allocated, false if it is free.
1433 */
1434 ffs_checkblk(ip, bno, size)
1435 struct inode *ip;
1436 ufs_daddr_t bno;
1437 long size;
1438 {
1439 struct fs *fs;
1440 struct cg *cgp;
1441 struct buf *bp;
1442 int i, error, frags, free;
1443 #if REV_ENDIAN_FS
1444 struct vnode *vp=ITOV(ip);
1445 struct mount *mp=vp->v_mount;
1446 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1447 #endif /* REV_ENDIAN_FS */
1448
1449 fs = ip->i_fs;
1450 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1451 printf("bsize = %d, size = %d, fs = %s\n",
1452 fs->fs_bsize, size, fs->fs_fsmnt);
1453 panic("checkblk: bad size");
1454 }
1455 if ((u_int)bno >= fs->fs_size)
1456 panic("checkblk: bad block %d", bno);
1457 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1458 (int)fs->fs_cgsize, NOCRED, &bp);
1459 if (error) {
1460 brelse(bp);
1461 return;
1462 }
1463 cgp = (struct cg *)bp->b_data;
1464 #if REV_ENDIAN_FS
1465 if (rev_endian)
1466 byte_swap_cgin(cgp,fs);
1467 #endif /* REV_ENDIAN_FS */
1468 if (!cg_chkmagic(cgp)) {
1469 #if REV_ENDIAN_FS
1470 if (rev_endian)
1471 byte_swap_cgout(cgp,fs);
1472 #endif /* REV_ENDIAN_FS */
1473 brelse(bp);
1474 return;
1475 }
1476 bno = dtogd(fs, bno);
1477 if (size == fs->fs_bsize) {
1478 free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
1479 } else {
1480 frags = numfrags(fs, size);
1481 for (free = 0, i = 0; i < frags; i++)
1482 if (isset(cg_blksfree(cgp), bno + i))
1483 free++;
1484 if (free != 0 && free != frags)
1485 panic("checkblk: partially free fragment");
1486 }
1487 #if REV_ENDIAN_FS
1488 if (rev_endian)
1489 byte_swap_cgout(cgp,fs);
1490 #endif /* REV_ENDIAN_FS */
1491 brelse(bp);
1492 return (!free);
1493 }
1494 #endif /* DIAGNOSTIC */
1495
1496 /*
1497 * Free an inode.
1498 *
1499 * The specified inode is placed back in the free map.
1500 */
1501 int
1502 ffs_vfree(ap)
1503 struct vop_vfree_args /* {
1504 struct vnode *a_pvp;
1505 ino_t a_ino;
1506 int a_mode;
1507 } */ *ap;
1508 {
1509 register struct fs *fs;
1510 register struct cg *cgp;
1511 register struct inode *pip;
1512 ino_t ino = ap->a_ino;
1513 struct buf *bp;
1514 int error, cg;
1515 #if REV_ENDIAN_FS
1516 struct vnode *vp=ap->a_pvp;
1517 struct mount *mp=vp->v_mount;
1518 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1519 #endif /* REV_ENDIAN_FS */
1520
1521 pip = VTOI(ap->a_pvp);
1522 fs = pip->i_fs;
1523 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1524 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
1525 pip->i_dev, ino, fs->fs_fsmnt);
1526 cg = ino_to_cg(fs, ino);
1527 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1528 (int)fs->fs_cgsize, NOCRED, &bp);
1529 if (error) {
1530 brelse(bp);
1531 return (0);
1532 }
1533 cgp = (struct cg *)bp->b_data;
1534 #if REV_ENDIAN_FS
1535 if (rev_endian)
1536 byte_swap_cgin(cgp,fs);
1537 #endif /* REV_ENDIAN_FS */
1538 if (!cg_chkmagic(cgp)) {
1539 #if REV_ENDIAN_FS
1540 if (rev_endian)
1541 byte_swap_cgout(cgp,fs);
1542 #endif /* REV_ENDIAN_FS */
1543 brelse(bp);
1544 return (0);
1545 }
1546 cgp->cg_time = time.tv_sec;
1547 ino %= fs->fs_ipg;
1548 if (isclr(cg_inosused(cgp), ino)) {
1549 printf("dev = 0x%x, ino = %d, fs = %s\n",
1550 pip->i_dev, ino, fs->fs_fsmnt);
1551 if (fs->fs_ronly == 0)
1552 panic("ifree: freeing free inode");
1553 }
1554 clrbit(cg_inosused(cgp), ino);
1555 if (ino < cgp->cg_irotor)
1556 cgp->cg_irotor = ino;
1557 cgp->cg_cs.cs_nifree++;
1558 fs->fs_cstotal.cs_nifree++;
1559 fs->fs_cs(fs, cg).cs_nifree++;
1560 if ((ap->a_mode & IFMT) == IFDIR) {
1561 cgp->cg_cs.cs_ndir--;
1562 fs->fs_cstotal.cs_ndir--;
1563 fs->fs_cs(fs, cg).cs_ndir--;
1564 }
1565 fs->fs_fmod = 1;
1566 #if REV_ENDIAN_FS
1567 if (rev_endian)
1568 byte_swap_cgout(cgp,fs);
1569 #endif /* REV_ENDIAN_FS */
1570 bdwrite(bp);
1571 return (0);
1572 }
1573
1574 /*
1575 * Find a block of the specified size in the specified cylinder group.
1576 *
1577 * It is a panic if a request is made to find a block if none are
1578 * available.
1579 */
1580 static ufs_daddr_t
1581 ffs_mapsearch(fs, cgp, bpref, allocsiz)
1582 register struct fs *fs;
1583 register struct cg *cgp;
1584 ufs_daddr_t bpref;
1585 int allocsiz;
1586 {
1587 ufs_daddr_t bno;
1588 int start, len, loc, i;
1589 int blk, field, subfield, pos;
1590
1591 /*
1592 * find the fragment by searching through the free block
1593 * map for an appropriate bit pattern
1594 */
1595 if (bpref)
1596 start = dtogd(fs, bpref) / NBBY;
1597 else
1598 start = cgp->cg_frotor / NBBY;
1599 len = howmany(fs->fs_fpg, NBBY) - start;
1600 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
1601 (u_char *)fragtbl[fs->fs_frag],
1602 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1603 if (loc == 0) {
1604 len = start + 1;
1605 start = 0;
1606 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
1607 (u_char *)fragtbl[fs->fs_frag],
1608 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1609 if (loc == 0) {
1610 printf("start = %d, len = %d, fs = %s\n",
1611 start, len, fs->fs_fsmnt);
1612 panic("ffs_alloccg: map corrupted");
1613 /* NOTREACHED */
1614 }
1615 }
1616 bno = (start + len - loc) * NBBY;
1617 cgp->cg_frotor = bno;
1618 /*
1619 * found the byte in the map
1620 * sift through the bits to find the selected frag
1621 */
1622 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1623 blk = blkmap(fs, cg_blksfree(cgp), bno);
1624 blk <<= 1;
1625 field = around[allocsiz];
1626 subfield = inside[allocsiz];
1627 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1628 if ((blk & field) == subfield)
1629 return (bno + pos);
1630 field <<= 1;
1631 subfield <<= 1;
1632 }
1633 }
1634 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1635 panic("ffs_alloccg: block not in map");
1636 return (-1);
1637 }
1638
1639 /*
1640 * Update the cluster map because of an allocation or free.
1641 *
1642 * Cnt == 1 means free; cnt == -1 means allocating.
1643 */
1644 ffs_clusteracct(fs, cgp, blkno, cnt)
1645 struct fs *fs;
1646 struct cg *cgp;
1647 ufs_daddr_t blkno;
1648 int cnt;
1649 {
1650 int32_t *sump;
1651 int32_t *lp;
1652 u_char *freemapp, *mapp;
1653 int i, start, end, forw, back, map, bit;
1654
1655 if (fs->fs_contigsumsize <= 0)
1656 return;
1657 freemapp = cg_clustersfree(cgp);
1658 sump = cg_clustersum(cgp);
1659 /*
1660 * Allocate or clear the actual block.
1661 */
1662 if (cnt > 0)
1663 setbit(freemapp, blkno);
1664 else
1665 clrbit(freemapp, blkno);
1666 /*
1667 * Find the size of the cluster going forward.
1668 */
1669 start = blkno + 1;
1670 end = start + fs->fs_contigsumsize;
1671 if (end >= cgp->cg_nclusterblks)
1672 end = cgp->cg_nclusterblks;
1673 mapp = &freemapp[start / NBBY];
1674 map = *mapp++;
1675 bit = 1 << (start % NBBY);
1676 for (i = start; i < end; i++) {
1677 if ((map & bit) == 0)
1678 break;
1679 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1680 bit <<= 1;
1681 } else {
1682 map = *mapp++;
1683 bit = 1;
1684 }
1685 }
1686 forw = i - start;
1687 /*
1688 * Find the size of the cluster going backward.
1689 */
1690 start = blkno - 1;
1691 end = start - fs->fs_contigsumsize;
1692 if (end < 0)
1693 end = -1;
1694 mapp = &freemapp[start / NBBY];
1695 map = *mapp--;
1696 bit = 1 << (start % NBBY);
1697 for (i = start; i > end; i--) {
1698 if ((map & bit) == 0)
1699 break;
1700 if ((i & (NBBY - 1)) != 0) {
1701 bit >>= 1;
1702 } else {
1703 map = *mapp--;
1704 bit = 1 << (NBBY - 1);
1705 }
1706 }
1707 back = start - i;
1708 /*
1709 * Account for old cluster and the possibly new forward and
1710 * back clusters.
1711 */
1712 i = back + forw + 1;
1713 if (i > fs->fs_contigsumsize)
1714 i = fs->fs_contigsumsize;
1715 sump[i] += cnt;
1716 if (back > 0)
1717 sump[back] -= cnt;
1718 if (forw > 0)
1719 sump[forw] -= cnt;
1720 /*
1721 * Update cluster summary information.
1722 */
1723 lp = &sump[fs->fs_contigsumsize];
1724 for (i = fs->fs_contigsumsize; i > 0; i--)
1725 if (*lp-- > 0)
1726 break;
1727 fs->fs_maxcluster[cgp->cg_cgx] = i;
1728 }
1729
1730 /*
1731 * Fserr prints the name of a file system with an error diagnostic.
1732 *
1733 * The form of the error message is:
1734 * fs: error message
1735 */
1736 static void
1737 ffs_fserr(fs, uid, cp)
1738 struct fs *fs;
1739 u_int uid;
1740 char *cp;
1741 {
1742
1743 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1744 }