]> git.saurik.com Git - apple/xnu.git/blame - bsd/ufs/ffs/ffs_alloc.c
xnu-517.tar.gz
[apple/xnu.git] / bsd / ufs / ffs / ffs_alloc.c
CommitLineData
1c79356b 1/*
55e303ae 2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
26/*
27 * Copyright (c) 1982, 1986, 1989, 1993
28 * The Regents of the University of California. All rights reserved.
29 *
30 * Redistribution and use in source and binary forms, with or without
31 * modification, are permitted provided that the following conditions
32 * are met:
33 * 1. Redistributions of source code must retain the above copyright
34 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in the
37 * documentation and/or other materials provided with the distribution.
38 * 3. All advertising materials mentioning features or use of this software
39 * must display the following acknowledgement:
40 * This product includes software developed by the University of
41 * California, Berkeley and its contributors.
42 * 4. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 *
58 * @(#)ffs_alloc.c 8.18 (Berkeley) 5/26/95
59 */
60#include <rev_endian_fs.h>
61#include <vm/vm_pager.h>
62
63#include <sys/param.h>
64#include <sys/systm.h>
65#include <sys/buf.h>
66#include <sys/proc.h>
67#include <sys/vnode.h>
68#include <sys/mount.h>
69#include <sys/kernel.h>
70#include <sys/syslog.h>
9bccf70c 71#include <sys/quota.h>
1c79356b
A
72
73#include <sys/vm.h>
74
75#include <ufs/ufs/quota.h>
76#include <ufs/ufs/inode.h>
77
78#include <ufs/ffs/fs.h>
79#include <ufs/ffs/ffs_extern.h>
80
81#if REV_ENDIAN_FS
82#include <ufs/ufs/ufs_byte_order.h>
83#include <architecture/byte_order.h>
84#endif /* REV_ENDIAN_FS */
85
86extern u_long nextgennumber;
87
88static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
89static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t));
90static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
91 int));
55e303ae 92static ino_t ffs_dirpref __P((struct inode *));
1c79356b
A
93static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
94static void ffs_fserr __P((struct fs *, u_int, char *));
95static u_long ffs_hashalloc
96 __P((struct inode *, int, long, int, u_int32_t (*)()));
97static ino_t ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
98static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t,
99 int));
100
101/*
102 * Allocate a block in the file system.
103 *
104 * The size of the requested block is given, which must be some
105 * multiple of fs_fsize and <= fs_bsize.
106 * A preference may be optionally specified. If a preference is given
107 * the following hierarchy is used to allocate a block:
108 * 1) allocate the requested block.
109 * 2) allocate a rotationally optimal block in the same cylinder.
110 * 3) allocate a block in the same cylinder group.
111 * 4) quadradically rehash into other cylinder groups, until an
112 * available block is located.
113 * If no block preference is given the following heirarchy is used
114 * to allocate a block:
115 * 1) allocate a block in the cylinder group that contains the
116 * inode for the file.
117 * 2) quadradically rehash into other cylinder groups, until an
118 * available block is located.
119 */
120ffs_alloc(ip, lbn, bpref, size, cred, bnp)
121 register struct inode *ip;
122 ufs_daddr_t lbn, bpref;
123 int size;
124 struct ucred *cred;
125 ufs_daddr_t *bnp;
126{
127 register struct fs *fs;
128 ufs_daddr_t bno;
129 int cg, error;
130 int devBlockSize=0;
131 *bnp = 0;
132 fs = ip->i_fs;
133#if DIAGNOSTIC
134 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
135 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
136 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
137 panic("ffs_alloc: bad size");
138 }
139 if (cred == NOCRED)
140 panic("ffs_alloc: missing credential\n");
141#endif /* DIAGNOSTIC */
142 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
143 goto nospace;
144 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
145 goto nospace;
146 VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize);
147#if QUOTA
9bccf70c 148 if (error = chkdq(ip, (int64_t)size, cred, 0))
1c79356b
A
149 return (error);
150#endif /* QUOTA */
151 if (bpref >= fs->fs_size)
152 bpref = 0;
153 if (bpref == 0)
154 cg = ino_to_cg(fs, ip->i_number);
155 else
156 cg = dtog(fs, bpref);
157 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
158 (u_int32_t (*)())ffs_alloccg);
159 if (bno > 0) {
160 ip->i_blocks += btodb(size, devBlockSize);
161 ip->i_flag |= IN_CHANGE | IN_UPDATE;
162 *bnp = bno;
163 return (0);
164 }
165#if QUOTA
166 /*
167 * Restore user's disk quota because allocation failed.
168 */
9bccf70c 169 (void) chkdq(ip, (int64_t)-size, cred, FORCE);
1c79356b
A
170#endif /* QUOTA */
171nospace:
172 ffs_fserr(fs, cred->cr_uid, "file system full");
173 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
174 return (ENOSPC);
175}
176
177/*
178 * Reallocate a fragment to a bigger size
179 *
180 * The number and size of the old block is given, and a preference
181 * and new size is also specified. The allocator attempts to extend
182 * the original block. Failing that, the regular block allocator is
183 * invoked to get an appropriate block.
184 */
185ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
186 register struct inode *ip;
187 ufs_daddr_t lbprev;
188 ufs_daddr_t bpref;
189 int osize, nsize;
190 struct ucred *cred;
191 struct buf **bpp;
192{
193 register struct fs *fs;
194 struct buf *bp;
195 int cg, request, error;
196 ufs_daddr_t bprev, bno;
197 int devBlockSize=0;
198
199 *bpp = 0;
200 fs = ip->i_fs;
201#if DIAGNOSTIC
202 if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
203 (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
204 printf(
205 "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
206 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
207 panic("ffs_realloccg: bad size");
208 }
209 if (cred == NOCRED)
210 panic("ffs_realloccg: missing credential\n");
211#endif /* DIAGNOSTIC */
212 if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
213 goto nospace;
214 if ((bprev = ip->i_db[lbprev]) == 0) {
215 printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
216 ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
217 panic("ffs_realloccg: bad bprev");
218 }
219 /*
220 * Allocate the extra space in the buffer.
221 */
222 if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
223 brelse(bp);
224 return (error);
225 }
226 VOP_DEVBLOCKSIZE(ip->i_devvp,&devBlockSize);
227
228#if QUOTA
9bccf70c 229 if (error = chkdq(ip, (int64_t)(nsize - osize), cred, 0))
1c79356b
A
230 {
231 brelse(bp);
232 return (error);
233 }
234#endif /* QUOTA */
235 /*
236 * Check for extension in the existing location.
237 */
238 cg = dtog(fs, bprev);
239 if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) {
240 if (bp->b_blkno != fsbtodb(fs, bno))
241 panic("bad blockno");
242 ip->i_blocks += btodb(nsize - osize, devBlockSize);
243 ip->i_flag |= IN_CHANGE | IN_UPDATE;
244 allocbuf(bp, nsize);
245 bp->b_flags |= B_DONE;
55e303ae 246 bzero((char *)bp->b_data + osize, (u_int)bp->b_bufsize - osize);
1c79356b
A
247 *bpp = bp;
248 return (0);
249 }
250 /*
251 * Allocate a new disk location.
252 */
253 if (bpref >= fs->fs_size)
254 bpref = 0;
255 switch ((int)fs->fs_optim) {
256 case FS_OPTSPACE:
257 /*
258 * Allocate an exact sized fragment. Although this makes
259 * best use of space, we will waste time relocating it if
260 * the file continues to grow. If the fragmentation is
261 * less than half of the minimum free reserve, we choose
262 * to begin optimizing for time.
263 */
264 request = nsize;
265 if (fs->fs_minfree < 5 ||
266 fs->fs_cstotal.cs_nffree >
267 fs->fs_dsize * fs->fs_minfree / (2 * 100))
268 break;
269 log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
270 fs->fs_fsmnt);
271 fs->fs_optim = FS_OPTTIME;
272 break;
273 case FS_OPTTIME:
274 /*
275 * At this point we have discovered a file that is trying to
276 * grow a small fragment to a larger fragment. To save time,
277 * we allocate a full sized block, then free the unused portion.
278 * If the file continues to grow, the `ffs_fragextend' call
279 * above will be able to grow it in place without further
280 * copying. If aberrant programs cause disk fragmentation to
281 * grow within 2% of the free reserve, we choose to begin
282 * optimizing for space.
283 */
284 request = fs->fs_bsize;
285 if (fs->fs_cstotal.cs_nffree <
286 fs->fs_dsize * (fs->fs_minfree - 2) / 100)
287 break;
288 log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
289 fs->fs_fsmnt);
290 fs->fs_optim = FS_OPTSPACE;
291 break;
292 default:
293 printf("dev = 0x%x, optim = %d, fs = %s\n",
294 ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
295 panic("ffs_realloccg: bad optim");
296 /* NOTREACHED */
297 }
298 bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
299 (u_int32_t (*)())ffs_alloccg);
300 if (bno > 0) {
301 bp->b_blkno = fsbtodb(fs, bno);
302 ffs_blkfree(ip, bprev, (long)osize);
303 if (nsize < request)
304 ffs_blkfree(ip, bno + numfrags(fs, nsize),
305 (long)(request - nsize));
306 ip->i_blocks += btodb(nsize - osize, devBlockSize);
307 ip->i_flag |= IN_CHANGE | IN_UPDATE;
308 allocbuf(bp, nsize);
309 bp->b_flags |= B_DONE;
55e303ae 310 bzero((char *)bp->b_data + osize, (u_int)bp->b_bufsize - osize);
1c79356b
A
311 *bpp = bp;
312 return (0);
313 }
314#if QUOTA
315 /*
316 * Restore user's disk quota because allocation failed.
317 */
9bccf70c 318 (void) chkdq(ip, (int64_t)-(nsize - osize), cred, FORCE);
1c79356b
A
319#endif /* QUOTA */
320 brelse(bp);
321nospace:
322 /*
323 * no space available
324 */
325 ffs_fserr(fs, cred->cr_uid, "file system full");
326 uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
327 return (ENOSPC);
328}
329
330/*
331 * Reallocate a sequence of blocks into a contiguous sequence of blocks.
332 *
333 * The vnode and an array of buffer pointers for a range of sequential
334 * logical blocks to be made contiguous is given. The allocator attempts
335 * to find a range of sequential blocks starting as close as possible to
336 * an fs_rotdelay offset from the end of the allocation for the logical
337 * block immediately preceeding the current range. If successful, the
338 * physical block numbers in the buffer pointers and in the inode are
339 * changed to reflect the new allocation. If unsuccessful, the allocation
340 * is left unchanged. The success in doing the reallocation is returned.
341 * Note that the error return is not reflected back to the user. Rather
342 * the previous block allocation will be used.
343 */
344/* Note: This routine is unused in UBC cluster I/O */
345
346int doasyncfree = 1;
347int doreallocblks = 1;
348
349int
350ffs_reallocblks(ap)
351 struct vop_reallocblks_args *ap;
352{
353 return (ENOSPC);
354}
355
356/*
357 * Allocate an inode in the file system.
358 *
359 * If allocating a directory, use ffs_dirpref to select the inode.
360 * If allocating in a directory, the following hierarchy is followed:
361 * 1) allocate the preferred inode.
362 * 2) allocate an inode in the same cylinder group.
363 * 3) quadradically rehash into other cylinder groups, until an
364 * available inode is located.
365 * If no inode preference is given the following heirarchy is used
366 * to allocate an inode:
367 * 1) allocate an inode in cylinder group 0.
368 * 2) quadradically rehash into other cylinder groups, until an
369 * available inode is located.
370 */
371int
372ffs_valloc(ap)
373 struct vop_valloc_args /* {
374 struct vnode *a_pvp;
375 int a_mode;
376 struct ucred *a_cred;
377 struct vnode **a_vpp;
378 } */ *ap;
379{
380 register struct vnode *pvp = ap->a_pvp;
381 register struct inode *pip;
382 register struct fs *fs;
383 register struct inode *ip;
384 mode_t mode = ap->a_mode;
385 ino_t ino, ipref;
386 int cg, error;
387
388 *ap->a_vpp = NULL;
389 pip = VTOI(pvp);
390 fs = pip->i_fs;
391 if (fs->fs_cstotal.cs_nifree == 0)
392 goto noinodes;
393
394 if ((mode & IFMT) == IFDIR)
55e303ae 395 ipref = ffs_dirpref(pip);
1c79356b
A
396 else
397 ipref = pip->i_number;
398 if (ipref >= fs->fs_ncg * fs->fs_ipg)
399 ipref = 0;
400 cg = ino_to_cg(fs, ipref);
55e303ae
A
401 /*
402 * Track the number of dirs created one after another
403 * in a cg without intervening files.
404 */
405 if ((mode & IFMT) == IFDIR) {
406 if (fs->fs_contigdirs[cg] < 255)
407 fs->fs_contigdirs[cg]++;
408 } else {
409 if (fs->fs_contigdirs[cg] > 0)
410 fs->fs_contigdirs[cg]--;
411 }
1c79356b
A
412 ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg);
413 if (ino == 0)
414 goto noinodes;
55e303ae 415 error = VFS_VGET(pvp->v_mount, (void *)ino, ap->a_vpp);
1c79356b
A
416 if (error) {
417 VOP_VFREE(pvp, ino, mode);
418 return (error);
419 }
420 ip = VTOI(*ap->a_vpp);
421 if (ip->i_mode) {
422 printf("mode = 0%o, inum = %d, fs = %s\n",
423 ip->i_mode, ip->i_number, fs->fs_fsmnt);
424 panic("ffs_valloc: dup alloc");
425 }
426 if (ip->i_blocks) { /* XXX */
427 printf("free inode %s/%d had %d blocks\n",
428 fs->fs_fsmnt, ino, ip->i_blocks);
429 ip->i_blocks = 0;
430 }
431 ip->i_flags = 0;
432 /*
433 * Set up a new generation number for this inode.
434 */
435 if (++nextgennumber < (u_long)time.tv_sec)
436 nextgennumber = time.tv_sec;
437 ip->i_gen = nextgennumber;
438 return (0);
439noinodes:
440 ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes");
441 uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
442 return (ENOSPC);
443}
444
445/*
55e303ae 446 * Find a cylinder group to place a directory.
1c79356b 447 *
55e303ae
A
448 * The policy implemented by this algorithm is to allocate a
449 * directory inode in the same cylinder group as its parent
450 * directory, but also to reserve space for its files inodes
451 * and data. Restrict the number of directories which may be
452 * allocated one after another in the same cylinder group
453 * without intervening allocation of files.
1c79356b
A
454 */
455static ino_t
55e303ae
A
456ffs_dirpref(pip)
457 struct inode *pip;
1c79356b 458{
55e303ae
A
459 register struct fs *fs;
460 int cg, prefcg, dirsize, cgsize;
461 int avgifree, avgbfree, avgndir, curdirsize;
462 int minifree, minbfree, maxndir;
463 int mincg, minndir;
464 int maxcontigdirs;
1c79356b 465
55e303ae 466 fs = pip->i_fs;
1c79356b 467 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
55e303ae
A
468 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
469 avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
470
471 /*
472 * Force allocation in another cg if creating a first level dir.
473 */
474 if (ITOV(pip)->v_flag & VROOT) {
475#ifdef __APPLE__
476 prefcg = random() % fs->fs_ncg;
477#else
478 prefcg = arc4random() % fs->fs_ncg;
479#endif
480 mincg = prefcg;
481 minndir = fs->fs_ipg;
482 for (cg = prefcg; cg < fs->fs_ncg; cg++)
483 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
484 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
485 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
486 mincg = cg;
487 minndir = fs->fs_cs(fs, cg).cs_ndir;
488 }
489 for (cg = 0; cg < prefcg; cg++)
490 if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
491 fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
492 fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
493 mincg = cg;
494 minndir = fs->fs_cs(fs, cg).cs_ndir;
495 }
496 return ((ino_t)(fs->fs_ipg * mincg));
497 }
498
499 /*
500 * Count various limits which used for
501 * optimal allocation of a directory inode.
502 */
503 maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
504 minifree = avgifree - fs->fs_ipg / 4;
505 if (minifree < 0)
506 minifree = 0;
507 minbfree = avgbfree - fs->fs_fpg / fs->fs_frag / 4;
508 if (minbfree < 0)
509 minbfree = 0;
510 cgsize = fs->fs_fsize * fs->fs_fpg;
511 dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
512 curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
513 if (dirsize < curdirsize)
514 dirsize = curdirsize;
515 maxcontigdirs = min(cgsize / dirsize, 255);
516 if (fs->fs_avgfpdir > 0)
517 maxcontigdirs = min(maxcontigdirs,
518 fs->fs_ipg / fs->fs_avgfpdir);
519 if (maxcontigdirs == 0)
520 maxcontigdirs = 1;
521
522 /*
523 * Limit number of dirs in one cg and reserve space for
524 * regular files, but only if we have no deficit in
525 * inodes or space.
526 */
527 prefcg = ino_to_cg(fs, pip->i_number);
528 for (cg = prefcg; cg < fs->fs_ncg; cg++)
529 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
530 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
531 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
532 if (fs->fs_contigdirs[cg] < maxcontigdirs)
533 return ((ino_t)(fs->fs_ipg * cg));
534 }
535 for (cg = 0; cg < prefcg; cg++)
536 if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
537 fs->fs_cs(fs, cg).cs_nifree >= minifree &&
538 fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
539 if (fs->fs_contigdirs[cg] < maxcontigdirs)
540 return ((ino_t)(fs->fs_ipg * cg));
1c79356b 541 }
55e303ae
A
542 /*
543 * This is a backstop when we have deficit in space.
544 */
545 for (cg = prefcg; cg < fs->fs_ncg; cg++)
546 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
547 return ((ino_t)(fs->fs_ipg * cg));
548 for (cg = 0; cg < prefcg; cg++)
549 if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
550 break;
551 return ((ino_t)(fs->fs_ipg * cg));
1c79356b
A
552}
553
554/*
555 * Select the desired position for the next block in a file. The file is
556 * logically divided into sections. The first section is composed of the
557 * direct blocks. Each additional section contains fs_maxbpg blocks.
558 *
559 * If no blocks have been allocated in the first section, the policy is to
560 * request a block in the same cylinder group as the inode that describes
561 * the file. If no blocks have been allocated in any other section, the
562 * policy is to place the section in a cylinder group with a greater than
563 * average number of free blocks. An appropriate cylinder group is found
564 * by using a rotor that sweeps the cylinder groups. When a new group of
565 * blocks is needed, the sweep begins in the cylinder group following the
566 * cylinder group from which the previous allocation was made. The sweep
567 * continues until a cylinder group with greater than the average number
568 * of free blocks is found. If the allocation is for the first block in an
569 * indirect block, the information on the previous allocation is unavailable;
570 * here a best guess is made based upon the logical block number being
571 * allocated.
572 *
573 * If a section is already partially allocated, the policy is to
574 * contiguously allocate fs_maxcontig blocks. The end of one of these
575 * contiguous blocks and the beginning of the next is physically separated
576 * so that the disk head will be in transit between them for at least
577 * fs_rotdelay milliseconds. This is to allow time for the processor to
578 * schedule another I/O transfer.
579 */
580ufs_daddr_t
581ffs_blkpref(ip, lbn, indx, bap)
582 struct inode *ip;
583 ufs_daddr_t lbn;
584 int indx;
585 ufs_daddr_t *bap;
586{
587 register struct fs *fs;
588 register int cg;
589 int avgbfree, startcg;
590 ufs_daddr_t nextblk;
591#if REV_ENDIAN_FS
592 daddr_t prev=0;
593 struct vnode *vp=ITOV(ip);
594 struct mount *mp=vp->v_mount;
595 int rev_endian=(mp->mnt_flag & MNT_REVEND);
596#endif /* REV_ENDIAN_FS */
597
598 fs = ip->i_fs;
599#if REV_ENDIAN_FS
600 if (indx && bap) {
601 if (rev_endian) {
602 if (bap != &ip->i_db[0])
603 prev = NXSwapLong(bap[indx - 1]);
604 else
605 prev = bap[indx - 1];
606 } else prev = bap[indx - 1];
607 }
608 if (indx % fs->fs_maxbpg == 0 || prev == 0)
609#else /* REV_ENDIAN_FS */
610 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0)
611#endif /* REV_ENDIAN_FS */
612 {
613 if (lbn < NDADDR) {
614 cg = ino_to_cg(fs, ip->i_number);
615 return (fs->fs_fpg * cg + fs->fs_frag);
616 }
617 /*
618 * Find a cylinder with greater than average number of
619 * unused data blocks.
620 */
621#if REV_ENDIAN_FS
622 if (indx == 0 || prev == 0)
623#else /* REV_ENDIAN_FS */
624 if (indx == 0 || bap[indx - 1] == 0)
625#endif /* REV_ENDIAN_FS */
626 startcg =
627 ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
628 else
629#if REV_ENDIAN_FS
630 startcg = dtog(fs, prev) + 1;
631#else /* REV_ENDIAN_FS */
632 startcg = dtog(fs, bap[indx - 1]) + 1;
633#endif /* REV_ENDIAN_FS */
634 startcg %= fs->fs_ncg;
635 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
636 for (cg = startcg; cg < fs->fs_ncg; cg++)
637 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
638 fs->fs_cgrotor = cg;
639 return (fs->fs_fpg * cg + fs->fs_frag);
640 }
641 for (cg = 0; cg <= startcg; cg++)
642 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
643 fs->fs_cgrotor = cg;
644 return (fs->fs_fpg * cg + fs->fs_frag);
645 }
646 return (NULL);
647 }
648 /*
649 * One or more previous blocks have been laid out. If less
650 * than fs_maxcontig previous blocks are contiguous, the
651 * next block is requested contiguously, otherwise it is
652 * requested rotationally delayed by fs_rotdelay milliseconds.
653 */
654#if REV_ENDIAN_FS
655 if (rev_endian) {
656 nextblk = prev + fs->fs_frag;
657 if (indx < fs->fs_maxcontig) {
658 return (nextblk);
659 }
660 if (bap != &ip->i_db[0])
661 prev = NXSwapLong(bap[indx - fs->fs_maxcontig]);
662 else
663 prev = bap[indx - fs->fs_maxcontig];
664 if (prev + blkstofrags(fs, fs->fs_maxcontig) != nextblk)
665 return (nextblk);
666 } else {
667#endif /* REV_ENDIAN_FS */
668 nextblk = bap[indx - 1] + fs->fs_frag;
669 if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] +
670 blkstofrags(fs, fs->fs_maxcontig) != nextblk)
671 return (nextblk);
672#if REV_ENDIAN_FS
673 }
674#endif /* REV_ENDIAN_FS */
675 if (fs->fs_rotdelay != 0)
676 /*
677 * Here we convert ms of delay to frags as:
678 * (frags) = (ms) * (rev/sec) * (sect/rev) /
679 * ((sect/frag) * (ms/sec))
680 * then round up to the next block.
681 */
682 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
683 (NSPF(fs) * 1000), fs->fs_frag);
684 return (nextblk);
685}
686
687/*
688 * Implement the cylinder overflow algorithm.
689 *
690 * The policy implemented by this algorithm is:
691 * 1) allocate the block in its requested cylinder group.
692 * 2) quadradically rehash on the cylinder group number.
693 * 3) brute force search for a free block.
694 */
695/*VARARGS5*/
696static u_long
697ffs_hashalloc(ip, cg, pref, size, allocator)
698 struct inode *ip;
699 int cg;
700 long pref;
701 int size; /* size for data blocks, mode for inodes */
702 u_int32_t (*allocator)();
703{
704 register struct fs *fs;
705 long result;
706 int i, icg = cg;
707
708 fs = ip->i_fs;
709 /*
710 * 1: preferred cylinder group
711 */
712 result = (*allocator)(ip, cg, pref, size);
713 if (result)
714 return (result);
715 /*
716 * 2: quadratic rehash
717 */
718 for (i = 1; i < fs->fs_ncg; i *= 2) {
719 cg += i;
720 if (cg >= fs->fs_ncg)
721 cg -= fs->fs_ncg;
722 result = (*allocator)(ip, cg, 0, size);
723 if (result)
724 return (result);
725 }
726 /*
727 * 3: brute force search
728 * Note that we start at i == 2, since 0 was checked initially,
729 * and 1 is always checked in the quadratic rehash.
730 */
731 cg = (icg + 2) % fs->fs_ncg;
732 for (i = 2; i < fs->fs_ncg; i++) {
733 result = (*allocator)(ip, cg, 0, size);
734 if (result)
735 return (result);
736 cg++;
737 if (cg == fs->fs_ncg)
738 cg = 0;
739 }
740 return (NULL);
741}
742
743/*
744 * Determine whether a fragment can be extended.
745 *
746 * Check to see if the necessary fragments are available, and
747 * if they are, allocate them.
748 */
749static ufs_daddr_t
750ffs_fragextend(ip, cg, bprev, osize, nsize)
751 struct inode *ip;
752 int cg;
753 long bprev;
754 int osize, nsize;
755{
756 register struct fs *fs;
757 register struct cg *cgp;
758 struct buf *bp;
759 long bno;
760 int frags, bbase;
761 int i, error;
762#if REV_ENDIAN_FS
763 struct vnode *vp=ITOV(ip);
764 struct mount *mp=vp->v_mount;
765 int rev_endian=(mp->mnt_flag & MNT_REVEND);
766#endif /* REV_ENDIAN_FS */
767
768 fs = ip->i_fs;
769 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
770 return (NULL);
771 frags = numfrags(fs, nsize); /* number of fragments needed */
772 bbase = fragnum(fs, bprev); /* offset in a frag (it is mod fragsize */
773 if (bbase > fragnum(fs, (bprev + frags - 1))) {
774 /* cannot extend across a block boundary */
775 return (NULL);
776 }
777 /* read corresponding cylinder group info */
778 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
779 (int)fs->fs_cgsize, NOCRED, &bp);
780 if (error) {
781 brelse(bp);
782 return (NULL);
783 }
784 cgp = (struct cg *)bp->b_data;
785#if REV_ENDIAN_FS
786 if (rev_endian) {
787 byte_swap_cgin(cgp, fs);
788 }
789#endif /* REV_ENDIAN_FS */
790
791 if (!cg_chkmagic(cgp)) {
792#if REV_ENDIAN_FS
793 if (rev_endian)
794 byte_swap_cgout(cgp,fs);
795#endif /* REV_ENDIAN_FS */
796 brelse(bp);
797 return (NULL);
798 }
799 cgp->cg_time = time.tv_sec;
800 bno = dtogd(fs, bprev);
801 for (i = numfrags(fs, osize); i < frags; i++)
802 if (isclr(cg_blksfree(cgp), bno + i)) {
803#if REV_ENDIAN_FS
804 if (rev_endian)
805 byte_swap_cgout(cgp,fs);
806#endif /* REV_ENDIAN_FS */
807 brelse(bp);
808 return (NULL);
809 }
810 /*
811 * the current fragment can be extended
812 * deduct the count on fragment being extended into
813 * increase the count on the remaining fragment (if any)
814 * allocate the extended piece
815 */
816 for (i = frags; i < fs->fs_frag - bbase; i++)
817 if (isclr(cg_blksfree(cgp), bno + i))
818 break;
819 cgp->cg_frsum[i - numfrags(fs, osize)]--;
820 if (i != frags)
821 cgp->cg_frsum[i - frags]++;
822 for (i = numfrags(fs, osize); i < frags; i++) {
823 clrbit(cg_blksfree(cgp), bno + i);
824 cgp->cg_cs.cs_nffree--;
825 fs->fs_cstotal.cs_nffree--;
826 fs->fs_cs(fs, cg).cs_nffree--;
827 }
828 fs->fs_fmod = 1;
829#if REV_ENDIAN_FS
830 if (rev_endian)
831 byte_swap_cgout(cgp,fs);
832#endif /* REV_ENDIAN_FS */
833 bdwrite(bp);
834 return (bprev);
835}
836
837/*
838 * Determine whether a block can be allocated.
839 *
840 * Check to see if a block of the appropriate size is available,
841 * and if it is, allocate it.
842 */
843static ufs_daddr_t
844ffs_alloccg(ip, cg, bpref, size)
845 struct inode *ip;
846 int cg;
847 ufs_daddr_t bpref;
848 int size;
849{
850 register struct fs *fs;
851 register struct cg *cgp;
852 struct buf *bp;
853 register int i;
854 int error, bno, frags, allocsiz;
855#if REV_ENDIAN_FS
856 struct vnode *vp=ITOV(ip);
857 struct mount *mp=vp->v_mount;
858 int rev_endian=(mp->mnt_flag & MNT_REVEND);
859#endif /* REV_ENDIAN_FS */
860
861 fs = ip->i_fs;
862 if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
863 return (NULL);
864 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
865 (int)fs->fs_cgsize, NOCRED, &bp);
866 if (error) {
867 brelse(bp);
868 return (NULL);
869 }
870 cgp = (struct cg *)bp->b_data;
871#if REV_ENDIAN_FS
872 if (rev_endian)
873 byte_swap_cgin(cgp,fs);
874#endif /* REV_ENDIAN_FS */
875 if (!cg_chkmagic(cgp) ||
876 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
877#if REV_ENDIAN_FS
878 if (rev_endian)
879 byte_swap_cgout(cgp,fs);
880#endif /* REV_ENDIAN_FS */
881 brelse(bp);
882 return (NULL);
883 }
884 cgp->cg_time = time.tv_sec;
885 if (size == fs->fs_bsize) {
886 bno = ffs_alloccgblk(fs, cgp, bpref);
887#if REV_ENDIAN_FS
888 if (rev_endian)
889 byte_swap_cgout(cgp,fs);
890#endif /* REV_ENDIAN_FS */
891 bdwrite(bp);
892 return (bno);
893 }
894 /*
895 * check to see if any fragments are already available
896 * allocsiz is the size which will be allocated, hacking
897 * it down to a smaller size if necessary
898 */
899 frags = numfrags(fs, size);
900 for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
901 if (cgp->cg_frsum[allocsiz] != 0)
902 break;
903 if (allocsiz == fs->fs_frag) {
904 /*
905 * no fragments were available, so a block will be
906 * allocated, and hacked up
907 */
908 if (cgp->cg_cs.cs_nbfree == 0) {
909#if REV_ENDIAN_FS
910 if (rev_endian)
911 byte_swap_cgout(cgp,fs);
912#endif /* REV_ENDIAN_FS */
913 brelse(bp);
914 return (NULL);
915 }
916 bno = ffs_alloccgblk(fs, cgp, bpref);
917 bpref = dtogd(fs, bno);
918 for (i = frags; i < fs->fs_frag; i++)
919 setbit(cg_blksfree(cgp), bpref + i);
920 i = fs->fs_frag - frags;
921 cgp->cg_cs.cs_nffree += i;
922 fs->fs_cstotal.cs_nffree += i;
923 fs->fs_cs(fs, cg).cs_nffree += i;
924 fs->fs_fmod = 1;
925 cgp->cg_frsum[i]++;
926#if REV_ENDIAN_FS
927 if (rev_endian)
928 byte_swap_cgout(cgp,fs);
929#endif /* REV_ENDIAN_FS */
930 bdwrite(bp);
931 return (bno);
932 }
933 bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
934 if (bno < 0) {
935#if REV_ENDIAN_FS
936 if (rev_endian)
937 byte_swap_cgout(cgp,fs);
938#endif /* REV_ENDIAN_FS */
939 brelse(bp);
940 return (NULL);
941 }
942 for (i = 0; i < frags; i++)
943 clrbit(cg_blksfree(cgp), bno + i);
944 cgp->cg_cs.cs_nffree -= frags;
945 fs->fs_cstotal.cs_nffree -= frags;
946 fs->fs_cs(fs, cg).cs_nffree -= frags;
947 fs->fs_fmod = 1;
948 cgp->cg_frsum[allocsiz]--;
949 if (frags != allocsiz)
950 cgp->cg_frsum[allocsiz - frags]++;
951#if REV_ENDIAN_FS
952 if (rev_endian)
953 byte_swap_cgout(cgp,fs);
954#endif /* REV_ENDIAN_FS */
955 bdwrite(bp);
956 return (cg * fs->fs_fpg + bno);
957}
958
959/*
960 * Allocate a block in a cylinder group.
961 *
962 * This algorithm implements the following policy:
963 * 1) allocate the requested block.
964 * 2) allocate a rotationally optimal block in the same cylinder.
965 * 3) allocate the next available block on the block rotor for the
966 * specified cylinder group.
967 * Note that this routine only allocates fs_bsize blocks; these
968 * blocks may be fragmented by the routine that allocates them.
969 */
970static ufs_daddr_t
971ffs_alloccgblk(fs, cgp, bpref)
972 register struct fs *fs;
973 register struct cg *cgp;
974 ufs_daddr_t bpref;
975{
976 ufs_daddr_t bno, blkno;
977 int cylno, pos, delta;
978 short *cylbp;
979 register int i;
980
981 if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
982 bpref = cgp->cg_rotor;
983 goto norot;
984 }
985 bpref = blknum(fs, bpref);
986 bpref = dtogd(fs, bpref);
987 /*
988 * if the requested block is available, use it
989 */
990 if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
991 bno = bpref;
992 goto gotit;
993 }
994 if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
995 /*
996 * Block layout information is not available.
997 * Leaving bpref unchanged means we take the
998 * next available free block following the one
999 * we just allocated. Hopefully this will at
1000 * least hit a track cache on drives of unknown
1001 * geometry (e.g. SCSI).
1002 */
1003 goto norot;
1004 }
1005 /*
1006 * check for a block available on the same cylinder
1007 */
1008 cylno = cbtocylno(fs, bpref);
1009 if (cg_blktot(cgp)[cylno] == 0)
1010 goto norot;
1011 /*
1012 * check the summary information to see if a block is
1013 * available in the requested cylinder starting at the
1014 * requested rotational position and proceeding around.
1015 */
1016 cylbp = cg_blks(fs, cgp, cylno);
1017 pos = cbtorpos(fs, bpref);
1018 for (i = pos; i < fs->fs_nrpos; i++)
1019 if (cylbp[i] > 0)
1020 break;
1021 if (i == fs->fs_nrpos)
1022 for (i = 0; i < pos; i++)
1023 if (cylbp[i] > 0)
1024 break;
1025 if (cylbp[i] > 0) {
1026 /*
1027 * found a rotational position, now find the actual
1028 * block. A panic if none is actually there.
1029 */
1030 pos = cylno % fs->fs_cpc;
1031 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
1032 if (fs_postbl(fs, pos)[i] == -1) {
1033 printf("pos = %d, i = %d, fs = %s\n",
1034 pos, i, fs->fs_fsmnt);
1035 panic("ffs_alloccgblk: cyl groups corrupted");
1036 }
1037 for (i = fs_postbl(fs, pos)[i];; ) {
1038 if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
1039 bno = blkstofrags(fs, (bno + i));
1040 goto gotit;
1041 }
1042 delta = fs_rotbl(fs)[i];
1043 if (delta <= 0 ||
1044 delta + i > fragstoblks(fs, fs->fs_fpg))
1045 break;
1046 i += delta;
1047 }
1048 printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
1049 panic("ffs_alloccgblk: can't find blk in cyl");
1050 }
1051norot:
1052 /*
1053 * no blocks in the requested cylinder, so take next
1054 * available one in this cylinder group.
1055 */
1056 bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
1057 if (bno < 0)
1058 return (NULL);
1059 cgp->cg_rotor = bno;
1060gotit:
1061 blkno = fragstoblks(fs, bno);
1062 ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
1063 ffs_clusteracct(fs, cgp, blkno, -1);
1064 cgp->cg_cs.cs_nbfree--;
1065 fs->fs_cstotal.cs_nbfree--;
1066 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
1067 cylno = cbtocylno(fs, bno);
1068 cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
1069 cg_blktot(cgp)[cylno]--;
1070 fs->fs_fmod = 1;
1071 return (cgp->cg_cgx * fs->fs_fpg + bno);
1072}
1073
1074/*
1075 * Determine whether a cluster can be allocated.
1076 *
1077 * We do not currently check for optimal rotational layout if there
1078 * are multiple choices in the same cylinder group. Instead we just
1079 * take the first one that we find following bpref.
1080 */
1081static ufs_daddr_t
1082ffs_clusteralloc(ip, cg, bpref, len)
1083 struct inode *ip;
1084 int cg;
1085 ufs_daddr_t bpref;
1086 int len;
1087{
1088 register struct fs *fs;
1089 register struct cg *cgp;
1090 struct buf *bp;
1091 int i, got, run, bno, bit, map;
1092 u_char *mapp;
1093 int32_t *lp;
1094#if REV_ENDIAN_FS
1095 struct vnode *vp=ITOV(ip);
1096 struct mount *mp=vp->v_mount;
1097 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1098#endif /* REV_ENDIAN_FS */
1099
1100 fs = ip->i_fs;
1101 if (fs->fs_maxcluster[cg] < len)
1102 return (NULL);
1103 if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
1104 NOCRED, &bp))
1105 goto fail;
1106 cgp = (struct cg *)bp->b_data;
1107#if REV_ENDIAN_FS
1108 if (rev_endian)
1109 byte_swap_cgin(cgp,fs);
1110#endif /* REV_ENDIAN_FS */
1111 if (!cg_chkmagic(cgp)) {
1112#if REV_ENDIAN_FS
1113 if (rev_endian)
1114 byte_swap_cgout(cgp,fs);
1115#endif /* REV_ENDIAN_FS */
1116 goto fail;
1117 }
1118 /*
1119 * Check to see if a cluster of the needed size (or bigger) is
1120 * available in this cylinder group.
1121 */
1122 lp = &cg_clustersum(cgp)[len];
1123 for (i = len; i <= fs->fs_contigsumsize; i++)
1124 if (*lp++ > 0)
1125 break;
1126 if (i > fs->fs_contigsumsize) {
1127 /*
1128 * This is the first time looking for a cluster in this
1129 * cylinder group. Update the cluster summary information
1130 * to reflect the true maximum sized cluster so that
1131 * future cluster allocation requests can avoid reading
1132 * the cylinder group map only to find no clusters.
1133 */
1134 lp = &cg_clustersum(cgp)[len - 1];
1135 for (i = len - 1; i > 0; i--)
1136 if (*lp-- > 0)
1137 break;
1138 fs->fs_maxcluster[cg] = i;
1139#if REV_ENDIAN_FS
1140 if (rev_endian)
1141 byte_swap_cgout(cgp,fs);
1142#endif /* REV_ENDIAN_FS */
1143 goto fail;
1144 }
1145 /*
1146 * Search the cluster map to find a big enough cluster.
1147 * We take the first one that we find, even if it is larger
1148 * than we need as we prefer to get one close to the previous
1149 * block allocation. We do not search before the current
1150 * preference point as we do not want to allocate a block
1151 * that is allocated before the previous one (as we will
1152 * then have to wait for another pass of the elevator
1153 * algorithm before it will be read). We prefer to fail and
1154 * be recalled to try an allocation in the next cylinder group.
1155 */
1156 if (dtog(fs, bpref) != cg)
1157 bpref = 0;
1158 else
1159 bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
1160 mapp = &cg_clustersfree(cgp)[bpref / NBBY];
1161 map = *mapp++;
1162 bit = 1 << (bpref % NBBY);
1163 for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
1164 if ((map & bit) == 0) {
1165 run = 0;
1166 } else {
1167 run++;
1168 if (run == len)
1169 break;
1170 }
1171 if ((got & (NBBY - 1)) != (NBBY - 1)) {
1172 bit <<= 1;
1173 } else {
1174 map = *mapp++;
1175 bit = 1;
1176 }
1177 }
1178 if (got == cgp->cg_nclusterblks) {
1179#if REV_ENDIAN_FS
1180 if (rev_endian)
1181 byte_swap_cgout(cgp,fs);
1182#endif /* REV_ENDIAN_FS */
1183 goto fail;
1184 }
1185 /*
1186 * Allocate the cluster that we have found.
1187 */
1188 for (i = 1; i <= len; i++)
1189 if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
1190 panic("ffs_clusteralloc: map mismatch");
1191 bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
1192 if (dtog(fs, bno) != cg)
1193 panic("ffs_clusteralloc: allocated out of group");
1194 len = blkstofrags(fs, len);
1195 for (i = 0; i < len; i += fs->fs_frag)
1196 if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i)
1197 panic("ffs_clusteralloc: lost block");
1198#if REV_ENDIAN_FS
1199 if (rev_endian)
1200 byte_swap_cgout(cgp,fs);
1201#endif /* REV_ENDIAN_FS */
1202 bdwrite(bp);
1203 return (bno);
1204
1205fail:
1206 brelse(bp);
1207 return (0);
1208}
1209
1210/*
1211 * Determine whether an inode can be allocated.
1212 *
1213 * Check to see if an inode is available, and if it is,
1214 * allocate it using the following policy:
1215 * 1) allocate the requested inode.
1216 * 2) allocate the next available inode after the requested
1217 * inode in the specified cylinder group.
1218 */
1219static ino_t
1220ffs_nodealloccg(ip, cg, ipref, mode)
1221 struct inode *ip;
1222 int cg;
1223 ufs_daddr_t ipref;
1224 int mode;
1225{
1226 register struct fs *fs;
1227 register struct cg *cgp;
1228 struct buf *bp;
1229 int error, start, len, loc, map, i;
1230#if REV_ENDIAN_FS
1231 struct vnode *vp=ITOV(ip);
1232 struct mount *mp=vp->v_mount;
1233 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1234#endif /* REV_ENDIAN_FS */
1235
1236 fs = ip->i_fs;
1237 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1238 return (NULL);
1239 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1240 (int)fs->fs_cgsize, NOCRED, &bp);
1241 if (error) {
1242 brelse(bp);
1243 return (NULL);
1244 }
1245 cgp = (struct cg *)bp->b_data;
1246#if REV_ENDIAN_FS
1247 if (rev_endian)
1248 byte_swap_cgin(cgp,fs);
1249#endif /* REV_ENDIAN_FS */
1250 if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
1251#if REV_ENDIAN_FS
1252 if (rev_endian)
1253 byte_swap_cgout(cgp,fs);
1254#endif /* REV_ENDIAN_FS */
1255 brelse(bp);
1256 return (NULL);
1257 }
1258
1259 cgp->cg_time = time.tv_sec;
1260 if (ipref) {
1261 ipref %= fs->fs_ipg;
1262 if (isclr(cg_inosused(cgp), ipref))
1263 goto gotit;
1264 }
1265 start = cgp->cg_irotor / NBBY;
1266 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1267 loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
1268 if (loc == 0) {
1269 len = start + 1;
1270 start = 0;
1271 loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
1272 if (loc == 0) {
1273 printf("cg = %d, irotor = %d, fs = %s\n",
1274 cg, cgp->cg_irotor, fs->fs_fsmnt);
1275 panic("ffs_nodealloccg: map corrupted");
1276 /* NOTREACHED */
1277 }
1278 }
1279 i = start + len - loc;
1280 map = cg_inosused(cgp)[i];
1281 ipref = i * NBBY;
1282 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1283 if ((map & i) == 0) {
1284 cgp->cg_irotor = ipref;
1285 goto gotit;
1286 }
1287 }
1288 printf("fs = %s\n", fs->fs_fsmnt);
1289 panic("ffs_nodealloccg: block not in map");
1290 /* NOTREACHED */
1291gotit:
1292 setbit(cg_inosused(cgp), ipref);
1293 cgp->cg_cs.cs_nifree--;
1294 fs->fs_cstotal.cs_nifree--;
1295 fs->fs_cs(fs, cg).cs_nifree--;
1296 fs->fs_fmod = 1;
1297 if ((mode & IFMT) == IFDIR) {
1298 cgp->cg_cs.cs_ndir++;
1299 fs->fs_cstotal.cs_ndir++;
1300 fs->fs_cs(fs, cg).cs_ndir++;
1301 }
1302#if REV_ENDIAN_FS
1303 if (rev_endian)
1304 byte_swap_cgout(cgp,fs);
1305#endif /* REV_ENDIAN_FS */
1306 bdwrite(bp);
1307 return (cg * fs->fs_ipg + ipref);
1308}
1309
1310/*
1311 * Free a block or fragment.
1312 *
1313 * The specified block or fragment is placed back in the
1314 * free map. If a fragment is deallocated, a possible
1315 * block reassembly is checked.
1316 */
1317ffs_blkfree(ip, bno, size)
1318 register struct inode *ip;
1319 ufs_daddr_t bno;
1320 long size;
1321{
1322 register struct fs *fs;
1323 register struct cg *cgp;
1324 struct buf *bp;
1325 ufs_daddr_t blkno;
1326 int i, error, cg, blk, frags, bbase;
1327#if REV_ENDIAN_FS
1328 struct vnode *vp=ITOV(ip);
1329 struct mount *mp=vp->v_mount;
1330 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1331#endif /* REV_ENDIAN_FS */
1332 fs = ip->i_fs;
1333 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1334 printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
1335 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
1336 panic("blkfree: bad size");
1337 }
1338 cg = dtog(fs, bno);
1339 if ((u_int)bno >= fs->fs_size) {
1340 printf("bad block %d, ino %d\n", bno, ip->i_number);
1341 ffs_fserr(fs, ip->i_uid, "bad block");
1342 return;
1343 }
1344 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1345 (int)fs->fs_cgsize, NOCRED, &bp);
1346 if (error) {
1347 brelse(bp);
1348 return;
1349 }
1350 cgp = (struct cg *)bp->b_data;
1351#if REV_ENDIAN_FS
1352 if (rev_endian)
1353 byte_swap_cgin(cgp,fs);
1354#endif /* REV_ENDIAN_FS */
1355 if (!cg_chkmagic(cgp)) {
1356#if REV_ENDIAN_FS
1357 if (rev_endian)
1358 byte_swap_cgout(cgp,fs);
1359#endif /* REV_ENDIAN_FS */
1360 brelse(bp);
1361 return;
1362 }
1363 cgp->cg_time = time.tv_sec;
1364 bno = dtogd(fs, bno);
1365 if (size == fs->fs_bsize) {
1366 blkno = fragstoblks(fs, bno);
1367 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1368 printf("dev = 0x%x, block = %d, fs = %s\n",
1369 ip->i_dev, bno, fs->fs_fsmnt);
1370 panic("blkfree: freeing free block");
1371 }
1372 ffs_setblock(fs, cg_blksfree(cgp), blkno);
1373 ffs_clusteracct(fs, cgp, blkno, 1);
1374 cgp->cg_cs.cs_nbfree++;
1375 fs->fs_cstotal.cs_nbfree++;
1376 fs->fs_cs(fs, cg).cs_nbfree++;
1377 i = cbtocylno(fs, bno);
1378 cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
1379 cg_blktot(cgp)[i]++;
1380 } else {
1381 bbase = bno - fragnum(fs, bno);
1382 /*
1383 * decrement the counts associated with the old frags
1384 */
1385 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1386 ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
1387 /*
1388 * deallocate the fragment
1389 */
1390 frags = numfrags(fs, size);
1391 for (i = 0; i < frags; i++) {
1392 if (isset(cg_blksfree(cgp), bno + i)) {
1393 printf("dev = 0x%x, block = %d, fs = %s\n",
1394 ip->i_dev, bno + i, fs->fs_fsmnt);
1395 panic("blkfree: freeing free frag");
1396 }
1397 setbit(cg_blksfree(cgp), bno + i);
1398 }
1399 cgp->cg_cs.cs_nffree += i;
1400 fs->fs_cstotal.cs_nffree += i;
1401 fs->fs_cs(fs, cg).cs_nffree += i;
1402 /*
1403 * add back in counts associated with the new frags
1404 */
1405 blk = blkmap(fs, cg_blksfree(cgp), bbase);
1406 ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
1407 /*
1408 * if a complete block has been reassembled, account for it
1409 */
1410 blkno = fragstoblks(fs, bbase);
1411 if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
1412 cgp->cg_cs.cs_nffree -= fs->fs_frag;
1413 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
1414 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
1415 ffs_clusteracct(fs, cgp, blkno, 1);
1416 cgp->cg_cs.cs_nbfree++;
1417 fs->fs_cstotal.cs_nbfree++;
1418 fs->fs_cs(fs, cg).cs_nbfree++;
1419 i = cbtocylno(fs, bbase);
1420 cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
1421 cg_blktot(cgp)[i]++;
1422 }
1423 }
1424 fs->fs_fmod = 1;
1425#if REV_ENDIAN_FS
1426 if (rev_endian)
1427 byte_swap_cgout(cgp,fs);
1428#endif /* REV_ENDIAN_FS */
1429 bdwrite(bp);
1430}
1431
1432#if DIAGNOSTIC
1433/*
1434 * Verify allocation of a block or fragment. Returns true if block or
1435 * fragment is allocated, false if it is free.
1436 */
1437ffs_checkblk(ip, bno, size)
1438 struct inode *ip;
1439 ufs_daddr_t bno;
1440 long size;
1441{
1442 struct fs *fs;
1443 struct cg *cgp;
1444 struct buf *bp;
1445 int i, error, frags, free;
1446#if REV_ENDIAN_FS
1447 struct vnode *vp=ITOV(ip);
1448 struct mount *mp=vp->v_mount;
1449 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1450#endif /* REV_ENDIAN_FS */
1451
1452 fs = ip->i_fs;
1453 if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
1454 printf("bsize = %d, size = %d, fs = %s\n",
1455 fs->fs_bsize, size, fs->fs_fsmnt);
1456 panic("checkblk: bad size");
1457 }
1458 if ((u_int)bno >= fs->fs_size)
1459 panic("checkblk: bad block %d", bno);
1460 error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
1461 (int)fs->fs_cgsize, NOCRED, &bp);
1462 if (error) {
1463 brelse(bp);
1464 return;
1465 }
1466 cgp = (struct cg *)bp->b_data;
1467#if REV_ENDIAN_FS
1468 if (rev_endian)
1469 byte_swap_cgin(cgp,fs);
1470#endif /* REV_ENDIAN_FS */
1471 if (!cg_chkmagic(cgp)) {
1472#if REV_ENDIAN_FS
1473 if (rev_endian)
1474 byte_swap_cgout(cgp,fs);
1475#endif /* REV_ENDIAN_FS */
1476 brelse(bp);
1477 return;
1478 }
1479 bno = dtogd(fs, bno);
1480 if (size == fs->fs_bsize) {
1481 free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
1482 } else {
1483 frags = numfrags(fs, size);
1484 for (free = 0, i = 0; i < frags; i++)
1485 if (isset(cg_blksfree(cgp), bno + i))
1486 free++;
1487 if (free != 0 && free != frags)
1488 panic("checkblk: partially free fragment");
1489 }
1490#if REV_ENDIAN_FS
1491 if (rev_endian)
1492 byte_swap_cgout(cgp,fs);
1493#endif /* REV_ENDIAN_FS */
1494 brelse(bp);
1495 return (!free);
1496}
1497#endif /* DIAGNOSTIC */
1498
1499/*
1500 * Free an inode.
1501 *
1502 * The specified inode is placed back in the free map.
1503 */
1504int
1505ffs_vfree(ap)
1506 struct vop_vfree_args /* {
1507 struct vnode *a_pvp;
1508 ino_t a_ino;
1509 int a_mode;
1510 } */ *ap;
1511{
1512 register struct fs *fs;
1513 register struct cg *cgp;
1514 register struct inode *pip;
1515 ino_t ino = ap->a_ino;
1516 struct buf *bp;
1517 int error, cg;
1518#if REV_ENDIAN_FS
1519 struct vnode *vp=ap->a_pvp;
1520 struct mount *mp=vp->v_mount;
1521 int rev_endian=(mp->mnt_flag & MNT_REVEND);
1522#endif /* REV_ENDIAN_FS */
1523
1524 pip = VTOI(ap->a_pvp);
1525 fs = pip->i_fs;
1526 if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
1527 panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n",
1528 pip->i_dev, ino, fs->fs_fsmnt);
1529 cg = ino_to_cg(fs, ino);
1530 error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
1531 (int)fs->fs_cgsize, NOCRED, &bp);
1532 if (error) {
1533 brelse(bp);
1534 return (0);
1535 }
1536 cgp = (struct cg *)bp->b_data;
1537#if REV_ENDIAN_FS
1538 if (rev_endian)
1539 byte_swap_cgin(cgp,fs);
1540#endif /* REV_ENDIAN_FS */
1541 if (!cg_chkmagic(cgp)) {
1542#if REV_ENDIAN_FS
1543 if (rev_endian)
1544 byte_swap_cgout(cgp,fs);
1545#endif /* REV_ENDIAN_FS */
1546 brelse(bp);
1547 return (0);
1548 }
1549 cgp->cg_time = time.tv_sec;
1550 ino %= fs->fs_ipg;
1551 if (isclr(cg_inosused(cgp), ino)) {
1552 printf("dev = 0x%x, ino = %d, fs = %s\n",
1553 pip->i_dev, ino, fs->fs_fsmnt);
1554 if (fs->fs_ronly == 0)
1555 panic("ifree: freeing free inode");
1556 }
1557 clrbit(cg_inosused(cgp), ino);
1558 if (ino < cgp->cg_irotor)
1559 cgp->cg_irotor = ino;
1560 cgp->cg_cs.cs_nifree++;
1561 fs->fs_cstotal.cs_nifree++;
1562 fs->fs_cs(fs, cg).cs_nifree++;
1563 if ((ap->a_mode & IFMT) == IFDIR) {
1564 cgp->cg_cs.cs_ndir--;
1565 fs->fs_cstotal.cs_ndir--;
1566 fs->fs_cs(fs, cg).cs_ndir--;
1567 }
1568 fs->fs_fmod = 1;
1569#if REV_ENDIAN_FS
1570 if (rev_endian)
1571 byte_swap_cgout(cgp,fs);
1572#endif /* REV_ENDIAN_FS */
1573 bdwrite(bp);
1574 return (0);
1575}
1576
1577/*
1578 * Find a block of the specified size in the specified cylinder group.
1579 *
1580 * It is a panic if a request is made to find a block if none are
1581 * available.
1582 */
1583static ufs_daddr_t
1584ffs_mapsearch(fs, cgp, bpref, allocsiz)
1585 register struct fs *fs;
1586 register struct cg *cgp;
1587 ufs_daddr_t bpref;
1588 int allocsiz;
1589{
1590 ufs_daddr_t bno;
1591 int start, len, loc, i;
1592 int blk, field, subfield, pos;
1593
1594 /*
1595 * find the fragment by searching through the free block
1596 * map for an appropriate bit pattern
1597 */
1598 if (bpref)
1599 start = dtogd(fs, bpref) / NBBY;
1600 else
1601 start = cgp->cg_frotor / NBBY;
1602 len = howmany(fs->fs_fpg, NBBY) - start;
1603 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
1604 (u_char *)fragtbl[fs->fs_frag],
1605 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1606 if (loc == 0) {
1607 len = start + 1;
1608 start = 0;
1609 loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
1610 (u_char *)fragtbl[fs->fs_frag],
1611 (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1612 if (loc == 0) {
1613 printf("start = %d, len = %d, fs = %s\n",
1614 start, len, fs->fs_fsmnt);
1615 panic("ffs_alloccg: map corrupted");
1616 /* NOTREACHED */
1617 }
1618 }
1619 bno = (start + len - loc) * NBBY;
1620 cgp->cg_frotor = bno;
1621 /*
1622 * found the byte in the map
1623 * sift through the bits to find the selected frag
1624 */
1625 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1626 blk = blkmap(fs, cg_blksfree(cgp), bno);
1627 blk <<= 1;
1628 field = around[allocsiz];
1629 subfield = inside[allocsiz];
1630 for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1631 if ((blk & field) == subfield)
1632 return (bno + pos);
1633 field <<= 1;
1634 subfield <<= 1;
1635 }
1636 }
1637 printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1638 panic("ffs_alloccg: block not in map");
1639 return (-1);
1640}
1641
1642/*
1643 * Update the cluster map because of an allocation or free.
1644 *
1645 * Cnt == 1 means free; cnt == -1 means allocating.
1646 */
1647ffs_clusteracct(fs, cgp, blkno, cnt)
1648 struct fs *fs;
1649 struct cg *cgp;
1650 ufs_daddr_t blkno;
1651 int cnt;
1652{
1653 int32_t *sump;
1654 int32_t *lp;
1655 u_char *freemapp, *mapp;
1656 int i, start, end, forw, back, map, bit;
1657
1658 if (fs->fs_contigsumsize <= 0)
1659 return;
1660 freemapp = cg_clustersfree(cgp);
1661 sump = cg_clustersum(cgp);
1662 /*
1663 * Allocate or clear the actual block.
1664 */
1665 if (cnt > 0)
1666 setbit(freemapp, blkno);
1667 else
1668 clrbit(freemapp, blkno);
1669 /*
1670 * Find the size of the cluster going forward.
1671 */
1672 start = blkno + 1;
1673 end = start + fs->fs_contigsumsize;
1674 if (end >= cgp->cg_nclusterblks)
1675 end = cgp->cg_nclusterblks;
1676 mapp = &freemapp[start / NBBY];
1677 map = *mapp++;
1678 bit = 1 << (start % NBBY);
1679 for (i = start; i < end; i++) {
1680 if ((map & bit) == 0)
1681 break;
1682 if ((i & (NBBY - 1)) != (NBBY - 1)) {
1683 bit <<= 1;
1684 } else {
1685 map = *mapp++;
1686 bit = 1;
1687 }
1688 }
1689 forw = i - start;
1690 /*
1691 * Find the size of the cluster going backward.
1692 */
1693 start = blkno - 1;
1694 end = start - fs->fs_contigsumsize;
1695 if (end < 0)
1696 end = -1;
1697 mapp = &freemapp[start / NBBY];
1698 map = *mapp--;
1699 bit = 1 << (start % NBBY);
1700 for (i = start; i > end; i--) {
1701 if ((map & bit) == 0)
1702 break;
1703 if ((i & (NBBY - 1)) != 0) {
1704 bit >>= 1;
1705 } else {
1706 map = *mapp--;
1707 bit = 1 << (NBBY - 1);
1708 }
1709 }
1710 back = start - i;
1711 /*
1712 * Account for old cluster and the possibly new forward and
1713 * back clusters.
1714 */
1715 i = back + forw + 1;
1716 if (i > fs->fs_contigsumsize)
1717 i = fs->fs_contigsumsize;
1718 sump[i] += cnt;
1719 if (back > 0)
1720 sump[back] -= cnt;
1721 if (forw > 0)
1722 sump[forw] -= cnt;
1723 /*
1724 * Update cluster summary information.
1725 */
1726 lp = &sump[fs->fs_contigsumsize];
1727 for (i = fs->fs_contigsumsize; i > 0; i--)
1728 if (*lp-- > 0)
1729 break;
1730 fs->fs_maxcluster[cgp->cg_cgx] = i;
1731}
1732
1733/*
1734 * Fserr prints the name of a file system with an error diagnostic.
1735 *
1736 * The form of the error message is:
1737 * fs: error message
1738 */
1739static void
1740ffs_fserr(fs, uid, cp)
1741 struct fs *fs;
1742 u_int uid;
1743 char *cp;
1744{
1745
1746 log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1747}