]>
Commit | Line | Data |
---|---|---|
9bccf70c A |
1 | .\" $NetBSD: fs.5,v 1.3 1994/11/30 19:31:17 jtc Exp $ |
2 | .\" | |
3 | .\" Copyright (c) 1983, 1991, 1993 | |
4 | .\" The Regents of the University of California. All rights reserved. | |
5 | .\" | |
6 | .\" Redistribution and use in source and binary forms, with or without | |
7 | .\" modification, are permitted provided that the following conditions | |
8 | .\" are met: | |
9 | .\" 1. Redistributions of source code must retain the above copyright | |
10 | .\" notice, this list of conditions and the following disclaimer. | |
11 | .\" 2. Redistributions in binary form must reproduce the above copyright | |
12 | .\" notice, this list of conditions and the following disclaimer in the | |
13 | .\" documentation and/or other materials provided with the distribution. | |
14 | .\" 3. All advertising materials mentioning features or use of this software | |
15 | .\" must display the following acknowledgement: | |
16 | .\" This product includes software developed by the University of | |
17 | .\" California, Berkeley and its contributors. | |
18 | .\" 4. Neither the name of the University nor the names of its contributors | |
19 | .\" may be used to endorse or promote products derived from this software | |
20 | .\" without specific prior written permission. | |
21 | .\" | |
22 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
23 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
24 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
25 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
26 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
27 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
28 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
29 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
30 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
31 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
32 | .\" SUCH DAMAGE. | |
33 | .\" | |
34 | .\" @(#)fs.5 8.2 (Berkeley) 4/19/94 | |
35 | .\" | |
36 | .Dd April 19, 1994 | |
37 | .Dt FS 5 | |
38 | .Os BSD 4.2 | |
39 | .Sh NAME | |
40 | .Nm fs , | |
41 | .Nm inode | |
42 | .Nd format of file system volume | |
43 | .Sh SYNOPSIS | |
44 | .Fd #include <sys/types.h> | |
45 | .Fd #include <ufs/fs.h> | |
46 | .Fd #include <ufs/inode.h> | |
47 | .Sh DESCRIPTION | |
48 | The files | |
49 | .Aq Pa fs.h | |
50 | and | |
51 | .Aq Pa inode.h | |
52 | declare several structures, defined variables and macros | |
53 | which are used to create and manage the underlying format of | |
54 | file system objects on random access devices (disks). | |
55 | .Pp | |
56 | The block size and number of blocks which | |
57 | comprise a file system are parameters of the file system. | |
58 | Sectors beginning at | |
59 | .Dv BBLOCK | |
60 | and continuing for | |
61 | .Dv BBSIZE | |
62 | are used | |
63 | for a disklabel and for some hardware primary | |
64 | and secondary bootstrapping programs. | |
65 | .Pp | |
66 | The actual file system begins at sector | |
67 | .Dv SBLOCK | |
68 | with the | |
69 | .Em super-block | |
70 | that is of size | |
71 | .Dv SBSIZE . | |
72 | The following structure described the super-block and is | |
73 | from the file | |
74 | .Aq Pa ufs/fs.h : | |
75 | .Bd -literal | |
76 | #define FS_MAGIC 0x011954 | |
77 | struct fs { | |
78 | struct fs *fs_link; /* linked list of file systems */ | |
79 | struct fs *fs_rlink; /* used for incore super blocks */ | |
80 | daddr_t fs_sblkno; /* addr of super-block in filesys */ | |
81 | daddr_t fs_cblkno; /* offset of cyl-block in filesys */ | |
82 | daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ | |
83 | daddr_t fs_dblkno; /* offset of first data after cg */ | |
84 | long fs_cgoffset; /* cylinder group offset in cylinder */ | |
85 | long fs_cgmask; /* used to calc mod fs_ntrak */ | |
86 | time_t fs_time; /* last time written */ | |
87 | long fs_size; /* number of blocks in fs */ | |
88 | long fs_dsize; /* number of data blocks in fs */ | |
89 | long fs_ncg; /* number of cylinder groups */ | |
90 | long fs_bsize; /* size of basic blocks in fs */ | |
91 | long fs_fsize; /* size of frag blocks in fs */ | |
92 | long fs_frag; /* number of frags in a block in fs */ | |
93 | /* these are configuration parameters */ | |
94 | long fs_minfree; /* minimum percentage of free blocks */ | |
95 | long fs_rotdelay; /* num of ms for optimal next block */ | |
96 | long fs_rps; /* disk revolutions per second */ | |
97 | /* these fields can be computed from the others */ | |
98 | long fs_bmask; /* ``blkoff'' calc of blk offsets */ | |
99 | long fs_fmask; /* ``fragoff'' calc of frag offsets */ | |
100 | long fs_bshift; /* ``lblkno'' calc of logical blkno */ | |
101 | long fs_fshift; /* ``numfrags'' calc number of frags */ | |
102 | /* these are configuration parameters */ | |
103 | long fs_maxcontig; /* max number of contiguous blks */ | |
104 | long fs_maxbpg; /* max number of blks per cyl group */ | |
105 | /* these fields can be computed from the others */ | |
106 | long fs_fragshift; /* block to frag shift */ | |
107 | long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ | |
108 | long fs_sbsize; /* actual size of super block */ | |
109 | long fs_csmask; /* csum block offset */ | |
110 | long fs_csshift; /* csum block number */ | |
111 | long fs_nindir; /* value of NINDIR */ | |
112 | long fs_inopb; /* value of INOPB */ | |
113 | long fs_nspf; /* value of NSPF */ | |
114 | /* yet another configuration parameter */ | |
115 | long fs_optim; /* optimization preference, see below */ | |
116 | /* these fields are derived from the hardware */ | |
117 | long fs_npsect; /* # sectors/track including spares */ | |
118 | long fs_interleave; /* hardware sector interleave */ | |
119 | long fs_trackskew; /* sector 0 skew, per track */ | |
120 | long fs_headswitch; /* head switch time, usec */ | |
121 | long fs_trkseek; /* track-to-track seek, usec */ | |
122 | /* sizes determined by number of cylinder groups and their sizes */ | |
123 | daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ | |
124 | long fs_cssize; /* size of cyl grp summary area */ | |
125 | long fs_cgsize; /* cylinder group size */ | |
126 | /* these fields are derived from the hardware */ | |
127 | long fs_ntrak; /* tracks per cylinder */ | |
128 | long fs_nsect; /* sectors per track */ | |
129 | long fs_spc; /* sectors per cylinder */ | |
130 | /* this comes from the disk driver partitioning */ | |
131 | long fs_ncyl; /* cylinders in file system */ | |
132 | /* these fields can be computed from the others */ | |
133 | long fs_cpg; /* cylinders per group */ | |
134 | long fs_ipg; /* inodes per group */ | |
135 | long fs_fpg; /* blocks per group * fs_frag */ | |
136 | /* this data must be re-computed after crashes */ | |
137 | struct csum fs_cstotal; /* cylinder summary information */ | |
138 | /* these fields are cleared at mount time */ | |
139 | char fs_fmod; /* super block modified flag */ | |
140 | char fs_clean; /* file system is clean flag */ | |
141 | char fs_ronly; /* mounted read-only flag */ | |
142 | char fs_flags; /* currently unused flag */ | |
143 | char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ | |
144 | /* these fields retain the current block allocation info */ | |
145 | long fs_cgrotor; /* last cg searched */ | |
146 | struct csum *fs_csp[MAXCSBUFS]; /* list of fs_cs info buffers */ | |
147 | long fs_cpc; /* cyl per cycle in postbl */ | |
148 | short fs_opostbl[16][8]; /* old rotation block list head */ | |
149 | long fs_sparecon[56]; /* reserved for future constants */ | |
150 | quad fs_qbmask; /* ~fs_bmask - for use with quad size */ | |
151 | quad fs_qfmask; /* ~fs_fmask - for use with quad size */ | |
152 | long fs_postblformat; /* format of positional layout tables */ | |
153 | long fs_nrpos; /* number of rotational positions */ | |
154 | long fs_postbloff; /* (short) rotation block list head */ | |
155 | long fs_rotbloff; /* (u_char) blocks for each rotation */ | |
156 | long fs_magic; /* magic number */ | |
157 | u_char fs_space[1]; /* list of blocks for each rotation */ | |
158 | /* actually longer */ | |
159 | }; | |
160 | .Ed | |
161 | .Pp | |
162 | Each disk drive contains some number of file systems. | |
163 | A file system consists of a number of cylinder groups. | |
164 | Each cylinder group has inodes and data. | |
165 | .Pp | |
166 | A file system is described by its super-block, which in turn | |
167 | describes the cylinder groups. The super-block is critical | |
168 | data and is replicated in each cylinder group to protect against | |
169 | catastrophic loss. This is done at file system creation | |
170 | time and the critical | |
171 | super-block data does not change, so the copies need not be | |
172 | referenced further unless disaster strikes. | |
173 | .Pp | |
174 | Addresses stored in inodes are capable of addressing fragments | |
175 | of `blocks'. File system blocks of at most size | |
176 | .Dv MAXBSIZE | |
177 | can | |
178 | be optionally broken into 2, 4, or 8 pieces, each of which is | |
179 | addressable; these pieces may be | |
180 | .Dv DEV_BSIZE , | |
181 | or some multiple of | |
182 | a | |
183 | .Dv DEV_BSIZE | |
184 | unit. | |
185 | .Pp | |
186 | Large files consist of exclusively large data blocks. To avoid | |
187 | undue wasted disk space, the last data block of a small file is | |
188 | allocated as only as many fragments of a large block as are | |
189 | necessary. The file system format retains only a single pointer | |
190 | to such a fragment, which is a piece of a single large block that | |
191 | has been divided. The size of such a fragment is determinable from | |
192 | information in the inode, using the | |
193 | .Fn blksize fs ip lbn | |
194 | macro. | |
195 | .Pp | |
196 | The file system records space availability at the fragment level; | |
197 | to determine block availability, aligned fragments are examined. | |
198 | .Pp | |
199 | The root inode is the root of the file system. | |
200 | Inode 0 can't be used for normal purposes and | |
201 | historically bad blocks were linked to inode 1, | |
202 | thus the root inode is 2 (inode 1 is no longer used for | |
203 | this purpose, however numerous dump tapes make this | |
204 | assumption, so we are stuck with it). | |
205 | .Pp | |
206 | The | |
207 | .Fa fs_minfree | |
208 | element gives the minimum acceptable percentage of file system | |
209 | blocks that may be free. If the freelist drops below this level | |
210 | only the super-user may continue to allocate blocks. | |
211 | The | |
212 | .Fa fs_minfree | |
213 | element | |
214 | may be set to 0 if no reserve of free blocks is deemed necessary, | |
215 | however severe performance degradations will be observed if the | |
216 | file system is run at greater than 90% full; thus the default | |
217 | value of | |
218 | .Fa fs_minfree | |
219 | is 10%. | |
220 | .Pp | |
221 | Empirically the best trade-off between block fragmentation and | |
222 | overall disk utilization at a loading of 90% comes with a | |
223 | fragmentation of 8, thus the default fragment size is an eighth | |
224 | of the block size. | |
225 | .Pp | |
226 | The element | |
227 | .Fa fs_optim | |
228 | specifies whether the file system should try to minimize the time spent | |
229 | allocating blocks, or if it should attempt to minimize the space | |
230 | fragmentation on the disk. | |
231 | If the value of fs_minfree (see above) is less than 10%, | |
232 | then the file system defaults to optimizing for space to avoid | |
233 | running out of full sized blocks. | |
234 | If the value of minfree is greater than or equal to 10%, | |
235 | fragmentation is unlikely to be problematical, and | |
236 | the file system defaults to optimizing for time. | |
237 | .Pp | |
238 | .Em Cylinder group related limits : | |
239 | Each cylinder keeps track of the availability of blocks at different | |
240 | rotational positions, so that sequential blocks can be laid out | |
241 | with minimum rotational latency. With the default of 8 distinguished | |
242 | rotational positions, the resolution of the | |
243 | summary information is 2ms for a typical 3600 rpm drive. | |
244 | .Pp | |
245 | The element | |
246 | .Fa fs_rotdelay | |
247 | gives the minimum number of milliseconds to initiate | |
248 | another disk transfer on the same cylinder. | |
249 | It is used in determining the rotationally optimal | |
250 | layout for disk blocks within a file; | |
251 | the default value for | |
252 | .Fa fs_rotdelay | |
253 | is 2ms. | |
254 | .Pp | |
255 | Each file system has a statically allocated number of inodes. | |
256 | An inode is allocated for each | |
257 | .Dv NBPI | |
258 | bytes of disk space. | |
259 | The inode allocation strategy is extremely conservative. | |
260 | .Pp | |
261 | .Dv MINBSIZE | |
262 | is the smallest allowable block size. | |
263 | With a | |
264 | .Dv MINBSIZE | |
265 | of 4096 | |
266 | it is possible to create files of size | |
267 | 2^32 with only two levels of indirection. | |
268 | .Dv MINBSIZE | |
269 | must be big enough to hold a cylinder group block, | |
270 | thus changes to | |
271 | .Pq Fa struct cg | |
272 | must keep its size within | |
273 | .Dv MINBSIZE . | |
274 | Note that super-blocks are never more than size | |
275 | .Dv SBSIZE . | |
276 | .Pp | |
277 | The path name on which the file system is mounted is maintained in | |
278 | .Fa fs_fsmnt . | |
279 | .Dv MAXMNTLEN | |
280 | defines the amount of space allocated in | |
281 | the super-block for this name. | |
282 | The limit on the amount of summary information per file system | |
283 | is defined by | |
284 | .Dv MAXCSBUFS. | |
285 | For a 4096 byte block size, it is currently parameterized for a | |
286 | maximum of two million cylinders. | |
287 | .Pp | |
288 | Per cylinder group information is summarized in blocks allocated | |
289 | from the first cylinder group's data blocks. | |
290 | These blocks are read in from | |
291 | .Fa fs_csaddr | |
292 | (size | |
293 | .Fa fs_cssize ) | |
294 | in addition to the super-block. | |
295 | .Pp | |
296 | .Sy N.B.: | |
297 | .Xr sizeof Pq Fa struct csum | |
298 | must be a power of two in order for | |
299 | the | |
300 | .Fn fs_cs | |
301 | macro to work. | |
302 | .Pp | |
303 | The | |
304 | .Em "Super-block for a file system" : | |
305 | The size of the rotational layout tables | |
306 | is limited by the fact that the super-block is of size | |
307 | .Dv SBSIZE . | |
308 | The size of these tables is | |
309 | .Em inversely | |
310 | proportional to the block | |
311 | size of the file system. The size of the tables is | |
312 | increased when sector sizes are not powers of two, | |
313 | as this increases the number of cylinders | |
314 | included before the rotational pattern repeats | |
315 | .Pq Fa fs_cpc . | |
316 | The size of the rotational layout | |
317 | tables is derived from the number of bytes remaining in | |
318 | .Pq Fa struct fs . | |
319 | .Pp | |
320 | The number of blocks of data per cylinder group | |
321 | is limited because cylinder groups are at most one block. | |
322 | The inode and free block tables | |
323 | must fit into a single block after deducting space for | |
324 | the cylinder group structure | |
325 | .Pq Fa struct cg . | |
326 | .Pp | |
327 | The | |
328 | .Em Inode : | |
329 | The inode is the focus of all file activity in the | |
330 | file system. | |
331 | There is a unique inode allocated | |
332 | for each active file, | |
333 | each current directory, each mounted-on file, | |
334 | text file, and the root. | |
335 | An inode is `named' by its device/i-number pair. | |
336 | For further information, see the include file | |
337 | .Aq Pa sys/inode.h . | |
338 | .Sh HISTORY | |
339 | A super-block structure named filsys appeared in | |
340 | .At v6 . | |
341 | The file system described in this manual appeared | |
342 | in | |
343 | .Bx 4.2 . |