]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_cnode.h
dd426b8a7d4ae5dd7b0b891f1311f14dcaa88efa
[apple/hfs.git] / core / hfs_cnode.h
1 /*
2 * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #ifndef _HFS_CNODE_H_
29 #define _HFS_CNODE_H_
30
31 #include <sys/appleapiopts.h>
32
33 #ifdef KERNEL
34 #ifdef __APPLE_API_PRIVATE
35 #include <stdbool.h>
36 #include <sys/types.h>
37 #include <sys/queue.h>
38 #include <sys/stat.h>
39 #include <sys/vnode.h>
40 #include <sys/quota.h>
41 #if HFS_COMPRESSION
42 #include <sys/decmpfs.h>
43 #endif
44 #if CONFIG_PROTECT
45 #include <sys/cprotect.h>
46 #endif
47 #include <kern/locks.h>
48
49 #include "hfs_catalog.h"
50 #include "rangelist.h"
51 #include "hfs_dbg.h"
52
53 /*
54 * The filefork is used to represent an HFS file fork (data or resource).
55 * Reading or writing any of these fields requires holding cnode lock.
56 */
57 struct filefork {
58 struct cnode *ff_cp; /* cnode associated with this fork */
59 struct rl_head ff_invalidranges; /* Areas of disk that should read back as zeroes */
60 union {
61 void *ffu_sysfileinfo; /* additional info for system files */
62 char *ffu_symlinkptr; /* symbolic link pathname */
63 } ff_union;
64 struct cat_fork ff_data; /* fork data (size, extents) */
65 };
66 typedef struct filefork filefork_t;
67
68
69 #define HFS_TEMPLOOKUP_NAMELEN 32
70
71 /*
72 * Catalog Lookup struct (runtime)
73 *
74 * This is used so that when we need to malloc a container for a catalog
75 * lookup operation, we can acquire memory for everything in one fell swoop
76 * as opposed to putting many of these objects on the stack. The cat_fork
77 * data structure can take up 100+bytes easily, and that can add to stack
78 * overhead.
79 *
80 * As a result, we use this to easily pass around the memory needed for a
81 * lookup operation.
82 */
83 struct cat_lookup_buffer {
84 struct cat_desc lookup_desc;
85 struct cat_attr lookup_attr;
86 struct filefork lookup_fork;
87 struct componentname lookup_cn;
88 char lookup_name[HFS_TEMPLOOKUP_NAMELEN]; /* for open-unlinked paths only */
89 };
90
91
92 /* Aliases for common fields */
93 #define ff_size ff_data.cf_size
94 #define ff_new_size ff_data.cf_new_size
95 #define ff_clumpsize ff_data.cf_clump
96 #define ff_bytesread ff_data.cf_bytesread
97 #define ff_extents ff_data.cf_extents
98
99 /*
100 * Note that the blocks fields are protected by the cnode lock, *not*
101 * the truncate lock.
102 */
103 #define ff_blocks ff_data.cf_blocks
104 #define ff_unallocblocks ff_data.cf_vblocks
105 static inline uint32_t ff_allocblocks(filefork_t *ff)
106 {
107 hfs_assert(ff->ff_blocks >= ff->ff_unallocblocks);
108 return ff->ff_blocks - ff->ff_unallocblocks;
109 }
110
111 #define ff_symlinkptr ff_union.ffu_symlinkptr
112 #define ff_sysfileinfo ff_union.ffu_sysfileinfo
113
114
115 /* The btree code still needs these... */
116 #define fcbEOF ff_size
117 #define fcbExtents ff_extents
118 #define fcbBTCBPtr ff_sysfileinfo
119
120 typedef u_int8_t atomicflag_t;
121
122
123 /*
124 * Hardlink Origin (for hardlinked directories).
125 */
126 struct linkorigin {
127 TAILQ_ENTRY(linkorigin) lo_link; /* chain */
128 void * lo_thread; /* thread that performed the lookup */
129 cnid_t lo_cnid; /* hardlink's cnid */
130 cnid_t lo_parentcnid; /* hardlink's parent cnid */
131 };
132 typedef struct linkorigin linkorigin_t;
133
134 #define MAX_CACHED_ORIGINS 10
135 #define MAX_CACHED_FILE_ORIGINS 8
136
137 /*
138 * The cnode is used to represent each active (or recently active)
139 * file or directory in the HFS filesystem.
140 *
141 * Reading or writing any of these fields requires holding c_lock.
142 */
143 struct cnode {
144 lck_rw_t c_rwlock; /* cnode's lock */
145 thread_t c_lockowner; /* cnode's lock owner (exclusive case only) */
146 lck_rw_t c_truncatelock; /* protects file from truncation during read/write */
147 thread_t c_truncatelockowner; /* truncate lock owner (exclusive case only) */
148 LIST_ENTRY(cnode) c_hash; /* cnode's hash chain */
149 u_int32_t c_flag; /* cnode's runtime flags */
150 u_int32_t c_hflag; /* cnode's flags for maintaining hash - protected by global hash lock */
151 struct vnode *c_vp; /* vnode for data fork or dir */
152 struct vnode *c_rsrc_vp; /* vnode for resource fork */
153 struct dquot *c_dquot[MAXQUOTAS]; /* cnode's quota info */
154 u_int32_t c_childhint; /* catalog hint for children (small dirs only) */
155 u_int32_t c_dirthreadhint; /* catalog hint for directory's thread rec */
156 struct cat_desc c_desc; /* cnode's descriptor */
157 struct cat_attr c_attr; /* cnode's attributes */
158 TAILQ_HEAD(hfs_originhead, linkorigin) c_originlist; /* hardlink origin cache */
159 TAILQ_HEAD(hfs_hinthead, directoryhint) c_hintlist; /* readdir directory hint list */
160 int16_t c_dirhinttag; /* directory hint tag */
161 union {
162 int16_t cu_dirhintcnt; /* directory hint count */
163 int16_t cu_syslockcount; /* system file use only */
164 } c_union;
165 u_int32_t c_dirchangecnt; /* changes each insert/delete (in-core only) */
166 struct filefork *c_datafork; /* cnode's data fork */
167 struct filefork *c_rsrcfork; /* cnode's rsrc fork */
168 atomicflag_t c_touch_acctime;
169 atomicflag_t c_touch_chgtime;
170 atomicflag_t c_touch_modtime;
171
172 // The following flags are protected by the truncate lock
173 union {
174 struct {
175 bool c_need_dvnode_put_after_truncate_unlock : 1;
176 bool c_need_rvnode_put_after_truncate_unlock : 1;
177 #if HFS_COMPRESSION
178 bool c_need_decmpfs_reset : 1;
179 #endif
180 };
181 uint8_t c_tflags;
182 };
183
184 /*
185 * Where we're using a journal, we keep track of the last
186 * transaction that we did an update in. If a minor modification
187 * is made, we'll still push it if we're still on the same
188 * transaction.
189 */
190 uint32_t c_update_txn;
191
192 #if HFS_COMPRESSION
193 struct decmpfs_cnode *c_decmp;
194 #endif /* HFS_COMPRESSION */
195 #if CONFIG_PROTECT
196 struct cprotect *c_cpentry; /* content protection data */
197 #endif
198
199 #if HFS_MALLOC_DEBUG
200 // N.B. — *must* always be last
201 uint64_t magic;
202 #endif
203 };
204 typedef struct cnode cnode_t;
205
206 /* Aliases for common cnode fields */
207 #define c_cnid c_desc.cd_cnid
208 #define c_hint c_desc.cd_hint
209 #define c_parentcnid c_desc.cd_parentcnid
210 #define c_encoding c_desc.cd_encoding
211
212 #define c_fileid c_attr.ca_fileid
213 #define c_mode c_attr.ca_mode
214 #define c_linkcount c_attr.ca_linkcount
215 #define c_uid c_attr.ca_uid
216 #define c_gid c_attr.ca_gid
217 #define c_rdev c_attr.ca_union1.cau_rdev
218 #define c_atime c_attr.ca_atime
219 #define c_mtime c_attr.ca_mtime
220 #define c_ctime c_attr.ca_ctime
221 #define c_itime c_attr.ca_itime
222 #define c_btime c_attr.ca_btime
223 #define c_bsdflags c_attr.ca_flags
224 #define c_finderinfo c_attr.ca_finderinfo
225 #define c_blocks c_attr.ca_union2.cau_blocks
226 #define c_entries c_attr.ca_union2.cau_entries
227 #define c_zftimeout c_childhint
228
229 #define c_dirhintcnt c_union.cu_dirhintcnt
230 #define c_syslockcount c_union.cu_syslockcount
231
232
233 /* hash maintenance flags kept in c_hflag and protected by hfs_chash_mutex */
234 #define H_ALLOC 0x00001 /* CNode is being allocated */
235 #define H_ATTACH 0x00002 /* CNode is being attached to by another vnode */
236 #define H_TRANSIT 0x00004 /* CNode is getting recycled */
237 #define H_WAITING 0x00008 /* CNode is being waited for */
238
239
240 /*
241 * Runtime cnode flags (kept in c_flag)
242 */
243 #define C_NEED_RVNODE_PUT 0x0000001 /* Need to do a vnode_put on c_rsrc_vp after the unlock */
244 #define C_NEED_DVNODE_PUT 0x0000002 /* Need to do a vnode_put on c_vp after the unlock */
245 #define C_ZFWANTSYNC 0x0000004 /* fsync requested and file has holes */
246 #define C_FROMSYNC 0x0000008 /* fsync was called from sync */
247
248 #define C_MODIFIED 0x0000010 /* CNode has been modified */
249 #define C_NOEXISTS 0x0000020 /* CNode has been deleted, catalog entry is gone */
250 #define C_DELETED 0x0000040 /* CNode has been marked to be deleted */
251 #define C_HARDLINK 0x0000080 /* CNode is a hard link (file or dir) */
252
253 /*
254 * A minor modification is one where the volume would not be inconsistent if
255 * the change was not pushed to disk. For example, changes to times.
256 */
257 #define C_MINOR_MOD 0x0000100 /* CNode has a minor modification */
258
259 #define C_HASXATTRS 0x0000200 /* cnode has extended attributes */
260 #define C_NEG_ENTRIES 0x0000400 /* directory has negative name entries */
261 /*
262 * For C_SSD_STATIC: SSDs may want to deal with the file payload data in a
263 * different manner knowing that the content is not likely to be modified. This is
264 * purely advisory at the HFS level, and is not maintained after the cnode goes out of core.
265 */
266 #define C_SSD_STATIC 0x0000800 /* Assume future writes contain static content */
267
268 #define C_NEED_DATA_SETSIZE 0x0001000 /* Do a ubc_setsize(0) on c_rsrc_vp after the unlock */
269 #define C_NEED_RSRC_SETSIZE 0x0002000 /* Do a ubc_setsize(0) on c_vp after the unlock */
270 #define C_DIR_MODIFICATION 0x0004000 /* Directory is being modified, wait for lookups */
271 #define C_ALWAYS_ZEROFILL 0x0008000 /* Always zero-fill the file on an fsync */
272
273 #define C_RENAMED 0x0010000 /* cnode was deleted as part of rename; C_DELETED should also be set */
274 #define C_NEEDS_DATEADDED 0x0020000 /* cnode needs date-added written to the finderinfo bit */
275 #define C_BACKINGSTORE 0x0040000 /* cnode is a backing store for an existing or currently-mounting filesystem */
276
277 /*
278 * This flag indicates the cnode might be dirty because it
279 * was mapped writable so if we get any page-outs, update
280 * the modification and change times.
281 */
282 #define C_MIGHT_BE_DIRTY_FROM_MAPPING 0x0080000
283
284 /*
285 * For C_SSD_GREEDY_MODE: SSDs may want to write the file payload data using the greedy mode knowing
286 * that the content needs to be written out to the disk quicker than normal at the expense of storage efficiency.
287 * This is purely advisory at the HFS level, and is not maintained after the cnode goes out of core.
288 */
289 #define C_SSD_GREEDY_MODE 0x0100000 /* Assume future writes are recommended to be written in SLC mode */
290
291 /* 0x0200000 is currently unused */
292
293 #define C_IO_ISOCHRONOUS 0x0400000 /* device-specific isochronous throughput I/O */
294
295 #define ZFTIMELIMIT (5 * 60)
296
297 /*
298 * The following is the "invisible" bit from the fdFlags field
299 * in the FndrFileInfo.
300 */
301 enum { kFinderInvisibleMask = 1 << 14 };
302
303
304 /*
305 * Convert between cnode pointers and vnode pointers
306 */
307 #define VTOC(vp) ((struct cnode *)vnode_fsnode((vp)))
308
309 #define CTOV(cp,rsrc) (((rsrc) && S_ISREG((cp)->c_mode)) ? \
310 (cp)->c_rsrc_vp : (cp)->c_vp)
311
312 /*
313 * Convert between vnode pointers and file forks
314 *
315 * Note: no CTOF since that is ambiguous
316 */
317
318 #define FTOC(fp) ((fp)->ff_cp)
319
320 #define VTOF(vp) ((vp) == VTOC((vp))->c_rsrc_vp ? \
321 VTOC((vp))->c_rsrcfork : \
322 VTOC((vp))->c_datafork)
323
324 #define VCTOF(vp, cp) ((vp) == (cp)->c_rsrc_vp ? \
325 (cp)->c_rsrcfork : \
326 (cp)->c_datafork)
327
328 #define FTOV(fp) ((fp) == FTOC(fp)->c_rsrcfork ? \
329 FTOC(fp)->c_rsrc_vp : \
330 FTOC(fp)->c_vp)
331
332 /*
333 * This is a helper function used for determining whether or not a cnode has become open
334 * unlinked in between the time we acquired its vnode and the time we acquire the cnode lock
335 * to start manipulating it. Due to the SMP nature of VFS, it is probably necessary to
336 * use this macro every time we acquire a cnode lock, as the content of the Cnode may have
337 * been modified in betweeen the lookup and a VNOP. Whether or not to call this is dependent
338 * upon the VNOP in question. Sometimes it is OK to use an open-unlinked file, for example, in,
339 * reading. But other times, such as on the source of a VNOP_RENAME, it should be disallowed.
340 */
341 int hfs_checkdeleted(struct cnode *cp);
342
343 /*
344 * Test for a resource fork
345 */
346 #define FORK_IS_RSRC(fp) ((fp) == FTOC(fp)->c_rsrcfork)
347
348 #define VNODE_IS_RSRC(vp) ((vp) == VTOC((vp))->c_rsrc_vp)
349
350 #if HFS_COMPRESSION
351 /*
352 * VTOCMP(vp) returns a pointer to vp's decmpfs_cnode; this could be NULL
353 * if the file is not compressed or if hfs_file_is_compressed() hasn't
354 * yet been called on this file.
355 */
356 #define VTOCMP(vp) (VTOC((vp))->c_decmp)
357 int hfs_file_is_compressed(struct cnode *cp, int skiplock);
358 int hfs_uncompressed_size_of_compressed_file(struct hfsmount *hfsmp, struct vnode *vp, cnid_t fid, off_t *size, int skiplock);
359 int hfs_hides_rsrc(vfs_context_t ctx, struct cnode *cp, int skiplock);
360 int hfs_hides_xattr(vfs_context_t ctx, struct cnode *cp, const char *name, int skiplock);
361 #endif
362
363 #define ATIME_ONDISK_ACCURACY 300
364
365 static inline bool hfs_should_save_atime(cnode_t *cp)
366 {
367 /*
368 * We only write atime updates to disk if the delta is greater
369 * than ATIME_ONDISK_ACCURACY.
370 */
371 return (cp->c_atime < cp->c_attr.ca_atimeondisk
372 || cp->c_atime - cp->c_attr.ca_atimeondisk > ATIME_ONDISK_ACCURACY);
373 }
374
375 typedef enum {
376 HFS_NOT_DIRTY = 0,
377 HFS_DIRTY = 1,
378 HFS_DIRTY_ATIME = 2
379 } hfs_dirty_t;
380
381 static inline hfs_dirty_t hfs_is_dirty(cnode_t *cp)
382 {
383 if (ISSET(cp->c_flag, C_NOEXISTS))
384 return HFS_NOT_DIRTY;
385
386 if (ISSET(cp->c_flag, C_MODIFIED | C_MINOR_MOD | C_NEEDS_DATEADDED)
387 || cp->c_touch_chgtime || cp->c_touch_modtime) {
388 return HFS_DIRTY;
389 }
390
391 if (cp->c_touch_acctime || hfs_should_save_atime(cp))
392 return HFS_DIRTY_ATIME;
393
394 return HFS_NOT_DIRTY;
395 }
396
397 /* This overlays the FileID portion of NFS file handles. */
398 struct hfsfid {
399 u_int32_t hfsfid_cnid; /* Catalog node ID. */
400 u_int32_t hfsfid_gen; /* Generation number (create date). */
401 };
402
403
404 /* Get new default vnode */
405 extern int hfs_getnewvnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
406 struct cat_desc *descp, int flags, struct cat_attr *attrp,
407 struct cat_fork *forkp, struct vnode **vpp, int *out_flags);
408
409 /* Input flags for hfs_getnewvnode */
410
411 #define GNV_WANTRSRC 0x01 /* Request the resource fork vnode. */
412 #define GNV_SKIPLOCK 0x02 /* Skip taking the cnode lock (when getting resource fork). */
413 #define GNV_CREATE 0x04 /* The vnode is for a newly created item. */
414 #define GNV_NOCACHE 0x08 /* Delay entering this item in the name cache */
415 #define GNV_USE_VP 0x10 /* Use the vnode provided in *vpp instead of creating a new one */
416
417 /* Output flags for hfs_getnewvnode */
418 #define GNV_CHASH_RENAMED 0x01 /* The cnode was renamed in-flight */
419 #define GNV_CAT_DELETED 0x02 /* The cnode was deleted from the catalog */
420 #define GNV_NEW_CNODE 0x04 /* We are vending out a newly initialized cnode */
421 #define GNV_CAT_ATTRCHANGED 0x08 /* Something in struct cat_attr changed in between cat_lookups */
422
423
424 /* Touch cnode times based on c_touch_xxx flags */
425 extern void hfs_touchtimes(struct hfsmount *, struct cnode *);
426 extern void hfs_write_dateadded (struct cat_attr *cattrp, u_int32_t dateadded);
427 extern u_int32_t hfs_get_dateadded (struct cnode *cp);
428 extern u_int32_t hfs_get_dateadded_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */);
429
430 /* Gen counter methods */
431 extern void hfs_write_gencount(struct cat_attr *cattrp, uint32_t gencount);
432 extern uint32_t hfs_get_gencount(struct cnode *cp);
433 extern uint32_t hfs_incr_gencount (struct cnode *cp);
434 extern uint32_t hfs_get_gencount_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */);
435
436 /* Document id methods */
437 extern uint32_t hfs_get_document_id(struct cnode * /* cp */);
438 extern uint32_t hfs_get_document_id_from_blob(const uint8_t * /* finderinfo */, mode_t /* mode */);
439
440 /* Zero-fill file and push regions out to disk */
441 enum {
442 // Use this flag if you're going to sync later
443 HFS_FILE_DONE_NO_SYNC = 1,
444 };
445 typedef uint32_t hfs_file_done_opts_t;
446 extern int hfs_filedone(struct vnode *vp, vfs_context_t context,
447 hfs_file_done_opts_t opts);
448
449 /*
450 * HFS cnode hash functions.
451 */
452 extern void hfs_chashinit(void);
453 extern void hfs_chashinit_finish(struct hfsmount *hfsmp);
454 extern void hfs_delete_chash(struct hfsmount *hfsmp);
455 extern int hfs_chashremove(struct hfsmount *hfsmp, struct cnode *cp);
456 extern void hfs_chash_abort(struct hfsmount *hfsmp, struct cnode *cp);
457 extern void hfs_chash_rehash(struct hfsmount *hfsmp, struct cnode *cp1, struct cnode *cp2);
458 extern void hfs_chashwakeup(struct hfsmount *hfsmp, struct cnode *cp, int flags);
459 extern void hfs_chash_mark_in_transit(struct hfsmount *hfsmp, struct cnode *cp);
460
461 extern struct vnode * hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc,
462 int skiplock, int allow_deleted);
463 extern struct cnode * hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp,
464 int wantrsrc, int skiplock, int *out_flags, int *hflags);
465 extern int hfs_chash_snoop(struct hfsmount *, ino_t, int, int (*)(const cnode_t *, void *), void *);
466 extern int hfs_valid_cnode(struct hfsmount *hfsmp, struct vnode *dvp, struct componentname *cnp,
467 cnid_t cnid, struct cat_attr *cattr, int *error);
468
469 extern int hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid);
470
471 /*
472 * HFS cnode lock functions.
473 *
474 * HFS Locking Order:
475 *
476 * 1. cnode truncate lock (if needed) -- see below for more on this
477 *
478 * + hfs_vnop_pagein/out handles recursive use of this lock (by
479 * using flag option HFS_LOCK_SKIP_IF_EXCLUSIVE) although there
480 * are issues with this (see #16620278).
481 *
482 * + If locking multiple cnodes then the truncate lock must be taken on
483 * all (in address order), before taking the cnode locks.
484 *
485 * 2. Hot Files stage mutex (grabbed before manipulating individual vnodes/cnodes)
486 *
487 * 3. cnode locks in address order (if needed)
488 *
489 * 4. journal (if needed)
490 *
491 * 5. Hot Files B-Tree lock (not treated as a system file)
492 *
493 * 6. system files (as needed)
494 *
495 * A. Catalog B-tree file
496 * B. Attributes B-tree file
497 * C. Startup file (if there is one)
498 * D. Allocation Bitmap file (always exclusive, supports recursion)
499 * E. Overflow Extents B-tree file (always exclusive, supports recursion)
500 *
501 * 7. hfs mount point (always last)
502 *
503 *
504 * I. HFS cnode hash lock (must not acquire any new locks while holding this lock, always taken last)
505 */
506
507 /*
508 * -- The Truncate Lock --
509 *
510 * The truncate lock is used for a few purposes (more than its name
511 * might suggest). The first thing to note is that the cnode lock
512 * cannot be held whilst issuing any I/O other than metadata changes,
513 * so the truncate lock, in either shared or exclusive form, must
514 * usually be held in these cases. This includes calls to ubc_setsize
515 * where the new size is less than the current size known to the VM
516 * subsystem (for two reasons: a) because reaping pages can block
517 * (e.g. on pages that are busy or being cleaned); b) reaping pages
518 * might require page-in for tasks that have that region mapped
519 * privately). The same applies to other calls into the VM subsystem.
520 *
521 * Here are some (but not necessarily all) cases that the truncate
522 * lock protects for:
523 *
524 * + When reading and writing a file, we hold the truncate lock
525 * shared to ensure that the underlying blocks cannot be deleted
526 * and on systems that use content protection, this also ensures
527 * the keys remain valid (which might be being used by the
528 * underlying layers).
529 *
530 * + We need to protect against the following sequence of events:
531 *
532 * A file is initially size X. A thread issues an append to that
533 * file. Another thread truncates the file and then extends it
534 * to a a new size Y. Now the append can be applied at offset X
535 * and then the data is lost when the file is truncated; or it
536 * could be applied after the truncate, i.e. at offset 0; or it
537 * can be applied at offset Y. What we *cannot* do is apply the
538 * append at offset X and for the data to be visible at the end.
539 * (Note that we are free to choose when we apply the append
540 * operation.)
541 *
542 * To solve this, we keep things simple and take the truncate lock
543 * exclusively in order to sequence the append with other size
544 * changes. Therefore any size change must take the truncate lock
545 * exclusively.
546 *
547 * (N.B. we could do better and allow readers to run concurrently
548 * during the append and other size changes.)
549 *
550 * So here are the rules:
551 *
552 * + If you plan to change ff_size, you must take the truncate lock
553 * exclusively, *but* be careful what I/O you do whilst you have
554 * the truncate lock exclusively and try and avoid it if you can:
555 * if the VM subsystem tries to do something with some pages on a
556 * different thread and you try and do some I/O with those same
557 * pages, we will deadlock. (See #16620278.)
558 *
559 * + If you do anything that requires blocks to not be deleted or
560 * encryption keys to remain valid, you must take the truncate lock
561 * shared.
562 *
563 * + And it follows therefore, that if you want to delete blocks or
564 * delete keys, you must take the truncate lock exclusively. Note
565 * that for asynchronous writes, the truncate lock will be dropped
566 * after issuing I/O but before the I/O has completed which means
567 * that before manipulating keys, you *must* issue
568 * vnode_wait_for_writes in addition to holding the truncate lock.
569 *
570 * N.B. ff_size is actually protected by the cnode lock and so you
571 * must hold the cnode lock exclusively to change it and shared to
572 * read it.
573 *
574 */
575
576 enum hfs_locktype {
577 HFS_SHARED_LOCK = 1,
578 HFS_EXCLUSIVE_LOCK = 2
579 };
580
581 /* Option flags for cnode and truncate lock functions */
582 enum hfs_lockflags {
583 HFS_LOCK_DEFAULT = 0x0, /* Default flag, no options provided */
584 HFS_LOCK_ALLOW_NOEXISTS = 0x1, /* Allow locking of all cnodes, including cnode marked deleted with no catalog entry */
585 HFS_LOCK_SKIP_IF_EXCLUSIVE = 0x2, /* Skip locking if the current thread already holds the lock exclusive */
586
587 // Used when you do not want to check return from hfs_lock
588 HFS_LOCK_ALWAYS = HFS_LOCK_ALLOW_NOEXISTS,
589 };
590 #define HFS_SHARED_OWNER (void *)0xffffffff
591
592 void hfs_lock_always(cnode_t *cnode, enum hfs_locktype);
593 int hfs_lock(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
594 bool hfs_lock_upgrade(cnode_t *cp);
595 int hfs_lockpair(struct cnode *, struct cnode *, enum hfs_locktype);
596 int hfs_lockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *,
597 enum hfs_locktype, struct cnode **);
598 void hfs_unlock(struct cnode *);
599 void hfs_unlockpair(struct cnode *, struct cnode *);
600 void hfs_unlockfour(struct cnode *, struct cnode *, struct cnode *, struct cnode *);
601
602 void hfs_lock_truncate(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
603 bool hfs_truncate_lock_upgrade(struct cnode *cp);
604 void hfs_truncate_lock_downgrade(struct cnode *cp);
605 void hfs_unlock_truncate(struct cnode *, enum hfs_lockflags);
606 int hfs_try_trunclock(struct cnode *, enum hfs_locktype, enum hfs_lockflags);
607
608 extern int hfs_systemfile_lock(struct hfsmount *, int, enum hfs_locktype);
609 extern void hfs_systemfile_unlock(struct hfsmount *, int);
610
611 void hfs_clear_might_be_dirty_flag(cnode_t *cp);
612
613 // cnode must be locked
614 static inline __attribute__((pure))
615 bool hfs_has_rsrc(const cnode_t *cp)
616 {
617 if (cp->c_rsrcfork)
618 return cp->c_rsrcfork->ff_blocks > 0;
619 else
620 return cp->c_datafork && cp->c_blocks > cp->c_datafork->ff_blocks;
621 }
622
623 #endif /* __APPLE_API_PRIVATE */
624 #endif /* KERNEL */
625
626 #endif /* ! _HFS_CNODE_H_ */