]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/shadow.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / dev / vn / shadow.c
1 /*
2 * Copyright (c) 2001-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /*
30 * shadow.c
31 *
32 * Implement copy-on-write shadow map to allow a disk image to be
33 * mounted read-only, yet be writable by transferring writes to a
34 * "shadow" file. Subsequent reads from blocks that have been
35 * written will then go the "shadow" file.
36 *
37 * The map has two parts:
38 * 1) a bit map to track which blocks have been written
39 * 2) a band map to map a "band" within the original file to a corresponding
40 * "band" in the shadow file. Each band has the same size.
41 *
42 * The band map is used to ensure that blocks that are contiguous in the
43 * original file will remain contiguous in the shadow file.
44 *
45 * For debugging purposes, this file can be compiled standalone using:
46 * cc -o shadow shadow.c -DTEST_SHADOW
47 */
48
49 /*
50 * Modification History
51 *
52 * December 21, 2001 Dieter Siegmund (dieter@apple.com)
53 * - initial revision
54 */
55 #include <sys/param.h>
56 #include <sys/types.h>
57 #include <mach/boolean.h>
58
59 #include <string.h>
60
61 #ifdef TEST_SHADOW
62 #include <unistd.h>
63 #include <stdlib.h>
64 #define my_malloc(a) malloc(a)
65 #define my_free(a) free(a)
66 #else /* !TEST_SHADOW */
67 #include <sys/malloc.h>
68 #define my_malloc(a) _MALLOC(a, M_TEMP, M_WAITOK)
69 #define my_free(a) FREE(a, M_TEMP)
70 #include <libkern/libkern.h>
71 #endif /* TEST_SHADOW */
72
73 #include "shadow.h"
74
75 #define UINT32_ALL_ONES ((uint32_t)(-1))
76 #define USHORT_ALL_ONES ((u_short)(-1))
77 #define UCHAR_ALL_ONES ((u_char)(-1))
78
79 #define my_trunc(value, divisor) ((value) / (divisor) * (divisor))
80
81 /* a band size of 128K can represent a file up to 8GB */
82 #define BAND_SIZE_DEFAULT_POWER_2 17
83 #define BAND_SIZE_DEFAULT (1 << BAND_SIZE_DEFAULT_POWER_2)
84
85 typedef u_short band_number_t;
86 #define BAND_ZERO ((band_number_t)0)
87 #define BAND_MAX ((band_number_t)65535)
88
89 struct shadow_map {
90 uint32_t blocks_per_band;/* size in blocks */
91 uint32_t block_size;
92 u_char * block_bitmap; /* 1 bit per block; 1=written */
93 band_number_t * bands; /* band map array */
94 uint32_t file_size_blocks; /* size of file in bands */
95 uint32_t shadow_size_bands; /* size of shadow in bands */
96 uint32_t next_band; /* next free band */
97 uint32_t zeroth_band; /* special-case 0th band */
98 };
99
100
101 typedef struct {
102 uint64_t byte;
103 uint32_t bit;
104 } bitmap_offset_t;
105
106 static __inline__ u_char
107 bit(int b)
108 {
109 return (u_char)(1 << b);
110 }
111
112 /*
113 * Function: bits_lower
114 * Purpose:
115 * Return a byte value in which bits numbered lower than 'b' are set.
116 */
117 static __inline__ u_char
118 bits_lower(int b)
119 {
120 return (u_char)(bit(b) - 1);
121 }
122
123 /*
124 * Function: byte_set_bits
125 * Purpose:
126 * Set the given range of bits within a byte.
127 */
128 static __inline__ u_char
129 byte_set_bits(int start, int end)
130 {
131 return (u_char)((~bits_lower(start)) & (bits_lower(end) | bit(end)));
132 }
133
134 static __inline__ bitmap_offset_t
135 bitmap_offset(off_t where)
136 {
137 bitmap_offset_t b;
138
139 b.byte = where / NBBY;
140 b.bit = where % NBBY;
141 return b;
142 }
143
144 /*
145 * Function: bitmap_set
146 *
147 * Purpose:
148 * Set the given range of bits.
149 *
150 * This algorithm tries to set the extents using the biggest
151 * units, using longs, then a short, then a byte, then bits.
152 */
153 static void
154 bitmap_set(u_char * map, off_t start_bit, size_t bit_count)
155 {
156 bitmap_offset_t start;
157 bitmap_offset_t end;
158
159 start = bitmap_offset(start_bit);
160 end = bitmap_offset(start_bit + bit_count);
161 if (start.byte < end.byte) {
162 uint64_t n_bytes;
163
164 if (start.bit) {
165 map[start.byte] |= byte_set_bits(start.bit, NBBY - 1);
166 start.bit = 0;
167 start.byte++;
168 if (start.byte == end.byte) {
169 goto end;
170 }
171 }
172
173 n_bytes = end.byte - start.byte;
174
175 while (n_bytes >= (sizeof(uint32_t))) {
176 *((uint32_t *)(map + start.byte)) = UINT32_ALL_ONES;
177 start.byte += sizeof(uint32_t);
178 n_bytes -= sizeof(uint32_t);
179 }
180 if (n_bytes >= sizeof(u_short)) {
181 *((u_short *)(map + start.byte)) = USHORT_ALL_ONES;
182 start.byte += sizeof(u_short);
183 n_bytes -= sizeof(u_short);
184 }
185 if (n_bytes == 1) {
186 map[start.byte] = UCHAR_ALL_ONES;
187 start.byte++;
188 n_bytes = 0;
189 }
190 }
191
192 end:
193 if (end.bit > start.bit) {
194 map[start.byte] |= byte_set_bits(start.bit, end.bit - 1);
195 }
196
197 return;
198 }
199
200 /*
201 * Function: bitmap_get
202 *
203 * Purpose:
204 * Return the number of bits in the range that are the same e.g.
205 * 11101 returns 3 because the first 3 bits are the same (1's), whereas
206 * 001100 returns 2 because the first 2 bits are the same.
207 * This algorithm tries to count things in as big a chunk as possible,
208 * first aligning to a byte offset, then trying to count longs, a short,
209 * a byte, then any remaining bits to find the bit that is different.
210 */
211
212 static uint32_t
213 bitmap_get(u_char * map, off_t start_bit, size_t bit_count,
214 boolean_t * ret_is_set)
215 {
216 uint32_t count;
217 int i;
218 boolean_t is_set;
219 bitmap_offset_t start;
220 bitmap_offset_t end;
221
222 start = bitmap_offset(start_bit);
223 end = bitmap_offset(start_bit + bit_count);
224
225 is_set = (map[start.byte] & bit(start.bit)) ? TRUE : FALSE;
226 count = 0;
227
228 if (start.byte < end.byte) {
229 uint64_t n_bytes;
230
231 if (start.bit) { /* try to align to a byte */
232 for (i = start.bit; i < NBBY; i++) {
233 boolean_t this_is_set;
234
235 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
236 if (this_is_set != is_set) {
237 goto done; /* found bit that was different, we're done */
238 }
239 count++;
240 }
241 start.bit = 0; /* made it to the next byte */
242 start.byte++;
243 if (start.byte == end.byte) {
244 goto end; /* no more bytes, check for any leftover bits */
245 }
246 }
247 /* calculate how many bytes are left in the range */
248 n_bytes = end.byte - start.byte;
249
250 /* check for 4 bytes of the same bits */
251 while (n_bytes >= sizeof(uint32_t)) {
252 uint32_t * valPtr = (uint32_t *)(map + start.byte);
253 if ((is_set && *valPtr == UINT32_ALL_ONES)
254 || (!is_set && *valPtr == 0)) {
255 count += sizeof(*valPtr) * NBBY;
256 start.byte += sizeof(*valPtr);
257 n_bytes -= sizeof(*valPtr);
258 } else {
259 break; /* bits differ */
260 }
261 }
262 /* check for 2 bytes of the same bits */
263 if (n_bytes >= sizeof(u_short)) {
264 u_short * valPtr = (u_short *)(map + start.byte);
265
266 if ((is_set && *valPtr == USHORT_ALL_ONES)
267 || (!is_set && (*valPtr == 0))) {
268 count += sizeof(*valPtr) * NBBY;
269 start.byte += sizeof(*valPtr);
270 n_bytes -= sizeof(*valPtr);
271 }
272 }
273
274 /* check for 1 byte of the same bits */
275 if (n_bytes) {
276 if ((is_set && map[start.byte] == UCHAR_ALL_ONES)
277 || (!is_set && map[start.byte] == 0)) {
278 count += NBBY;
279 start.byte++;
280 n_bytes--;
281 }
282 /* we found bits that were different, find the first one */
283 if (n_bytes) {
284 for (i = 0; i < NBBY; i++) {
285 boolean_t this_is_set;
286
287 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
288 if (this_is_set != is_set) {
289 break;
290 }
291 count++;
292 }
293 goto done;
294 }
295 }
296 }
297
298 end:
299 for (i = start.bit; i < (int)end.bit; i++) {
300 boolean_t this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
301
302 if (this_is_set != is_set) {
303 break;
304 }
305 count++;
306 }
307
308 done:
309 *ret_is_set = is_set;
310 return count;
311 }
312
313 static __inline__ band_number_t
314 shadow_map_block_to_band(shadow_map_t * map, off_t block)
315 {
316 return (band_number_t)(block / map->blocks_per_band);
317 }
318
319 /*
320 * Function: shadow_map_mapped_band
321 * Purpose:
322 * Return the mapped band for the given band.
323 * If map_it is FALSE, and the band is not mapped, return FALSE.
324 * If map_it is TRUE, then this function will always return TRUE.
325 */
326 static boolean_t
327 shadow_map_mapped_band(shadow_map_t * map, band_number_t band,
328 boolean_t map_it, band_number_t * mapped_band)
329 {
330 boolean_t is_mapped = FALSE;
331
332 if (band == map->zeroth_band) {
333 *mapped_band = BAND_ZERO;
334 is_mapped = TRUE;
335 } else {
336 *mapped_band = map->bands[band];
337 if (*mapped_band == BAND_ZERO) {
338 if (map_it) {
339 /* grow the file */
340 if (map->next_band == 0) {
341 /* remember the zero'th band */
342 map->zeroth_band = band;
343 }
344 *mapped_band = map->bands[band] = (band_number_t)map->next_band++;
345 is_mapped = TRUE;
346 }
347 } else {
348 is_mapped = TRUE;
349 }
350 }
351 return is_mapped;
352 }
353
354 /*
355 * Function: shadow_map_contiguous
356 *
357 * Purpose:
358 * Return the first offset within the range position..(position + count)
359 * that is not a contiguous mapped band.
360 *
361 * If called with is_write = TRUE, this function will map bands as it goes.
362 */
363 static off_t
364 shadow_map_contiguous(shadow_map_t * map, off_t start_block,
365 size_t num_blocks, boolean_t is_write)
366 {
367 band_number_t band = shadow_map_block_to_band(map, start_block);
368 off_t end_block = start_block + num_blocks;
369 boolean_t is_mapped;
370 band_number_t mapped_band;
371 off_t ret_end_block = end_block;
372 off_t p;
373
374 is_mapped = shadow_map_mapped_band(map, band, is_write, &mapped_band);
375 if (is_write == FALSE && is_mapped == FALSE) {
376 static int happened = 0;
377 /* this can't happen */
378 if (happened == 0) {
379 printf("shadow_map_contiguous: this can't happen!\n");
380 happened = 1;
381 }
382 return start_block;
383 }
384 for (p = my_trunc(start_block + map->blocks_per_band,
385 map->blocks_per_band);
386 p < end_block; p += map->blocks_per_band) {
387 band_number_t next_mapped_band;
388
389 band++;
390 is_mapped = shadow_map_mapped_band(map, band, is_write,
391 &next_mapped_band);
392 if (is_write == FALSE && is_mapped == FALSE) {
393 return p;
394 }
395 if ((mapped_band + 1) != next_mapped_band) {
396 /* not contiguous */
397 ret_end_block = p;
398 break;
399 }
400 mapped_band = next_mapped_band;
401 }
402 return ret_end_block;
403 }
404
405
406 /*
407 * Function: block_bitmap_size
408 * Purpose:
409 * The number of bytes required in a block bitmap to represent a file of size
410 * file_size.
411 *
412 * The bytes required is the number of blocks in the file,
413 * divided by the number of bits per byte.
414 * Note:
415 * An 8GB file requires (assuming 512 byte block):
416 * 2^33 / 2^9 / 2^3 = 2^21 = 2MB
417 * of bitmap space. This is a non-trival amount of memory,
418 * particularly since most of the bits will be zero.
419 * A sparse bitmap would really help in this case.
420 */
421 static __inline__ size_t
422 block_bitmap_size(off_t file_size, uint32_t block_size)
423 {
424 off_t blocks = howmany(file_size, block_size);
425 return howmany(blocks, NBBY);
426 }
427
428 /*
429 * Function: shadow_map_read
430 *
431 * Purpose:
432 * Calculate the block offset within the shadow to read, and the number
433 * blocks to read. The input values (block_offset, block_count) refer
434 * to the original file.
435 *
436 * The output values (*incr_block_offset, *incr_block_count) refer to the
437 * shadow file if the return value is TRUE. They refer to the original
438 * file if the return value is FALSE.
439 *
440 * Blocks within a band may or may not have been written, in addition,
441 * Bands are not necessarily contiguous, therefore:
442 * *incr_block_count <= block_count
443 * The caller must be prepared to call this function interatively
444 * to complete the whole i/o.
445 * Returns:
446 * TRUE if the shadow file should be read, FALSE if the original file
447 * should be read.
448 */
449 boolean_t
450 shadow_map_read(shadow_map_t * map, off_t block_offset, size_t block_count,
451 off_t * incr_block_offset, size_t * incr_block_count)
452 {
453 boolean_t written = FALSE;
454 uint32_t n_blocks;
455
456 if (block_offset >= map->file_size_blocks
457 || (block_offset + block_count) > map->file_size_blocks) {
458 printf("shadow_map_read: request (%lld, %lu) exceeds file size %d\n",
459 block_offset, block_count, map->file_size_blocks);
460 *incr_block_count = 0;
461 }
462 n_blocks = bitmap_get(map->block_bitmap, block_offset, block_count,
463 &written);
464 if (written == FALSE) {
465 *incr_block_count = n_blocks;
466 *incr_block_offset = block_offset;
467 } else { /* start has been written, and therefore mapped */
468 band_number_t mapped_band;
469 off_t band_limit;
470
471 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
472 *incr_block_offset = mapped_band * map->blocks_per_band
473 + (block_offset % map->blocks_per_band);
474 band_limit
475 = shadow_map_contiguous(map, block_offset, block_count, FALSE);
476 *incr_block_count = band_limit - block_offset;
477 if (*incr_block_count > n_blocks) {
478 *incr_block_count = n_blocks;
479 }
480 }
481 return written;
482 }
483
484 /*
485 * Function: shadow_map_write
486 *
487 * Purpose:
488 * Calculate the block offset within the shadow to write, and the number
489 * blocks to write. The input values (block_offset, block_count) refer
490 * to the original file. The output values
491 * (*incr_block_offset, *incr_block_count) refer to the shadow file.
492 *
493 * Bands are not necessarily contiguous, therefore:
494 * *incr_block_count <= block_count
495 * The caller must be prepared to call this function interatively
496 * to complete the whole i/o.
497 * Returns:
498 * TRUE if the shadow file was grown, FALSE otherwise.
499 */
500 boolean_t
501 shadow_map_write(shadow_map_t * map, off_t block_offset,
502 size_t block_count, off_t * incr_block_offset,
503 size_t * incr_block_count)
504 {
505 off_t band_limit;
506 band_number_t mapped_band;
507 boolean_t shadow_grew = FALSE;
508
509 if (block_offset >= map->file_size_blocks
510 || (block_offset + block_count) > map->file_size_blocks) {
511 printf("shadow_map_write: request (%lld, %zu) exceeds file size %d\n",
512 block_offset, block_count, map->file_size_blocks);
513 *incr_block_count = 0;
514 }
515
516 band_limit = shadow_map_contiguous(map, block_offset, block_count, TRUE);
517 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
518 *incr_block_offset = mapped_band * map->blocks_per_band
519 + (block_offset % map->blocks_per_band);
520 *incr_block_count = band_limit - block_offset;
521
522 /* mark these blocks as written */
523 bitmap_set(map->block_bitmap, block_offset, *incr_block_count);
524
525 if (map->next_band > map->shadow_size_bands) {
526 map->shadow_size_bands = map->next_band;
527 shadow_grew = TRUE;
528 }
529 return shadow_grew;
530 }
531
532 boolean_t
533 shadow_map_is_written(shadow_map_t * map, off_t block_offset)
534 {
535 bitmap_offset_t b;
536
537 b = bitmap_offset(block_offset);
538 return (map->block_bitmap[b.byte] & bit(b.bit)) ? TRUE : FALSE;
539 }
540
541 /*
542 * Function: shadow_map_shadow_size
543 *
544 * Purpose:
545 * To return the size of the shadow file in blocks.
546 */
547 uint32_t
548 shadow_map_shadow_size(shadow_map_t * map)
549 {
550 return map->shadow_size_bands * map->blocks_per_band;
551 }
552
553 /*
554 * Function: shadow_map_create
555 *
556 * Purpose:
557 * Allocate the dynamic data for keeping track of the shadow dirty blocks
558 * and the band mapping table.
559 * Returns:
560 * NULL if an error occurred.
561 */
562 shadow_map_t *
563 shadow_map_create(off_t file_size, off_t shadow_size,
564 uint32_t band_size, uint32_t block_size)
565 {
566 void * block_bitmap = NULL;
567 size_t bitmap_size;
568 band_number_t * bands = NULL;
569 shadow_map_t * map;
570 uint32_t n_bands = 0;
571
572 if (band_size == 0) {
573 band_size = BAND_SIZE_DEFAULT;
574 }
575
576 off_t many = howmany(file_size, band_size);
577 if (many > (BAND_MAX + 1)) {
578 printf("file is too big: %lld > %d\n",
579 many, BAND_MAX);
580 goto failure;
581 }
582 n_bands = (uint32_t)many;
583
584 /* create a block bitmap, one bit per block */
585 bitmap_size = block_bitmap_size(file_size, block_size);
586 block_bitmap = my_malloc(bitmap_size);
587 if (block_bitmap == NULL) {
588 printf("failed to allocate bitmap\n");
589 goto failure;
590 }
591 bzero(block_bitmap, bitmap_size);
592
593 /* get the band map */
594 bands = (band_number_t *)my_malloc(n_bands * sizeof(band_number_t));
595 if (bands == NULL) {
596 printf("failed to allocate bands\n");
597 goto failure;
598 }
599 bzero(bands, n_bands * sizeof(band_number_t));
600
601 map = my_malloc(sizeof(*map));
602 if (map == NULL) {
603 printf("failed to allocate map\n");
604 goto failure;
605 }
606 map->blocks_per_band = band_size / block_size;
607 map->block_bitmap = block_bitmap;
608 map->bands = bands;
609 map->file_size_blocks = n_bands * map->blocks_per_band;
610 map->next_band = 0;
611 map->zeroth_band = -1;
612 map->shadow_size_bands = (uint32_t)howmany(shadow_size, band_size);
613 map->block_size = block_size;
614 return map;
615
616 failure:
617 if (block_bitmap) {
618 my_free(block_bitmap);
619 }
620 if (bands) {
621 my_free(bands);
622 }
623 return NULL;
624 }
625
626 /*
627 * Function: shadow_map_free
628 * Purpose:
629 * Frees the data structure to deal with the shadow map.
630 */
631 void
632 shadow_map_free(shadow_map_t * map)
633 {
634 if (map->block_bitmap) {
635 my_free(map->block_bitmap);
636 }
637 if (map->bands) {
638 my_free(map->bands);
639 }
640 map->block_bitmap = NULL;
641 map->bands = NULL;
642 my_free(map);
643 return;
644 }
645
646 #ifdef TEST_SHADOW
647 #define BAND_SIZE_BLOCKS (BAND_SIZE_DEFAULT / 512)
648
649 enum {
650 ReadRequest,
651 WriteRequest,
652 };
653
654 typedef struct {
655 int type;
656 uint32_t offset;
657 uint32_t count;
658 } block_request_t;
659
660 int
661 main()
662 {
663 shadow_map_t * map;
664 int i;
665 block_request_t requests[] = {
666 { WriteRequest, BAND_SIZE_BLOCKS * 2, 1 },
667 { ReadRequest, BAND_SIZE_BLOCKS / 2, BAND_SIZE_BLOCKS * 2 - 2 },
668 { WriteRequest, BAND_SIZE_BLOCKS * 1, 5 * BAND_SIZE_BLOCKS + 3},
669 { ReadRequest, 0, BAND_SIZE_BLOCKS * 10 },
670 { WriteRequest, BAND_SIZE_BLOCKS * (BAND_MAX - 1),
671 BAND_SIZE_BLOCKS * 2},
672 { 0, 0 },
673 };
674
675 map = shadow_map_create(1024 * 1024 * 1024 * 8ULL, 0, 0, 512);
676 if (map == NULL) {
677 printf("shadow_map_create failed\n");
678 exit(1);
679 }
680 for (i = 0; TRUE; i++) {
681 uint32_t offset;
682 uint32_t resid;
683 boolean_t shadow_grew;
684 boolean_t read_shadow;
685
686 if (requests[i].count == 0) {
687 break;
688 }
689 offset = requests[i].offset;
690 resid = requests[i].count;
691 printf("\n%s REQUEST (%ld, %ld)\n",
692 requests[i].type == WriteRequest ? "WRITE" : "READ",
693 offset, resid);
694 switch (requests[i].type) {
695 case WriteRequest:
696 while (resid > 0) {
697 uint32_t this_offset;
698 uint32_t this_count;
699
700 shadow_grew = shadow_map_write(map, offset,
701 resid,
702 &this_offset,
703 &this_count);
704 printf("\t(%ld, %ld) => (%ld, %ld)",
705 offset, resid, this_offset, this_count);
706 resid -= this_count;
707 offset += this_count;
708 if (shadow_grew) {
709 printf(" shadow grew to %ld", shadow_map_shadow_size(map));
710 }
711 printf("\n");
712 }
713 break;
714 case ReadRequest:
715 while (resid > 0) {
716 uint32_t this_offset;
717 uint32_t this_count;
718
719 read_shadow = shadow_map_read(map, offset,
720 resid,
721 &this_offset,
722 &this_count);
723 printf("\t(%ld, %ld) => (%ld, %ld)%s\n",
724 offset, resid, this_offset, this_count,
725 read_shadow ? " from shadow" : "");
726 if (this_count == 0) {
727 printf("this_count is 0, aborting\n");
728 break;
729 }
730 resid -= this_count;
731 offset += this_count;
732 }
733 break;
734 default:
735 break;
736 }
737 }
738 if (map) {
739 shadow_map_free(map);
740 }
741 exit(0);
742 return 0;
743 }
744 #endif