]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/vn/shadow.c
20b78be748f5af50873ca6939ecf02d80df9edee
[apple/xnu.git] / bsd / dev / vn / shadow.c
1
2 /*
3 * Copyright (c) 2001 Apple Computer, Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 *
9 * This file contains Original Code and/or Modifications of Original Code
10 * as defined in and that are subject to the Apple Public Source License
11 * Version 2.0 (the 'License'). You may not use this file except in
12 * compliance with the License. Please obtain a copy of the License at
13 * http://www.opensource.apple.com/apsl/ and read it before using this
14 * file.
15 *
16 * The Original Code and all software distributed under the License are
17 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
18 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
19 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
21 * Please see the License for the specific language governing rights and
22 * limitations under the License.
23 *
24 * @APPLE_LICENSE_HEADER_END@
25 */
26
27 /*
28 * shadow.c
29 *
30 * Implement copy-on-write shadow map to allow a disk image to be
31 * mounted read-only, yet be writable by transferring writes to a
32 * "shadow" file. Subsequent reads from blocks that have been
33 * written will then go the "shadow" file.
34 *
35 * The map has two parts:
36 * 1) a bit map to track which blocks have been written
37 * 2) a band map to map a "band" within the original file to a corresponding
38 * "band" in the shadow file. Each band has the same size.
39 *
40 * The band map is used to ensure that blocks that are contiguous in the
41 * original file will remain contiguous in the shadow file.
42 *
43 * For debugging purposes, this file can be compiled standalone using:
44 * cc -o shadow shadow.c -DTEST_SHADOW
45 */
46
47 /*
48 * Modification History
49 *
50 * December 21, 2001 Dieter Siegmund (dieter@apple.com)
51 * - initial revision
52 */
53 #include <sys/param.h>
54 #include <sys/types.h>
55 #include <mach/boolean.h>
56
57 #include <string.h>
58
59 #ifdef TEST_SHADOW
60 #include <unistd.h>
61 #include <stdlib.h>
62 #define my_malloc(a) malloc(a)
63 #define my_free(a) free(a)
64 #else TEST_SHADOW
65 #include <sys/malloc.h>
66 #define my_malloc(a) _MALLOC(a, M_TEMP, M_WAITOK)
67 #define my_free(a) FREE(a, M_TEMP)
68 #endif TEST_SHADOW
69
70 #include "shadow.h"
71
72 #define ULONG_ALL_ONES ((u_long)(-1))
73 #define USHORT_ALL_ONES ((u_short)(-1))
74 #define UCHAR_ALL_ONES ((u_char)(-1))
75
76 #define my_trunc(value, divisor) ((value) / (divisor) * (divisor))
77
78 /* a band size of 128K can represent a file up to 8GB */
79 #define BAND_SIZE_DEFAULT_POWER_2 17
80 #define BAND_SIZE_DEFAULT (1 << BAND_SIZE_DEFAULT_POWER_2)
81
82 typedef u_short band_number_t;
83 #define BAND_ZERO ((band_number_t)0)
84 #define BAND_MAX ((band_number_t)65535)
85
86 struct shadow_map {
87 u_long blocks_per_band;/* size in blocks */
88 u_long block_size;
89 u_char * block_bitmap; /* 1 bit per block; 1=written */
90 band_number_t * bands; /* band map array */
91 u_long file_size_blocks; /* size of file in bands */
92 u_long shadow_size_bands; /* size of shadow in bands */
93 u_long next_band; /* next free band */
94 u_long zeroth_band; /* special-case 0th band */
95 };
96
97
98 typedef struct {
99 u_long byte;
100 u_long bit;
101 } bitmap_offset_t;
102
103 static __inline__ u_char
104 bit(int b)
105 {
106 return ((u_char)(1 << b));
107 }
108
109 /*
110 * Function: bits_lower
111 * Purpose:
112 * Return a byte value in which bits numbered lower than 'b' are set.
113 */
114 static __inline__ u_char
115 bits_lower(int b)
116 {
117 return ((u_char)(bit(b) - 1));
118 }
119
120 /*
121 * Function: byte_set_bits
122 * Purpose:
123 * Set the given range of bits within a byte.
124 */
125 static __inline__ u_char
126 byte_set_bits(int start, int end)
127 {
128 return ((u_char)((~bits_lower(start)) & (bits_lower(end) | bit(end))));
129 }
130
131 static __inline__ bitmap_offset_t
132 bitmap_offset(off_t where)
133 {
134 bitmap_offset_t b;
135
136 b.byte = where / NBBY;
137 b.bit = where % NBBY;
138 return (b);
139 }
140
141 /*
142 * Function: bitmap_set
143 *
144 * Purpose:
145 * Set the given range of bits.
146 *
147 * This algorithm tries to set the extents using the biggest
148 * units, using longs, then a short, then a byte, then bits.
149 */
150 static void
151 bitmap_set(u_char * map, u_long start_bit, u_long bit_count)
152 {
153 bitmap_offset_t start;
154 bitmap_offset_t end;
155
156 start = bitmap_offset(start_bit);
157 end = bitmap_offset(start_bit + bit_count);
158 if (start.byte < end.byte) {
159 u_long n_bytes;
160
161 if (start.bit) {
162 map[start.byte] |= byte_set_bits(start.bit, NBBY - 1);
163 start.bit = 0;
164 start.byte++;
165 if (start.byte == end.byte)
166 goto end;
167 }
168
169 n_bytes = end.byte - start.byte;
170
171 while (n_bytes >= (sizeof(u_long))) {
172 *((u_long *)(map + start.byte)) = ULONG_ALL_ONES;
173 start.byte += sizeof(u_long);
174 n_bytes -= sizeof(u_long);
175 }
176 if (n_bytes >= sizeof(u_short)) {
177 *((u_short *)(map + start.byte)) = USHORT_ALL_ONES;
178 start.byte += sizeof(u_short);
179 n_bytes -= sizeof(u_short);
180 }
181 if (n_bytes == 1) {
182 map[start.byte] = UCHAR_ALL_ONES;
183 start.byte++;
184 n_bytes = 0;
185 }
186 }
187
188 end:
189 if (end.bit > start.bit) {
190 map[start.byte] |= byte_set_bits(start.bit, end.bit - 1);
191 }
192
193 return;
194 }
195
196 /*
197 * Function: bitmap_get
198 *
199 * Purpose:
200 * Return the number of bits in the range that are the same e.g.
201 * 11101 returns 3 because the first 3 bits are the same (1's), whereas
202 * 001100 returns 2 because the first 2 bits are the same.
203 * This algorithm tries to count things in as big a chunk as possible,
204 * first aligning to a byte offset, then trying to count longs, a short,
205 * a byte, then any remaining bits to find the bit that is different.
206 */
207
208 static u_long
209 bitmap_get(u_char * map, u_long start_bit, u_long bit_count,
210 boolean_t * ret_is_set)
211 {
212 u_long count;
213 int i;
214 boolean_t is_set;
215 bitmap_offset_t start;
216 bitmap_offset_t end;
217
218 start = bitmap_offset(start_bit);
219 end = bitmap_offset(start_bit + bit_count);
220
221 is_set = (map[start.byte] & bit(start.bit)) ? TRUE : FALSE;
222 count = 0;
223
224 if (start.byte < end.byte) {
225 u_long n_bytes;
226
227 if (start.bit) { /* try to align to a byte */
228 for (i = start.bit; i < NBBY; i++) {
229 boolean_t this_is_set;
230
231 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
232 if (this_is_set != is_set) {
233 goto done; /* found bit that was different, we're done */
234 }
235 count++;
236 }
237 start.bit = 0; /* made it to the next byte */
238 start.byte++;
239 if (start.byte == end.byte)
240 goto end; /* no more bytes, check for any leftover bits */
241 }
242 /* calculate how many bytes are left in the range */
243 n_bytes = end.byte - start.byte;
244
245 /* check for 4 bytes of the same bits */
246 while (n_bytes >= sizeof(u_long)) {
247 u_long * valPtr = (u_long *)(map + start.byte);
248 if ((is_set && *valPtr == ULONG_ALL_ONES)
249 || (!is_set && *valPtr == 0)) {
250 count += sizeof(*valPtr) * NBBY;
251 start.byte += sizeof(*valPtr);
252 n_bytes -= sizeof(*valPtr);
253 }
254 else
255 break; /* bits differ */
256
257 }
258 /* check for 2 bytes of the same bits */
259 if (n_bytes >= sizeof(u_short)) {
260 u_short * valPtr = (u_short *)(map + start.byte);
261
262 if ((is_set && *valPtr == USHORT_ALL_ONES)
263 || (!is_set && (*valPtr == 0))) {
264 count += sizeof(*valPtr) * NBBY;
265 start.byte += sizeof(*valPtr);
266 n_bytes -= sizeof(*valPtr);
267 }
268 }
269
270 /* check for 1 byte of the same bits */
271 if (n_bytes) {
272 if ((is_set && map[start.byte] == UCHAR_ALL_ONES)
273 || (!is_set && map[start.byte] == 0)) {
274 count += NBBY;
275 start.byte++;
276 n_bytes--;
277 }
278 /* we found bits that were different, find the first one */
279 if (n_bytes) {
280 for (i = 0; i < NBBY; i++) {
281 boolean_t this_is_set;
282
283 this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
284 if (this_is_set != is_set) {
285 break;
286 }
287 count++;
288 }
289 goto done;
290 }
291 }
292 }
293
294 end:
295 for (i = start.bit; i < end.bit; i++) {
296 boolean_t this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE;
297
298 if (this_is_set != is_set) {
299 break;
300 }
301 count++;
302 }
303
304 done:
305 *ret_is_set = is_set;
306 return (count);
307 }
308
309 static __inline__ band_number_t
310 shadow_map_block_to_band(shadow_map_t * map, unsigned long block)
311 {
312 return (block / map->blocks_per_band);
313 }
314
315 /*
316 * Function: shadow_map_mapped_band
317 * Purpose:
318 * Return the mapped band for the given band.
319 * If map_it is FALSE, and the band is not mapped, return FALSE.
320 * If map_it is TRUE, then this function will always return TRUE.
321 */
322 static boolean_t
323 shadow_map_mapped_band(shadow_map_t * map, band_number_t band,
324 boolean_t map_it, band_number_t * mapped_band)
325 {
326 boolean_t is_mapped = FALSE;
327
328 if (band == map->zeroth_band) {
329 *mapped_band = BAND_ZERO;
330 is_mapped = TRUE;
331 }
332 else {
333 *mapped_band = map->bands[band];
334 if (*mapped_band == BAND_ZERO) {
335 if (map_it) {
336 /* grow the file */
337 if (map->next_band == 0) {
338 /* remember the zero'th band */
339 map->zeroth_band = band;
340 }
341 *mapped_band = map->bands[band] = map->next_band++;
342 is_mapped = TRUE;
343 }
344 }
345 else {
346 is_mapped = TRUE;
347 }
348 }
349 return (is_mapped);
350 }
351
352 /*
353 * Function: shadow_map_contiguous
354 *
355 * Purpose:
356 * Return the first offset within the range position..(position + count)
357 * that is not a contiguous mapped band.
358 *
359 * If called with is_write = TRUE, this function will map bands as it goes.
360 */
361 static u_long
362 shadow_map_contiguous(shadow_map_t * map, u_long start_block,
363 u_long num_blocks, boolean_t is_write)
364 {
365 band_number_t band = shadow_map_block_to_band(map, start_block);
366 u_long end_block = start_block + num_blocks;
367 boolean_t is_mapped;
368 band_number_t mapped_band;
369 u_long ret_end_block = end_block;
370 u_long p;
371
372 is_mapped = shadow_map_mapped_band(map, band, is_write, &mapped_band);
373 if (is_write == FALSE && is_mapped == FALSE) {
374 static int happened = 0;
375 /* this can't happen */
376 if (happened == 0) {
377 printf("shadow_map_contiguous: this can't happen!\n");
378 happened = 1;
379 }
380 return (start_block);
381 }
382 for (p = my_trunc(start_block + map->blocks_per_band,
383 map->blocks_per_band);
384 p < end_block; p += map->blocks_per_band) {
385 band_number_t next_mapped_band;
386
387 band++;
388 is_mapped = shadow_map_mapped_band(map, band, is_write,
389 &next_mapped_band);
390 if (is_write == FALSE && is_mapped == FALSE) {
391 return (p);
392 }
393 if ((mapped_band + 1) != next_mapped_band) {
394 /* not contiguous */
395 ret_end_block = p;
396 break;
397 }
398 mapped_band = next_mapped_band;
399 }
400 return (ret_end_block);
401 }
402
403
404 /*
405 * Function: block_bitmap_size
406 * Purpose:
407 * The number of bytes required in a block bitmap to represent a file of size
408 * file_size.
409 *
410 * The bytes required is the number of blocks in the file,
411 * divided by the number of bits per byte.
412 * Note:
413 * An 8GB file requires (assuming 512 byte block):
414 * 2^33 / 2^9 / 2^3 = 2^21 = 2MB
415 * of bitmap space. This is a non-trival amount of memory,
416 * particularly since most of the bits will be zero.
417 * A sparse bitmap would really help in this case.
418 */
419 static __inline__ u_long
420 block_bitmap_size(off_t file_size, u_long block_size)
421 {
422 off_t blocks = howmany(file_size, block_size);
423 return (howmany(blocks, NBBY));
424 }
425
426 /*
427 * Function: shadow_map_read
428 *
429 * Purpose:
430 * Calculate the block offset within the shadow to read, and the number
431 * blocks to read. The input values (block_offset, block_count) refer
432 * to the original file.
433 *
434 * The output values (*incr_block_offset, *incr_block_count) refer to the
435 * shadow file if the return value is TRUE. They refer to the original
436 * file if the return value is FALSE.
437
438 * Blocks within a band may or may not have been written, in addition,
439 * Bands are not necessarily contiguous, therefore:
440 * *incr_block_count <= block_count
441 * The caller must be prepared to call this function interatively
442 * to complete the whole i/o.
443 * Returns:
444 * TRUE if the shadow file should be read, FALSE if the original file
445 * should be read.
446 */
447 boolean_t
448 shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count,
449 u_long * incr_block_offset, u_long * incr_block_count)
450 {
451 boolean_t written = FALSE;
452 u_long n_blocks;
453
454 if (block_offset >= map->file_size_blocks
455 || (block_offset + block_count) > map->file_size_blocks) {
456 printf("shadow_map_read: request (%ld, %ld) exceeds file size %ld\n",
457 block_offset, block_count, map->file_size_blocks);
458 *incr_block_count = 0;
459 }
460 n_blocks = bitmap_get(map->block_bitmap, block_offset, block_count,
461 &written);
462 if (written == FALSE) {
463 *incr_block_count = n_blocks;
464 *incr_block_offset = block_offset;
465 }
466 else { /* start has been written, and therefore mapped */
467 band_number_t mapped_band;
468 u_long band_limit;
469
470 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
471 *incr_block_offset = mapped_band * map->blocks_per_band
472 + (block_offset % map->blocks_per_band);
473 band_limit
474 = shadow_map_contiguous(map, block_offset, block_count, FALSE);
475 *incr_block_count = band_limit - block_offset;
476 if (*incr_block_count > n_blocks) {
477 *incr_block_count = n_blocks;
478 }
479 }
480 return (written);
481 }
482
483 /*
484 * Function: shadow_map_write
485 *
486 * Purpose:
487 * Calculate the block offset within the shadow to write, and the number
488 * blocks to write. The input values (block_offset, block_count) refer
489 * to the original file. The output values
490 * (*incr_block_offset, *incr_block_count) refer to the shadow file.
491 *
492 * Bands are not necessarily contiguous, therefore:
493 * *incr_block_count <= block_count
494 * The caller must be prepared to call this function interatively
495 * to complete the whole i/o.
496 * Returns:
497 * TRUE if the shadow file was grown, FALSE otherwise.
498 */
499 boolean_t
500 shadow_map_write(shadow_map_t * map, u_long block_offset,
501 u_long block_count, u_long * incr_block_offset,
502 u_long * incr_block_count)
503 {
504 u_long band_limit;
505 band_number_t mapped_band;
506 boolean_t shadow_grew = FALSE;
507
508 if (block_offset >= map->file_size_blocks
509 || (block_offset + block_count) > map->file_size_blocks) {
510 printf("shadow_map_write: request (%ld, %ld) exceeds file size %ld\n",
511 block_offset, block_count, map->file_size_blocks);
512 *incr_block_count = 0;
513 }
514
515 band_limit = shadow_map_contiguous(map, block_offset, block_count, TRUE);
516 mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)];
517 *incr_block_offset = mapped_band * map->blocks_per_band
518 + (block_offset % map->blocks_per_band);
519 *incr_block_count = band_limit - block_offset;
520
521 /* mark these blocks as written */
522 bitmap_set(map->block_bitmap, block_offset, *incr_block_count);
523
524 if (map->next_band > map->shadow_size_bands) {
525 map->shadow_size_bands = map->next_band;
526 shadow_grew = TRUE;
527 }
528 return (shadow_grew);
529 }
530
531 /*
532 * Function: shadow_map_shadow_size
533 *
534 * Purpose:
535 * To return the size of the shadow file in blocks.
536 */
537 u_long
538 shadow_map_shadow_size(shadow_map_t * map)
539 {
540 return (map->shadow_size_bands * map->blocks_per_band);
541 }
542
543 /*
544 * Function: shadow_map_create
545 *
546 * Purpose:
547 * Allocate the dynamic data for keeping track of the shadow dirty blocks
548 * and the band mapping table.
549 * Returns:
550 * NULL if an error occurred.
551 */
552 shadow_map_t *
553 shadow_map_create(off_t file_size, off_t shadow_size,
554 u_long band_size, u_long block_size)
555 {
556 void * block_bitmap = 0;
557 u_long bitmap_size;
558 band_number_t * bands = 0;
559 shadow_map_t * map;
560 u_long n_bands = 0;
561
562 if (band_size == 0) {
563 band_size = BAND_SIZE_DEFAULT;
564 }
565
566 n_bands = howmany(file_size, band_size);
567 if (n_bands > (BAND_MAX + 1)) {
568 printf("file is too big: %ld > %d\n",
569 n_bands, BAND_MAX);
570 goto failure;
571 }
572
573 /* create a block bitmap, one bit per block */
574 bitmap_size = block_bitmap_size(file_size, block_size);
575 block_bitmap = my_malloc(bitmap_size);
576 if (block_bitmap == NULL) {
577 printf("failed to allocate bitmap\n");
578 goto failure;
579 }
580 bzero(block_bitmap, bitmap_size);
581
582 /* get the band map */
583 bands = (band_number_t *)my_malloc(n_bands * sizeof(band_number_t));
584 if (bands == NULL) {
585 printf("failed to allocate bands\n");
586 goto failure;
587 }
588 bzero(bands, n_bands * sizeof(band_number_t));
589
590 map = my_malloc(sizeof(*map));
591 if (map == NULL) {
592 printf("failed to allocate map\n");
593 goto failure;
594 }
595 map->blocks_per_band = band_size / block_size;
596 map->block_bitmap = block_bitmap;
597 map->bands = bands;
598 map->file_size_blocks = n_bands * map->blocks_per_band;
599 map->next_band = 0;
600 map->zeroth_band = -1;
601 map->shadow_size_bands = howmany(shadow_size, band_size);
602 map->block_size = block_size;
603 return (map);
604
605 failure:
606 if (block_bitmap)
607 my_free(block_bitmap);
608 if (bands)
609 my_free(bands);
610 return (NULL);
611 }
612
613 /*
614 * Function: shadow_map_free
615 * Purpose:
616 * Frees the data structure to deal with the shadow map.
617 */
618 void
619 shadow_map_free(shadow_map_t * map)
620 {
621 if (map->block_bitmap)
622 my_free(map->block_bitmap);
623 if (map->bands)
624 my_free(map->bands);
625 map->block_bitmap = 0;
626 map->bands = 0;
627 my_free(map);
628 return;
629 }
630
631 #ifdef TEST_SHADOW
632 #define BAND_SIZE_BLOCKS (BAND_SIZE_DEFAULT / 512)
633
634 enum {
635 ReadRequest,
636 WriteRequest,
637 };
638
639 typedef struct {
640 int type;
641 u_long offset;
642 u_long count;
643 } block_request_t;
644
645 int
646 main()
647 {
648 shadow_map_t * map;
649 int i;
650 block_request_t requests[] = {
651 { WriteRequest, BAND_SIZE_BLOCKS * 2, 1 },
652 { ReadRequest, BAND_SIZE_BLOCKS / 2, BAND_SIZE_BLOCKS * 2 - 2 },
653 { WriteRequest, BAND_SIZE_BLOCKS * 1, 5 * BAND_SIZE_BLOCKS + 3},
654 { ReadRequest, 0, BAND_SIZE_BLOCKS * 10 },
655 { WriteRequest, BAND_SIZE_BLOCKS * (BAND_MAX - 1),
656 BAND_SIZE_BLOCKS * 2},
657 { 0, 0 },
658 };
659
660 map = shadow_map_create(1024 * 1024 * 1024 * 8ULL, 0, 0, 512);
661 if (map == NULL) {
662 printf("shadow_map_create failed\n");
663 exit(1);
664 }
665 for (i = 0; TRUE; i++) {
666 u_long offset;
667 u_long resid;
668 boolean_t shadow_grew;
669 boolean_t read_shadow;
670
671 if (requests[i].count == 0) {
672 break;
673 }
674 offset = requests[i].offset;
675 resid = requests[i].count;
676 printf("\n%s REQUEST (%ld, %ld)\n",
677 requests[i].type == WriteRequest ? "WRITE" : "READ",
678 offset, resid);
679 switch (requests[i].type) {
680 case WriteRequest:
681 while (resid > 0) {
682 u_long this_offset;
683 u_long this_count;
684
685 shadow_grew = shadow_map_write(map, offset,
686 resid,
687 &this_offset,
688 &this_count);
689 printf("\t(%ld, %ld) => (%ld, %ld)",
690 offset, resid, this_offset, this_count);
691 resid -= this_count;
692 offset += this_count;
693 if (shadow_grew) {
694 printf(" shadow grew to %ld", shadow_map_shadow_size(map));
695 }
696 printf("\n");
697 }
698 break;
699 case ReadRequest:
700 while (resid > 0) {
701 u_long this_offset;
702 u_long this_count;
703
704 read_shadow = shadow_map_read(map, offset,
705 resid,
706 &this_offset,
707 &this_count);
708 printf("\t(%ld, %ld) => (%ld, %ld)%s\n",
709 offset, resid, this_offset, this_count,
710 read_shadow ? " from shadow" : "");
711 if (this_count == 0) {
712 printf("this_count is 0, aborting\n");
713 break;
714 }
715 resid -= this_count;
716 offset += this_count;
717 }
718 break;
719 default:
720 break;
721 }
722 }
723 if (map) {
724 shadow_map_free(map);
725 }
726 exit(0);
727 return (0);
728 }
729 #endif