X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/d52fe63fc81f7e44faaae711812a211a78434976..9bccf70c0258c7cac2dcb80011b2a964d884c552:/bsd/dev/vn/shadow.c diff --git a/bsd/dev/vn/shadow.c b/bsd/dev/vn/shadow.c new file mode 100644 index 000000000..aef372aae --- /dev/null +++ b/bsd/dev/vn/shadow.c @@ -0,0 +1,726 @@ + +/* + * Copyright (c) 2001 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +/* + * shadow.c + * + * Implement copy-on-write shadow map to allow a disk image to be + * mounted read-only, yet be writable by transferring writes to a + * "shadow" file. Subsequent reads from blocks that have been + * written will then go the "shadow" file. + * + * The map has two parts: + * 1) a bit map to track which blocks have been written + * 2) a band map to map a "band" within the original file to a corresponding + * "band" in the shadow file. Each band has the same size. + * + * The band map is used to ensure that blocks that are contiguous in the + * original file will remain contiguous in the shadow file. + * + * For debugging purposes, this file can be compiled standalone using: + * cc -o shadow shadow.c -DTEST_SHADOW + */ + +/* + * Modification History + * + * December 21, 2001 Dieter Siegmund (dieter@apple.com) + * - initial revision + */ +#include +#include +#include + +#include + +#ifdef TEST_SHADOW +#include +#include +#define my_malloc(a) malloc(a) +#define my_free(a) free(a) +#else TEST_SHADOW +#include +#define my_malloc(a) _MALLOC(a, M_TEMP, M_WAITOK) +#define my_free(a) FREE(a, M_TEMP) +#endif TEST_SHADOW + +#include "shadow.h" + +#define ULONG_ALL_ONES ((u_long)(-1)) +#define USHORT_ALL_ONES ((u_short)(-1)) +#define UCHAR_ALL_ONES ((u_char)(-1)) + +#define my_trunc(value, divisor) ((value) / (divisor) * (divisor)) + +/* a band size of 128K can represent a file up to 8GB */ +#define BAND_SIZE_DEFAULT_POWER_2 17 +#define BAND_SIZE_DEFAULT (1 << BAND_SIZE_DEFAULT_POWER_2) + +typedef u_short band_number_t; +#define BAND_ZERO ((band_number_t)0) +#define BAND_MAX ((band_number_t)65535) + +struct shadow_map { + u_long blocks_per_band;/* size in blocks */ + u_long block_size; + u_char * block_bitmap; /* 1 bit per block; 1=written */ + band_number_t * bands; /* band map array */ + u_long file_size_blocks; /* size of file in bands */ + u_long shadow_size_bands; /* size of shadow in bands */ + u_long next_band; /* next free band */ + u_long zeroth_band; /* special-case 0th band */ +}; + + +typedef struct { + u_long byte; + u_long bit; +} bitmap_offset_t; + +static __inline__ u_char +bit(int b) +{ + return ((u_char)(1 << b)); +} + +/* + * Function: bits_lower + * Purpose: + * Return a byte value in which bits numbered lower than 'b' are set. + */ +static __inline__ u_char +bits_lower(int b) +{ + return ((u_char)(bit(b) - 1)); +} + +/* + * Function: byte_set_bits + * Purpose: + * Set the given range of bits within a byte. + */ +static __inline__ u_char +byte_set_bits(int start, int end) +{ + return ((u_char)((~bits_lower(start)) & (bits_lower(end) | bit(end)))); +} + +static __inline__ bitmap_offset_t +bitmap_offset(off_t where) +{ + bitmap_offset_t b; + + b.byte = where / NBBY; + b.bit = where % NBBY; + return (b); +} + +/* + * Function: bitmap_set + * + * Purpose: + * Set the given range of bits. + * + * This algorithm tries to set the extents using the biggest + * units, using longs, then a short, then a byte, then bits. + */ +static void +bitmap_set(u_char * map, u_long start_bit, u_long bit_count) +{ + bitmap_offset_t start; + bitmap_offset_t end; + + start = bitmap_offset(start_bit); + end = bitmap_offset(start_bit + bit_count); + if (start.byte < end.byte) { + u_long n_bytes; + + if (start.bit) { + map[start.byte] |= byte_set_bits(start.bit, NBBY - 1); + start.bit = 0; + start.byte++; + if (start.byte == end.byte) + goto end; + } + + n_bytes = end.byte - start.byte; + + while (n_bytes >= (sizeof(u_long))) { + *((u_long *)(map + start.byte)) = ULONG_ALL_ONES; + start.byte += sizeof(u_long); + n_bytes -= sizeof(u_long); + } + if (n_bytes >= sizeof(u_short)) { + *((u_short *)(map + start.byte)) = USHORT_ALL_ONES; + start.byte += sizeof(u_short); + n_bytes -= sizeof(u_short); + } + if (n_bytes == 1) { + map[start.byte] = UCHAR_ALL_ONES; + start.byte++; + n_bytes = 0; + } + } + + end: + if (end.bit > start.bit) { + map[start.byte] |= byte_set_bits(start.bit, end.bit - 1); + } + + return; +} + +/* + * Function: bitmap_get + * + * Purpose: + * Return the number of bits in the range that are the same e.g. + * 11101 returns 3 because the first 3 bits are the same (1's), whereas + * 001100 returns 2 because the first 2 bits are the same. + * This algorithm tries to count things in as big a chunk as possible, + * first aligning to a byte offset, then trying to count longs, a short, + * a byte, then any remaining bits to find the bit that is different. + */ + +static u_long +bitmap_get(u_char * map, u_long start_bit, u_long bit_count, + boolean_t * ret_is_set) +{ + u_long count; + int i; + boolean_t is_set; + bitmap_offset_t start; + bitmap_offset_t end; + + start = bitmap_offset(start_bit); + end = bitmap_offset(start_bit + bit_count); + + is_set = (map[start.byte] & bit(start.bit)) ? TRUE : FALSE; + count = 0; + + if (start.byte < end.byte) { + u_long n_bytes; + + if (start.bit) { /* try to align to a byte */ + for (i = start.bit; i < NBBY; i++) { + boolean_t this_is_set; + + this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE; + if (this_is_set != is_set) { + goto done; /* found bit that was different, we're done */ + } + count++; + } + start.bit = 0; /* made it to the next byte */ + start.byte++; + if (start.byte == end.byte) + goto end; /* no more bytes, check for any leftover bits */ + } + /* calculate how many bytes are left in the range */ + n_bytes = end.byte - start.byte; + + /* check for 4 bytes of the same bits */ + while (n_bytes >= sizeof(u_long)) { + u_long * valPtr = (u_long *)(map + start.byte); + if ((is_set && *valPtr == ULONG_ALL_ONES) + || (!is_set && *valPtr == 0)) { + count += sizeof(*valPtr) * NBBY; + start.byte += sizeof(*valPtr); + n_bytes -= sizeof(*valPtr); + } + else + break; /* bits differ */ + + } + /* check for 2 bytes of the same bits */ + if (n_bytes >= sizeof(u_short)) { + u_short * valPtr = (u_short *)(map + start.byte); + + if ((is_set && *valPtr == USHORT_ALL_ONES) + || (!is_set && (*valPtr == 0))) { + count += sizeof(*valPtr) * NBBY; + start.byte += sizeof(*valPtr); + n_bytes -= sizeof(*valPtr); + } + } + + /* check for 1 byte of the same bits */ + if (n_bytes) { + if ((is_set && map[start.byte] == UCHAR_ALL_ONES) + || (!is_set && map[start.byte] == 0)) { + count += NBBY; + start.byte++; + n_bytes--; + } + /* we found bits that were different, find the first one */ + if (n_bytes) { + for (i = 0; i < NBBY; i++) { + boolean_t this_is_set; + + this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE; + if (this_is_set != is_set) { + break; + } + count++; + } + goto done; + } + } + } + + end: + for (i = start.bit; i < end.bit; i++) { + boolean_t this_is_set = (map[start.byte] & bit(i)) ? TRUE : FALSE; + + if (this_is_set != is_set) { + break; + } + count++; + } + + done: + *ret_is_set = is_set; + return (count); +} + +static __inline__ band_number_t +shadow_map_block_to_band(shadow_map_t * map, unsigned long block) +{ + return (block / map->blocks_per_band); +} + +/* + * Function: shadow_map_mapped_band + * Purpose: + * Return the mapped band for the given band. + * If map_it is FALSE, and the band is not mapped, return FALSE. + * If map_it is TRUE, then this function will always return TRUE. + */ +static boolean_t +shadow_map_mapped_band(shadow_map_t * map, band_number_t band, + boolean_t map_it, band_number_t * mapped_band) +{ + boolean_t is_mapped = FALSE; + + if (band == map->zeroth_band) { + *mapped_band = BAND_ZERO; + is_mapped = TRUE; + } + else { + *mapped_band = map->bands[band]; + if (*mapped_band == BAND_ZERO) { + if (map_it) { + /* grow the file */ + if (map->next_band == 0) { + /* remember the zero'th band */ + map->zeroth_band = band; + } + *mapped_band = map->bands[band] = map->next_band++; + is_mapped = TRUE; + } + } + else { + is_mapped = TRUE; + } + } + return (is_mapped); +} + +/* + * Function: shadow_map_contiguous + * + * Purpose: + * Return the first offset within the range position..(position + count) + * that is not a contiguous mapped band. + * + * If called with is_write = TRUE, this function will map bands as it goes. + */ +static u_long +shadow_map_contiguous(shadow_map_t * map, u_long start_block, + u_long num_blocks, boolean_t is_write) +{ + band_number_t band = shadow_map_block_to_band(map, start_block); + u_long end_block = start_block + num_blocks; + boolean_t is_mapped; + band_number_t mapped_band; + u_long ret_end_block = end_block; + u_long p; + + is_mapped = shadow_map_mapped_band(map, band, is_write, &mapped_band); + if (is_write == FALSE && is_mapped == FALSE) { + static int happened = 0; + /* this can't happen */ + if (happened == 0) { + printf("shadow_map_contiguous: this can't happen!\n"); + happened = 1; + } + return (start_block); + } + for (p = my_trunc(start_block + map->blocks_per_band, + map->blocks_per_band); + p < end_block; p += map->blocks_per_band) { + band_number_t next_mapped_band; + + band++; + is_mapped = shadow_map_mapped_band(map, band, is_write, + &next_mapped_band); + if (is_write == FALSE && is_mapped == FALSE) { + return (p); + } + if ((mapped_band + 1) != next_mapped_band) { + /* not contiguous */ + ret_end_block = p; + break; + } + mapped_band = next_mapped_band; + } + return (ret_end_block); +} + + +/* + * Function: block_bitmap_size + * Purpose: + * The number of bytes required in a block bitmap to represent a file of size + * file_size. + * + * The bytes required is the number of blocks in the file, + * divided by the number of bits per byte. + * Note: + * An 8GB file requires (assuming 512 byte block): + * 2^33 / 2^9 / 2^3 = 2^21 = 2MB + * of bitmap space. This is a non-trival amount of memory, + * particularly since most of the bits will be zero. + * A sparse bitmap would really help in this case. + */ +static __inline__ u_long +block_bitmap_size(off_t file_size, u_long block_size) +{ + off_t blocks = howmany(file_size, block_size); + return (howmany(blocks, NBBY)); +} + +/* + * Function: shadow_map_read + * + * Purpose: + * Calculate the block offset within the shadow to read, and the number + * blocks to read. The input values (block_offset, block_count) refer + * to the original file. + * + * The output values (*incr_block_offset, *incr_block_count) refer to the + * shadow file if the return value is TRUE. They refer to the original + * file if the return value is FALSE. + + * Blocks within a band may or may not have been written, in addition, + * Bands are not necessarily contiguous, therefore: + * *incr_block_count <= block_count + * The caller must be prepared to call this function interatively + * to complete the whole i/o. + * Returns: + * TRUE if the shadow file should be read, FALSE if the original file + * should be read. + */ +boolean_t +shadow_map_read(shadow_map_t * map, u_long block_offset, u_long block_count, + u_long * incr_block_offset, u_long * incr_block_count) +{ + boolean_t written = FALSE; + u_long n_blocks; + + if (block_offset >= map->file_size_blocks + || (block_offset + block_count) > map->file_size_blocks) { + printf("shadow_map_read: request (%ld, %ld) exceeds file size %ld\n", + block_offset, block_count, map->file_size_blocks); + *incr_block_count = 0; + } + n_blocks = bitmap_get(map->block_bitmap, block_offset, block_count, + &written); + if (written == FALSE) { + *incr_block_count = n_blocks; + *incr_block_offset = block_offset; + } + else { /* start has been written, and therefore mapped */ + band_number_t mapped_band; + u_long band_limit; + + mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)]; + *incr_block_offset = mapped_band * map->blocks_per_band + + (block_offset % map->blocks_per_band); + band_limit + = shadow_map_contiguous(map, block_offset, block_count, FALSE); + *incr_block_count = band_limit - block_offset; + if (*incr_block_count > n_blocks) { + *incr_block_count = n_blocks; + } + } + return (written); +} + +/* + * Function: shadow_map_write + * + * Purpose: + * Calculate the block offset within the shadow to write, and the number + * blocks to write. The input values (block_offset, block_count) refer + * to the original file. The output values + * (*incr_block_offset, *incr_block_count) refer to the shadow file. + * + * Bands are not necessarily contiguous, therefore: + * *incr_block_count <= block_count + * The caller must be prepared to call this function interatively + * to complete the whole i/o. + * Returns: + * TRUE if the shadow file was grown, FALSE otherwise. + */ +boolean_t +shadow_map_write(shadow_map_t * map, u_long block_offset, + u_long block_count, u_long * incr_block_offset, + u_long * incr_block_count) +{ + u_long band_limit; + band_number_t mapped_band; + boolean_t shadow_grew = FALSE; + + if (block_offset >= map->file_size_blocks + || (block_offset + block_count) > map->file_size_blocks) { + printf("shadow_map_write: request (%ld, %ld) exceeds file size %ld\n", + block_offset, block_count, map->file_size_blocks); + *incr_block_count = 0; + } + + band_limit = shadow_map_contiguous(map, block_offset, block_count, TRUE); + mapped_band = map->bands[shadow_map_block_to_band(map, block_offset)]; + *incr_block_offset = mapped_band * map->blocks_per_band + + (block_offset % map->blocks_per_band); + *incr_block_count = band_limit - block_offset; + + /* mark these blocks as written */ + bitmap_set(map->block_bitmap, block_offset, *incr_block_count); + + if (map->next_band > map->shadow_size_bands) { + map->shadow_size_bands = map->next_band; + shadow_grew = TRUE; + } + return (shadow_grew); +} + +/* + * Function: shadow_map_shadow_size + * + * Purpose: + * To return the size of the shadow file in blocks. + */ +u_long +shadow_map_shadow_size(shadow_map_t * map) +{ + return (map->shadow_size_bands * map->blocks_per_band); +} + +/* + * Function: shadow_map_create + * + * Purpose: + * Allocate the dynamic data for keeping track of the shadow dirty blocks + * and the band mapping table. + * Returns: + * NULL if an error occurred. + */ +shadow_map_t * +shadow_map_create(off_t file_size, off_t shadow_size, + u_long band_size, u_long block_size) +{ + void * block_bitmap = 0; + u_long bitmap_size; + band_number_t * bands = 0; + shadow_map_t * map; + u_long n_bands = 0; + + if (band_size == 0) { + band_size = BAND_SIZE_DEFAULT; + } + + n_bands = howmany(file_size, band_size); + if (n_bands > (BAND_MAX + 1)) { + printf("file is too big: %ld > %d\n", + n_bands, BAND_MAX); + goto failure; + } + + /* create a block bitmap, one bit per block */ + bitmap_size = block_bitmap_size(file_size, block_size); + block_bitmap = my_malloc(bitmap_size); + if (block_bitmap == NULL) { + printf("failed to allocate bitmap\n"); + goto failure; + } + bzero(block_bitmap, bitmap_size); + + /* get the band map */ + bands = (band_number_t *)my_malloc(n_bands * sizeof(band_number_t)); + if (bands == NULL) { + printf("failed to allocate bands\n"); + goto failure; + } + bzero(bands, n_bands * sizeof(band_number_t)); + + map = my_malloc(sizeof(*map)); + if (map == NULL) { + printf("failed to allocate map\n"); + goto failure; + } + map->blocks_per_band = band_size / block_size; + map->block_bitmap = block_bitmap; + map->bands = bands; + map->file_size_blocks = n_bands * map->blocks_per_band; + map->next_band = 0; + map->zeroth_band = -1; + map->shadow_size_bands = howmany(shadow_size, band_size); + map->block_size = block_size; + return (map); + + failure: + if (block_bitmap) + my_free(block_bitmap); + if (bands) + my_free(bands); + return (NULL); +} + +/* + * Function: shadow_map_free + * Purpose: + * Frees the data structure to deal with the shadow map. + */ +void +shadow_map_free(shadow_map_t * map) +{ + if (map->block_bitmap) + my_free(map->block_bitmap); + if (map->bands) + my_free(map->bands); + map->block_bitmap = 0; + map->bands = 0; + my_free(map); + return; +} + +#ifdef TEST_SHADOW +#define BAND_SIZE_BLOCKS (BAND_SIZE_DEFAULT / 512) + +enum { + ReadRequest, + WriteRequest, +}; + +typedef struct { + int type; + u_long offset; + u_long count; +} block_request_t; + +int +main() +{ + shadow_map_t * map; + int i; + block_request_t requests[] = { + { WriteRequest, BAND_SIZE_BLOCKS * 2, 1 }, + { ReadRequest, BAND_SIZE_BLOCKS / 2, BAND_SIZE_BLOCKS * 2 - 2 }, + { WriteRequest, BAND_SIZE_BLOCKS * 1, 5 * BAND_SIZE_BLOCKS + 3}, + { ReadRequest, 0, BAND_SIZE_BLOCKS * 10 }, + { WriteRequest, BAND_SIZE_BLOCKS * (BAND_MAX - 1), + BAND_SIZE_BLOCKS * 2}, + { 0, 0 }, + }; + + map = shadow_map_create(1024 * 1024 * 1024 * 8ULL, 0, 0, 512); + if (map == NULL) { + printf("shadow_map_create failed\n"); + exit(1); + } + for (i = 0; TRUE; i++) { + u_long offset; + u_long resid; + boolean_t shadow_grew; + boolean_t read_shadow; + + if (requests[i].count == 0) { + break; + } + offset = requests[i].offset; + resid = requests[i].count; + printf("\n%s REQUEST (%ld, %ld)\n", + requests[i].type == WriteRequest ? "WRITE" : "READ", + offset, resid); + switch (requests[i].type) { + case WriteRequest: + while (resid > 0) { + u_long this_offset; + u_long this_count; + + shadow_grew = shadow_map_write(map, offset, + resid, + &this_offset, + &this_count); + printf("\t(%ld, %ld) => (%ld, %ld)", + offset, resid, this_offset, this_count); + resid -= this_count; + offset += this_count; + if (shadow_grew) { + printf(" shadow grew to %ld", shadow_map_shadow_size(map)); + } + printf("\n"); + } + break; + case ReadRequest: + while (resid > 0) { + u_long this_offset; + u_long this_count; + + read_shadow = shadow_map_read(map, offset, + resid, + &this_offset, + &this_count); + printf("\t(%ld, %ld) => (%ld, %ld)%s\n", + offset, resid, this_offset, this_count, + read_shadow ? " from shadow" : ""); + if (this_count == 0) { + printf("this_count is 0, aborting\n"); + break; + } + resid -= this_count; + offset += this_count; + } + break; + default: + break; + } + } + if (map) { + shadow_map_free(map); + } + exit(0); + return (0); +} +#endif