]>
git.saurik.com Git - hfs.git/blob - fsck_hfs/cache.c
2 * Copyright (c) 2000-2012 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
30 #include <sys/types.h>
46 * Allocate an unused cache block.
48 void *CacheAllocBlock (Cache_t
*cache
);
53 * Release an active cache block.
56 CacheFreeBlock( Cache_t
*cache
, Tag_t
*tag
);
61 * Obtain a cache block. If one already exists, it is returned. Otherwise a
62 * new one is created and inserted into the cache.
64 int CacheLookup (Cache_t
*cache
, uint64_t off
, Tag_t
**tag
);
69 * Perform a direct read on the file.
71 int CacheRawRead (Cache_t
*cache
, uint64_t off
, uint32_t len
, void *buf
);
76 * Perform a direct write on the file.
78 int CacheRawWrite (Cache_t
*cache
, uint64_t off
, uint32_t len
, void *buf
);
83 * Flush, and optionally remove, all cache blocks that intersect
87 CacheFlushRange( Cache_t
*cache
, uint64_t start
, uint64_t len
, int remove
);
92 * Initializes the LRU data structures.
94 static int LRUInit (LRU_t
*lru
);
101 * NOTE: This is typically a no-op, since the cache manager will be clearing
104 static int LRUDestroy (LRU_t
*lru
);
109 * Registers data activity on the given node. If the node is already in the
110 * LRU, it is moved to the front. Otherwise, it is inserted at the front.
112 * NOTE: If the node is not in the LRU, we assume that its pointers are NULL.
114 static int LRUHit (LRU_t
*lru
, LRUNode_t
*node
, int age
);
119 * Chooses a buffer to release.
121 * TODO: Under extreme conditions, it should be possible to release the buffer
122 * of an actively referenced cache buffer, leaving the tag behind as a
123 * placeholder. This would be required for implementing 2Q-LRU
126 static int LRUEvict (LRU_t
*lru
, LRUNode_t
*node
);
129 * CalculateCacheSizes
131 * Determine the cache size values that should be used to initialize the cache.
132 * If the requested value does not validate according to the conditions described
133 * below, it is adjusted.
135 * If no input values are provided, use default values for cache size
136 * and cache block size.
138 * Cache size should be -
139 * a. greater than or equal to minimum cache size
140 * b. less than or equal to maximum cache size. The maximum cache size
141 * is limited by the maximum value that can be allocated using malloc
142 * or mmap (maximum value for size_t)
143 * c. multiple of cache block size
146 * *calcBlockSize: the size of the blocks in the cache
147 * *calcTotalBlocks: the number of blocks in the cache
149 void CalculateCacheSizes(uint64_t cacheSize
, uint32_t *calcBlockSize
, uint32_t *calcTotalBlocks
, char cache_debug
)
151 uint32_t blockSize
= DefaultCacheBlockSize
;
152 const size_t max_size_t
= ~0; /* Maximum value represented by size_t */
154 /* Simple case - no user cache size, use default values */
156 *calcBlockSize
= DefaultCacheBlockSize
;
157 *calcTotalBlocks
= DefaultCacheBlocks
;
161 /* User provided cache size - check with minimum and maximum values */
162 if (cacheSize
< MinCacheSize
) {
163 cacheSize
= MinCacheSize
;
165 if (cacheSize
> max_size_t
||
166 cacheSize
> MaxCacheSize
) {
168 printf ("\tCache size should be greater than %uM and less than %luM\n", MinCacheSize
/(1024*1024), max_size_t
/(1024*1024));
170 cacheSize
= MaxCacheSize
;
173 /* Cache size should be multiple of cache block size */
174 if (cacheSize
% blockSize
) {
176 printf ("\tCache size should be multiple of cache block size (currently %uK)\n", blockSize
/1024);
178 cacheSize
= (cacheSize
/ blockSize
) * blockSize
;
181 *calcBlockSize
= blockSize
;
182 *calcTotalBlocks
= cacheSize
/ blockSize
;
191 * Initializes the cache for use. If preTouch is non-zero, the cache memory will
192 * be iterated through, with one byte per page touched. (This is to ensure that
193 * the memory is actually created, and is used to avoid deadlocking due to swapping
194 * during a live verify of the boot volume.)
196 int CacheInit (Cache_t
*cache
, int fdRead
, int fdWrite
, uint32_t devBlockSize
,
197 uint32_t cacheBlockSize
, uint32_t cacheTotalBlocks
, uint32_t hashSize
, int preTouch
)
203 memset (cache
, 0x00, sizeof (Cache_t
));
205 cache
->FD_R
= fdRead
;
206 cache
->FD_W
= fdWrite
;
207 cache
->DevBlockSize
= devBlockSize
;
208 /* CacheFlush requires cleared cache->Hash */
209 cache
->Hash
= (Tag_t
**) calloc( 1, (sizeof (Tag_t
*) * hashSize
) );
210 cache
->HashSize
= hashSize
;
211 cache
->BlockSize
= cacheBlockSize
;
213 /* Allocate the cache memory */
214 /* Break out of the loop on success, or when the proposed cache is < MinCacheSize */
216 cache
->FreeHead
= mmap (NULL
,
217 cacheTotalBlocks
* cacheBlockSize
,
218 PROT_READ
| PROT_WRITE
,
219 MAP_ANON
| MAP_PRIVATE
,
222 if (cache
->FreeHead
== (void *)-1) {
223 if ((cacheTotalBlocks
* cacheBlockSize
) <= MinCacheSize
) {
225 printf("\tTried to allocate %dK, minimum is %dK\n",
226 (cacheTotalBlocks
* cacheBlockSize
) / 1024,
227 MinCacheSize
/ 1024);
231 printf("\tFailed to allocate %uK for cache; trying %uK\n",
232 (cacheTotalBlocks
* cacheBlockSize
) / 1024,
233 (cacheTotalBlocks
* cacheBlockSize
/ 2) / 1024);
234 CalculateCacheSizes((cacheTotalBlocks
* cacheBlockSize
) / 2, &cacheBlockSize
, &cacheTotalBlocks
, debug
);
238 printf ("\tUsing cacheBlockSize=%uK cacheTotalBlock=%u cacheSize=%uK.\n", cacheBlockSize
/1024, cacheTotalBlocks
, (cacheBlockSize
/1024) * cacheTotalBlocks
);
243 if (cache
->FreeHead
== (void*)-1) {
245 printf("%s(%d): FreeHead = -1\n", __FUNCTION__
, __LINE__
);
251 /* If necessary, touch a byte in each page */
253 size_t pageSize
= getpagesize();
254 unsigned char *ptr
= (unsigned char *)cache
->FreeHead
;
255 unsigned char *end
= ptr
+ (cacheTotalBlocks
* cacheBlockSize
);
262 /* Initialize the cache memory free list */
263 temp
= cache
->FreeHead
;
264 for (i
= 0; i
< cacheTotalBlocks
- 1; i
++) {
265 *temp
= ((char *)temp
+ cacheBlockSize
);
266 temp
= (void **)((char *)temp
+ cacheBlockSize
);
269 cache
->FreeSize
= cacheTotalBlocks
;
271 buf
= (Buf_t
*)malloc(sizeof(Buf_t
) * MAXBUFS
);
274 printf("%s(%d): malloc(%zu) failed\n", __FUNCTION__
, __LINE__
, sizeof(Buf_t
) * MAXBUFS
);
279 memset (&buf
[0], 0x00, sizeof (Buf_t
) * MAXBUFS
);
280 for (i
= 1 ; i
< MAXBUFS
; i
++) {
281 (&buf
[i
-1])->Next
= &buf
[i
];
283 cache
->FreeBufs
= &buf
[0];
286 printf( "%s - cacheTotalBlocks %d cacheBlockSize %d hashSize %d \n",
287 __FUNCTION__
, cacheTotalBlocks
, cacheBlockSize
, hashSize
);
288 printf( "%s - cache memory %d \n", __FUNCTION__
, (cacheTotalBlocks
* cacheBlockSize
) );
291 return (LRUInit (&cache
->LRU
));
298 * Shutdown the cache.
300 int CacheDestroy (Cache_t
*cache
)
305 /* Print cache report */
306 printf ("Cache Report:\n");
307 printf ("\tRead Requests: %d\n", cache
->ReqRead
);
308 printf ("\tWrite Requests: %d\n", cache
->ReqWrite
);
309 printf ("\tDisk Reads: %d\n", cache
->DiskRead
);
310 printf ("\tDisk Writes: %d\n", cache
->DiskWrite
);
311 printf ("\tSpans: %d\n", cache
->Span
);
313 /* Shutdown the LRU */
314 LRUDestroy (&cache
->LRU
);
316 /* I'm lazy, I'll come back to it :P */
323 * Reads a range of bytes from the cache, returning a pointer to a buffer
324 * containing the requested bytes.
326 * NOTE: The returned buffer may directly refer to a cache block, or an
327 * anonymous buffer. Do not make any assumptions about the nature of
328 * the returned buffer, except that it is contiguous.
330 int CacheRead (Cache_t
*cache
, uint64_t off
, uint32_t len
, Buf_t
**bufp
)
335 uint32_t coff
= (off
% cache
->BlockSize
);
336 uint64_t cblk
= (off
- coff
);
339 /* Check for conflicts with other bufs */
340 searchBuf
= cache
->ActiveBufs
;
341 while (searchBuf
!= NULL
) {
342 if ((searchBuf
->Offset
>= off
) && (searchBuf
->Offset
< off
+ len
)) {
344 printf ("ERROR: CacheRead: Deadlock\n");
349 searchBuf
= searchBuf
->Next
;
352 /* get a free buffer */
353 if ((buf
= cache
->FreeBufs
) == NULL
) {
355 printf ("ERROR: CacheRead: no more bufs!\n");
359 cache
->FreeBufs
= buf
->Next
;
362 /* Clear the buf structure */
370 /* If this is unaligned or spans multiple cache blocks */
371 if ((cblk
/ cache
->BlockSize
) != ((off
+ len
- 1) / cache
->BlockSize
)) {
372 buf
->Flags
|= BUF_SPAN
;
374 /* Fetch the first cache block */
375 error
= CacheLookup (cache
, cblk
, &tag
);
378 printf ("ERROR: CacheRead: CacheLookup error %d\n", error
);
383 /* If we live nicely inside a cache block */
384 if (!(buf
->Flags
& BUF_SPAN
)) {
385 /* Offset the buffer into the cache block */
386 buf
->Buffer
= tag
->Buffer
+ coff
;
388 /* Bump the cache block's reference count */
391 /* Kick the node into the right queue */
392 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, 0);
394 /* Otherwise, things get ugly */
396 uint32_t boff
; /* Offset into the buffer */
397 uint32_t blen
; /* Space to fill in the buffer */
400 /* Allocate a temp buffer */
401 buf
->Buffer
= (void *)malloc (len
);
402 if (buf
->Buffer
== NULL
) {
404 printf ("ERROR: CacheRead: No Memory\n");
409 /* Blit the first chunk into the buffer */
410 boff
= cache
->BlockSize
- coff
;
413 printf("INFO: memcpy(%p, %p + %u, %u)\n", buf
->Buffer
, tag
->Buffer
, coff
, boff
);
415 memcpy (buf
->Buffer
, tag
->Buffer
+ coff
, boff
);
417 /* Bump the cache block's reference count */
420 /* Kick the node into the right queue */
421 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, 0);
423 /* Next cache block */
424 cblk
+= cache
->BlockSize
;
426 /* Read data a cache block at a time */
428 /* Fetch the next cache block */
429 error
= CacheLookup (cache
, cblk
, &tag
);
431 /* Free the allocated buffer */
435 /* Release all the held tags */
436 cblk
-= cache
->BlockSize
;
438 if (CacheLookup (cache
, cblk
, &tag
) != EOK
) {
439 fprintf (stderr
, "CacheRead: Unrecoverable error\n");
444 /* Kick the node into the right queue */
445 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, 0);
451 /* Blit the cache block into the buffer */
452 temp
= ((blen
> cache
->BlockSize
) ? cache
->BlockSize
: blen
);
454 printf ("INFO: memcpy(%p + %u, %p, %u)\n", buf
->Buffer
, boff
, tag
->Buffer
, temp
);
456 memcpy (buf
->Buffer
+ boff
,
460 /* Update counters */
465 /* Advance to the next cache block */
466 cblk
+= cache
->BlockSize
;
468 /* Kick the node into the right queue */
469 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, 0);
472 /* Count the spanned access */
476 /* Attach to head of active buffers list */
477 if (cache
->ActiveBufs
!= NULL
) {
478 buf
->Next
= cache
->ActiveBufs
;
481 cache
->ActiveBufs
->Prev
= buf
;
484 cache
->ActiveBufs
= buf
;
487 /* Update counters */
494 * All of the uses of kLockWrite need to be audited for
495 * when the journal replay is writing.
500 * Writes a buffer through the cache.
502 int CacheWrite ( Cache_t
*cache
, Buf_t
*buf
, int age
, uint32_t writeOptions
)
505 uint32_t coff
= (buf
->Offset
% cache
->BlockSize
);
506 uint64_t cblk
= (buf
->Offset
- coff
);
509 /* Fetch the first cache block */
510 error
= CacheLookup (cache
, cblk
, &tag
);
511 if (error
!= EOK
) return (error
);
513 /* If the buffer was a direct reference */
514 if (!(buf
->Flags
& BUF_SPAN
)) {
515 /* Commit the dirty block */
516 if ( (writeOptions
& (kLazyWrite
| kLockWrite
)) != 0 )
518 /* Copy flags to tag */
519 tag
->Flags
|= (writeOptions
& (kLazyWrite
| kLockWrite
));
523 error
= CacheRawWrite (cache
,
527 if (error
!= EOK
) return (error
);
530 /* Release the reference */
531 if ((writeOptions
& kLockWrite
) == 0)
534 /* Kick the node into the right queue */
535 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
537 /* Otherwise, we do the ugly thing again */
539 uint32_t boff
; /* Offset into the buffer */
540 uint32_t blen
; /* Space to fill in the buffer */
543 /* Blit the first chunk back into the cache */
544 boff
= cache
->BlockSize
- coff
;
545 blen
= buf
->Length
- boff
;
546 memcpy (tag
->Buffer
+ coff
, buf
->Buffer
, boff
);
548 /* Commit the dirty block */
549 if ( (writeOptions
& (kLazyWrite
| kLockWrite
)) != 0 )
551 /* flag this for lazy write */
552 tag
->Flags
|= (writeOptions
& (kLazyWrite
| kLockWrite
));
556 error
= CacheRawWrite (cache
,
560 if (error
!= EOK
) return (error
);
563 /* Release the cache block reference */
564 if ((writeOptions
& kLockWrite
) == 0)
567 /* Kick the node into the right queue */
568 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
570 /* Next cache block */
571 cblk
+= cache
->BlockSize
;
573 /* Write data a cache block at a time */
575 /* Fetch the next cache block */
576 error
= CacheLookup (cache
, cblk
, &tag
);
577 /* We must go through with the write regardless */
579 /* Blit the next buffer chunk back into the cache */
580 temp
= ((blen
> cache
->BlockSize
) ? cache
->BlockSize
: blen
);
585 /* Commit the dirty block */
586 if ( (writeOptions
& (kLazyWrite
| kLockWrite
)) != 0 )
588 /* flag this for lazy write */
589 tag
->Flags
|= (writeOptions
& (kLazyWrite
| kLockWrite
));
593 error
= CacheRawWrite (cache
,
597 if (error
!= EOK
) return (error
);
600 /* Update counters */
603 if ((writeOptions
& kLockWrite
) == 0)
606 /* Kick the node into the right queue */
607 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
608 /* And go to the next cache block */
609 cblk
+= cache
->BlockSize
;
612 /* Release the anonymous buffer */
616 /* Detach the buffer */
617 if (buf
->Next
!= NULL
)
618 buf
->Next
->Prev
= buf
->Prev
;
619 if (buf
->Prev
!= NULL
)
620 buf
->Prev
->Next
= buf
->Next
;
621 if (cache
->ActiveBufs
== buf
)
622 cache
->ActiveBufs
= buf
->Next
;
624 /* Clear the buffer and put it back on free list */
625 memset (buf
, 0x00, sizeof (Buf_t
));
626 buf
->Next
= cache
->FreeBufs
;
627 cache
->FreeBufs
= buf
;
629 /* Update counters */
638 * Releases a clean buffer.
640 * NOTE: We don't verify whether it's dirty or not.
642 int CacheRelease (Cache_t
*cache
, Buf_t
*buf
, int age
)
645 uint32_t coff
= (buf
->Offset
% cache
->BlockSize
);
646 uint64_t cblk
= (buf
->Offset
- coff
);
649 /* Fetch the first cache block */
650 error
= CacheLookup (cache
, cblk
, &tag
);
653 printf ("ERROR: CacheRelease: CacheLookup error\n");
658 /* If the buffer was a direct reference */
659 if (!(buf
->Flags
& BUF_SPAN
)) {
660 /* Release the reference */
661 if ((tag
->Flags
& kLockWrite
) == 0) {
665 /* Kick the node into the right queue */
666 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
668 /* Otherwise, we do the ugly thing again */
670 uint32_t blen
; /* Space to fill in the buffer */
672 /* Blit the first chunk back into the cache */
673 blen
= buf
->Length
- cache
->BlockSize
+ coff
;
675 /* Release the cache block reference */
676 if ((tag
->Flags
& kLockWrite
) == 0) {
680 /* Kick the node into the right queue */
681 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
683 /* Next cache block */
684 cblk
+= cache
->BlockSize
;
686 /* Release cache blocks one at a time */
688 /* Fetch the next cache block */
689 error
= CacheLookup (cache
, cblk
, &tag
);
690 /* We must go through with the write regardless */
692 /* Update counters */
693 blen
-= ((blen
> cache
->BlockSize
) ? cache
->BlockSize
: blen
);
694 if ((tag
->Flags
& kLockWrite
) == 0)
697 /* Kick the node into the right queue */
698 LRUHit (&cache
->LRU
, (LRUNode_t
*)tag
, age
);
699 /* Advance to the next block */
700 cblk
+= cache
->BlockSize
;
703 /* Release the anonymous buffer */
707 /* Detach the buffer */
708 if (buf
->Next
!= NULL
)
709 buf
->Next
->Prev
= buf
->Prev
;
710 if (buf
->Prev
!= NULL
)
711 buf
->Prev
->Next
= buf
->Next
;
712 if (cache
->ActiveBufs
== buf
)
713 cache
->ActiveBufs
= buf
->Next
;
715 /* Clear the buffer and put it back on free list */
716 memset (buf
, 0x00, sizeof (Buf_t
));
717 buf
->Next
= cache
->FreeBufs
;
718 cache
->FreeBufs
= buf
;
726 * Disposes of a particular buffer.
728 int CacheRemove (Cache_t
*cache
, Tag_t
*tag
)
732 /* Make sure it's not busy */
733 if (tag
->Refs
) return (EBUSY
);
736 if (tag
->Next
!= NULL
)
737 tag
->Next
->Prev
= tag
->Prev
;
738 if (tag
->Prev
!= NULL
)
739 tag
->Prev
->Next
= tag
->Next
;
741 cache
->Hash
[tag
->Offset
% cache
->HashSize
] = tag
->Next
;
743 /* Make sure the head node doesn't have a back pointer */
744 if ((cache
->Hash
[tag
->Offset
% cache
->HashSize
] != NULL
) &&
745 (cache
->Hash
[tag
->Offset
% cache
->HashSize
]->Prev
!= NULL
)) {
747 printf ("ERROR: CacheRemove: Corrupt hash chain\n");
751 /* Release it's buffer (if it has one) */
752 if (tag
->Buffer
!= NULL
)
754 error
= CacheFreeBlock (cache
, tag
);
759 /* Zero the tag (for easy debugging) */
760 memset (tag
, 0x00, sizeof (Tag_t
));
771 * Only dispose of the buffer, leave the tag intact.
773 int CacheEvict (Cache_t
*cache
, Tag_t
*tag
)
777 /* Make sure it's not busy */
778 if (tag
->Refs
) return (EBUSY
);
780 /* Release the buffer */
781 if (tag
->Buffer
!= NULL
)
783 error
= CacheFreeBlock (cache
, tag
);
795 * Allocate an unused cache block.
797 void *CacheAllocBlock (Cache_t
*cache
)
801 if (cache
->FreeHead
== NULL
)
803 if (cache
->FreeSize
== 0)
806 temp
= cache
->FreeHead
;
807 cache
->FreeHead
= *((void **)cache
->FreeHead
);
816 * Release an active cache block.
819 CacheFreeBlock( Cache_t
*cache
, Tag_t
*tag
)
823 if ( (tag
->Flags
& kLazyWrite
) != 0 )
825 /* this cache block has been marked for lazy write - do it now */
826 error
= CacheRawWrite( cache
,
833 printf( "%s - CacheRawWrite failed with error %d \n", __FUNCTION__
, error
);
837 tag
->Flags
&= ~kLazyWrite
;
840 if ((tag
->Flags
& kLockWrite
) == 0)
842 *((void **)tag
->Buffer
) = cache
->FreeHead
;
843 cache
->FreeHead
= (void **)tag
->Buffer
;
853 * Write out any blocks that are marked for lazy write.
856 CacheFlush( Cache_t
*cache
)
862 for ( i
= 0; i
< cache
->HashSize
; i
++ )
864 myTagPtr
= cache
->Hash
[ i
];
866 while ( NULL
!= myTagPtr
)
868 if ( (myTagPtr
->Flags
& kLazyWrite
) != 0 )
870 /* this cache block has been marked for lazy write - do it now */
871 error
= CacheRawWrite( cache
,
878 printf( "%s - CacheRawWrite failed with error %d \n", __FUNCTION__
, error
);
882 myTagPtr
->Flags
&= ~kLazyWrite
;
884 myTagPtr
= myTagPtr
->Next
;
896 * Return true if the two given ranges intersect.
900 RangeIntersect(uint64_t start1
, uint64_t len1
, uint64_t start2
, uint64_t len2
)
902 uint64_t end1
= start1
+ len1
- 1;
903 uint64_t end2
= start2
+ len2
- 1;
905 if (end1
< start2
|| start1
> end2
)
915 * Flush, and optionally remove, all cache blocks that intersect
919 CacheFlushRange( Cache_t
*cache
, uint64_t start
, uint64_t len
, int remove
)
923 Tag_t
*currentTag
, *nextTag
;
925 for ( i
= 0; i
< cache
->HashSize
; i
++ )
927 currentTag
= cache
->Hash
[ i
];
929 while ( NULL
!= currentTag
)
931 /* Keep track of the next block, in case we remove the current block */
932 nextTag
= currentTag
->Next
;
934 if ( currentTag
->Flags
& kLazyWrite
&&
935 RangeIntersect(currentTag
->Offset
, cache
->BlockSize
, start
, len
))
937 error
= CacheRawWrite( cache
,
940 currentTag
->Buffer
);
944 printf( "%s - CacheRawWrite failed with error %d \n", __FUNCTION__
, error
);
948 currentTag
->Flags
&= ~kLazyWrite
;
950 if ( remove
&& ((currentTag
->Flags
& kLockWrite
) == 0))
951 CacheRemove( cache
, currentTag
);
954 currentTag
= nextTag
;
959 } /* CacheFlushRange */
961 /* Function: CacheCopyDiskBlocks
963 * Description: Perform direct disk block copy from from_offset to to_offset
966 * The function flushes the cache blocks intersecting with disk blocks
967 * belonging to from_offset. Invalidating the disk blocks belonging to
968 * to_offset from the cache would have been sufficient, but its block
969 * start and end might not lie on cache block size boundary. Therefore we
970 * flush the disk blocks belonging to to_offset on the disk .
972 * The function performs raw read and write on the disk of cache block size,
973 * with exception of last operation.
975 * Note that the data written to disk does not exist in cache after
976 * this function. This function however ensures that if the device
977 * offset being read/written on disk existed in cache, it is invalidated and
978 * written to disk before performing any read/write operation.
981 * 1. cache - pointer to cache.
982 * 2. from_offset - disk offset to copy from.
983 * 3. to_offset - disk offset to copy to.
984 * 4. len - length in bytes to be copied. Note that this length should be
985 * a multiple of disk block size, else read/write will return error.
988 * zero (EOK) on success.
989 * On failure, non-zero value.
990 * Known error values:
991 * ENOMEM - insufficient memory to allocate intermediate copy buffer.
992 * EINVAL - the length of data to read/write is not multiple of
993 * device block size, or
994 * the device offset is not multiple of device block size, or
995 * ENXIO - invalid disk offset
997 int CacheCopyDiskBlocks (Cache_t
*cache
, uint64_t from_offset
, uint64_t to_offset
, uint32_t len
)
1001 char *tmpBuffer
= NULL
;
1002 uint32_t ioReqCount
;
1003 uint32_t numberOfBuffersToWrite
;
1005 /* Return error if length of data to be written on disk is
1006 * less than the length of the buffer to be written, or
1007 * disk offsets are not multiple of device block size
1009 if ((len
% cache
->DevBlockSize
) ||
1010 (from_offset
% cache
->DevBlockSize
) ||
1011 (to_offset
% cache
->DevBlockSize
)) {
1016 /* Flush contents of from_offset on the disk */
1017 error
= CacheFlushRange(cache
, from_offset
, len
, 1);
1018 if (error
!= EOK
) goto out
;
1020 /* Flush contents of to_offset on the disk */
1021 error
= CacheFlushRange(cache
, to_offset
, len
, 1);
1022 if (error
!= EOK
) goto out
;
1024 /* Allocate temporary buffer for reading/writing, currently
1025 * set to block size of cache.
1027 tmpBuffer
= malloc(cache
->BlockSize
);
1030 printf("%s(%d): malloc(%zd) failed\n", __FUNCTION__
, __LINE__
, (size_t)cache
->BlockSize
);
1036 ioReqCount
= cache
->BlockSize
;
1037 numberOfBuffersToWrite
= (len
+ ioReqCount
- 1) / ioReqCount
;
1039 for (i
=0; i
<numberOfBuffersToWrite
; i
++) {
1040 if (i
== (numberOfBuffersToWrite
- 1)) {
1042 ioReqCount
= len
- (i
* cache
->BlockSize
);
1046 error
= CacheRawRead (cache
, from_offset
, ioReqCount
, tmpBuffer
);
1047 if (error
!= EOK
) goto out
;
1050 error
= CacheRawWrite (cache
, to_offset
, ioReqCount
, tmpBuffer
);
1051 if (error
!= EOK
) goto out
;
1054 printf ("%s: Copying %d bytes from %qd to %qd\n", __FUNCTION__
, ioReqCount
, from_offset
, to_offset
);
1057 /* Increment offsets with data read/written */
1058 from_offset
+= ioReqCount
;
1059 to_offset
+= ioReqCount
;
1069 /* Function: CacheWriteBufferToDisk
1071 * Description: Write data on disk starting at given offset for upto write_len.
1072 * The data from given buffer upto buf_len is written to the disk starting
1073 * at given offset. If the amount of data written on disk is greater than
1074 * the length of buffer, all the remaining data is written as zeros.
1076 * If no buffer is provided or if length of buffer is zero, the function
1077 * writes zeros on disk from offset upto write_len bytes.
1079 * The function requires the length of buffer is either equal to or less
1080 * than the data to be written on disk. It also requires that the length
1081 * of data to be written on disk is a multiple of device block size.
1083 * Note that the data written to disk does not exist in cache after
1084 * this function. This function however ensures that if the device
1085 * offset being written on disk existed in cache, it is invalidated and
1086 * written to disk before performing any read/write operation.
1089 * 1. cache - pointer to cache
1090 * 2. offset - offset on disk to write data of buffer
1091 * 3. buffer - pointer to data to be written on disk
1092 * 4. len - length of buffer to be written on disk.
1095 * zero (EOK) on success.
1096 * On failure, non-zero value.
1097 * Known error values:
1098 * ENOMEM - insufficient memory to allocate intermediate copy buffer.
1099 * EINVAL - the length of data to read/write is not multiple of
1100 * device block size, or
1101 * the device offset is not multiple of device block size, or
1102 * the length of data to be written on disk is less than
1103 * the length of buffer.
1104 * ENXIO - invalid disk offset
1106 int CacheWriteBufferToDisk (Cache_t
*cache
, uint64_t offset
, uint32_t write_len
, u_char
*buffer
, uint32_t buf_len
)
1109 u_char
*write_buffer
= NULL
;
1111 uint32_t buf_offset
;
1112 uint32_t bytes_remain
;
1113 uint8_t zero_fill
= false;
1115 /* Check if buffer is provided */
1116 if (buffer
== NULL
) {
1120 /* Return error if length of data to be written on disk is,
1121 * less than the length of the buffer to be written, or
1122 * is not a multiple of device block size, or offset to write
1123 * is not multiple of device block size
1125 if ((write_len
% cache
->DevBlockSize
) ||
1126 (offset
% cache
->DevBlockSize
) ||
1127 (write_len
< buf_len
)) {
1132 /* Flush cache contents of offset range to be written on the disk */
1133 error
= CacheFlushRange(cache
, offset
, write_len
, 1);
1138 /* Calculate correct size of buffer to be written each time */
1139 io_count
= (write_len
< cache
->BlockSize
) ? write_len
: cache
->BlockSize
;
1141 /* Allocate temporary buffer to write data to disk */
1142 write_buffer
= malloc (io_count
);
1143 if (!write_buffer
) {
1145 printf("%s(%d): malloc(%zd) failed\n", __FUNCTION__
, __LINE__
, (size_t)cache
->BlockSize
);
1151 /* Read offset in data buffer to be written to disk */
1155 /* The last buffer might be less than io_count bytes */
1156 if (write_len
< io_count
) {
1157 io_count
= write_len
;
1160 /* Check whether data buffer was written completely to disk */
1161 if (buf_offset
< buf_len
) {
1162 /* Calculate the bytes from data buffer still to be written */
1163 bytes_remain
= buf_len
- buf_offset
;
1165 if (bytes_remain
>= io_count
) {
1166 /* Bytes remaining is greater than bytes written in one
1167 * IO request. Limit bytes read from data buffer in this
1168 * pass to the bytes written in one IO request
1170 bytes_remain
= io_count
;
1172 /* Copy data from data buffer to write buffer */
1173 memcpy (write_buffer
, buffer
, bytes_remain
);
1175 /* Bytes remaining is less than bytes written in one
1176 * IO request. Zero fill the remaining write buffer.
1179 /* Copy data from data buffer to write buffer */
1180 memcpy (write_buffer
, buffer
, bytes_remain
);
1182 /* Zero fill remain buffer, if any */
1183 memset (write_buffer
+ bytes_remain
, 0, io_count
- bytes_remain
);
1186 buf_offset
+= bytes_remain
;
1187 buffer
+= bytes_remain
;
1189 /* Do not zero fill the buffer if we have already done it */
1190 if (zero_fill
== false) {
1191 /* Zero fill entire write buffer */
1192 memset (write_buffer
, 0, io_count
);
1198 error
= CacheRawWrite (cache
, offset
, io_count
, write_buffer
);
1199 if (error
!= EOK
) goto out
;
1202 write_len
-= io_count
;
1206 /* If we allocated a temporary buffer, deallocate it */
1207 if (write_buffer
!= NULL
) {
1208 free (write_buffer
);
1216 * Obtain a cache block. If one already exists, it is returned. Otherwise a
1217 * new one is created and inserted into the cache.
1219 int CacheLookup (Cache_t
*cache
, uint64_t off
, Tag_t
**tag
)
1222 uint32_t hash
= off
% cache
->HashSize
;
1227 /* Search the hash table */
1229 temp
= cache
->Hash
[hash
];
1230 while (temp
!= NULL
) {
1231 if (temp
->Offset
== off
) break;
1237 /* Perform MTF if necessary */
1238 if (cache
->Hash
[hash
] != temp
) {
1239 /* Disconnect the tag */
1240 if (temp
->Next
!= NULL
)
1241 temp
->Next
->Prev
= temp
->Prev
;
1242 temp
->Prev
->Next
= temp
->Next
;
1245 /* Otherwise, it's a miss */
1247 /* Allocate a new tag */
1248 temp
= (Tag_t
*)calloc (sizeof (Tag_t
), 1);/* We really only need to zero the
1249 LRU portion though */
1252 /* Kick the tag onto the LRU */
1253 //LRUHit (&cache->LRU, (LRUNode_t *)temp, 0);
1256 /* Insert at the head (if it's not already there) */
1257 if (cache
->Hash
[hash
] != temp
) {
1259 temp
->Next
= cache
->Hash
[hash
];
1260 if (temp
->Next
!= NULL
)
1261 temp
->Next
->Prev
= temp
;
1262 cache
->Hash
[hash
] = temp
;
1265 /* Make sure there's a buffer */
1266 if (temp
->Buffer
== NULL
) {
1267 /* Find a free buffer */
1268 temp
->Buffer
= CacheAllocBlock (cache
);
1269 if (temp
->Buffer
== NULL
) {
1270 /* Try to evict a buffer */
1271 error
= LRUEvict (&cache
->LRU
, (LRUNode_t
*)temp
);
1272 if (error
!= EOK
) return (error
);
1275 temp
->Buffer
= CacheAllocBlock (cache
);
1276 if (temp
->Buffer
== NULL
) {
1278 printf("%s(%d): CacheAllocBlock failed (FreeHead = %p, FreeSize = %u)\n", __FUNCTION__
, __LINE__
, cache
->FreeHead
, cache
->FreeSize
);
1284 /* Load the block from disk */
1285 error
= CacheRawRead (cache
, off
, cache
->BlockSize
, temp
->Buffer
);
1286 if (error
!= EOK
) return (error
);
1290 if (temp
&& temp
->Flags
& kLockWrite
) {
1291 fprintf(stderr
, "CacheLookup(%p, %llu, %p): Found cache-locked block\n", cache
, off
, tag
);
1302 * Perform a direct read on the file.
1304 int CacheRawRead (Cache_t
*cache
, uint64_t off
, uint32_t len
, void *buf
)
1309 /* Both offset and length must be multiples of the device block size */
1310 if (off
% cache
->DevBlockSize
) return (EINVAL
);
1311 if (len
% cache
->DevBlockSize
) return (EINVAL
);
1313 /* Seek to the position */
1315 result
= lseek (cache
->FD_R
, off
, SEEK_SET
);
1316 if (result
== (off_t
)-1 && errno
!= 0)
1318 if (result
!= off
) return (ENXIO
);
1319 /* Read into the buffer */
1321 printf("%s: offset %llu, len %u\n", __FUNCTION__
, off
, len
);
1323 nread
= read (cache
->FD_R
, buf
, len
);
1324 if (nread
== -1) return (errno
);
1325 if (nread
== 0) return (ENXIO
);
1327 /* Update counters */
1336 * Perform a direct write on the file.
1338 int CacheRawWrite (Cache_t
*cache
, uint64_t off
, uint32_t len
, void *buf
)
1343 /* Both offset and length must be multiples of the device block size */
1344 if (off
% cache
->DevBlockSize
) return (EINVAL
);
1345 if (len
% cache
->DevBlockSize
) return (EINVAL
);
1347 /* Seek to the position */
1349 result
= lseek (cache
->FD_W
, off
, SEEK_SET
);
1350 if (result
== (off_t
)-1 && errno
!= 0) return (errno
);
1351 if (result
!= off
) return (ENXIO
);
1353 /* Write into the buffer */
1354 nwritten
= write (cache
->FD_W
, buf
, len
);
1355 if (nwritten
== -1) return (errno
);
1356 if (nwritten
== 0) return (ENXIO
);
1358 /* Update counters */
1369 * Initializes the LRU data structures.
1371 static int LRUInit (LRU_t
*lru
)
1373 /* Make the dummy nodes point to themselves */
1374 lru
->Head
.Next
= &lru
->Head
;
1375 lru
->Head
.Prev
= &lru
->Head
;
1377 lru
->Busy
.Next
= &lru
->Busy
;
1378 lru
->Busy
.Prev
= &lru
->Busy
;
1388 * NOTE: This is typically a no-op, since the cache manager will be clearing
1391 static int LRUDestroy (LRU_t
*lru
)
1400 * Registers data activity on the given node. If the node is already in the
1401 * LRU, it is moved to the front. Otherwise, it is inserted at the front.
1403 * NOTE: If the node is not in the LRU, we assume that its pointers are NULL.
1405 static int LRUHit (LRU_t
*lru
, LRUNode_t
*node
, int age
)
1407 /* Handle existing nodes */
1408 if ((node
->Next
!= NULL
) && (node
->Prev
!= NULL
)) {
1409 /* Detach the node */
1410 node
->Next
->Prev
= node
->Prev
;
1411 node
->Prev
->Next
= node
->Next
;
1414 /* If it's busy (we can't evict it) */
1415 if (((Tag_t
*)node
)->Refs
) {
1416 /* Insert at the head of the Busy queue */
1417 node
->Next
= lru
->Busy
.Next
;
1418 node
->Prev
= &lru
->Busy
;
1421 /* Insert at the head of the LRU */
1422 node
->Next
= &lru
->Head
;
1423 node
->Prev
= lru
->Head
.Prev
;
1426 /* Insert at the head of the LRU */
1427 node
->Next
= lru
->Head
.Next
;
1428 node
->Prev
= &lru
->Head
;
1431 node
->Next
->Prev
= node
;
1432 node
->Prev
->Next
= node
;
1440 * Chooses a buffer to release.
1442 * TODO: Under extreme conditions, it shoud be possible to release the buffer
1443 * of an actively referenced cache buffer, leaving the tag behind as a
1444 * placeholder. This would be required for implementing 2Q-LRU
1447 * NOTE: Make sure we never evict the node we're trying to find a buffer for!
1449 static int LRUEvict (LRU_t
*lru
, LRUNode_t
*node
)
1456 temp
= lru
->Head
.Prev
;
1458 /* Stop if we're empty */
1459 if (temp
== &lru
->Head
) {
1461 printf("%s(%d): empty?\n", __FUNCTION__
, __LINE__
);
1466 /* Detach the tail */
1467 temp
->Next
->Prev
= temp
->Prev
;
1468 temp
->Prev
->Next
= temp
->Next
;
1470 /* If it's not busy, we have a victim */
1471 if (!((Tag_t
*)temp
)->Refs
) break;
1473 /* Insert at the head of the Busy queue */
1474 temp
->Next
= lru
->Busy
.Next
;
1475 temp
->Prev
= &lru
->Busy
;
1477 temp
->Next
->Prev
= temp
;
1478 temp
->Prev
->Next
= temp
;
1483 /* Remove the tag */
1484 CacheRemove ((Cache_t
*)lru
, (Tag_t
*)temp
);
1490 * Dump the cache contents.
1491 * If nobody else calls it, it gets optimized out. Annoying and yet
1495 dumpCache(Cache_t
*cache
)
1501 printf("\tDevBlockSize = %u\n", cache
->DevBlockSize
);
1502 printf("\tCache Block Size = %u\n", cache
->BlockSize
);
1503 printf("\tHash Size = %u\n", cache
->HashSize
);
1504 printf("\tHash Table:\n");
1505 for (i
= 0; i
< cache
->HashSize
; i
++) {
1508 for (tag
= cache
->Hash
[i
]; tag
; tag
= tag
->Next
) {
1510 printf("\t\tOffset %llu, refs %u, Flags %#x (%skLazyWrite, %skLockWrite)\n",
1511 tag
->Offset
, tag
->Refs
, tag
->Flags
,
1512 (tag
->Flags
& kLazyWrite
) ? "" : "no ",
1513 (tag
->Flags
& kLockWrite
) ? "" : "no ");
1516 printf("\tNumber of entries: %u\n", numEntries
);