diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index a755b394d..2c0cadd40 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -2,6 +2,7 @@ SUBDIRS = fm fs COMMON_H = \ $(top_srcdir)/include/sys/arc.h \ + $(top_srcdir)/include/sys/arc_impl.h \ $(top_srcdir)/include/sys/avl.h \ $(top_srcdir)/include/sys/avl_impl.h \ $(top_srcdir)/include/sys/blkptr.h \ diff --git a/include/sys/arc_impl.h b/include/sys/arc_impl.h new file mode 100644 index 000000000..f7e1295a5 --- /dev/null +++ b/include/sys/arc_impl.h @@ -0,0 +1,157 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. + * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + */ + +#ifndef _SYS_ARC_IMPL_H +#define _SYS_ARC_IMPL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Note that buffers can be in one of 6 states: + * ARC_anon - anonymous (discussed below) + * ARC_mru - recently used, currently cached + * ARC_mru_ghost - recentely used, no longer in cache + * ARC_mfu - frequently used, currently cached + * ARC_mfu_ghost - frequently used, no longer in cache + * ARC_l2c_only - exists in L2ARC but not other states + * When there are no active references to the buffer, they are + * are linked onto a list in one of these arc states. These are + * the only buffers that can be evicted or deleted. Within each + * state there are multiple lists, one for meta-data and one for + * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, + * etc.) is tracked separately so that it can be managed more + * explicitly: favored over data, limited explicitly. + * + * Anonymous buffers are buffers that are not associated with + * a DVA. These are buffers that hold dirty block copies + * before they are written to stable storage. By definition, + * they are "ref'd" and are considered part of arc_mru + * that cannot be freed. Generally, they will aquire a DVA + * as they are written and migrate onto the arc_mru list. + * + * The ARC_l2c_only state is for buffers that are in the second + * level ARC but no longer in any of the ARC_m* lists. The second + * level ARC itself may also contain buffers that are in any of + * the ARC_m* states - meaning that a buffer can exist in two + * places. The reason for the ARC_l2c_only state is to keep the + * buffer header in the hash table, so that reads that hit the + * second level ARC benefit from these fast lookups. + */ + +typedef struct arc_state { + list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */ + uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */ + uint64_t arcs_size; /* total amount of data in this state */ + kmutex_t arcs_mtx; + arc_state_type_t arcs_state; +} arc_state_t; + +typedef struct l2arc_buf_hdr l2arc_buf_hdr_t; + +typedef struct arc_callback arc_callback_t; + +struct arc_callback { + void *acb_private; + arc_done_func_t *acb_done; + arc_buf_t *acb_buf; + zio_t *acb_zio_dummy; + arc_callback_t *acb_next; +}; + +typedef struct arc_write_callback arc_write_callback_t; + +struct arc_write_callback { + void *awcb_private; + arc_done_func_t *awcb_ready; + arc_done_func_t *awcb_physdone; + arc_done_func_t *awcb_done; + arc_buf_t *awcb_buf; +}; + +struct arc_buf_hdr { + /* protected by hash lock */ + dva_t b_dva; + uint64_t b_birth; + uint64_t b_cksum0; + + kmutex_t b_freeze_lock; + zio_cksum_t *b_freeze_cksum; + + arc_buf_hdr_t *b_hash_next; + arc_buf_t *b_buf; + uint32_t b_flags; + uint32_t b_datacnt; + + arc_callback_t *b_acb; + kcondvar_t b_cv; + + /* immutable */ + arc_buf_contents_t b_type; + uint64_t b_size; + uint64_t b_spa; + + /* protected by arc state mutex */ + arc_state_t *b_state; + list_node_t b_arc_node; + + /* updated atomically */ + clock_t b_arc_access; + uint32_t b_mru_hits; + uint32_t b_mru_ghost_hits; + uint32_t b_mfu_hits; + uint32_t b_mfu_ghost_hits; + uint32_t b_l2_hits; + + /* self protecting */ + refcount_t b_refcnt; + + l2arc_buf_hdr_t *b_l2hdr; + list_node_t b_l2node; +}; + +typedef struct l2arc_dev { + vdev_t *l2ad_vdev; /* vdev */ + spa_t *l2ad_spa; /* spa */ + uint64_t l2ad_hand; /* next write location */ + uint64_t l2ad_start; /* first addr on device */ + uint64_t l2ad_end; /* last addr on device */ + uint64_t l2ad_evict; /* last addr eviction reached */ + boolean_t l2ad_first; /* first sweep through */ + boolean_t l2ad_writing; /* currently writing */ + list_t *l2ad_buflist; /* buffer list */ + list_node_t l2ad_node; /* device list node */ +} l2arc_dev_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ARC_IMPL_H */ diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 0742da0f6..148b208a8 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -145,6 +145,7 @@ #include #include #include +#include #ifndef _KERNEL /* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */ @@ -218,46 +219,6 @@ unsigned long zfs_arc_max = 0; unsigned long zfs_arc_min = 0; unsigned long zfs_arc_meta_limit = 0; -/* - * Note that buffers can be in one of 6 states: - * ARC_anon - anonymous (discussed below) - * ARC_mru - recently used, currently cached - * ARC_mru_ghost - recentely used, no longer in cache - * ARC_mfu - frequently used, currently cached - * ARC_mfu_ghost - frequently used, no longer in cache - * ARC_l2c_only - exists in L2ARC but not other states - * When there are no active references to the buffer, they are - * are linked onto a list in one of these arc states. These are - * the only buffers that can be evicted or deleted. Within each - * state there are multiple lists, one for meta-data and one for - * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, - * etc.) is tracked separately so that it can be managed more - * explicitly: favored over data, limited explicitly. - * - * Anonymous buffers are buffers that are not associated with - * a DVA. These are buffers that hold dirty block copies - * before they are written to stable storage. By definition, - * they are "ref'd" and are considered part of arc_mru - * that cannot be freed. Generally, they will aquire a DVA - * as they are written and migrate onto the arc_mru list. - * - * The ARC_l2c_only state is for buffers that are in the second - * level ARC but no longer in any of the ARC_m* lists. The second - * level ARC itself may also contain buffers that are in any of - * the ARC_m* states - meaning that a buffer can exist in two - * places. The reason for the ARC_l2c_only state is to keep the - * buffer header in the hash table, so that reads that hit the - * second level ARC benefit from these fast lookups. - */ - -typedef struct arc_state { - list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */ - uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */ - uint64_t arcs_size; /* total amount of data in this state */ - kmutex_t arcs_mtx; - arc_state_type_t arcs_state; -} arc_state_t; - /* The 6 states: */ static arc_state_t ARC_anon; static arc_state_t ARC_mru; @@ -522,69 +483,6 @@ static arc_state_t *arc_l2c_only; #define L2ARC_IS_VALID_COMPRESS(_c_) \ ((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY) -typedef struct l2arc_buf_hdr l2arc_buf_hdr_t; - -typedef struct arc_callback arc_callback_t; - -struct arc_callback { - void *acb_private; - arc_done_func_t *acb_done; - arc_buf_t *acb_buf; - zio_t *acb_zio_dummy; - arc_callback_t *acb_next; -}; - -typedef struct arc_write_callback arc_write_callback_t; - -struct arc_write_callback { - void *awcb_private; - arc_done_func_t *awcb_ready; - arc_done_func_t *awcb_physdone; - arc_done_func_t *awcb_done; - arc_buf_t *awcb_buf; -}; - -struct arc_buf_hdr { - /* protected by hash lock */ - dva_t b_dva; - uint64_t b_birth; - uint64_t b_cksum0; - - kmutex_t b_freeze_lock; - zio_cksum_t *b_freeze_cksum; - - arc_buf_hdr_t *b_hash_next; - arc_buf_t *b_buf; - uint32_t b_flags; - uint32_t b_datacnt; - - arc_callback_t *b_acb; - kcondvar_t b_cv; - - /* immutable */ - arc_buf_contents_t b_type; - uint64_t b_size; - uint64_t b_spa; - - /* protected by arc state mutex */ - arc_state_t *b_state; - list_node_t b_arc_node; - - /* updated atomically */ - clock_t b_arc_access; - uint32_t b_mru_hits; - uint32_t b_mru_ghost_hits; - uint32_t b_mfu_hits; - uint32_t b_mfu_ghost_hits; - uint32_t b_l2_hits; - - /* self protecting */ - refcount_t b_refcnt; - - l2arc_buf_hdr_t *b_l2hdr; - list_node_t b_l2node; -}; - static list_t arc_prune_list; static kmutex_t arc_prune_mtx; static arc_buf_t *arc_eviction_list; @@ -707,19 +605,6 @@ int l2arc_norw = B_FALSE; /* no reads during writes */ /* * L2ARC Internals */ -typedef struct l2arc_dev { - vdev_t *l2ad_vdev; /* vdev */ - spa_t *l2ad_spa; /* spa */ - uint64_t l2ad_hand; /* next write location */ - uint64_t l2ad_start; /* first addr on device */ - uint64_t l2ad_end; /* last addr on device */ - uint64_t l2ad_evict; /* last addr eviction reached */ - boolean_t l2ad_first; /* first sweep through */ - boolean_t l2ad_writing; /* currently writing */ - list_t *l2ad_buflist; /* buffer list */ - list_node_t l2ad_node; /* device list node */ -} l2arc_dev_t; - static list_t L2ARC_dev_list; /* device list */ static list_t *l2arc_dev_list; /* device list pointer */ static kmutex_t l2arc_dev_mtx; /* device list mutex */