mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-27 02:14:28 +03:00
Merge branch 'lock-contention-on-arcs_mtx-final'
Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf Closes #3115 Closes #3481
This commit is contained in:
commit
06358ea16e
@ -191,12 +191,10 @@ def get_arc_summary(Kstat):
|
|||||||
### ARC Misc. ###
|
### ARC Misc. ###
|
||||||
deleted = Kstat["kstat.zfs.misc.arcstats.deleted"]
|
deleted = Kstat["kstat.zfs.misc.arcstats.deleted"]
|
||||||
mutex_miss = Kstat["kstat.zfs.misc.arcstats.mutex_miss"]
|
mutex_miss = Kstat["kstat.zfs.misc.arcstats.mutex_miss"]
|
||||||
recycle_miss = Kstat["kstat.zfs.misc.arcstats.recycle_miss"]
|
|
||||||
|
|
||||||
### ARC Misc. ###
|
### ARC Misc. ###
|
||||||
output["arc_misc"] = {}
|
output["arc_misc"] = {}
|
||||||
output["arc_misc"]["deleted"] = fHits(deleted)
|
output["arc_misc"]["deleted"] = fHits(deleted)
|
||||||
output["arc_misc"]['recycle_miss'] = fHits(recycle_miss)
|
|
||||||
output["arc_misc"]['mutex_miss'] = fHits(mutex_miss)
|
output["arc_misc"]['mutex_miss'] = fHits(mutex_miss)
|
||||||
output["arc_misc"]['evict_skips'] = fHits(mutex_miss)
|
output["arc_misc"]['evict_skips'] = fHits(mutex_miss)
|
||||||
|
|
||||||
@ -302,8 +300,6 @@ def _arc_summary(Kstat):
|
|||||||
### ARC Misc. ###
|
### ARC Misc. ###
|
||||||
sys.stdout.write("ARC Misc:\n")
|
sys.stdout.write("ARC Misc:\n")
|
||||||
sys.stdout.write("\tDeleted:\t\t\t\t%s\n" % arc['arc_misc']['deleted'])
|
sys.stdout.write("\tDeleted:\t\t\t\t%s\n" % arc['arc_misc']['deleted'])
|
||||||
sys.stdout.write("\tRecycle Misses:\t\t\t\t%s\n" %
|
|
||||||
arc['arc_misc']['recycle_miss'])
|
|
||||||
sys.stdout.write("\tMutex Misses:\t\t\t\t%s\n" %
|
sys.stdout.write("\tMutex Misses:\t\t\t\t%s\n" %
|
||||||
arc['arc_misc']['mutex_miss'])
|
arc['arc_misc']['mutex_miss'])
|
||||||
sys.stdout.write("\tEvict Skips:\t\t\t\t%s\n" %
|
sys.stdout.write("\tEvict Skips:\t\t\t\t%s\n" %
|
||||||
|
@ -82,7 +82,6 @@ cols = {
|
|||||||
"mrug": [4, 1000, "MRU Ghost List hits per second"],
|
"mrug": [4, 1000, "MRU Ghost List hits per second"],
|
||||||
"eskip": [5, 1000, "evict_skip per second"],
|
"eskip": [5, 1000, "evict_skip per second"],
|
||||||
"mtxmis": [6, 1000, "mutex_miss per second"],
|
"mtxmis": [6, 1000, "mutex_miss per second"],
|
||||||
"rmis": [4, 1000, "recycle_miss per second"],
|
|
||||||
"dread": [5, 1000, "Demand accesses per second"],
|
"dread": [5, 1000, "Demand accesses per second"],
|
||||||
"pread": [5, 1000, "Prefetch accesses per second"],
|
"pread": [5, 1000, "Prefetch accesses per second"],
|
||||||
"l2hits": [6, 1000, "L2ARC hits per second"],
|
"l2hits": [6, 1000, "L2ARC hits per second"],
|
||||||
@ -406,7 +405,6 @@ def calculate():
|
|||||||
v["mrug"] = d["mru_ghost_hits"] / sint
|
v["mrug"] = d["mru_ghost_hits"] / sint
|
||||||
v["mfug"] = d["mfu_ghost_hits"] / sint
|
v["mfug"] = d["mfu_ghost_hits"] / sint
|
||||||
v["eskip"] = d["evict_skip"] / sint
|
v["eskip"] = d["evict_skip"] / sint
|
||||||
v["rmis"] = d["recycle_miss"] / sint
|
|
||||||
v["mtxmis"] = d["mutex_miss"] / sint
|
v["mtxmis"] = d["mutex_miss"] / sint
|
||||||
|
|
||||||
if l2exist:
|
if l2exist:
|
||||||
|
@ -1250,7 +1250,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
|
|||||||
print_indirect(bp, zb, dnp);
|
print_indirect(bp, zb, dnp);
|
||||||
|
|
||||||
if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
|
if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
|
||||||
uint32_t flags = ARC_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
int i;
|
int i;
|
||||||
blkptr_t *cbp;
|
blkptr_t *cbp;
|
||||||
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||||
|
@ -4042,7 +4042,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
|||||||
* assign an arcbuf to a dbuf.
|
* assign an arcbuf to a dbuf.
|
||||||
*/
|
*/
|
||||||
for (j = 0; j < s; j++) {
|
for (j = 0; j < s; j++) {
|
||||||
if (i != 5) {
|
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||||
bigbuf_arcbufs[j] =
|
bigbuf_arcbufs[j] =
|
||||||
dmu_request_arcbuf(bonus_db, chunksize);
|
dmu_request_arcbuf(bonus_db, chunksize);
|
||||||
} else {
|
} else {
|
||||||
@ -4066,7 +4066,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
|||||||
umem_free(packbuf, packsize);
|
umem_free(packbuf, packsize);
|
||||||
umem_free(bigbuf, bigsize);
|
umem_free(bigbuf, bigsize);
|
||||||
for (j = 0; j < s; j++) {
|
for (j = 0; j < s; j++) {
|
||||||
if (i != 5) {
|
if (i != 5 ||
|
||||||
|
chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||||
dmu_return_arcbuf(bigbuf_arcbufs[j]);
|
dmu_return_arcbuf(bigbuf_arcbufs[j]);
|
||||||
} else {
|
} else {
|
||||||
dmu_return_arcbuf(
|
dmu_return_arcbuf(
|
||||||
@ -4111,7 +4112,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
|||||||
}
|
}
|
||||||
for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
|
for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
|
||||||
dmu_buf_t *dbt;
|
dmu_buf_t *dbt;
|
||||||
if (i != 5) {
|
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||||
bcopy((caddr_t)bigbuf + (off - bigoff),
|
bcopy((caddr_t)bigbuf + (off - bigoff),
|
||||||
bigbuf_arcbufs[j]->b_data, chunksize);
|
bigbuf_arcbufs[j]->b_data, chunksize);
|
||||||
} else {
|
} else {
|
||||||
@ -4128,7 +4129,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
|||||||
VERIFY(dmu_buf_hold(os, bigobj, off,
|
VERIFY(dmu_buf_hold(os, bigobj, off,
|
||||||
FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
|
FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
|
||||||
}
|
}
|
||||||
if (i != 5) {
|
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||||
dmu_assign_arcbuf(bonus_db, off,
|
dmu_assign_arcbuf(bonus_db, off,
|
||||||
bigbuf_arcbufs[j], tx);
|
bigbuf_arcbufs[j], tx);
|
||||||
} else {
|
} else {
|
||||||
|
@ -33,6 +33,7 @@ COMMON_H = \
|
|||||||
$(top_srcdir)/include/sys/efi_partition.h \
|
$(top_srcdir)/include/sys/efi_partition.h \
|
||||||
$(top_srcdir)/include/sys/metaslab.h \
|
$(top_srcdir)/include/sys/metaslab.h \
|
||||||
$(top_srcdir)/include/sys/metaslab_impl.h \
|
$(top_srcdir)/include/sys/metaslab_impl.h \
|
||||||
|
$(top_srcdir)/include/sys/multilist.h \
|
||||||
$(top_srcdir)/include/sys/nvpair.h \
|
$(top_srcdir)/include/sys/nvpair.h \
|
||||||
$(top_srcdir)/include/sys/nvpair_impl.h \
|
$(top_srcdir)/include/sys/nvpair_impl.h \
|
||||||
$(top_srcdir)/include/sys/range_tree.h \
|
$(top_srcdir)/include/sys/range_tree.h \
|
||||||
@ -53,6 +54,7 @@ COMMON_H = \
|
|||||||
$(top_srcdir)/include/sys/trace_dbuf.h \
|
$(top_srcdir)/include/sys/trace_dbuf.h \
|
||||||
$(top_srcdir)/include/sys/trace_dmu.h \
|
$(top_srcdir)/include/sys/trace_dmu.h \
|
||||||
$(top_srcdir)/include/sys/trace_dnode.h \
|
$(top_srcdir)/include/sys/trace_dnode.h \
|
||||||
|
$(top_srcdir)/include/sys/trace_multilist.h \
|
||||||
$(top_srcdir)/include/sys/trace_txg.h \
|
$(top_srcdir)/include/sys/trace_txg.h \
|
||||||
$(top_srcdir)/include/sys/trace_zil.h \
|
$(top_srcdir)/include/sys/trace_zil.h \
|
||||||
$(top_srcdir)/include/sys/trace_zrlock.h \
|
$(top_srcdir)/include/sys/trace_zrlock.h \
|
||||||
|
@ -38,6 +38,12 @@ extern "C" {
|
|||||||
#include <sys/spa.h>
|
#include <sys/spa.h>
|
||||||
#include <sys/refcount.h>
|
#include <sys/refcount.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used by arc_flush() to inform arc_evict_state() that it should evict
|
||||||
|
* all available buffers from the arc state being passed in.
|
||||||
|
*/
|
||||||
|
#define ARC_EVICT_ALL -1ULL
|
||||||
|
|
||||||
typedef struct arc_buf_hdr arc_buf_hdr_t;
|
typedef struct arc_buf_hdr arc_buf_hdr_t;
|
||||||
typedef struct arc_buf arc_buf_t;
|
typedef struct arc_buf arc_buf_t;
|
||||||
typedef struct arc_prune arc_prune_t;
|
typedef struct arc_prune arc_prune_t;
|
||||||
@ -53,10 +59,65 @@ arc_done_func_t arc_getbuf_func;
|
|||||||
struct arc_prune {
|
struct arc_prune {
|
||||||
arc_prune_func_t *p_pfunc;
|
arc_prune_func_t *p_pfunc;
|
||||||
void *p_private;
|
void *p_private;
|
||||||
|
uint64_t p_adjust;
|
||||||
list_node_t p_node;
|
list_node_t p_node;
|
||||||
refcount_t p_refcnt;
|
refcount_t p_refcnt;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef enum arc_strategy {
|
||||||
|
ARC_STRATEGY_META_ONLY = 0, /* Evict only meta data buffers */
|
||||||
|
ARC_STRATEGY_META_BALANCED = 1, /* Evict data buffers if needed */
|
||||||
|
} arc_strategy_t;
|
||||||
|
|
||||||
|
typedef enum arc_flags
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Public flags that can be passed into the ARC by external consumers.
|
||||||
|
*/
|
||||||
|
ARC_FLAG_NONE = 1 << 0, /* No flags set */
|
||||||
|
ARC_FLAG_WAIT = 1 << 1, /* perform sync I/O */
|
||||||
|
ARC_FLAG_NOWAIT = 1 << 2, /* perform async I/O */
|
||||||
|
ARC_FLAG_PREFETCH = 1 << 3, /* I/O is a prefetch */
|
||||||
|
ARC_FLAG_CACHED = 1 << 4, /* I/O was in cache */
|
||||||
|
ARC_FLAG_L2CACHE = 1 << 5, /* cache in L2ARC */
|
||||||
|
ARC_FLAG_L2COMPRESS = 1 << 6, /* compress in L2ARC */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Private ARC flags. These flags are private ARC only flags that
|
||||||
|
* will show up in b_flags in the arc_hdr_buf_t. These flags should
|
||||||
|
* only be set by ARC code.
|
||||||
|
*/
|
||||||
|
ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */
|
||||||
|
ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */
|
||||||
|
ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */
|
||||||
|
ARC_FLAG_FREED_IN_READ = 1 << 10, /* freed during read */
|
||||||
|
ARC_FLAG_BUF_AVAILABLE = 1 << 11, /* block not in use */
|
||||||
|
ARC_FLAG_INDIRECT = 1 << 12, /* indirect block */
|
||||||
|
ARC_FLAG_L2_WRITING = 1 << 13, /* write in progress */
|
||||||
|
ARC_FLAG_L2_EVICTED = 1 << 14, /* evicted during I/O */
|
||||||
|
ARC_FLAG_L2_WRITE_HEAD = 1 << 15, /* head of write list */
|
||||||
|
/* indicates that the buffer contains metadata (otherwise, data) */
|
||||||
|
ARC_FLAG_BUFC_METADATA = 1 << 16,
|
||||||
|
|
||||||
|
/* Flags specifying whether optional hdr struct fields are defined */
|
||||||
|
ARC_FLAG_HAS_L1HDR = 1 << 17,
|
||||||
|
ARC_FLAG_HAS_L2HDR = 1 << 18,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The arc buffer's compression mode is stored in the top 7 bits of the
|
||||||
|
* flags field, so these dummy flags are included so that MDB can
|
||||||
|
* interpret the enum properly.
|
||||||
|
*/
|
||||||
|
ARC_FLAG_COMPRESS_0 = 1 << 24,
|
||||||
|
ARC_FLAG_COMPRESS_1 = 1 << 25,
|
||||||
|
ARC_FLAG_COMPRESS_2 = 1 << 26,
|
||||||
|
ARC_FLAG_COMPRESS_3 = 1 << 27,
|
||||||
|
ARC_FLAG_COMPRESS_4 = 1 << 28,
|
||||||
|
ARC_FLAG_COMPRESS_5 = 1 << 29,
|
||||||
|
ARC_FLAG_COMPRESS_6 = 1 << 30
|
||||||
|
|
||||||
|
} arc_flags_t;
|
||||||
|
|
||||||
struct arc_buf {
|
struct arc_buf {
|
||||||
arc_buf_hdr_t *b_hdr;
|
arc_buf_hdr_t *b_hdr;
|
||||||
arc_buf_t *b_next;
|
arc_buf_t *b_next;
|
||||||
@ -71,15 +132,6 @@ typedef enum arc_buf_contents {
|
|||||||
ARC_BUFC_METADATA, /* buffer contains metadata */
|
ARC_BUFC_METADATA, /* buffer contains metadata */
|
||||||
ARC_BUFC_NUMTYPES
|
ARC_BUFC_NUMTYPES
|
||||||
} arc_buf_contents_t;
|
} arc_buf_contents_t;
|
||||||
/*
|
|
||||||
* These are the flags we pass into calls to the arc
|
|
||||||
*/
|
|
||||||
#define ARC_WAIT (1 << 1) /* perform I/O synchronously */
|
|
||||||
#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
|
|
||||||
#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
|
|
||||||
#define ARC_CACHED (1 << 4) /* I/O was already in cache */
|
|
||||||
#define ARC_L2CACHE (1 << 5) /* cache in L2ARC */
|
|
||||||
#define ARC_L2COMPRESS (1 << 6) /* compress in L2ARC */
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following breakdows of arc_size exist for kstat only.
|
* The following breakdows of arc_size exist for kstat only.
|
||||||
@ -106,7 +158,6 @@ typedef enum arc_state_type {
|
|||||||
typedef struct arc_buf_info {
|
typedef struct arc_buf_info {
|
||||||
arc_state_type_t abi_state_type;
|
arc_state_type_t abi_state_type;
|
||||||
arc_buf_contents_t abi_state_contents;
|
arc_buf_contents_t abi_state_contents;
|
||||||
uint64_t abi_state_index;
|
|
||||||
uint32_t abi_flags;
|
uint32_t abi_flags;
|
||||||
uint32_t abi_datacnt;
|
uint32_t abi_datacnt;
|
||||||
uint64_t abi_size;
|
uint64_t abi_size;
|
||||||
@ -146,7 +197,7 @@ int arc_referenced(arc_buf_t *buf);
|
|||||||
|
|
||||||
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
||||||
arc_done_func_t *done, void *private, zio_priority_t priority, int flags,
|
arc_done_func_t *done, void *private, zio_priority_t priority, int flags,
|
||||||
uint32_t *arc_flags, const zbookmark_phys_t *zb);
|
arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
|
||||||
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
||||||
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
|
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
|
||||||
const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
|
const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
|
||||||
@ -160,7 +211,7 @@ void arc_freed(spa_t *spa, const blkptr_t *bp);
|
|||||||
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
||||||
boolean_t arc_clear_callback(arc_buf_t *buf);
|
boolean_t arc_clear_callback(arc_buf_t *buf);
|
||||||
|
|
||||||
void arc_flush(spa_t *spa);
|
void arc_flush(spa_t *spa, boolean_t retry);
|
||||||
void arc_tempreserve_clear(uint64_t reserve);
|
void arc_tempreserve_clear(uint64_t reserve);
|
||||||
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
|
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
|
||||||
|
|
||||||
|
@ -67,15 +67,25 @@ extern "C" {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
typedef struct arc_state {
|
typedef struct arc_state {
|
||||||
list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */
|
/*
|
||||||
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
|
* list of evictable buffers
|
||||||
uint64_t arcs_size; /* total amount of data in this state */
|
*/
|
||||||
kmutex_t arcs_mtx;
|
multilist_t arcs_list[ARC_BUFC_NUMTYPES];
|
||||||
|
/*
|
||||||
|
* total amount of evictable data in this state
|
||||||
|
*/
|
||||||
|
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES];
|
||||||
|
/*
|
||||||
|
* total amount of data in this state; this includes: evictable,
|
||||||
|
* non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
|
||||||
|
*/
|
||||||
|
uint64_t arcs_size;
|
||||||
|
/*
|
||||||
|
* supports the "dbufs" kstat
|
||||||
|
*/
|
||||||
arc_state_type_t arcs_state;
|
arc_state_type_t arcs_state;
|
||||||
} arc_state_t;
|
} arc_state_t;
|
||||||
|
|
||||||
typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
|
|
||||||
|
|
||||||
typedef struct arc_callback arc_callback_t;
|
typedef struct arc_callback arc_callback_t;
|
||||||
|
|
||||||
struct arc_callback {
|
struct arc_callback {
|
||||||
@ -96,31 +106,49 @@ struct arc_write_callback {
|
|||||||
arc_buf_t *awcb_buf;
|
arc_buf_t *awcb_buf;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct arc_buf_hdr {
|
/*
|
||||||
/* protected by hash lock */
|
* ARC buffers are separated into multiple structs as a memory saving measure:
|
||||||
dva_t b_dva;
|
* - Common fields struct, always defined, and embedded within it:
|
||||||
uint64_t b_birth;
|
* - L2-only fields, always allocated but undefined when not in L2ARC
|
||||||
uint64_t b_cksum0;
|
* - L1-only fields, only allocated when in L1ARC
|
||||||
|
*
|
||||||
|
* Buffer in L1 Buffer only in L2
|
||||||
|
* +------------------------+ +------------------------+
|
||||||
|
* | arc_buf_hdr_t | | arc_buf_hdr_t |
|
||||||
|
* | | | |
|
||||||
|
* | | | |
|
||||||
|
* | | | |
|
||||||
|
* +------------------------+ +------------------------+
|
||||||
|
* | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
|
||||||
|
* | (undefined if L1-only) | | |
|
||||||
|
* +------------------------+ +------------------------+
|
||||||
|
* | l1arc_buf_hdr_t |
|
||||||
|
* | |
|
||||||
|
* | |
|
||||||
|
* | |
|
||||||
|
* | |
|
||||||
|
* +------------------------+
|
||||||
|
*
|
||||||
|
* Because it's possible for the L2ARC to become extremely large, we can wind
|
||||||
|
* up eating a lot of memory in L2ARC buffer headers, so the size of a header
|
||||||
|
* is minimized by only allocating the fields necessary for an L1-cached buffer
|
||||||
|
* when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
|
||||||
|
* l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
|
||||||
|
* words in pointers. arc_hdr_realloc() is used to switch a header between
|
||||||
|
* these two allocation states.
|
||||||
|
*/
|
||||||
|
typedef struct l1arc_buf_hdr {
|
||||||
kmutex_t b_freeze_lock;
|
kmutex_t b_freeze_lock;
|
||||||
zio_cksum_t *b_freeze_cksum;
|
|
||||||
|
|
||||||
arc_buf_hdr_t *b_hash_next;
|
|
||||||
arc_buf_t *b_buf;
|
arc_buf_t *b_buf;
|
||||||
uint32_t b_flags;
|
|
||||||
uint32_t b_datacnt;
|
uint32_t b_datacnt;
|
||||||
|
/* for waiting on writes to complete */
|
||||||
arc_callback_t *b_acb;
|
|
||||||
kcondvar_t b_cv;
|
kcondvar_t b_cv;
|
||||||
|
|
||||||
/* immutable */
|
|
||||||
arc_buf_contents_t b_type;
|
|
||||||
uint64_t b_size;
|
|
||||||
uint64_t b_spa;
|
|
||||||
|
|
||||||
/* protected by arc state mutex */
|
/* protected by arc state mutex */
|
||||||
arc_state_t *b_state;
|
arc_state_t *b_state;
|
||||||
list_node_t b_arc_node;
|
multilist_node_t b_arc_node;
|
||||||
|
|
||||||
/* updated atomically */
|
/* updated atomically */
|
||||||
clock_t b_arc_access;
|
clock_t b_arc_access;
|
||||||
@ -133,9 +161,10 @@ struct arc_buf_hdr {
|
|||||||
/* self protecting */
|
/* self protecting */
|
||||||
refcount_t b_refcnt;
|
refcount_t b_refcnt;
|
||||||
|
|
||||||
l2arc_buf_hdr_t *b_l2hdr;
|
arc_callback_t *b_acb;
|
||||||
list_node_t b_l2node;
|
/* temporary buffer holder for in-flight compressed data */
|
||||||
};
|
void *b_tmp_cdata;
|
||||||
|
} l1arc_buf_hdr_t;
|
||||||
|
|
||||||
typedef struct l2arc_dev {
|
typedef struct l2arc_dev {
|
||||||
vdev_t *l2ad_vdev; /* vdev */
|
vdev_t *l2ad_vdev; /* vdev */
|
||||||
@ -146,15 +175,51 @@ typedef struct l2arc_dev {
|
|||||||
uint64_t l2ad_evict; /* last addr eviction reached */
|
uint64_t l2ad_evict; /* last addr eviction reached */
|
||||||
boolean_t l2ad_first; /* first sweep through */
|
boolean_t l2ad_first; /* first sweep through */
|
||||||
boolean_t l2ad_writing; /* currently writing */
|
boolean_t l2ad_writing; /* currently writing */
|
||||||
list_t *l2ad_buflist; /* buffer list */
|
kmutex_t l2ad_mtx; /* lock for buffer list */
|
||||||
|
list_t l2ad_buflist; /* buffer list */
|
||||||
list_node_t l2ad_node; /* device list node */
|
list_node_t l2ad_node; /* device list node */
|
||||||
} l2arc_dev_t;
|
} l2arc_dev_t;
|
||||||
|
|
||||||
|
typedef struct l2arc_buf_hdr {
|
||||||
|
/* protected by arc_buf_hdr mutex */
|
||||||
|
l2arc_dev_t *b_dev; /* L2ARC device */
|
||||||
|
uint64_t b_daddr; /* disk address, offset byte */
|
||||||
|
/* real alloc'd buffer size depending on b_compress applied */
|
||||||
|
uint32_t b_hits;
|
||||||
|
int32_t b_asize;
|
||||||
|
|
||||||
|
list_node_t b_l2node;
|
||||||
|
} l2arc_buf_hdr_t;
|
||||||
|
|
||||||
typedef struct l2arc_write_callback {
|
typedef struct l2arc_write_callback {
|
||||||
l2arc_dev_t *l2wcb_dev; /* device info */
|
l2arc_dev_t *l2wcb_dev; /* device info */
|
||||||
arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
|
arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
|
||||||
} l2arc_write_callback_t;
|
} l2arc_write_callback_t;
|
||||||
|
|
||||||
|
struct arc_buf_hdr {
|
||||||
|
/* protected by hash lock */
|
||||||
|
dva_t b_dva;
|
||||||
|
uint64_t b_birth;
|
||||||
|
/*
|
||||||
|
* Even though this checksum is only set/verified when a buffer is in
|
||||||
|
* the L1 cache, it needs to be in the set of common fields because it
|
||||||
|
* must be preserved from the time before a buffer is written out to
|
||||||
|
* L2ARC until after it is read back in.
|
||||||
|
*/
|
||||||
|
zio_cksum_t *b_freeze_cksum;
|
||||||
|
|
||||||
|
arc_buf_hdr_t *b_hash_next;
|
||||||
|
arc_flags_t b_flags;
|
||||||
|
|
||||||
|
/* immutable */
|
||||||
|
int32_t b_size;
|
||||||
|
uint64_t b_spa;
|
||||||
|
|
||||||
|
/* L2ARC fields. Undefined when not in L2ARC. */
|
||||||
|
l2arc_buf_hdr_t b_l2hdr;
|
||||||
|
/* L1ARC fields. Undefined when in l2arc_only state */
|
||||||
|
l1arc_buf_hdr_t b_l1hdr;
|
||||||
|
};
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
105
include/sys/multilist.h
Normal file
105
include/sys/multilist.h
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* This file and its contents are supplied under the terms of the
|
||||||
|
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||||
|
* You may only use this file in accordance with the terms of version
|
||||||
|
* 1.0 of the CDDL.
|
||||||
|
*
|
||||||
|
* A full copy of the text of the CDDL should have accompanied this
|
||||||
|
* source. A copy of the CDDL is also available via the Internet at
|
||||||
|
* http://www.illumos.org/license/CDDL.
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _SYS_MULTILIST_H
|
||||||
|
#define _SYS_MULTILIST_H
|
||||||
|
|
||||||
|
#include <sys/zfs_context.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef list_node_t multilist_node_t;
|
||||||
|
typedef struct multilist multilist_t;
|
||||||
|
typedef struct multilist_sublist multilist_sublist_t;
|
||||||
|
typedef unsigned int multilist_sublist_index_func_t(multilist_t *, void *);
|
||||||
|
|
||||||
|
struct multilist_sublist {
|
||||||
|
/*
|
||||||
|
* The mutex used internally to implement thread safe insertions
|
||||||
|
* and removals to this individual sublist. It can also be locked
|
||||||
|
* by a consumer using multilist_sublist_{lock,unlock}, which is
|
||||||
|
* useful if a consumer needs to traverse the list in a thread
|
||||||
|
* safe manner.
|
||||||
|
*/
|
||||||
|
kmutex_t mls_lock;
|
||||||
|
/*
|
||||||
|
* The actual list object containing all objects in this sublist.
|
||||||
|
*/
|
||||||
|
list_t mls_list;
|
||||||
|
/*
|
||||||
|
* Pad to cache line, in an effort to try and prevent cache line
|
||||||
|
* contention.
|
||||||
|
*/
|
||||||
|
} ____cacheline_aligned;
|
||||||
|
|
||||||
|
struct multilist {
|
||||||
|
/*
|
||||||
|
* This is used to get to the multilist_node_t structure given
|
||||||
|
* the void *object contained on the list.
|
||||||
|
*/
|
||||||
|
size_t ml_offset;
|
||||||
|
/*
|
||||||
|
* The number of sublists used internally by this multilist.
|
||||||
|
*/
|
||||||
|
uint64_t ml_num_sublists;
|
||||||
|
/*
|
||||||
|
* The array of pointers to the actual sublists.
|
||||||
|
*/
|
||||||
|
multilist_sublist_t *ml_sublists;
|
||||||
|
/*
|
||||||
|
* Pointer to function which determines the sublist to use
|
||||||
|
* when inserting and removing objects from this multilist.
|
||||||
|
* Please see the comment above multilist_create for details.
|
||||||
|
*/
|
||||||
|
multilist_sublist_index_func_t *ml_index_func;
|
||||||
|
};
|
||||||
|
|
||||||
|
void multilist_destroy(multilist_t *);
|
||||||
|
void multilist_create(multilist_t *, size_t, size_t, unsigned int,
|
||||||
|
multilist_sublist_index_func_t *);
|
||||||
|
|
||||||
|
void multilist_insert(multilist_t *, void *);
|
||||||
|
void multilist_remove(multilist_t *, void *);
|
||||||
|
int multilist_is_empty(multilist_t *);
|
||||||
|
|
||||||
|
unsigned int multilist_get_num_sublists(multilist_t *);
|
||||||
|
unsigned int multilist_get_random_index(multilist_t *);
|
||||||
|
|
||||||
|
multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
|
||||||
|
void multilist_sublist_unlock(multilist_sublist_t *);
|
||||||
|
|
||||||
|
void multilist_sublist_insert_head(multilist_sublist_t *, void *);
|
||||||
|
void multilist_sublist_insert_tail(multilist_sublist_t *, void *);
|
||||||
|
void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj);
|
||||||
|
void multilist_sublist_remove(multilist_sublist_t *, void *);
|
||||||
|
|
||||||
|
void *multilist_sublist_head(multilist_sublist_t *);
|
||||||
|
void *multilist_sublist_tail(multilist_sublist_t *);
|
||||||
|
void *multilist_sublist_next(multilist_sublist_t *, void *);
|
||||||
|
void *multilist_sublist_prev(multilist_sublist_t *, void *);
|
||||||
|
|
||||||
|
void multilist_link_init(multilist_node_t *);
|
||||||
|
int multilist_link_active(multilist_node_t *);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _SYS_MULTILIST_H */
|
@ -45,7 +45,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
|
|||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__array(uint64_t, hdr_dva_word, 2)
|
__array(uint64_t, hdr_dva_word, 2)
|
||||||
__field(uint64_t, hdr_birth)
|
__field(uint64_t, hdr_birth)
|
||||||
__field(uint64_t, hdr_cksum0)
|
|
||||||
__field(uint32_t, hdr_flags)
|
__field(uint32_t, hdr_flags)
|
||||||
__field(uint32_t, hdr_datacnt)
|
__field(uint32_t, hdr_datacnt)
|
||||||
__field(arc_buf_contents_t, hdr_type)
|
__field(arc_buf_contents_t, hdr_type)
|
||||||
@ -64,27 +63,25 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
|
|||||||
__entry->hdr_dva_word[0] = ab->b_dva.dva_word[0];
|
__entry->hdr_dva_word[0] = ab->b_dva.dva_word[0];
|
||||||
__entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
|
__entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
|
||||||
__entry->hdr_birth = ab->b_birth;
|
__entry->hdr_birth = ab->b_birth;
|
||||||
__entry->hdr_cksum0 = ab->b_cksum0;
|
|
||||||
__entry->hdr_flags = ab->b_flags;
|
__entry->hdr_flags = ab->b_flags;
|
||||||
__entry->hdr_datacnt = ab->b_datacnt;
|
__entry->hdr_datacnt = ab->b_l1hdr.b_datacnt;
|
||||||
__entry->hdr_type = ab->b_type;
|
|
||||||
__entry->hdr_size = ab->b_size;
|
__entry->hdr_size = ab->b_size;
|
||||||
__entry->hdr_spa = ab->b_spa;
|
__entry->hdr_spa = ab->b_spa;
|
||||||
__entry->hdr_state_type = ab->b_state->arcs_state;
|
__entry->hdr_state_type = ab->b_l1hdr.b_state->arcs_state;
|
||||||
__entry->hdr_access = ab->b_arc_access;
|
__entry->hdr_access = ab->b_l1hdr.b_arc_access;
|
||||||
__entry->hdr_mru_hits = ab->b_mru_hits;
|
__entry->hdr_mru_hits = ab->b_l1hdr.b_mru_hits;
|
||||||
__entry->hdr_mru_ghost_hits = ab->b_mru_ghost_hits;
|
__entry->hdr_mru_ghost_hits = ab->b_l1hdr.b_mru_ghost_hits;
|
||||||
__entry->hdr_mfu_hits = ab->b_mfu_hits;
|
__entry->hdr_mfu_hits = ab->b_l1hdr.b_mfu_hits;
|
||||||
__entry->hdr_mfu_ghost_hits = ab->b_mfu_ghost_hits;
|
__entry->hdr_mfu_ghost_hits = ab->b_l1hdr.b_mfu_ghost_hits;
|
||||||
__entry->hdr_l2_hits = ab->b_l2_hits;
|
__entry->hdr_l2_hits = ab->b_l1hdr.b_l2_hits;
|
||||||
__entry->hdr_refcount = ab->b_refcnt.rc_count;
|
__entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count;
|
||||||
),
|
),
|
||||||
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu cksum0 0x%llx "
|
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
|
||||||
"flags 0x%x datacnt %u type %u size %llu spa %llu "
|
"flags 0x%x datacnt %u type %u size %llu spa %llu "
|
||||||
"state_type %u access %lu mru_hits %u mru_ghost_hits %u "
|
"state_type %u access %lu mru_hits %u mru_ghost_hits %u "
|
||||||
"mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
|
"mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
|
||||||
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
|
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
|
||||||
__entry->hdr_birth, __entry->hdr_cksum0, __entry->hdr_flags,
|
__entry->hdr_birth, __entry->hdr_flags,
|
||||||
__entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
|
__entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
|
||||||
__entry->hdr_spa, __entry->hdr_state_type,
|
__entry->hdr_spa, __entry->hdr_state_type,
|
||||||
__entry->hdr_access, __entry->hdr_mru_hits,
|
__entry->hdr_access, __entry->hdr_mru_hits,
|
||||||
@ -261,7 +258,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
|||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__array(uint64_t, hdr_dva_word, 2)
|
__array(uint64_t, hdr_dva_word, 2)
|
||||||
__field(uint64_t, hdr_birth)
|
__field(uint64_t, hdr_birth)
|
||||||
__field(uint64_t, hdr_cksum0)
|
|
||||||
__field(uint32_t, hdr_flags)
|
__field(uint32_t, hdr_flags)
|
||||||
__field(uint32_t, hdr_datacnt)
|
__field(uint32_t, hdr_datacnt)
|
||||||
__field(arc_buf_contents_t, hdr_type)
|
__field(arc_buf_contents_t, hdr_type)
|
||||||
@ -292,20 +288,18 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
|||||||
__entry->hdr_dva_word[0] = hdr->b_dva.dva_word[0];
|
__entry->hdr_dva_word[0] = hdr->b_dva.dva_word[0];
|
||||||
__entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
|
__entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
|
||||||
__entry->hdr_birth = hdr->b_birth;
|
__entry->hdr_birth = hdr->b_birth;
|
||||||
__entry->hdr_cksum0 = hdr->b_cksum0;
|
|
||||||
__entry->hdr_flags = hdr->b_flags;
|
__entry->hdr_flags = hdr->b_flags;
|
||||||
__entry->hdr_datacnt = hdr->b_datacnt;
|
__entry->hdr_datacnt = hdr->b_l1hdr.b_datacnt;
|
||||||
__entry->hdr_type = hdr->b_type;
|
|
||||||
__entry->hdr_size = hdr->b_size;
|
__entry->hdr_size = hdr->b_size;
|
||||||
__entry->hdr_spa = hdr->b_spa;
|
__entry->hdr_spa = hdr->b_spa;
|
||||||
__entry->hdr_state_type = hdr->b_state->arcs_state;
|
__entry->hdr_state_type = hdr->b_l1hdr.b_state->arcs_state;
|
||||||
__entry->hdr_access = hdr->b_arc_access;
|
__entry->hdr_access = hdr->b_l1hdr.b_arc_access;
|
||||||
__entry->hdr_mru_hits = hdr->b_mru_hits;
|
__entry->hdr_mru_hits = hdr->b_l1hdr.b_mru_hits;
|
||||||
__entry->hdr_mru_ghost_hits = hdr->b_mru_ghost_hits;
|
__entry->hdr_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
|
||||||
__entry->hdr_mfu_hits = hdr->b_mfu_hits;
|
__entry->hdr_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
|
||||||
__entry->hdr_mfu_ghost_hits = hdr->b_mfu_ghost_hits;
|
__entry->hdr_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
|
||||||
__entry->hdr_l2_hits = hdr->b_l2_hits;
|
__entry->hdr_l2_hits = hdr->b_l1hdr.b_l2_hits;
|
||||||
__entry->hdr_refcount = hdr->b_refcnt.rc_count;
|
__entry->hdr_refcount = hdr->b_l1hdr.b_refcnt.rc_count;
|
||||||
|
|
||||||
__entry->bp_dva0[0] = bp->blk_dva[0].dva_word[0];
|
__entry->bp_dva0[0] = bp->blk_dva[0].dva_word[0];
|
||||||
__entry->bp_dva0[1] = bp->blk_dva[0].dva_word[1];
|
__entry->bp_dva0[1] = bp->blk_dva[0].dva_word[1];
|
||||||
@ -325,8 +319,8 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
|||||||
__entry->zb_level = zb->zb_level;
|
__entry->zb_level = zb->zb_level;
|
||||||
__entry->zb_blkid = zb->zb_blkid;
|
__entry->zb_blkid = zb->zb_blkid;
|
||||||
),
|
),
|
||||||
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu cksum0 0x%llx "
|
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
|
||||||
"flags 0x%x datacnt %u type %u size %llu spa %llu state_type %u "
|
"flags 0x%x datacnt %u size %llu spa %llu state_type %u "
|
||||||
"access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
|
"access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
|
||||||
"mfu_ghost_hits %u l2_hits %u refcount %lli } "
|
"mfu_ghost_hits %u l2_hits %u refcount %lli } "
|
||||||
"bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
|
"bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
|
||||||
@ -334,8 +328,8 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
|||||||
"lsize %llu } zb { objset %llu object %llu level %lli "
|
"lsize %llu } zb { objset %llu object %llu level %lli "
|
||||||
"blkid %llu }",
|
"blkid %llu }",
|
||||||
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
|
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
|
||||||
__entry->hdr_birth, __entry->hdr_cksum0, __entry->hdr_flags,
|
__entry->hdr_birth, __entry->hdr_flags,
|
||||||
__entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
|
__entry->hdr_datacnt, __entry->hdr_size,
|
||||||
__entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
|
__entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
|
||||||
__entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
|
__entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
|
||||||
__entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
|
__entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
|
||||||
|
76
include/sys/trace_multilist.h
Normal file
76
include/sys/trace_multilist.h
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or http://www.opensolaris.org/os/licensing.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
|
||||||
|
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM zfs
|
||||||
|
|
||||||
|
#if !defined(_TRACE_MULTILIST_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _TRACE_MULTILIST_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Generic support for three argument tracepoints of the form:
|
||||||
|
*
|
||||||
|
* DTRACE_PROBE3(...,
|
||||||
|
* multilist_t *, ...,
|
||||||
|
* unsigned int, ...,
|
||||||
|
* void *, ...);
|
||||||
|
*/
|
||||||
|
|
||||||
|
DECLARE_EVENT_CLASS(zfs_multilist_insert_remove_class,
|
||||||
|
TP_PROTO(multilist_t *ml, unsigned sublist_idx, void *obj),
|
||||||
|
TP_ARGS(ml, sublist_idx, obj),
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(size_t, ml_offset)
|
||||||
|
__field(uint64_t, ml_num_sublists)
|
||||||
|
|
||||||
|
__field(unsigned int, sublist_idx)
|
||||||
|
),
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->ml_offset = ml->ml_offset;
|
||||||
|
__entry->ml_num_sublists = ml->ml_num_sublists;
|
||||||
|
|
||||||
|
__entry->sublist_idx = sublist_idx;
|
||||||
|
),
|
||||||
|
TP_printk("ml { offset %ld numsublists %llu sublistidx %u } ",
|
||||||
|
__entry->ml_offset, __entry->ml_num_sublists, __entry->sublist_idx)
|
||||||
|
);
|
||||||
|
|
||||||
|
#define DEFINE_MULTILIST_INSERT_REMOVE_EVENT(name) \
|
||||||
|
DEFINE_EVENT(zfs_multilist_insert_remove_class, name, \
|
||||||
|
TP_PROTO(multilist_t *ml, unsigned int sublist_idx, void *obj), \
|
||||||
|
TP_ARGS(ml, sublist_idx, obj))
|
||||||
|
DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__insert);
|
||||||
|
DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__remove);
|
||||||
|
|
||||||
|
#endif /* _TRACE_MULTILIST_H */
|
||||||
|
|
||||||
|
#undef TRACE_INCLUDE_PATH
|
||||||
|
#undef TRACE_INCLUDE_FILE
|
||||||
|
#define TRACE_INCLUDE_PATH sys
|
||||||
|
#define TRACE_INCLUDE_FILE trace_multilist
|
||||||
|
#include <trace/define_trace.h>
|
||||||
|
|
||||||
|
#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */
|
@ -468,6 +468,7 @@ extern void taskq_init_ent(taskq_ent_t *);
|
|||||||
extern void taskq_destroy(taskq_t *);
|
extern void taskq_destroy(taskq_t *);
|
||||||
extern void taskq_wait(taskq_t *);
|
extern void taskq_wait(taskq_t *);
|
||||||
extern void taskq_wait_id(taskq_t *, taskqid_t);
|
extern void taskq_wait_id(taskq_t *, taskqid_t);
|
||||||
|
extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
|
||||||
extern int taskq_member(taskq_t *, kthread_t *);
|
extern int taskq_member(taskq_t *, kthread_t *);
|
||||||
extern int taskq_cancel_id(taskq_t *, taskqid_t);
|
extern int taskq_cancel_id(taskq_t *, taskqid_t);
|
||||||
extern void system_taskq_init(void);
|
extern void system_taskq_init(void);
|
||||||
@ -609,6 +610,7 @@ extern void delay(clock_t ticks);
|
|||||||
} while (0);
|
} while (0);
|
||||||
|
|
||||||
#define max_ncpus 64
|
#define max_ncpus 64
|
||||||
|
#define num_online_cpus() (sysconf(_SC_NPROCESSORS_ONLN))
|
||||||
|
|
||||||
#define minclsyspri 60
|
#define minclsyspri 60
|
||||||
#define maxclsyspri 99
|
#define maxclsyspri 99
|
||||||
|
@ -55,6 +55,7 @@ libzpool_la_SOURCES = \
|
|||||||
$(top_srcdir)/module/zfs/lzjb.c \
|
$(top_srcdir)/module/zfs/lzjb.c \
|
||||||
$(top_srcdir)/module/zfs/lz4.c \
|
$(top_srcdir)/module/zfs/lz4.c \
|
||||||
$(top_srcdir)/module/zfs/metaslab.c \
|
$(top_srcdir)/module/zfs/metaslab.c \
|
||||||
|
$(top_srcdir)/module/zfs/multilist.c \
|
||||||
$(top_srcdir)/module/zfs/range_tree.c \
|
$(top_srcdir)/module/zfs/range_tree.c \
|
||||||
$(top_srcdir)/module/zfs/refcount.c \
|
$(top_srcdir)/module/zfs/refcount.c \
|
||||||
$(top_srcdir)/module/zfs/rrwlock.c \
|
$(top_srcdir)/module/zfs/rrwlock.c \
|
||||||
|
@ -220,6 +220,12 @@ taskq_wait_id(taskq_t *tq, taskqid_t id)
|
|||||||
taskq_wait(tq);
|
taskq_wait(tq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
|
||||||
|
{
|
||||||
|
taskq_wait(tq);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
taskq_thread(void *arg)
|
taskq_thread(void *arg)
|
||||||
{
|
{
|
||||||
|
@ -347,6 +347,19 @@ increased to reduce the memory footprint.
|
|||||||
Default value: \fB8192\fR.
|
Default value: \fB8192\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_arc_evict_batch_limit\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
Number ARC headers to evict per sub-list before proceding to another sub-list.
|
||||||
|
This batch-style operation prevents entire sub-lists from being evicted at once
|
||||||
|
but comes at a cost of additional unlocking and locking.
|
||||||
|
.sp
|
||||||
|
Default value: \fB10\fR.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
@ -395,6 +408,19 @@ for meta data.
|
|||||||
Default value: \fB0\fR.
|
Default value: \fB0\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_arc_meta_min\fR (ulong)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
The minimum allowed size in bytes that meta data buffers may consume in
|
||||||
|
the ARC. This value defaults to 0 which disables a floor on the amount
|
||||||
|
of the ARC devoted meta data.
|
||||||
|
.sp
|
||||||
|
Default value: \fB0\fR.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
@ -447,6 +473,40 @@ Min life of prefetch block
|
|||||||
Default value: \fB100\fR.
|
Default value: \fB100\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_arc_num_sublists_per_state\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
To allow more fine-grained locking, each ARC state contains a series
|
||||||
|
of lists for both data and meta data objects. Locking is performed at
|
||||||
|
the level of these "sub-lists". This parameters controls the number of
|
||||||
|
sub-lists per ARC state.
|
||||||
|
.sp
|
||||||
|
Default value: 1 or the number of on-online CPUs, whichever is greater
|
||||||
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_arc_overflow_shift\fR (int)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
The ARC size is considered to be overflowing if it exceeds the current
|
||||||
|
ARC target size (arc_c) by a threshold determined by this parameter.
|
||||||
|
The threshold is calculated as a fraction of arc_c using the formula
|
||||||
|
"arc_c >> \fBzfs_arc_overflow_shift\fR".
|
||||||
|
|
||||||
|
The default value of 8 causes the ARC to be considered to be overflowing
|
||||||
|
if it exceeds the target size by 1/256th (0.3%) of the target size.
|
||||||
|
|
||||||
|
When the ARC is overflowing, new buffer allocations are stalled until
|
||||||
|
the reclaim thread catches up and the overflow condition no longer exists.
|
||||||
|
.sp
|
||||||
|
Default value: \fB8\fR.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
@ -37,6 +37,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/gzip.o
|
|||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/lzjb.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/lzjb.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/lz4.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/lz4.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/metaslab.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/metaslab.o
|
||||||
|
$(MODULE)-objs += @top_srcdir@/module/zfs/multilist.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/range_tree.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/range_tree.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/refcount.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/refcount.o
|
||||||
$(MODULE)-objs += @top_srcdir@/module/zfs/rrwlock.o
|
$(MODULE)-objs += @top_srcdir@/module/zfs/rrwlock.o
|
||||||
|
3567
module/zfs/arc.c
3567
module/zfs/arc.c
File diff suppressed because it is too large
Load Diff
@ -653,7 +653,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|||||||
{
|
{
|
||||||
dnode_t *dn;
|
dnode_t *dn;
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
uint32_t aflags = ARC_NOWAIT;
|
uint32_t aflags = ARC_FLAG_NOWAIT;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
DB_DNODE_ENTER(db);
|
DB_DNODE_ENTER(db);
|
||||||
@ -707,9 +707,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
if (DBUF_IS_L2CACHEABLE(db))
|
if (DBUF_IS_L2CACHEABLE(db))
|
||||||
aflags |= ARC_L2CACHE;
|
aflags |= ARC_FLAG_L2CACHE;
|
||||||
if (DBUF_IS_L2COMPRESSIBLE(db))
|
if (DBUF_IS_L2COMPRESSIBLE(db))
|
||||||
aflags |= ARC_L2COMPRESS;
|
aflags |= ARC_FLAG_L2COMPRESS;
|
||||||
|
|
||||||
SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ?
|
SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ?
|
||||||
db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
|
db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
|
||||||
@ -721,7 +721,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
|||||||
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
|
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
|
||||||
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
|
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
|
||||||
&aflags, &zb);
|
&aflags, &zb);
|
||||||
if (aflags & ARC_CACHED)
|
if (aflags & ARC_FLAG_CACHED)
|
||||||
*flags |= DB_RF_CACHED;
|
*flags |= DB_RF_CACHED;
|
||||||
|
|
||||||
return (SET_ERROR(err));
|
return (SET_ERROR(err));
|
||||||
@ -2028,7 +2028,8 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
|
|||||||
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp, NULL) == 0) {
|
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp, NULL) == 0) {
|
||||||
if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
||||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||||
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
|
arc_flags_t aflags =
|
||||||
|
ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
|
|
||||||
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
||||||
|
@ -48,12 +48,12 @@ dbuf_stats_hash_table_headers(char *buf, size_t size)
|
|||||||
(void) snprintf(buf, size,
|
(void) snprintf(buf, size,
|
||||||
"%-88s | %-124s | %s\n"
|
"%-88s | %-124s | %s\n"
|
||||||
"%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | "
|
"%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | "
|
||||||
"%-5s %-5s %-6s %-8s %-6s %-8s %-12s "
|
"%-5s %-5s %-8s %-6s %-8s %-12s "
|
||||||
"%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | "
|
"%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | "
|
||||||
"%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n",
|
"%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n",
|
||||||
"dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
|
"dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
|
||||||
"blkid", "offset", "dbsize", "meta", "state", "dbholds", "list",
|
"blkid", "offset", "dbsize", "meta", "state", "dbholds", "list",
|
||||||
"atype", "index", "flags", "count", "asize", "access",
|
"atype", "flags", "count", "asize", "access",
|
||||||
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
|
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
|
||||||
"l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs",
|
"l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs",
|
||||||
"bsize", "lvls", "dholds", "blocks", "dsize");
|
"bsize", "lvls", "dholds", "blocks", "dsize");
|
||||||
@ -77,7 +77,7 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
|
|||||||
|
|
||||||
nwritten = snprintf(buf, size,
|
nwritten = snprintf(buf, size,
|
||||||
"%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
|
"%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
|
||||||
"%-5d %-5d %-6lld 0x%-6x %-6lu %-8llu %-12llu "
|
"%-5d %-5d 0x%-6x %-6lu %-8llu %-12llu "
|
||||||
"%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
|
"%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
|
||||||
"%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
|
"%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
|
||||||
/* dmu_buf_impl_t */
|
/* dmu_buf_impl_t */
|
||||||
@ -94,7 +94,6 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
|
|||||||
/* arc_buf_info_t */
|
/* arc_buf_info_t */
|
||||||
abi.abi_state_type,
|
abi.abi_state_type,
|
||||||
abi.abi_state_contents,
|
abi.abi_state_contents,
|
||||||
(longlong_t)abi.abi_state_index,
|
|
||||||
abi.abi_flags,
|
abi.abi_flags,
|
||||||
(ulong_t)abi.abi_datacnt,
|
(ulong_t)abi.abi_datacnt,
|
||||||
(u_longlong_t)abi.abi_size,
|
(u_longlong_t)abi.abi_size,
|
||||||
|
@ -129,7 +129,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||||||
} else if (zb->zb_level == 0) {
|
} else if (zb->zb_level == 0) {
|
||||||
dnode_phys_t *blk;
|
dnode_phys_t *blk;
|
||||||
arc_buf_t *abuf;
|
arc_buf_t *abuf;
|
||||||
uint32_t aflags = ARC_WAIT;
|
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||||
int blksz = BP_GET_LSIZE(bp);
|
int blksz = BP_GET_LSIZE(bp);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -306,15 +306,15 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
|||||||
os->os_spa = spa;
|
os->os_spa = spa;
|
||||||
os->os_rootbp = bp;
|
os->os_rootbp = bp;
|
||||||
if (!BP_IS_HOLE(os->os_rootbp)) {
|
if (!BP_IS_HOLE(os->os_rootbp)) {
|
||||||
uint32_t aflags = ARC_WAIT;
|
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
||||||
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
||||||
|
|
||||||
if (DMU_OS_IS_L2CACHEABLE(os))
|
if (DMU_OS_IS_L2CACHEABLE(os))
|
||||||
aflags |= ARC_L2CACHE;
|
aflags |= ARC_FLAG_L2CACHE;
|
||||||
if (DMU_OS_IS_L2COMPRESSIBLE(os))
|
if (DMU_OS_IS_L2COMPRESSIBLE(os))
|
||||||
aflags |= ARC_L2COMPRESS;
|
aflags |= ARC_FLAG_L2COMPRESS;
|
||||||
|
|
||||||
dprintf_bp(os->os_rootbp, "reading %s", "");
|
dprintf_bp(os->os_rootbp, "reading %s", "");
|
||||||
err = arc_read(NULL, spa, os->os_rootbp,
|
err = arc_read(NULL, spa, os->os_rootbp,
|
||||||
|
@ -486,7 +486,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||||||
dnode_phys_t *blk;
|
dnode_phys_t *blk;
|
||||||
int i;
|
int i;
|
||||||
int blksz = BP_GET_LSIZE(bp);
|
int blksz = BP_GET_LSIZE(bp);
|
||||||
uint32_t aflags = ARC_WAIT;
|
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||||
arc_buf_t *abuf;
|
arc_buf_t *abuf;
|
||||||
|
|
||||||
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
|
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
|
||||||
@ -504,7 +504,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||||||
}
|
}
|
||||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||||
} else if (type == DMU_OT_SA) {
|
} else if (type == DMU_OT_SA) {
|
||||||
uint32_t aflags = ARC_WAIT;
|
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||||
arc_buf_t *abuf;
|
arc_buf_t *abuf;
|
||||||
int blksz = BP_GET_LSIZE(bp);
|
int blksz = BP_GET_LSIZE(bp);
|
||||||
|
|
||||||
@ -521,8 +521,8 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||||||
err = dump_write_embedded(dsp, zb->zb_object,
|
err = dump_write_embedded(dsp, zb->zb_object,
|
||||||
zb->zb_blkid * blksz, blksz, bp);
|
zb->zb_blkid * blksz, blksz, bp);
|
||||||
} else { /* it's a level-0 block of a regular object */
|
} else { /* it's a level-0 block of a regular object */
|
||||||
uint32_t aflags = ARC_WAIT;
|
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
|
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||||
arc_buf_t *abuf;
|
arc_buf_t *abuf;
|
||||||
int blksz = BP_GET_LSIZE(bp);
|
int blksz = BP_GET_LSIZE(bp);
|
||||||
|
|
||||||
|
@ -177,7 +177,7 @@ static void
|
|||||||
traverse_prefetch_metadata(traverse_data_t *td,
|
traverse_prefetch_metadata(traverse_data_t *td,
|
||||||
const blkptr_t *bp, const zbookmark_phys_t *zb)
|
const blkptr_t *bp, const zbookmark_phys_t *zb)
|
||||||
{
|
{
|
||||||
uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
|
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||||
|
|
||||||
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
|
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
|
||||||
return;
|
return;
|
||||||
@ -273,7 +273,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (BP_GET_LEVEL(bp) > 0) {
|
if (BP_GET_LEVEL(bp) > 0) {
|
||||||
uint32_t flags = ARC_WAIT;
|
uint32_t flags = ARC_FLAG_WAIT;
|
||||||
int32_t i;
|
int32_t i;
|
||||||
int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||||
zbookmark_phys_t *czb;
|
zbookmark_phys_t *czb;
|
||||||
@ -307,7 +307,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|||||||
kmem_free(czb, sizeof (zbookmark_phys_t));
|
kmem_free(czb, sizeof (zbookmark_phys_t));
|
||||||
|
|
||||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||||
uint32_t flags = ARC_WAIT;
|
uint32_t flags = ARC_FLAG_WAIT;
|
||||||
int32_t i;
|
int32_t i;
|
||||||
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||||
dnode_phys_t *cdnp;
|
dnode_phys_t *cdnp;
|
||||||
@ -331,7 +331,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||||
uint32_t flags = ARC_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
objset_phys_t *osp;
|
objset_phys_t *osp;
|
||||||
dnode_phys_t *mdnp, *gdnp, *udnp;
|
dnode_phys_t *mdnp, *gdnp, *udnp;
|
||||||
|
|
||||||
@ -448,7 +448,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||||||
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
|
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||||
{
|
{
|
||||||
prefetch_data_t *pfd = arg;
|
prefetch_data_t *pfd = arg;
|
||||||
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
|
arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||||
|
|
||||||
ASSERT(pfd->pd_bytes_fetched >= 0);
|
ASSERT(pfd->pd_bytes_fetched >= 0);
|
||||||
if (pfd->pd_cancel)
|
if (pfd->pd_cancel)
|
||||||
@ -545,7 +545,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
|||||||
|
|
||||||
/* See comment on ZIL traversal in dsl_scan_visitds. */
|
/* See comment on ZIL traversal in dsl_scan_visitds. */
|
||||||
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
|
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
|
||||||
uint32_t flags = ARC_WAIT;
|
uint32_t flags = ARC_FLAG_WAIT;
|
||||||
objset_phys_t *osp;
|
objset_phys_t *osp;
|
||||||
arc_buf_t *buf;
|
arc_buf_t *buf;
|
||||||
|
|
||||||
|
@ -317,7 +317,14 @@ dsl_pool_close(dsl_pool_t *dp)
|
|||||||
txg_list_destroy(&dp->dp_sync_tasks);
|
txg_list_destroy(&dp->dp_sync_tasks);
|
||||||
txg_list_destroy(&dp->dp_dirty_dirs);
|
txg_list_destroy(&dp->dp_dirty_dirs);
|
||||||
|
|
||||||
arc_flush(dp->dp_spa);
|
/*
|
||||||
|
* We can't set retry to TRUE since we're explicitly specifying
|
||||||
|
* a spa to flush. This is good enough; any missed buffers for
|
||||||
|
* this spa won't cause trouble, and they'll eventually fall
|
||||||
|
* out of the ARC just like any other unused buffer.
|
||||||
|
*/
|
||||||
|
arc_flush(dp->dp_spa, FALSE);
|
||||||
|
|
||||||
txg_fini(dp);
|
txg_fini(dp);
|
||||||
dsl_scan_fini(dp);
|
dsl_scan_fini(dp);
|
||||||
dmu_buf_user_evict_wait();
|
dmu_buf_user_evict_wait();
|
||||||
|
@ -590,7 +590,7 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
|
|||||||
uint64_t objset, uint64_t object, uint64_t blkid)
|
uint64_t objset, uint64_t object, uint64_t blkid)
|
||||||
{
|
{
|
||||||
zbookmark_phys_t czb;
|
zbookmark_phys_t czb;
|
||||||
uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
|
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||||
|
|
||||||
if (zfs_no_scrub_prefetch)
|
if (zfs_no_scrub_prefetch)
|
||||||
return;
|
return;
|
||||||
@ -655,7 +655,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
|||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (BP_GET_LEVEL(bp) > 0) {
|
if (BP_GET_LEVEL(bp) > 0) {
|
||||||
uint32_t flags = ARC_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
int i;
|
int i;
|
||||||
blkptr_t *cbp;
|
blkptr_t *cbp;
|
||||||
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||||
@ -682,7 +682,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
|||||||
}
|
}
|
||||||
(void) arc_buf_remove_ref(buf, &buf);
|
(void) arc_buf_remove_ref(buf, &buf);
|
||||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||||
uint32_t flags = ARC_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
dnode_phys_t *cdnp;
|
dnode_phys_t *cdnp;
|
||||||
int i, j;
|
int i, j;
|
||||||
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||||
@ -708,7 +708,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
|||||||
|
|
||||||
(void) arc_buf_remove_ref(buf, &buf);
|
(void) arc_buf_remove_ref(buf, &buf);
|
||||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||||
uint32_t flags = ARC_WAIT;
|
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||||
objset_phys_t *osp;
|
objset_phys_t *osp;
|
||||||
arc_buf_t *buf;
|
arc_buf_t *buf;
|
||||||
|
|
||||||
|
@ -556,7 +556,7 @@ metaslab_group_passivate(metaslab_group_t *mg)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
taskq_wait(mg->mg_taskq);
|
taskq_wait_outstanding(mg->mg_taskq, 0);
|
||||||
metaslab_group_alloc_update(mg);
|
metaslab_group_alloc_update(mg);
|
||||||
|
|
||||||
mgprev = mg->mg_prev;
|
mgprev = mg->mg_prev;
|
||||||
@ -1596,7 +1596,7 @@ metaslab_group_preload(metaslab_group_t *mg)
|
|||||||
int m = 0;
|
int m = 0;
|
||||||
|
|
||||||
if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
|
if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
|
||||||
taskq_wait(mg->mg_taskq);
|
taskq_wait_outstanding(mg->mg_taskq, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
375
module/zfs/multilist.c
Normal file
375
module/zfs/multilist.c
Normal file
@ -0,0 +1,375 @@
|
|||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* This file and its contents are supplied under the terms of the
|
||||||
|
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||||
|
* You may only use this file in accordance with the terms of version
|
||||||
|
* 1.0 of the CDDL.
|
||||||
|
*
|
||||||
|
* A full copy of the text of the CDDL should have accompanied this
|
||||||
|
* source. A copy of the CDDL is also available via the Internet at
|
||||||
|
* http://www.illumos.org/license/CDDL.
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/zfs_context.h>
|
||||||
|
#include <sys/multilist.h>
|
||||||
|
#include <sys/trace_multilist.h>
|
||||||
|
|
||||||
|
/* needed for spa_get_random() */
|
||||||
|
#include <sys/spa.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given the object contained on the list, return a pointer to the
|
||||||
|
* object's multilist_node_t structure it contains.
|
||||||
|
*/
|
||||||
|
#ifdef DEBUG
|
||||||
|
static multilist_node_t *
|
||||||
|
multilist_d2l(multilist_t *ml, void *obj)
|
||||||
|
{
|
||||||
|
return ((multilist_node_t *)((char *)obj + ml->ml_offset));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize a new mutlilist using the parameters specified.
|
||||||
|
*
|
||||||
|
* - 'size' denotes the size of the structure containing the
|
||||||
|
* multilist_node_t.
|
||||||
|
* - 'offset' denotes the byte offset of the mutlilist_node_t within
|
||||||
|
* the structure that contains it.
|
||||||
|
* - 'num' specifies the number of internal sublists to create.
|
||||||
|
* - 'index_func' is used to determine which sublist to insert into
|
||||||
|
* when the multilist_insert() function is called; as well as which
|
||||||
|
* sublist to remove from when multilist_remove() is called. The
|
||||||
|
* requirements this function must meet, are the following:
|
||||||
|
*
|
||||||
|
* - It must always return the same value when called on the same
|
||||||
|
* object (to ensure the object is removed from the list it was
|
||||||
|
* inserted into).
|
||||||
|
*
|
||||||
|
* - It must return a value in the range [0, number of sublists).
|
||||||
|
* The multilist_get_num_sublists() function may be used to
|
||||||
|
* determine the number of sublists in the multilist.
|
||||||
|
*
|
||||||
|
* Also, in order to reduce internal contention between the sublists
|
||||||
|
* during insertion and removal, this function should choose evenly
|
||||||
|
* between all available sublists when inserting. This isn't a hard
|
||||||
|
* requirement, but a general rule of thumb in order to garner the
|
||||||
|
* best multi-threaded performance out of the data structure.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
|
||||||
|
multilist_sublist_index_func_t *index_func)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
ASSERT3P(ml, !=, NULL);
|
||||||
|
ASSERT3U(size, >, 0);
|
||||||
|
ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
|
||||||
|
ASSERT3U(num, >, 0);
|
||||||
|
ASSERT3P(index_func, !=, NULL);
|
||||||
|
|
||||||
|
ml->ml_offset = offset;
|
||||||
|
ml->ml_num_sublists = num;
|
||||||
|
ml->ml_index_func = index_func;
|
||||||
|
|
||||||
|
ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
|
||||||
|
ml->ml_num_sublists, KM_SLEEP);
|
||||||
|
|
||||||
|
ASSERT3P(ml->ml_sublists, !=, NULL);
|
||||||
|
|
||||||
|
for (i = 0; i < ml->ml_num_sublists; i++) {
|
||||||
|
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||||
|
mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
list_create(&mls->mls_list, size, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Destroy the given multilist object, and free up any memory it holds.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
multilist_destroy(multilist_t *ml)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
ASSERT(multilist_is_empty(ml));
|
||||||
|
|
||||||
|
for (i = 0; i < ml->ml_num_sublists; i++) {
|
||||||
|
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||||
|
|
||||||
|
ASSERT(list_is_empty(&mls->mls_list));
|
||||||
|
|
||||||
|
list_destroy(&mls->mls_list);
|
||||||
|
mutex_destroy(&mls->mls_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT3P(ml->ml_sublists, !=, NULL);
|
||||||
|
kmem_free(ml->ml_sublists,
|
||||||
|
sizeof (multilist_sublist_t) * ml->ml_num_sublists);
|
||||||
|
|
||||||
|
ml->ml_num_sublists = 0;
|
||||||
|
ml->ml_offset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert the given object into the multilist.
|
||||||
|
*
|
||||||
|
* This function will insert the object specified into the sublist
|
||||||
|
* determined using the function given at multilist creation time.
|
||||||
|
*
|
||||||
|
* The sublist locks are automatically acquired if not already held, to
|
||||||
|
* ensure consistency when inserting and removing from multiple threads.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
multilist_insert(multilist_t *ml, void *obj)
|
||||||
|
{
|
||||||
|
unsigned int sublist_idx = ml->ml_index_func(ml, obj);
|
||||||
|
multilist_sublist_t *mls;
|
||||||
|
boolean_t need_lock;
|
||||||
|
|
||||||
|
DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
|
||||||
|
unsigned int, sublist_idx, void *, obj);
|
||||||
|
|
||||||
|
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||||
|
|
||||||
|
mls = &ml->ml_sublists[sublist_idx];
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note: Callers may already hold the sublist lock by calling
|
||||||
|
* multilist_sublist_lock(). Here we rely on MUTEX_HELD()
|
||||||
|
* returning TRUE if and only if the current thread holds the
|
||||||
|
* lock. While it's a little ugly to make the lock recursive in
|
||||||
|
* this way, it works and allows the calling code to be much
|
||||||
|
* simpler -- otherwise it would have to pass around a flag
|
||||||
|
* indicating that it already has the lock.
|
||||||
|
*/
|
||||||
|
need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||||
|
|
||||||
|
if (need_lock)
|
||||||
|
mutex_enter(&mls->mls_lock);
|
||||||
|
|
||||||
|
ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
|
||||||
|
|
||||||
|
multilist_sublist_insert_head(mls, obj);
|
||||||
|
|
||||||
|
if (need_lock)
|
||||||
|
mutex_exit(&mls->mls_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove the given object from the multilist.
|
||||||
|
*
|
||||||
|
* This function will remove the object specified from the sublist
|
||||||
|
* determined using the function given at multilist creation time.
|
||||||
|
*
|
||||||
|
* The necessary sublist locks are automatically acquired, to ensure
|
||||||
|
* consistency when inserting and removing from multiple threads.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
multilist_remove(multilist_t *ml, void *obj)
|
||||||
|
{
|
||||||
|
unsigned int sublist_idx = ml->ml_index_func(ml, obj);
|
||||||
|
multilist_sublist_t *mls;
|
||||||
|
boolean_t need_lock;
|
||||||
|
|
||||||
|
DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
|
||||||
|
unsigned int, sublist_idx, void *, obj);
|
||||||
|
|
||||||
|
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||||
|
|
||||||
|
mls = &ml->ml_sublists[sublist_idx];
|
||||||
|
/* See comment in multilist_insert(). */
|
||||||
|
need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||||
|
|
||||||
|
if (need_lock)
|
||||||
|
mutex_enter(&mls->mls_lock);
|
||||||
|
|
||||||
|
ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
|
||||||
|
|
||||||
|
multilist_sublist_remove(mls, obj);
|
||||||
|
|
||||||
|
if (need_lock)
|
||||||
|
mutex_exit(&mls->mls_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check to see if this multilist object is empty.
|
||||||
|
*
|
||||||
|
* This will return TRUE if it finds all of the sublists of this
|
||||||
|
* multilist to be empty, and FALSE otherwise. Each sublist lock will be
|
||||||
|
* automatically acquired as necessary.
|
||||||
|
*
|
||||||
|
* If concurrent insertions and removals are occurring, the semantics
|
||||||
|
* of this function become a little fuzzy. Instead of locking all
|
||||||
|
* sublists for the entire call time of the function, each sublist is
|
||||||
|
* only locked as it is individually checked for emptiness. Thus, it's
|
||||||
|
* possible for this function to return TRUE with non-empty sublists at
|
||||||
|
* the time the function returns. This would be due to another thread
|
||||||
|
* inserting into a given sublist, after that specific sublist was check
|
||||||
|
* and deemed empty, but before all sublists have been checked.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
multilist_is_empty(multilist_t *ml)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < ml->ml_num_sublists; i++) {
|
||||||
|
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||||
|
/* See comment in multilist_insert(). */
|
||||||
|
boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||||
|
|
||||||
|
if (need_lock)
|
||||||
|
mutex_enter(&mls->mls_lock);
|
||||||
|
|
||||||
|
if (!list_is_empty(&mls->mls_list)) {
|
||||||
|
if (need_lock)
|
||||||
|
mutex_exit(&mls->mls_lock);
|
||||||
|
|
||||||
|
return (FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_lock)
|
||||||
|
mutex_exit(&mls->mls_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return the number of sublists composing this multilist */
|
||||||
|
unsigned int
|
||||||
|
multilist_get_num_sublists(multilist_t *ml)
|
||||||
|
{
|
||||||
|
return (ml->ml_num_sublists);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return a randomly selected, valid sublist index for this multilist */
|
||||||
|
unsigned int
|
||||||
|
multilist_get_random_index(multilist_t *ml)
|
||||||
|
{
|
||||||
|
return (spa_get_random(ml->ml_num_sublists));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Lock and return the sublist specified at the given index */
|
||||||
|
multilist_sublist_t *
|
||||||
|
multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
|
||||||
|
{
|
||||||
|
multilist_sublist_t *mls;
|
||||||
|
|
||||||
|
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||||
|
mls = &ml->ml_sublists[sublist_idx];
|
||||||
|
mutex_enter(&mls->mls_lock);
|
||||||
|
|
||||||
|
return (mls);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
multilist_sublist_unlock(multilist_sublist_t *mls)
|
||||||
|
{
|
||||||
|
mutex_exit(&mls->mls_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We're allowing any object to be inserted into this specific sublist,
|
||||||
|
* but this can lead to trouble if multilist_remove() is called to
|
||||||
|
* remove this object. Specifically, if calling ml_index_func on this
|
||||||
|
* object returns an index for sublist different than what is passed as
|
||||||
|
* a parameter here, any call to multilist_remove() with this newly
|
||||||
|
* inserted object is undefined! (the call to multilist_remove() will
|
||||||
|
* remove the object from a list that it isn't contained in)
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||||
|
list_insert_head(&mls->mls_list, obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* please see comment above multilist_sublist_insert_head */
|
||||||
|
void
|
||||||
|
multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||||
|
list_insert_tail(&mls->mls_list, obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Move the object one element forward in the list.
|
||||||
|
*
|
||||||
|
* This function will move the given object forward in the list (towards
|
||||||
|
* the head) by one object. So, in essence, it will swap its position in
|
||||||
|
* the list with its "prev" pointer. If the given object is already at the
|
||||||
|
* head of the list, it cannot be moved forward any more than it already
|
||||||
|
* is, so no action is taken.
|
||||||
|
*
|
||||||
|
* NOTE: This function **must not** remove any object from the list other
|
||||||
|
* than the object given as the parameter. This is relied upon in
|
||||||
|
* arc_evict_state_impl().
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
|
||||||
|
{
|
||||||
|
void *prev = list_prev(&mls->mls_list, obj);
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||||
|
ASSERT(!list_is_empty(&mls->mls_list));
|
||||||
|
|
||||||
|
/* 'obj' must be at the head of the list, nothing to do */
|
||||||
|
if (prev == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
list_remove(&mls->mls_list, obj);
|
||||||
|
list_insert_before(&mls->mls_list, prev, obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||||
|
list_remove(&mls->mls_list, obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
multilist_sublist_head(multilist_sublist_t *mls)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||||
|
return (list_head(&mls->mls_list));
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
multilist_sublist_tail(multilist_sublist_t *mls)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||||
|
return (list_tail(&mls->mls_list));
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
multilist_sublist_next(multilist_sublist_t *mls, void *obj)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||||
|
return (list_next(&mls->mls_list, obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
void *
|
||||||
|
multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||||
|
return (list_prev(&mls->mls_list, obj));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
multilist_link_init(multilist_node_t *link)
|
||||||
|
{
|
||||||
|
list_link_init(link);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
multilist_link_active(multilist_node_t *link)
|
||||||
|
{
|
||||||
|
return (list_link_active(link));
|
||||||
|
}
|
@ -200,7 +200,7 @@ spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
|
|||||||
if (zfs_read_history == 0 && ssh->size == 0)
|
if (zfs_read_history == 0 && ssh->size == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (zfs_read_history_hits == 0 && (aflags & ARC_CACHED))
|
if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
|
srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
* (and only one) C file, so this dummy file exists for that purpose.
|
* (and only one) C file, so this dummy file exists for that purpose.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <sys/multilist.h>
|
||||||
#include <sys/arc_impl.h>
|
#include <sys/arc_impl.h>
|
||||||
#include <sys/vdev_impl.h>
|
#include <sys/vdev_impl.h>
|
||||||
#include <sys/zio.h>
|
#include <sys/zio.h>
|
||||||
@ -31,6 +32,7 @@
|
|||||||
#include <sys/dsl_dataset.h>
|
#include <sys/dsl_dataset.h>
|
||||||
#include <sys/dmu_tx.h>
|
#include <sys/dmu_tx.h>
|
||||||
#include <sys/dnode.h>
|
#include <sys/dnode.h>
|
||||||
|
#include <sys/multilist.h>
|
||||||
#include <sys/zfs_znode.h>
|
#include <sys/zfs_znode.h>
|
||||||
#include <sys/zil_impl.h>
|
#include <sys/zil_impl.h>
|
||||||
#include <sys/zrlock.h>
|
#include <sys/zrlock.h>
|
||||||
@ -42,6 +44,7 @@
|
|||||||
#include <sys/trace_dbuf.h>
|
#include <sys/trace_dbuf.h>
|
||||||
#include <sys/trace_dmu.h>
|
#include <sys/trace_dmu.h>
|
||||||
#include <sys/trace_dnode.h>
|
#include <sys/trace_dnode.h>
|
||||||
|
#include <sys/trace_multilist.h>
|
||||||
#include <sys/trace_txg.h>
|
#include <sys/trace_txg.h>
|
||||||
#include <sys/trace_zil.h>
|
#include <sys/trace_zil.h>
|
||||||
#include <sys/trace_zrlock.h>
|
#include <sys/trace_zrlock.h>
|
||||||
|
@ -471,7 +471,7 @@ txg_wait_callbacks(dsl_pool_t *dp)
|
|||||||
tx_state_t *tx = &dp->dp_tx;
|
tx_state_t *tx = &dp->dp_tx;
|
||||||
|
|
||||||
if (tx->tx_commit_cb_taskq != NULL)
|
if (tx->tx_commit_cb_taskq != NULL)
|
||||||
taskq_wait(tx->tx_commit_cb_taskq);
|
taskq_wait_outstanding(tx->tx_commit_cb_taskq, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -1152,8 +1152,8 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
|
|||||||
*/
|
*/
|
||||||
int round = 0;
|
int round = 0;
|
||||||
while (zsb->z_nr_znodes > 0) {
|
while (zsb->z_nr_znodes > 0) {
|
||||||
taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(
|
taskq_wait_outstanding(dsl_pool_iput_taskq(
|
||||||
zsb->z_os)));
|
dmu_objset_pool(zsb->z_os)), 0);
|
||||||
if (++round > 1 && !unmounting)
|
if (++round > 1 && !unmounting)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1740,7 +1740,7 @@ zfs_init(void)
|
|||||||
void
|
void
|
||||||
zfs_fini(void)
|
zfs_fini(void)
|
||||||
{
|
{
|
||||||
taskq_wait(system_taskq);
|
taskq_wait_outstanding(system_taskq, 0);
|
||||||
unregister_filesystem(&zpl_fs_type);
|
unregister_filesystem(&zpl_fs_type);
|
||||||
zfs_znode_fini();
|
zfs_znode_fini();
|
||||||
zfsctl_fini();
|
zfsctl_fini();
|
||||||
|
@ -204,7 +204,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
|
|||||||
char **end)
|
char **end)
|
||||||
{
|
{
|
||||||
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
|
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
|
||||||
uint32_t aflags = ARC_WAIT;
|
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||||
arc_buf_t *abuf = NULL;
|
arc_buf_t *abuf = NULL;
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
int error;
|
int error;
|
||||||
@ -280,7 +280,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
|
|||||||
{
|
{
|
||||||
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
|
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
|
||||||
const blkptr_t *bp = &lr->lr_blkptr;
|
const blkptr_t *bp = &lr->lr_blkptr;
|
||||||
uint32_t aflags = ARC_WAIT;
|
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||||
arc_buf_t *abuf = NULL;
|
arc_buf_t *abuf = NULL;
|
||||||
zbookmark_phys_t zb;
|
zbookmark_phys_t zb;
|
||||||
int error;
|
int error;
|
||||||
|
@ -2241,7 +2241,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
|
|||||||
|
|
||||||
if (ddp->ddp_phys_birth != 0) {
|
if (ddp->ddp_phys_birth != 0) {
|
||||||
arc_buf_t *abuf = NULL;
|
arc_buf_t *abuf = NULL;
|
||||||
uint32_t aflags = ARC_WAIT;
|
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||||
blkptr_t blk = *zio->io_bp;
|
blkptr_t blk = *zio->io_bp;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
@ -439,7 +439,11 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
|
|||||||
* fault injection isn't a performance critical path.
|
* fault injection isn't a performance critical path.
|
||||||
*/
|
*/
|
||||||
if (flags & ZINJECT_FLUSH_ARC)
|
if (flags & ZINJECT_FLUSH_ARC)
|
||||||
arc_flush(NULL);
|
/*
|
||||||
|
* We must use FALSE to ensure arc_flush returns, since
|
||||||
|
* we're not preventing concurrent ARC insertions.
|
||||||
|
*/
|
||||||
|
arc_flush(NULL, FALSE);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user