mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-26 01:44:31 +03:00
Merge branch 'lock-contention-on-arcs_mtx-final'
Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf Closes #3115 Closes #3481
This commit is contained in:
commit
06358ea16e
@ -191,12 +191,10 @@ def get_arc_summary(Kstat):
|
||||
### ARC Misc. ###
|
||||
deleted = Kstat["kstat.zfs.misc.arcstats.deleted"]
|
||||
mutex_miss = Kstat["kstat.zfs.misc.arcstats.mutex_miss"]
|
||||
recycle_miss = Kstat["kstat.zfs.misc.arcstats.recycle_miss"]
|
||||
|
||||
### ARC Misc. ###
|
||||
output["arc_misc"] = {}
|
||||
output["arc_misc"]["deleted"] = fHits(deleted)
|
||||
output["arc_misc"]['recycle_miss'] = fHits(recycle_miss)
|
||||
output["arc_misc"]['mutex_miss'] = fHits(mutex_miss)
|
||||
output["arc_misc"]['evict_skips'] = fHits(mutex_miss)
|
||||
|
||||
@ -302,8 +300,6 @@ def _arc_summary(Kstat):
|
||||
### ARC Misc. ###
|
||||
sys.stdout.write("ARC Misc:\n")
|
||||
sys.stdout.write("\tDeleted:\t\t\t\t%s\n" % arc['arc_misc']['deleted'])
|
||||
sys.stdout.write("\tRecycle Misses:\t\t\t\t%s\n" %
|
||||
arc['arc_misc']['recycle_miss'])
|
||||
sys.stdout.write("\tMutex Misses:\t\t\t\t%s\n" %
|
||||
arc['arc_misc']['mutex_miss'])
|
||||
sys.stdout.write("\tEvict Skips:\t\t\t\t%s\n" %
|
||||
|
@ -82,7 +82,6 @@ cols = {
|
||||
"mrug": [4, 1000, "MRU Ghost List hits per second"],
|
||||
"eskip": [5, 1000, "evict_skip per second"],
|
||||
"mtxmis": [6, 1000, "mutex_miss per second"],
|
||||
"rmis": [4, 1000, "recycle_miss per second"],
|
||||
"dread": [5, 1000, "Demand accesses per second"],
|
||||
"pread": [5, 1000, "Prefetch accesses per second"],
|
||||
"l2hits": [6, 1000, "L2ARC hits per second"],
|
||||
@ -406,7 +405,6 @@ def calculate():
|
||||
v["mrug"] = d["mru_ghost_hits"] / sint
|
||||
v["mfug"] = d["mfu_ghost_hits"] / sint
|
||||
v["eskip"] = d["evict_skip"] / sint
|
||||
v["rmis"] = d["recycle_miss"] / sint
|
||||
v["mtxmis"] = d["mutex_miss"] / sint
|
||||
|
||||
if l2exist:
|
||||
|
@ -1250,7 +1250,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
|
||||
print_indirect(bp, zb, dnp);
|
||||
|
||||
if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
int i;
|
||||
blkptr_t *cbp;
|
||||
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||
|
@ -4042,7 +4042,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||
* assign an arcbuf to a dbuf.
|
||||
*/
|
||||
for (j = 0; j < s; j++) {
|
||||
if (i != 5) {
|
||||
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||
bigbuf_arcbufs[j] =
|
||||
dmu_request_arcbuf(bonus_db, chunksize);
|
||||
} else {
|
||||
@ -4066,7 +4066,8 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||
umem_free(packbuf, packsize);
|
||||
umem_free(bigbuf, bigsize);
|
||||
for (j = 0; j < s; j++) {
|
||||
if (i != 5) {
|
||||
if (i != 5 ||
|
||||
chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||
dmu_return_arcbuf(bigbuf_arcbufs[j]);
|
||||
} else {
|
||||
dmu_return_arcbuf(
|
||||
@ -4111,7 +4112,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||
}
|
||||
for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
|
||||
dmu_buf_t *dbt;
|
||||
if (i != 5) {
|
||||
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||
bcopy((caddr_t)bigbuf + (off - bigoff),
|
||||
bigbuf_arcbufs[j]->b_data, chunksize);
|
||||
} else {
|
||||
@ -4128,7 +4129,7 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
|
||||
VERIFY(dmu_buf_hold(os, bigobj, off,
|
||||
FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
|
||||
}
|
||||
if (i != 5) {
|
||||
if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
|
||||
dmu_assign_arcbuf(bonus_db, off,
|
||||
bigbuf_arcbufs[j], tx);
|
||||
} else {
|
||||
|
@ -33,6 +33,7 @@ COMMON_H = \
|
||||
$(top_srcdir)/include/sys/efi_partition.h \
|
||||
$(top_srcdir)/include/sys/metaslab.h \
|
||||
$(top_srcdir)/include/sys/metaslab_impl.h \
|
||||
$(top_srcdir)/include/sys/multilist.h \
|
||||
$(top_srcdir)/include/sys/nvpair.h \
|
||||
$(top_srcdir)/include/sys/nvpair_impl.h \
|
||||
$(top_srcdir)/include/sys/range_tree.h \
|
||||
@ -53,6 +54,7 @@ COMMON_H = \
|
||||
$(top_srcdir)/include/sys/trace_dbuf.h \
|
||||
$(top_srcdir)/include/sys/trace_dmu.h \
|
||||
$(top_srcdir)/include/sys/trace_dnode.h \
|
||||
$(top_srcdir)/include/sys/trace_multilist.h \
|
||||
$(top_srcdir)/include/sys/trace_txg.h \
|
||||
$(top_srcdir)/include/sys/trace_zil.h \
|
||||
$(top_srcdir)/include/sys/trace_zrlock.h \
|
||||
|
@ -38,6 +38,12 @@ extern "C" {
|
||||
#include <sys/spa.h>
|
||||
#include <sys/refcount.h>
|
||||
|
||||
/*
|
||||
* Used by arc_flush() to inform arc_evict_state() that it should evict
|
||||
* all available buffers from the arc state being passed in.
|
||||
*/
|
||||
#define ARC_EVICT_ALL -1ULL
|
||||
|
||||
typedef struct arc_buf_hdr arc_buf_hdr_t;
|
||||
typedef struct arc_buf arc_buf_t;
|
||||
typedef struct arc_prune arc_prune_t;
|
||||
@ -53,10 +59,65 @@ arc_done_func_t arc_getbuf_func;
|
||||
struct arc_prune {
|
||||
arc_prune_func_t *p_pfunc;
|
||||
void *p_private;
|
||||
uint64_t p_adjust;
|
||||
list_node_t p_node;
|
||||
refcount_t p_refcnt;
|
||||
};
|
||||
|
||||
typedef enum arc_strategy {
|
||||
ARC_STRATEGY_META_ONLY = 0, /* Evict only meta data buffers */
|
||||
ARC_STRATEGY_META_BALANCED = 1, /* Evict data buffers if needed */
|
||||
} arc_strategy_t;
|
||||
|
||||
typedef enum arc_flags
|
||||
{
|
||||
/*
|
||||
* Public flags that can be passed into the ARC by external consumers.
|
||||
*/
|
||||
ARC_FLAG_NONE = 1 << 0, /* No flags set */
|
||||
ARC_FLAG_WAIT = 1 << 1, /* perform sync I/O */
|
||||
ARC_FLAG_NOWAIT = 1 << 2, /* perform async I/O */
|
||||
ARC_FLAG_PREFETCH = 1 << 3, /* I/O is a prefetch */
|
||||
ARC_FLAG_CACHED = 1 << 4, /* I/O was in cache */
|
||||
ARC_FLAG_L2CACHE = 1 << 5, /* cache in L2ARC */
|
||||
ARC_FLAG_L2COMPRESS = 1 << 6, /* compress in L2ARC */
|
||||
|
||||
/*
|
||||
* Private ARC flags. These flags are private ARC only flags that
|
||||
* will show up in b_flags in the arc_hdr_buf_t. These flags should
|
||||
* only be set by ARC code.
|
||||
*/
|
||||
ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */
|
||||
ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */
|
||||
ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */
|
||||
ARC_FLAG_FREED_IN_READ = 1 << 10, /* freed during read */
|
||||
ARC_FLAG_BUF_AVAILABLE = 1 << 11, /* block not in use */
|
||||
ARC_FLAG_INDIRECT = 1 << 12, /* indirect block */
|
||||
ARC_FLAG_L2_WRITING = 1 << 13, /* write in progress */
|
||||
ARC_FLAG_L2_EVICTED = 1 << 14, /* evicted during I/O */
|
||||
ARC_FLAG_L2_WRITE_HEAD = 1 << 15, /* head of write list */
|
||||
/* indicates that the buffer contains metadata (otherwise, data) */
|
||||
ARC_FLAG_BUFC_METADATA = 1 << 16,
|
||||
|
||||
/* Flags specifying whether optional hdr struct fields are defined */
|
||||
ARC_FLAG_HAS_L1HDR = 1 << 17,
|
||||
ARC_FLAG_HAS_L2HDR = 1 << 18,
|
||||
|
||||
/*
|
||||
* The arc buffer's compression mode is stored in the top 7 bits of the
|
||||
* flags field, so these dummy flags are included so that MDB can
|
||||
* interpret the enum properly.
|
||||
*/
|
||||
ARC_FLAG_COMPRESS_0 = 1 << 24,
|
||||
ARC_FLAG_COMPRESS_1 = 1 << 25,
|
||||
ARC_FLAG_COMPRESS_2 = 1 << 26,
|
||||
ARC_FLAG_COMPRESS_3 = 1 << 27,
|
||||
ARC_FLAG_COMPRESS_4 = 1 << 28,
|
||||
ARC_FLAG_COMPRESS_5 = 1 << 29,
|
||||
ARC_FLAG_COMPRESS_6 = 1 << 30
|
||||
|
||||
} arc_flags_t;
|
||||
|
||||
struct arc_buf {
|
||||
arc_buf_hdr_t *b_hdr;
|
||||
arc_buf_t *b_next;
|
||||
@ -71,15 +132,6 @@ typedef enum arc_buf_contents {
|
||||
ARC_BUFC_METADATA, /* buffer contains metadata */
|
||||
ARC_BUFC_NUMTYPES
|
||||
} arc_buf_contents_t;
|
||||
/*
|
||||
* These are the flags we pass into calls to the arc
|
||||
*/
|
||||
#define ARC_WAIT (1 << 1) /* perform I/O synchronously */
|
||||
#define ARC_NOWAIT (1 << 2) /* perform I/O asynchronously */
|
||||
#define ARC_PREFETCH (1 << 3) /* I/O is a prefetch */
|
||||
#define ARC_CACHED (1 << 4) /* I/O was already in cache */
|
||||
#define ARC_L2CACHE (1 << 5) /* cache in L2ARC */
|
||||
#define ARC_L2COMPRESS (1 << 6) /* compress in L2ARC */
|
||||
|
||||
/*
|
||||
* The following breakdows of arc_size exist for kstat only.
|
||||
@ -106,7 +158,6 @@ typedef enum arc_state_type {
|
||||
typedef struct arc_buf_info {
|
||||
arc_state_type_t abi_state_type;
|
||||
arc_buf_contents_t abi_state_contents;
|
||||
uint64_t abi_state_index;
|
||||
uint32_t abi_flags;
|
||||
uint32_t abi_datacnt;
|
||||
uint64_t abi_size;
|
||||
@ -146,7 +197,7 @@ int arc_referenced(arc_buf_t *buf);
|
||||
|
||||
int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
||||
arc_done_func_t *done, void *private, zio_priority_t priority, int flags,
|
||||
uint32_t *arc_flags, const zbookmark_phys_t *zb);
|
||||
arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
|
||||
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
||||
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
|
||||
const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
|
||||
@ -160,7 +211,7 @@ void arc_freed(spa_t *spa, const blkptr_t *bp);
|
||||
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
|
||||
boolean_t arc_clear_callback(arc_buf_t *buf);
|
||||
|
||||
void arc_flush(spa_t *spa);
|
||||
void arc_flush(spa_t *spa, boolean_t retry);
|
||||
void arc_tempreserve_clear(uint64_t reserve);
|
||||
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
|
||||
|
||||
|
@ -67,15 +67,25 @@ extern "C" {
|
||||
*/
|
||||
|
||||
typedef struct arc_state {
|
||||
list_t arcs_list[ARC_BUFC_NUMTYPES]; /* list of evictable buffers */
|
||||
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
|
||||
uint64_t arcs_size; /* total amount of data in this state */
|
||||
kmutex_t arcs_mtx;
|
||||
/*
|
||||
* list of evictable buffers
|
||||
*/
|
||||
multilist_t arcs_list[ARC_BUFC_NUMTYPES];
|
||||
/*
|
||||
* total amount of evictable data in this state
|
||||
*/
|
||||
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES];
|
||||
/*
|
||||
* total amount of data in this state; this includes: evictable,
|
||||
* non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
|
||||
*/
|
||||
uint64_t arcs_size;
|
||||
/*
|
||||
* supports the "dbufs" kstat
|
||||
*/
|
||||
arc_state_type_t arcs_state;
|
||||
} arc_state_t;
|
||||
|
||||
typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
|
||||
|
||||
typedef struct arc_callback arc_callback_t;
|
||||
|
||||
struct arc_callback {
|
||||
@ -96,31 +106,49 @@ struct arc_write_callback {
|
||||
arc_buf_t *awcb_buf;
|
||||
};
|
||||
|
||||
struct arc_buf_hdr {
|
||||
/* protected by hash lock */
|
||||
dva_t b_dva;
|
||||
uint64_t b_birth;
|
||||
uint64_t b_cksum0;
|
||||
|
||||
/*
|
||||
* ARC buffers are separated into multiple structs as a memory saving measure:
|
||||
* - Common fields struct, always defined, and embedded within it:
|
||||
* - L2-only fields, always allocated but undefined when not in L2ARC
|
||||
* - L1-only fields, only allocated when in L1ARC
|
||||
*
|
||||
* Buffer in L1 Buffer only in L2
|
||||
* +------------------------+ +------------------------+
|
||||
* | arc_buf_hdr_t | | arc_buf_hdr_t |
|
||||
* | | | |
|
||||
* | | | |
|
||||
* | | | |
|
||||
* +------------------------+ +------------------------+
|
||||
* | l2arc_buf_hdr_t | | l2arc_buf_hdr_t |
|
||||
* | (undefined if L1-only) | | |
|
||||
* +------------------------+ +------------------------+
|
||||
* | l1arc_buf_hdr_t |
|
||||
* | |
|
||||
* | |
|
||||
* | |
|
||||
* | |
|
||||
* +------------------------+
|
||||
*
|
||||
* Because it's possible for the L2ARC to become extremely large, we can wind
|
||||
* up eating a lot of memory in L2ARC buffer headers, so the size of a header
|
||||
* is minimized by only allocating the fields necessary for an L1-cached buffer
|
||||
* when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
|
||||
* l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
|
||||
* words in pointers. arc_hdr_realloc() is used to switch a header between
|
||||
* these two allocation states.
|
||||
*/
|
||||
typedef struct l1arc_buf_hdr {
|
||||
kmutex_t b_freeze_lock;
|
||||
zio_cksum_t *b_freeze_cksum;
|
||||
|
||||
arc_buf_hdr_t *b_hash_next;
|
||||
arc_buf_t *b_buf;
|
||||
uint32_t b_flags;
|
||||
uint32_t b_datacnt;
|
||||
|
||||
arc_callback_t *b_acb;
|
||||
/* for waiting on writes to complete */
|
||||
kcondvar_t b_cv;
|
||||
|
||||
/* immutable */
|
||||
arc_buf_contents_t b_type;
|
||||
uint64_t b_size;
|
||||
uint64_t b_spa;
|
||||
|
||||
/* protected by arc state mutex */
|
||||
arc_state_t *b_state;
|
||||
list_node_t b_arc_node;
|
||||
multilist_node_t b_arc_node;
|
||||
|
||||
/* updated atomically */
|
||||
clock_t b_arc_access;
|
||||
@ -133,9 +161,10 @@ struct arc_buf_hdr {
|
||||
/* self protecting */
|
||||
refcount_t b_refcnt;
|
||||
|
||||
l2arc_buf_hdr_t *b_l2hdr;
|
||||
list_node_t b_l2node;
|
||||
};
|
||||
arc_callback_t *b_acb;
|
||||
/* temporary buffer holder for in-flight compressed data */
|
||||
void *b_tmp_cdata;
|
||||
} l1arc_buf_hdr_t;
|
||||
|
||||
typedef struct l2arc_dev {
|
||||
vdev_t *l2ad_vdev; /* vdev */
|
||||
@ -146,15 +175,51 @@ typedef struct l2arc_dev {
|
||||
uint64_t l2ad_evict; /* last addr eviction reached */
|
||||
boolean_t l2ad_first; /* first sweep through */
|
||||
boolean_t l2ad_writing; /* currently writing */
|
||||
list_t *l2ad_buflist; /* buffer list */
|
||||
kmutex_t l2ad_mtx; /* lock for buffer list */
|
||||
list_t l2ad_buflist; /* buffer list */
|
||||
list_node_t l2ad_node; /* device list node */
|
||||
} l2arc_dev_t;
|
||||
|
||||
typedef struct l2arc_buf_hdr {
|
||||
/* protected by arc_buf_hdr mutex */
|
||||
l2arc_dev_t *b_dev; /* L2ARC device */
|
||||
uint64_t b_daddr; /* disk address, offset byte */
|
||||
/* real alloc'd buffer size depending on b_compress applied */
|
||||
uint32_t b_hits;
|
||||
int32_t b_asize;
|
||||
|
||||
list_node_t b_l2node;
|
||||
} l2arc_buf_hdr_t;
|
||||
|
||||
typedef struct l2arc_write_callback {
|
||||
l2arc_dev_t *l2wcb_dev; /* device info */
|
||||
arc_buf_hdr_t *l2wcb_head; /* head of write buflist */
|
||||
} l2arc_write_callback_t;
|
||||
|
||||
struct arc_buf_hdr {
|
||||
/* protected by hash lock */
|
||||
dva_t b_dva;
|
||||
uint64_t b_birth;
|
||||
/*
|
||||
* Even though this checksum is only set/verified when a buffer is in
|
||||
* the L1 cache, it needs to be in the set of common fields because it
|
||||
* must be preserved from the time before a buffer is written out to
|
||||
* L2ARC until after it is read back in.
|
||||
*/
|
||||
zio_cksum_t *b_freeze_cksum;
|
||||
|
||||
arc_buf_hdr_t *b_hash_next;
|
||||
arc_flags_t b_flags;
|
||||
|
||||
/* immutable */
|
||||
int32_t b_size;
|
||||
uint64_t b_spa;
|
||||
|
||||
/* L2ARC fields. Undefined when not in L2ARC. */
|
||||
l2arc_buf_hdr_t b_l2hdr;
|
||||
/* L1ARC fields. Undefined when in l2arc_only state */
|
||||
l1arc_buf_hdr_t b_l1hdr;
|
||||
};
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
105
include/sys/multilist.h
Normal file
105
include/sys/multilist.h
Normal file
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_MULTILIST_H
|
||||
#define _SYS_MULTILIST_H
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef list_node_t multilist_node_t;
|
||||
typedef struct multilist multilist_t;
|
||||
typedef struct multilist_sublist multilist_sublist_t;
|
||||
typedef unsigned int multilist_sublist_index_func_t(multilist_t *, void *);
|
||||
|
||||
struct multilist_sublist {
|
||||
/*
|
||||
* The mutex used internally to implement thread safe insertions
|
||||
* and removals to this individual sublist. It can also be locked
|
||||
* by a consumer using multilist_sublist_{lock,unlock}, which is
|
||||
* useful if a consumer needs to traverse the list in a thread
|
||||
* safe manner.
|
||||
*/
|
||||
kmutex_t mls_lock;
|
||||
/*
|
||||
* The actual list object containing all objects in this sublist.
|
||||
*/
|
||||
list_t mls_list;
|
||||
/*
|
||||
* Pad to cache line, in an effort to try and prevent cache line
|
||||
* contention.
|
||||
*/
|
||||
} ____cacheline_aligned;
|
||||
|
||||
struct multilist {
|
||||
/*
|
||||
* This is used to get to the multilist_node_t structure given
|
||||
* the void *object contained on the list.
|
||||
*/
|
||||
size_t ml_offset;
|
||||
/*
|
||||
* The number of sublists used internally by this multilist.
|
||||
*/
|
||||
uint64_t ml_num_sublists;
|
||||
/*
|
||||
* The array of pointers to the actual sublists.
|
||||
*/
|
||||
multilist_sublist_t *ml_sublists;
|
||||
/*
|
||||
* Pointer to function which determines the sublist to use
|
||||
* when inserting and removing objects from this multilist.
|
||||
* Please see the comment above multilist_create for details.
|
||||
*/
|
||||
multilist_sublist_index_func_t *ml_index_func;
|
||||
};
|
||||
|
||||
void multilist_destroy(multilist_t *);
|
||||
void multilist_create(multilist_t *, size_t, size_t, unsigned int,
|
||||
multilist_sublist_index_func_t *);
|
||||
|
||||
void multilist_insert(multilist_t *, void *);
|
||||
void multilist_remove(multilist_t *, void *);
|
||||
int multilist_is_empty(multilist_t *);
|
||||
|
||||
unsigned int multilist_get_num_sublists(multilist_t *);
|
||||
unsigned int multilist_get_random_index(multilist_t *);
|
||||
|
||||
multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
|
||||
void multilist_sublist_unlock(multilist_sublist_t *);
|
||||
|
||||
void multilist_sublist_insert_head(multilist_sublist_t *, void *);
|
||||
void multilist_sublist_insert_tail(multilist_sublist_t *, void *);
|
||||
void multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj);
|
||||
void multilist_sublist_remove(multilist_sublist_t *, void *);
|
||||
|
||||
void *multilist_sublist_head(multilist_sublist_t *);
|
||||
void *multilist_sublist_tail(multilist_sublist_t *);
|
||||
void *multilist_sublist_next(multilist_sublist_t *, void *);
|
||||
void *multilist_sublist_prev(multilist_sublist_t *, void *);
|
||||
|
||||
void multilist_link_init(multilist_node_t *);
|
||||
int multilist_link_active(multilist_node_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_MULTILIST_H */
|
@ -45,7 +45,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
|
||||
TP_STRUCT__entry(
|
||||
__array(uint64_t, hdr_dva_word, 2)
|
||||
__field(uint64_t, hdr_birth)
|
||||
__field(uint64_t, hdr_cksum0)
|
||||
__field(uint32_t, hdr_flags)
|
||||
__field(uint32_t, hdr_datacnt)
|
||||
__field(arc_buf_contents_t, hdr_type)
|
||||
@ -64,27 +63,25 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
|
||||
__entry->hdr_dva_word[0] = ab->b_dva.dva_word[0];
|
||||
__entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
|
||||
__entry->hdr_birth = ab->b_birth;
|
||||
__entry->hdr_cksum0 = ab->b_cksum0;
|
||||
__entry->hdr_flags = ab->b_flags;
|
||||
__entry->hdr_datacnt = ab->b_datacnt;
|
||||
__entry->hdr_type = ab->b_type;
|
||||
__entry->hdr_datacnt = ab->b_l1hdr.b_datacnt;
|
||||
__entry->hdr_size = ab->b_size;
|
||||
__entry->hdr_spa = ab->b_spa;
|
||||
__entry->hdr_state_type = ab->b_state->arcs_state;
|
||||
__entry->hdr_access = ab->b_arc_access;
|
||||
__entry->hdr_mru_hits = ab->b_mru_hits;
|
||||
__entry->hdr_mru_ghost_hits = ab->b_mru_ghost_hits;
|
||||
__entry->hdr_mfu_hits = ab->b_mfu_hits;
|
||||
__entry->hdr_mfu_ghost_hits = ab->b_mfu_ghost_hits;
|
||||
__entry->hdr_l2_hits = ab->b_l2_hits;
|
||||
__entry->hdr_refcount = ab->b_refcnt.rc_count;
|
||||
__entry->hdr_state_type = ab->b_l1hdr.b_state->arcs_state;
|
||||
__entry->hdr_access = ab->b_l1hdr.b_arc_access;
|
||||
__entry->hdr_mru_hits = ab->b_l1hdr.b_mru_hits;
|
||||
__entry->hdr_mru_ghost_hits = ab->b_l1hdr.b_mru_ghost_hits;
|
||||
__entry->hdr_mfu_hits = ab->b_l1hdr.b_mfu_hits;
|
||||
__entry->hdr_mfu_ghost_hits = ab->b_l1hdr.b_mfu_ghost_hits;
|
||||
__entry->hdr_l2_hits = ab->b_l1hdr.b_l2_hits;
|
||||
__entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count;
|
||||
),
|
||||
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu cksum0 0x%llx "
|
||||
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
|
||||
"flags 0x%x datacnt %u type %u size %llu spa %llu "
|
||||
"state_type %u access %lu mru_hits %u mru_ghost_hits %u "
|
||||
"mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
|
||||
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
|
||||
__entry->hdr_birth, __entry->hdr_cksum0, __entry->hdr_flags,
|
||||
__entry->hdr_birth, __entry->hdr_flags,
|
||||
__entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
|
||||
__entry->hdr_spa, __entry->hdr_state_type,
|
||||
__entry->hdr_access, __entry->hdr_mru_hits,
|
||||
@ -261,7 +258,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
||||
TP_STRUCT__entry(
|
||||
__array(uint64_t, hdr_dva_word, 2)
|
||||
__field(uint64_t, hdr_birth)
|
||||
__field(uint64_t, hdr_cksum0)
|
||||
__field(uint32_t, hdr_flags)
|
||||
__field(uint32_t, hdr_datacnt)
|
||||
__field(arc_buf_contents_t, hdr_type)
|
||||
@ -292,20 +288,18 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
||||
__entry->hdr_dva_word[0] = hdr->b_dva.dva_word[0];
|
||||
__entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
|
||||
__entry->hdr_birth = hdr->b_birth;
|
||||
__entry->hdr_cksum0 = hdr->b_cksum0;
|
||||
__entry->hdr_flags = hdr->b_flags;
|
||||
__entry->hdr_datacnt = hdr->b_datacnt;
|
||||
__entry->hdr_type = hdr->b_type;
|
||||
__entry->hdr_datacnt = hdr->b_l1hdr.b_datacnt;
|
||||
__entry->hdr_size = hdr->b_size;
|
||||
__entry->hdr_spa = hdr->b_spa;
|
||||
__entry->hdr_state_type = hdr->b_state->arcs_state;
|
||||
__entry->hdr_access = hdr->b_arc_access;
|
||||
__entry->hdr_mru_hits = hdr->b_mru_hits;
|
||||
__entry->hdr_mru_ghost_hits = hdr->b_mru_ghost_hits;
|
||||
__entry->hdr_mfu_hits = hdr->b_mfu_hits;
|
||||
__entry->hdr_mfu_ghost_hits = hdr->b_mfu_ghost_hits;
|
||||
__entry->hdr_l2_hits = hdr->b_l2_hits;
|
||||
__entry->hdr_refcount = hdr->b_refcnt.rc_count;
|
||||
__entry->hdr_state_type = hdr->b_l1hdr.b_state->arcs_state;
|
||||
__entry->hdr_access = hdr->b_l1hdr.b_arc_access;
|
||||
__entry->hdr_mru_hits = hdr->b_l1hdr.b_mru_hits;
|
||||
__entry->hdr_mru_ghost_hits = hdr->b_l1hdr.b_mru_ghost_hits;
|
||||
__entry->hdr_mfu_hits = hdr->b_l1hdr.b_mfu_hits;
|
||||
__entry->hdr_mfu_ghost_hits = hdr->b_l1hdr.b_mfu_ghost_hits;
|
||||
__entry->hdr_l2_hits = hdr->b_l1hdr.b_l2_hits;
|
||||
__entry->hdr_refcount = hdr->b_l1hdr.b_refcnt.rc_count;
|
||||
|
||||
__entry->bp_dva0[0] = bp->blk_dva[0].dva_word[0];
|
||||
__entry->bp_dva0[1] = bp->blk_dva[0].dva_word[1];
|
||||
@ -325,8 +319,8 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
||||
__entry->zb_level = zb->zb_level;
|
||||
__entry->zb_blkid = zb->zb_blkid;
|
||||
),
|
||||
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu cksum0 0x%llx "
|
||||
"flags 0x%x datacnt %u type %u size %llu spa %llu state_type %u "
|
||||
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
|
||||
"flags 0x%x datacnt %u size %llu spa %llu state_type %u "
|
||||
"access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
|
||||
"mfu_ghost_hits %u l2_hits %u refcount %lli } "
|
||||
"bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
|
||||
@ -334,8 +328,8 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
|
||||
"lsize %llu } zb { objset %llu object %llu level %lli "
|
||||
"blkid %llu }",
|
||||
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
|
||||
__entry->hdr_birth, __entry->hdr_cksum0, __entry->hdr_flags,
|
||||
__entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
|
||||
__entry->hdr_birth, __entry->hdr_flags,
|
||||
__entry->hdr_datacnt, __entry->hdr_size,
|
||||
__entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
|
||||
__entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
|
||||
__entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
|
||||
|
76
include/sys/trace_multilist.h
Normal file
76
include/sys/trace_multilist.h
Normal file
@ -0,0 +1,76 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM zfs
|
||||
|
||||
#if !defined(_TRACE_MULTILIST_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_MULTILIST_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
/*
|
||||
* Generic support for three argument tracepoints of the form:
|
||||
*
|
||||
* DTRACE_PROBE3(...,
|
||||
* multilist_t *, ...,
|
||||
* unsigned int, ...,
|
||||
* void *, ...);
|
||||
*/
|
||||
|
||||
DECLARE_EVENT_CLASS(zfs_multilist_insert_remove_class,
|
||||
TP_PROTO(multilist_t *ml, unsigned sublist_idx, void *obj),
|
||||
TP_ARGS(ml, sublist_idx, obj),
|
||||
TP_STRUCT__entry(
|
||||
__field(size_t, ml_offset)
|
||||
__field(uint64_t, ml_num_sublists)
|
||||
|
||||
__field(unsigned int, sublist_idx)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->ml_offset = ml->ml_offset;
|
||||
__entry->ml_num_sublists = ml->ml_num_sublists;
|
||||
|
||||
__entry->sublist_idx = sublist_idx;
|
||||
),
|
||||
TP_printk("ml { offset %ld numsublists %llu sublistidx %u } ",
|
||||
__entry->ml_offset, __entry->ml_num_sublists, __entry->sublist_idx)
|
||||
);
|
||||
|
||||
#define DEFINE_MULTILIST_INSERT_REMOVE_EVENT(name) \
|
||||
DEFINE_EVENT(zfs_multilist_insert_remove_class, name, \
|
||||
TP_PROTO(multilist_t *ml, unsigned int sublist_idx, void *obj), \
|
||||
TP_ARGS(ml, sublist_idx, obj))
|
||||
DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__insert);
|
||||
DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__remove);
|
||||
|
||||
#endif /* _TRACE_MULTILIST_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_PATH sys
|
||||
#define TRACE_INCLUDE_FILE trace_multilist
|
||||
#include <trace/define_trace.h>
|
||||
|
||||
#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */
|
@ -468,6 +468,7 @@ extern void taskq_init_ent(taskq_ent_t *);
|
||||
extern void taskq_destroy(taskq_t *);
|
||||
extern void taskq_wait(taskq_t *);
|
||||
extern void taskq_wait_id(taskq_t *, taskqid_t);
|
||||
extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
|
||||
extern int taskq_member(taskq_t *, kthread_t *);
|
||||
extern int taskq_cancel_id(taskq_t *, taskqid_t);
|
||||
extern void system_taskq_init(void);
|
||||
@ -609,6 +610,7 @@ extern void delay(clock_t ticks);
|
||||
} while (0);
|
||||
|
||||
#define max_ncpus 64
|
||||
#define num_online_cpus() (sysconf(_SC_NPROCESSORS_ONLN))
|
||||
|
||||
#define minclsyspri 60
|
||||
#define maxclsyspri 99
|
||||
|
@ -55,6 +55,7 @@ libzpool_la_SOURCES = \
|
||||
$(top_srcdir)/module/zfs/lzjb.c \
|
||||
$(top_srcdir)/module/zfs/lz4.c \
|
||||
$(top_srcdir)/module/zfs/metaslab.c \
|
||||
$(top_srcdir)/module/zfs/multilist.c \
|
||||
$(top_srcdir)/module/zfs/range_tree.c \
|
||||
$(top_srcdir)/module/zfs/refcount.c \
|
||||
$(top_srcdir)/module/zfs/rrwlock.c \
|
||||
|
@ -220,6 +220,12 @@ taskq_wait_id(taskq_t *tq, taskqid_t id)
|
||||
taskq_wait(tq);
|
||||
}
|
||||
|
||||
void
|
||||
taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
|
||||
{
|
||||
taskq_wait(tq);
|
||||
}
|
||||
|
||||
static void
|
||||
taskq_thread(void *arg)
|
||||
{
|
||||
|
@ -347,6 +347,19 @@ increased to reduce the memory footprint.
|
||||
Default value: \fB8192\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_arc_evict_batch_limit\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
Number ARC headers to evict per sub-list before proceding to another sub-list.
|
||||
This batch-style operation prevents entire sub-lists from being evicted at once
|
||||
but comes at a cost of additional unlocking and locking.
|
||||
.sp
|
||||
Default value: \fB10\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
@ -395,6 +408,19 @@ for meta data.
|
||||
Default value: \fB0\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_arc_meta_min\fR (ulong)
|
||||
.ad
|
||||
.RS 12n
|
||||
The minimum allowed size in bytes that meta data buffers may consume in
|
||||
the ARC. This value defaults to 0 which disables a floor on the amount
|
||||
of the ARC devoted meta data.
|
||||
.sp
|
||||
Default value: \fB0\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
@ -447,6 +473,40 @@ Min life of prefetch block
|
||||
Default value: \fB100\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_arc_num_sublists_per_state\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
To allow more fine-grained locking, each ARC state contains a series
|
||||
of lists for both data and meta data objects. Locking is performed at
|
||||
the level of these "sub-lists". This parameters controls the number of
|
||||
sub-lists per ARC state.
|
||||
.sp
|
||||
Default value: 1 or the number of on-online CPUs, whichever is greater
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs_arc_overflow_shift\fR (int)
|
||||
.ad
|
||||
.RS 12n
|
||||
The ARC size is considered to be overflowing if it exceeds the current
|
||||
ARC target size (arc_c) by a threshold determined by this parameter.
|
||||
The threshold is calculated as a fraction of arc_c using the formula
|
||||
"arc_c >> \fBzfs_arc_overflow_shift\fR".
|
||||
|
||||
The default value of 8 causes the ARC to be considered to be overflowing
|
||||
if it exceeds the target size by 1/256th (0.3%) of the target size.
|
||||
|
||||
When the ARC is overflowing, new buffer allocations are stalled until
|
||||
the reclaim thread catches up and the overflow condition no longer exists.
|
||||
.sp
|
||||
Default value: \fB8\fR.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
@ -37,6 +37,7 @@ $(MODULE)-objs += @top_srcdir@/module/zfs/gzip.o
|
||||
$(MODULE)-objs += @top_srcdir@/module/zfs/lzjb.o
|
||||
$(MODULE)-objs += @top_srcdir@/module/zfs/lz4.o
|
||||
$(MODULE)-objs += @top_srcdir@/module/zfs/metaslab.o
|
||||
$(MODULE)-objs += @top_srcdir@/module/zfs/multilist.o
|
||||
$(MODULE)-objs += @top_srcdir@/module/zfs/range_tree.o
|
||||
$(MODULE)-objs += @top_srcdir@/module/zfs/refcount.o
|
||||
$(MODULE)-objs += @top_srcdir@/module/zfs/rrwlock.o
|
||||
|
3567
module/zfs/arc.c
3567
module/zfs/arc.c
File diff suppressed because it is too large
Load Diff
@ -653,7 +653,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
||||
{
|
||||
dnode_t *dn;
|
||||
zbookmark_phys_t zb;
|
||||
uint32_t aflags = ARC_NOWAIT;
|
||||
uint32_t aflags = ARC_FLAG_NOWAIT;
|
||||
int err;
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
@ -707,9 +707,9 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
||||
mutex_exit(&db->db_mtx);
|
||||
|
||||
if (DBUF_IS_L2CACHEABLE(db))
|
||||
aflags |= ARC_L2CACHE;
|
||||
aflags |= ARC_FLAG_L2CACHE;
|
||||
if (DBUF_IS_L2COMPRESSIBLE(db))
|
||||
aflags |= ARC_L2COMPRESS;
|
||||
aflags |= ARC_FLAG_L2COMPRESS;
|
||||
|
||||
SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ?
|
||||
db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
|
||||
@ -721,7 +721,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
|
||||
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
|
||||
(*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
|
||||
&aflags, &zb);
|
||||
if (aflags & ARC_CACHED)
|
||||
if (aflags & ARC_FLAG_CACHED)
|
||||
*flags |= DB_RF_CACHED;
|
||||
|
||||
return (SET_ERROR(err));
|
||||
@ -2028,7 +2028,8 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
|
||||
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp, NULL) == 0) {
|
||||
if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
|
||||
arc_flags_t aflags =
|
||||
ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||
zbookmark_phys_t zb;
|
||||
|
||||
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
||||
|
@ -48,12 +48,12 @@ dbuf_stats_hash_table_headers(char *buf, size_t size)
|
||||
(void) snprintf(buf, size,
|
||||
"%-88s | %-124s | %s\n"
|
||||
"%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | "
|
||||
"%-5s %-5s %-6s %-8s %-6s %-8s %-12s "
|
||||
"%-5s %-5s %-8s %-6s %-8s %-12s "
|
||||
"%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | "
|
||||
"%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n",
|
||||
"dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
|
||||
"blkid", "offset", "dbsize", "meta", "state", "dbholds", "list",
|
||||
"atype", "index", "flags", "count", "asize", "access",
|
||||
"atype", "flags", "count", "asize", "access",
|
||||
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
|
||||
"l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs",
|
||||
"bsize", "lvls", "dholds", "blocks", "dsize");
|
||||
@ -77,7 +77,7 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
|
||||
|
||||
nwritten = snprintf(buf, size,
|
||||
"%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
|
||||
"%-5d %-5d %-6lld 0x%-6x %-6lu %-8llu %-12llu "
|
||||
"%-5d %-5d 0x%-6x %-6lu %-8llu %-12llu "
|
||||
"%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
|
||||
"%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
|
||||
/* dmu_buf_impl_t */
|
||||
@ -94,7 +94,6 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
|
||||
/* arc_buf_info_t */
|
||||
abi.abi_state_type,
|
||||
abi.abi_state_contents,
|
||||
(longlong_t)abi.abi_state_index,
|
||||
abi.abi_flags,
|
||||
(ulong_t)abi.abi_datacnt,
|
||||
(u_longlong_t)abi.abi_size,
|
||||
|
@ -129,7 +129,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
} else if (zb->zb_level == 0) {
|
||||
dnode_phys_t *blk;
|
||||
arc_buf_t *abuf;
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
int blksz = BP_GET_LSIZE(bp);
|
||||
int i;
|
||||
|
||||
|
@ -306,15 +306,15 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
os->os_spa = spa;
|
||||
os->os_rootbp = bp;
|
||||
if (!BP_IS_HOLE(os->os_rootbp)) {
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
zbookmark_phys_t zb;
|
||||
SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
|
||||
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
|
||||
|
||||
if (DMU_OS_IS_L2CACHEABLE(os))
|
||||
aflags |= ARC_L2CACHE;
|
||||
aflags |= ARC_FLAG_L2CACHE;
|
||||
if (DMU_OS_IS_L2COMPRESSIBLE(os))
|
||||
aflags |= ARC_L2COMPRESS;
|
||||
aflags |= ARC_FLAG_L2COMPRESS;
|
||||
|
||||
dprintf_bp(os->os_rootbp, "reading %s", "");
|
||||
err = arc_read(NULL, spa, os->os_rootbp,
|
||||
|
@ -486,7 +486,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
dnode_phys_t *blk;
|
||||
int i;
|
||||
int blksz = BP_GET_LSIZE(bp);
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
arc_buf_t *abuf;
|
||||
|
||||
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
|
||||
@ -504,7 +504,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
}
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
} else if (type == DMU_OT_SA) {
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
arc_buf_t *abuf;
|
||||
int blksz = BP_GET_LSIZE(bp);
|
||||
|
||||
@ -521,8 +521,8 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
err = dump_write_embedded(dsp, zb->zb_object,
|
||||
zb->zb_blkid * blksz, blksz, bp);
|
||||
} else { /* it's a level-0 block of a regular object */
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
uint64_t offset;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
arc_buf_t *abuf;
|
||||
int blksz = BP_GET_LSIZE(bp);
|
||||
|
||||
|
@ -177,7 +177,7 @@ static void
|
||||
traverse_prefetch_metadata(traverse_data_t *td,
|
||||
const blkptr_t *bp, const zbookmark_phys_t *zb)
|
||||
{
|
||||
uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
|
||||
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||
|
||||
if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
|
||||
return;
|
||||
@ -273,7 +273,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
}
|
||||
|
||||
if (BP_GET_LEVEL(bp) > 0) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
uint32_t flags = ARC_FLAG_WAIT;
|
||||
int32_t i;
|
||||
int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||
zbookmark_phys_t *czb;
|
||||
@ -307,7 +307,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
kmem_free(czb, sizeof (zbookmark_phys_t));
|
||||
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
uint32_t flags = ARC_FLAG_WAIT;
|
||||
int32_t i;
|
||||
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||
dnode_phys_t *cdnp;
|
||||
@ -331,7 +331,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
break;
|
||||
}
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
objset_phys_t *osp;
|
||||
dnode_phys_t *mdnp, *gdnp, *udnp;
|
||||
|
||||
@ -448,7 +448,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||
{
|
||||
prefetch_data_t *pfd = arg;
|
||||
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
|
||||
arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||
|
||||
ASSERT(pfd->pd_bytes_fetched >= 0);
|
||||
if (pfd->pd_cancel)
|
||||
@ -545,7 +545,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
||||
|
||||
/* See comment on ZIL traversal in dsl_scan_visitds. */
|
||||
if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
uint32_t flags = ARC_FLAG_WAIT;
|
||||
objset_phys_t *osp;
|
||||
arc_buf_t *buf;
|
||||
|
||||
|
@ -317,7 +317,14 @@ dsl_pool_close(dsl_pool_t *dp)
|
||||
txg_list_destroy(&dp->dp_sync_tasks);
|
||||
txg_list_destroy(&dp->dp_dirty_dirs);
|
||||
|
||||
arc_flush(dp->dp_spa);
|
||||
/*
|
||||
* We can't set retry to TRUE since we're explicitly specifying
|
||||
* a spa to flush. This is good enough; any missed buffers for
|
||||
* this spa won't cause trouble, and they'll eventually fall
|
||||
* out of the ARC just like any other unused buffer.
|
||||
*/
|
||||
arc_flush(dp->dp_spa, FALSE);
|
||||
|
||||
txg_fini(dp);
|
||||
dsl_scan_fini(dp);
|
||||
dmu_buf_user_evict_wait();
|
||||
|
@ -590,7 +590,7 @@ dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
|
||||
uint64_t objset, uint64_t object, uint64_t blkid)
|
||||
{
|
||||
zbookmark_phys_t czb;
|
||||
uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
|
||||
arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
|
||||
|
||||
if (zfs_no_scrub_prefetch)
|
||||
return;
|
||||
@ -655,7 +655,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
||||
int err;
|
||||
|
||||
if (BP_GET_LEVEL(bp) > 0) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
int i;
|
||||
blkptr_t *cbp;
|
||||
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
|
||||
@ -682,7 +682,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
||||
}
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
dnode_phys_t *cdnp;
|
||||
int i, j;
|
||||
int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
|
||||
@ -708,7 +708,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
||||
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||
uint32_t flags = ARC_WAIT;
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
objset_phys_t *osp;
|
||||
arc_buf_t *buf;
|
||||
|
||||
|
@ -556,7 +556,7 @@ metaslab_group_passivate(metaslab_group_t *mg)
|
||||
return;
|
||||
}
|
||||
|
||||
taskq_wait(mg->mg_taskq);
|
||||
taskq_wait_outstanding(mg->mg_taskq, 0);
|
||||
metaslab_group_alloc_update(mg);
|
||||
|
||||
mgprev = mg->mg_prev;
|
||||
@ -1596,7 +1596,7 @@ metaslab_group_preload(metaslab_group_t *mg)
|
||||
int m = 0;
|
||||
|
||||
if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
|
||||
taskq_wait(mg->mg_taskq);
|
||||
taskq_wait_outstanding(mg->mg_taskq, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
|
375
module/zfs/multilist.c
Normal file
375
module/zfs/multilist.c
Normal file
@ -0,0 +1,375 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/multilist.h>
|
||||
#include <sys/trace_multilist.h>
|
||||
|
||||
/* needed for spa_get_random() */
|
||||
#include <sys/spa.h>
|
||||
|
||||
/*
|
||||
* Given the object contained on the list, return a pointer to the
|
||||
* object's multilist_node_t structure it contains.
|
||||
*/
|
||||
#ifdef DEBUG
|
||||
static multilist_node_t *
|
||||
multilist_d2l(multilist_t *ml, void *obj)
|
||||
{
|
||||
return ((multilist_node_t *)((char *)obj + ml->ml_offset));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initialize a new mutlilist using the parameters specified.
|
||||
*
|
||||
* - 'size' denotes the size of the structure containing the
|
||||
* multilist_node_t.
|
||||
* - 'offset' denotes the byte offset of the mutlilist_node_t within
|
||||
* the structure that contains it.
|
||||
* - 'num' specifies the number of internal sublists to create.
|
||||
* - 'index_func' is used to determine which sublist to insert into
|
||||
* when the multilist_insert() function is called; as well as which
|
||||
* sublist to remove from when multilist_remove() is called. The
|
||||
* requirements this function must meet, are the following:
|
||||
*
|
||||
* - It must always return the same value when called on the same
|
||||
* object (to ensure the object is removed from the list it was
|
||||
* inserted into).
|
||||
*
|
||||
* - It must return a value in the range [0, number of sublists).
|
||||
* The multilist_get_num_sublists() function may be used to
|
||||
* determine the number of sublists in the multilist.
|
||||
*
|
||||
* Also, in order to reduce internal contention between the sublists
|
||||
* during insertion and removal, this function should choose evenly
|
||||
* between all available sublists when inserting. This isn't a hard
|
||||
* requirement, but a general rule of thumb in order to garner the
|
||||
* best multi-threaded performance out of the data structure.
|
||||
*/
|
||||
void
|
||||
multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
|
||||
multilist_sublist_index_func_t *index_func)
|
||||
{
|
||||
int i;
|
||||
|
||||
ASSERT3P(ml, !=, NULL);
|
||||
ASSERT3U(size, >, 0);
|
||||
ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
|
||||
ASSERT3U(num, >, 0);
|
||||
ASSERT3P(index_func, !=, NULL);
|
||||
|
||||
ml->ml_offset = offset;
|
||||
ml->ml_num_sublists = num;
|
||||
ml->ml_index_func = index_func;
|
||||
|
||||
ml->ml_sublists = kmem_zalloc(sizeof (multilist_sublist_t) *
|
||||
ml->ml_num_sublists, KM_SLEEP);
|
||||
|
||||
ASSERT3P(ml->ml_sublists, !=, NULL);
|
||||
|
||||
for (i = 0; i < ml->ml_num_sublists; i++) {
|
||||
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||
mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
list_create(&mls->mls_list, size, offset);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy the given multilist object, and free up any memory it holds.
|
||||
*/
|
||||
void
|
||||
multilist_destroy(multilist_t *ml)
|
||||
{
|
||||
int i;
|
||||
|
||||
ASSERT(multilist_is_empty(ml));
|
||||
|
||||
for (i = 0; i < ml->ml_num_sublists; i++) {
|
||||
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||
|
||||
ASSERT(list_is_empty(&mls->mls_list));
|
||||
|
||||
list_destroy(&mls->mls_list);
|
||||
mutex_destroy(&mls->mls_lock);
|
||||
}
|
||||
|
||||
ASSERT3P(ml->ml_sublists, !=, NULL);
|
||||
kmem_free(ml->ml_sublists,
|
||||
sizeof (multilist_sublist_t) * ml->ml_num_sublists);
|
||||
|
||||
ml->ml_num_sublists = 0;
|
||||
ml->ml_offset = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert the given object into the multilist.
|
||||
*
|
||||
* This function will insert the object specified into the sublist
|
||||
* determined using the function given at multilist creation time.
|
||||
*
|
||||
* The sublist locks are automatically acquired if not already held, to
|
||||
* ensure consistency when inserting and removing from multiple threads.
|
||||
*/
|
||||
void
|
||||
multilist_insert(multilist_t *ml, void *obj)
|
||||
{
|
||||
unsigned int sublist_idx = ml->ml_index_func(ml, obj);
|
||||
multilist_sublist_t *mls;
|
||||
boolean_t need_lock;
|
||||
|
||||
DTRACE_PROBE3(multilist__insert, multilist_t *, ml,
|
||||
unsigned int, sublist_idx, void *, obj);
|
||||
|
||||
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||
|
||||
mls = &ml->ml_sublists[sublist_idx];
|
||||
|
||||
/*
|
||||
* Note: Callers may already hold the sublist lock by calling
|
||||
* multilist_sublist_lock(). Here we rely on MUTEX_HELD()
|
||||
* returning TRUE if and only if the current thread holds the
|
||||
* lock. While it's a little ugly to make the lock recursive in
|
||||
* this way, it works and allows the calling code to be much
|
||||
* simpler -- otherwise it would have to pass around a flag
|
||||
* indicating that it already has the lock.
|
||||
*/
|
||||
need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||
|
||||
if (need_lock)
|
||||
mutex_enter(&mls->mls_lock);
|
||||
|
||||
ASSERT(!multilist_link_active(multilist_d2l(ml, obj)));
|
||||
|
||||
multilist_sublist_insert_head(mls, obj);
|
||||
|
||||
if (need_lock)
|
||||
mutex_exit(&mls->mls_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the given object from the multilist.
|
||||
*
|
||||
* This function will remove the object specified from the sublist
|
||||
* determined using the function given at multilist creation time.
|
||||
*
|
||||
* The necessary sublist locks are automatically acquired, to ensure
|
||||
* consistency when inserting and removing from multiple threads.
|
||||
*/
|
||||
void
|
||||
multilist_remove(multilist_t *ml, void *obj)
|
||||
{
|
||||
unsigned int sublist_idx = ml->ml_index_func(ml, obj);
|
||||
multilist_sublist_t *mls;
|
||||
boolean_t need_lock;
|
||||
|
||||
DTRACE_PROBE3(multilist__remove, multilist_t *, ml,
|
||||
unsigned int, sublist_idx, void *, obj);
|
||||
|
||||
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||
|
||||
mls = &ml->ml_sublists[sublist_idx];
|
||||
/* See comment in multilist_insert(). */
|
||||
need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||
|
||||
if (need_lock)
|
||||
mutex_enter(&mls->mls_lock);
|
||||
|
||||
ASSERT(multilist_link_active(multilist_d2l(ml, obj)));
|
||||
|
||||
multilist_sublist_remove(mls, obj);
|
||||
|
||||
if (need_lock)
|
||||
mutex_exit(&mls->mls_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if this multilist object is empty.
|
||||
*
|
||||
* This will return TRUE if it finds all of the sublists of this
|
||||
* multilist to be empty, and FALSE otherwise. Each sublist lock will be
|
||||
* automatically acquired as necessary.
|
||||
*
|
||||
* If concurrent insertions and removals are occurring, the semantics
|
||||
* of this function become a little fuzzy. Instead of locking all
|
||||
* sublists for the entire call time of the function, each sublist is
|
||||
* only locked as it is individually checked for emptiness. Thus, it's
|
||||
* possible for this function to return TRUE with non-empty sublists at
|
||||
* the time the function returns. This would be due to another thread
|
||||
* inserting into a given sublist, after that specific sublist was check
|
||||
* and deemed empty, but before all sublists have been checked.
|
||||
*/
|
||||
int
|
||||
multilist_is_empty(multilist_t *ml)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ml->ml_num_sublists; i++) {
|
||||
multilist_sublist_t *mls = &ml->ml_sublists[i];
|
||||
/* See comment in multilist_insert(). */
|
||||
boolean_t need_lock = !MUTEX_HELD(&mls->mls_lock);
|
||||
|
||||
if (need_lock)
|
||||
mutex_enter(&mls->mls_lock);
|
||||
|
||||
if (!list_is_empty(&mls->mls_list)) {
|
||||
if (need_lock)
|
||||
mutex_exit(&mls->mls_lock);
|
||||
|
||||
return (FALSE);
|
||||
}
|
||||
|
||||
if (need_lock)
|
||||
mutex_exit(&mls->mls_lock);
|
||||
}
|
||||
|
||||
return (TRUE);
|
||||
}
|
||||
|
||||
/* Return the number of sublists composing this multilist */
|
||||
unsigned int
|
||||
multilist_get_num_sublists(multilist_t *ml)
|
||||
{
|
||||
return (ml->ml_num_sublists);
|
||||
}
|
||||
|
||||
/* Return a randomly selected, valid sublist index for this multilist */
|
||||
unsigned int
|
||||
multilist_get_random_index(multilist_t *ml)
|
||||
{
|
||||
return (spa_get_random(ml->ml_num_sublists));
|
||||
}
|
||||
|
||||
/* Lock and return the sublist specified at the given index */
|
||||
multilist_sublist_t *
|
||||
multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
|
||||
{
|
||||
multilist_sublist_t *mls;
|
||||
|
||||
ASSERT3U(sublist_idx, <, ml->ml_num_sublists);
|
||||
mls = &ml->ml_sublists[sublist_idx];
|
||||
mutex_enter(&mls->mls_lock);
|
||||
|
||||
return (mls);
|
||||
}
|
||||
|
||||
void
|
||||
multilist_sublist_unlock(multilist_sublist_t *mls)
|
||||
{
|
||||
mutex_exit(&mls->mls_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* We're allowing any object to be inserted into this specific sublist,
|
||||
* but this can lead to trouble if multilist_remove() is called to
|
||||
* remove this object. Specifically, if calling ml_index_func on this
|
||||
* object returns an index for sublist different than what is passed as
|
||||
* a parameter here, any call to multilist_remove() with this newly
|
||||
* inserted object is undefined! (the call to multilist_remove() will
|
||||
* remove the object from a list that it isn't contained in)
|
||||
*/
|
||||
void
|
||||
multilist_sublist_insert_head(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
list_insert_head(&mls->mls_list, obj);
|
||||
}
|
||||
|
||||
/* please see comment above multilist_sublist_insert_head */
|
||||
void
|
||||
multilist_sublist_insert_tail(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
list_insert_tail(&mls->mls_list, obj);
|
||||
}
|
||||
|
||||
/*
|
||||
* Move the object one element forward in the list.
|
||||
*
|
||||
* This function will move the given object forward in the list (towards
|
||||
* the head) by one object. So, in essence, it will swap its position in
|
||||
* the list with its "prev" pointer. If the given object is already at the
|
||||
* head of the list, it cannot be moved forward any more than it already
|
||||
* is, so no action is taken.
|
||||
*
|
||||
* NOTE: This function **must not** remove any object from the list other
|
||||
* than the object given as the parameter. This is relied upon in
|
||||
* arc_evict_state_impl().
|
||||
*/
|
||||
void
|
||||
multilist_sublist_move_forward(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
void *prev = list_prev(&mls->mls_list, obj);
|
||||
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
ASSERT(!list_is_empty(&mls->mls_list));
|
||||
|
||||
/* 'obj' must be at the head of the list, nothing to do */
|
||||
if (prev == NULL)
|
||||
return;
|
||||
|
||||
list_remove(&mls->mls_list, obj);
|
||||
list_insert_before(&mls->mls_list, prev, obj);
|
||||
}
|
||||
|
||||
void
|
||||
multilist_sublist_remove(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
list_remove(&mls->mls_list, obj);
|
||||
}
|
||||
|
||||
void *
|
||||
multilist_sublist_head(multilist_sublist_t *mls)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
return (list_head(&mls->mls_list));
|
||||
}
|
||||
|
||||
void *
|
||||
multilist_sublist_tail(multilist_sublist_t *mls)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
return (list_tail(&mls->mls_list));
|
||||
}
|
||||
|
||||
void *
|
||||
multilist_sublist_next(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
return (list_next(&mls->mls_list, obj));
|
||||
}
|
||||
|
||||
void *
|
||||
multilist_sublist_prev(multilist_sublist_t *mls, void *obj)
|
||||
{
|
||||
ASSERT(MUTEX_HELD(&mls->mls_lock));
|
||||
return (list_prev(&mls->mls_list, obj));
|
||||
}
|
||||
|
||||
void
|
||||
multilist_link_init(multilist_node_t *link)
|
||||
{
|
||||
list_link_init(link);
|
||||
}
|
||||
|
||||
int
|
||||
multilist_link_active(multilist_node_t *link)
|
||||
{
|
||||
return (list_link_active(link));
|
||||
}
|
@ -200,7 +200,7 @@ spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
|
||||
if (zfs_read_history == 0 && ssh->size == 0)
|
||||
return;
|
||||
|
||||
if (zfs_read_history_hits == 0 && (aflags & ARC_CACHED))
|
||||
if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
|
||||
return;
|
||||
|
||||
srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
|
||||
|
@ -23,6 +23,7 @@
|
||||
* (and only one) C file, so this dummy file exists for that purpose.
|
||||
*/
|
||||
|
||||
#include <sys/multilist.h>
|
||||
#include <sys/arc_impl.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/zio.h>
|
||||
@ -31,6 +32,7 @@
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/multilist.h>
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/zil_impl.h>
|
||||
#include <sys/zrlock.h>
|
||||
@ -42,6 +44,7 @@
|
||||
#include <sys/trace_dbuf.h>
|
||||
#include <sys/trace_dmu.h>
|
||||
#include <sys/trace_dnode.h>
|
||||
#include <sys/trace_multilist.h>
|
||||
#include <sys/trace_txg.h>
|
||||
#include <sys/trace_zil.h>
|
||||
#include <sys/trace_zrlock.h>
|
||||
|
@ -471,7 +471,7 @@ txg_wait_callbacks(dsl_pool_t *dp)
|
||||
tx_state_t *tx = &dp->dp_tx;
|
||||
|
||||
if (tx->tx_commit_cb_taskq != NULL)
|
||||
taskq_wait(tx->tx_commit_cb_taskq);
|
||||
taskq_wait_outstanding(tx->tx_commit_cb_taskq, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1152,8 +1152,8 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
|
||||
*/
|
||||
int round = 0;
|
||||
while (zsb->z_nr_znodes > 0) {
|
||||
taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(
|
||||
zsb->z_os)));
|
||||
taskq_wait_outstanding(dsl_pool_iput_taskq(
|
||||
dmu_objset_pool(zsb->z_os)), 0);
|
||||
if (++round > 1 && !unmounting)
|
||||
break;
|
||||
}
|
||||
@ -1740,7 +1740,7 @@ zfs_init(void)
|
||||
void
|
||||
zfs_fini(void)
|
||||
{
|
||||
taskq_wait(system_taskq);
|
||||
taskq_wait_outstanding(system_taskq, 0);
|
||||
unregister_filesystem(&zpl_fs_type);
|
||||
zfs_znode_fini();
|
||||
zfsctl_fini();
|
||||
|
@ -204,7 +204,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
|
||||
char **end)
|
||||
{
|
||||
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
arc_buf_t *abuf = NULL;
|
||||
zbookmark_phys_t zb;
|
||||
int error;
|
||||
@ -280,7 +280,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
|
||||
{
|
||||
enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
|
||||
const blkptr_t *bp = &lr->lr_blkptr;
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
arc_buf_t *abuf = NULL;
|
||||
zbookmark_phys_t zb;
|
||||
int error;
|
||||
|
@ -2241,7 +2241,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
|
||||
|
||||
if (ddp->ddp_phys_birth != 0) {
|
||||
arc_buf_t *abuf = NULL;
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
blkptr_t blk = *zio->io_bp;
|
||||
int error;
|
||||
|
||||
|
@ -439,7 +439,11 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
|
||||
* fault injection isn't a performance critical path.
|
||||
*/
|
||||
if (flags & ZINJECT_FLUSH_ARC)
|
||||
arc_flush(NULL);
|
||||
/*
|
||||
* We must use FALSE to ensure arc_flush returns, since
|
||||
* we're not preventing concurrent ARC insertions.
|
||||
*/
|
||||
arc_flush(NULL, FALSE);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user