mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-03 23:09:35 +03:00
45d1abc74d
Refactor dmu_object_alloc_dnsize() and dnode_hold_impl() to simplify the
code, fix errors introduced by commit dbeb879
(PR #6117) interacting
badly with large dnodes, and improve performance.
* When allocating a new dnode in dmu_object_alloc_dnsize(), update the
percpu object ID for the core's metadnode chunk immediately. This
eliminates most lock contention when taking the hold and creating the
dnode.
* Correct detection of the chunk boundary to work properly with large
dnodes.
* Separate the dmu_hold_impl() code for the FREE case from the code for
the ALLOCATED case to make it easier to read.
* Fully populate the dnode handle array immediately after reading a
block of the metadnode from disk. Subsequently the dnode handle array
provides enough information to determine which dnode slots are in use
and which are free.
* Add several kstats to allow the behavior of the code to be examined.
* Verify dnode packing in large_dnode_008_pos.ksh. Since the test is
purely creates, it should leave very few holes in the metadnode.
* Add test large_dnode_009_pos.ksh, which performs concurrent creates
and deletes, to complement existing test which does only creates.
With the above fixes, there is very little contention in a test of about
200,000 racing dnode allocations produced by tests 'large_dnode_008_pos'
and 'large_dnode_009_pos'.
name type data
dnode_hold_dbuf_hold 4 0
dnode_hold_dbuf_read 4 0
dnode_hold_alloc_hits 4 3804690
dnode_hold_alloc_misses 4 216
dnode_hold_alloc_interior 4 3
dnode_hold_alloc_lock_retry 4 0
dnode_hold_alloc_lock_misses 4 0
dnode_hold_alloc_type_none 4 0
dnode_hold_free_hits 4 203105
dnode_hold_free_misses 4 4
dnode_hold_free_lock_misses 4 0
dnode_hold_free_lock_retry 4 0
dnode_hold_free_overflow 4 0
dnode_hold_free_refcount 4 57
dnode_hold_free_txg 4 0
dnode_allocate 4 203154
dnode_reallocate 4 0
dnode_buf_evict 4 23918
dnode_alloc_next_chunk 4 4887
dnode_alloc_race 4 0
dnode_alloc_next_block 4 18
The performance is slightly improved for concurrent creates with
16+ threads, and unchanged for low thread counts.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
231 lines
6.0 KiB
C
231 lines
6.0 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
|
|
#include <sys/zfs_context.h>
|
|
#include <sys/dbuf.h>
|
|
#include <sys/dmu_objset.h>
|
|
|
|
/*
|
|
* Calculate the index of the arc header for the state, disabled by default.
|
|
*/
|
|
int zfs_dbuf_state_index = 0;
|
|
|
|
/*
|
|
* ==========================================================================
|
|
* Dbuf Hash Read Routines
|
|
* ==========================================================================
|
|
*/
|
|
typedef struct dbuf_stats_t {
|
|
kmutex_t lock;
|
|
kstat_t *kstat;
|
|
dbuf_hash_table_t *hash;
|
|
int idx;
|
|
} dbuf_stats_t;
|
|
|
|
static dbuf_stats_t dbuf_stats_hash_table;
|
|
|
|
static int
|
|
dbuf_stats_hash_table_headers(char *buf, size_t size)
|
|
{
|
|
(void) snprintf(buf, size,
|
|
"%-88s | %-124s | %s\n"
|
|
"%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | "
|
|
"%-5s %-5s %-8s %-6s %-8s %-12s "
|
|
"%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | "
|
|
"%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n",
|
|
"dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
|
|
"blkid", "offset", "dbsize", "meta", "state", "dbholds", "list",
|
|
"atype", "flags", "count", "asize", "access",
|
|
"mru", "gmru", "mfu", "gmfu", "l2", "l2_dattr", "l2_asize",
|
|
"l2_comp", "aholds", "dtype", "btype", "data_bs", "meta_bs",
|
|
"bsize", "lvls", "dholds", "blocks", "dsize");
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
__dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
|
|
{
|
|
arc_buf_info_t abi = { 0 };
|
|
dmu_object_info_t doi = { 0 };
|
|
dnode_t *dn = DB_DNODE(db);
|
|
size_t nwritten;
|
|
|
|
if (db->db_buf)
|
|
arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index);
|
|
|
|
__dmu_object_info_from_dnode(dn, &doi);
|
|
|
|
nwritten = snprintf(buf, size,
|
|
"%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
|
|
"%-5d %-5d 0x%-6x %-6lu %-8llu %-12llu "
|
|
"%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
|
|
"%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
|
|
/* dmu_buf_impl_t */
|
|
spa_name(dn->dn_objset->os_spa),
|
|
(u_longlong_t)dmu_objset_id(db->db_objset),
|
|
(longlong_t)db->db.db_object,
|
|
(longlong_t)db->db_level,
|
|
(longlong_t)db->db_blkid,
|
|
(u_longlong_t)db->db.db_offset,
|
|
(u_longlong_t)db->db.db_size,
|
|
!!dbuf_is_metadata(db),
|
|
db->db_state,
|
|
(ulong_t)refcount_count(&db->db_holds),
|
|
/* arc_buf_info_t */
|
|
abi.abi_state_type,
|
|
abi.abi_state_contents,
|
|
abi.abi_flags,
|
|
(ulong_t)abi.abi_bufcnt,
|
|
(u_longlong_t)abi.abi_size,
|
|
(u_longlong_t)abi.abi_access,
|
|
(ulong_t)abi.abi_mru_hits,
|
|
(ulong_t)abi.abi_mru_ghost_hits,
|
|
(ulong_t)abi.abi_mfu_hits,
|
|
(ulong_t)abi.abi_mfu_ghost_hits,
|
|
(ulong_t)abi.abi_l2arc_hits,
|
|
(u_longlong_t)abi.abi_l2arc_dattr,
|
|
(u_longlong_t)abi.abi_l2arc_asize,
|
|
abi.abi_l2arc_compress,
|
|
(ulong_t)abi.abi_holds,
|
|
/* dmu_object_info_t */
|
|
doi.doi_type,
|
|
doi.doi_bonus_type,
|
|
(ulong_t)doi.doi_data_block_size,
|
|
(ulong_t)doi.doi_metadata_block_size,
|
|
(u_longlong_t)doi.doi_bonus_size,
|
|
(ulong_t)doi.doi_indirection,
|
|
(ulong_t)refcount_count(&dn->dn_holds),
|
|
(u_longlong_t)doi.doi_fill_count,
|
|
(u_longlong_t)doi.doi_max_offset);
|
|
|
|
if (nwritten >= size)
|
|
return (size);
|
|
|
|
return (nwritten + 1);
|
|
}
|
|
|
|
static int
|
|
dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
|
|
{
|
|
dbuf_stats_t *dsh = (dbuf_stats_t *)data;
|
|
dbuf_hash_table_t *h = dsh->hash;
|
|
dmu_buf_impl_t *db;
|
|
int length, error = 0;
|
|
|
|
ASSERT3S(dsh->idx, >=, 0);
|
|
ASSERT3S(dsh->idx, <=, h->hash_table_mask);
|
|
memset(buf, 0, size);
|
|
|
|
mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
|
|
for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
|
|
/*
|
|
* Returning ENOMEM will cause the data and header functions
|
|
* to be called with a larger scratch buffers.
|
|
*/
|
|
if (size < 512) {
|
|
error = ENOMEM;
|
|
break;
|
|
}
|
|
|
|
mutex_enter(&db->db_mtx);
|
|
|
|
if (db->db_state != DB_EVICTING) {
|
|
length = __dbuf_stats_hash_table_data(buf, size, db);
|
|
buf += length;
|
|
size -= length;
|
|
}
|
|
|
|
mutex_exit(&db->db_mtx);
|
|
}
|
|
mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void *
|
|
dbuf_stats_hash_table_addr(kstat_t *ksp, loff_t n)
|
|
{
|
|
dbuf_stats_t *dsh = ksp->ks_private;
|
|
|
|
ASSERT(MUTEX_HELD(&dsh->lock));
|
|
|
|
if (n <= dsh->hash->hash_table_mask) {
|
|
dsh->idx = n;
|
|
return (dsh);
|
|
}
|
|
|
|
return (NULL);
|
|
}
|
|
|
|
static void
|
|
dbuf_stats_hash_table_init(dbuf_hash_table_t *hash)
|
|
{
|
|
dbuf_stats_t *dsh = &dbuf_stats_hash_table;
|
|
kstat_t *ksp;
|
|
|
|
mutex_init(&dsh->lock, NULL, MUTEX_DEFAULT, NULL);
|
|
dsh->hash = hash;
|
|
|
|
ksp = kstat_create("zfs", 0, "dbufs", "misc",
|
|
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
|
dsh->kstat = ksp;
|
|
|
|
if (ksp) {
|
|
ksp->ks_lock = &dsh->lock;
|
|
ksp->ks_ndata = UINT32_MAX;
|
|
ksp->ks_private = dsh;
|
|
kstat_set_raw_ops(ksp, dbuf_stats_hash_table_headers,
|
|
dbuf_stats_hash_table_data, dbuf_stats_hash_table_addr);
|
|
kstat_install(ksp);
|
|
}
|
|
}
|
|
|
|
static void
|
|
dbuf_stats_hash_table_destroy(void)
|
|
{
|
|
dbuf_stats_t *dsh = &dbuf_stats_hash_table;
|
|
kstat_t *ksp;
|
|
|
|
ksp = dsh->kstat;
|
|
if (ksp)
|
|
kstat_delete(ksp);
|
|
|
|
mutex_destroy(&dsh->lock);
|
|
}
|
|
|
|
void
|
|
dbuf_stats_init(dbuf_hash_table_t *hash)
|
|
{
|
|
dbuf_stats_hash_table_init(hash);
|
|
}
|
|
|
|
void
|
|
dbuf_stats_destroy(void)
|
|
{
|
|
dbuf_stats_hash_table_destroy();
|
|
}
|
|
|
|
#if defined(_KERNEL) && defined(HAVE_SPL)
|
|
module_param(zfs_dbuf_state_index, int, 0644);
|
|
MODULE_PARM_DESC(zfs_dbuf_state_index, "Calculate arc header index");
|
|
#endif
|