mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 18:40:43 +03:00
ddt: add support for prefetching tables into the ARC
This change adds a new `zpool prefetch -t ddt $pool` command which causes a pool's DDT to be loaded into the ARC. The primary goal is to remove the need to "warm" a pool's cache before deduplication stops slowing write performance. It may also provide a way to reload portions of a DDT if they have been flushed due to inactivity. Sponsored-by: iXsystems, Inc. Sponsored-by: Catalogics, Inc. Sponsored-by: Klara, Inc. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Will Andrews <will.andrews@klarasystems.com> Signed-off-by: Fred Weigel <fred.weigel@klarasystems.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Signed-off-by: Don Brady <don.brady@klarasystems.com> Co-authored-by: Will Andrews <will.andrews@klarasystems.com> Co-authored-by: Don Brady <don.brady@klarasystems.com> Closes #15890
This commit is contained in:
+52
-1
@@ -26,7 +26,7 @@
|
||||
* Copyright (c) 2017, Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
|
||||
* Copyright (c) 2020, George Amanakis. All rights reserved.
|
||||
* Copyright (c) 2019, Klara Inc.
|
||||
* Copyright (c) 2019, 2023, Klara Inc.
|
||||
* Copyright (c) 2019, Allan Jude
|
||||
* Copyright (c) 2020, The FreeBSD Foundation [1]
|
||||
*
|
||||
@@ -5471,6 +5471,57 @@ arc_read_done(zio_t *zio)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup the block at the specified DVA (in bp), and return the manner in
|
||||
* which the block is cached. A zero return indicates not cached.
|
||||
*/
|
||||
int
|
||||
arc_cached(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
arc_buf_hdr_t *hdr = NULL;
|
||||
kmutex_t *hash_lock = NULL;
|
||||
uint64_t guid = spa_load_guid(spa);
|
||||
int flags = 0;
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (ARC_CACHED_EMBEDDED);
|
||||
|
||||
hdr = buf_hash_find(guid, bp, &hash_lock);
|
||||
if (hdr == NULL)
|
||||
return (0);
|
||||
|
||||
if (HDR_HAS_L1HDR(hdr)) {
|
||||
arc_state_t *state = hdr->b_l1hdr.b_state;
|
||||
/*
|
||||
* We switch to ensure that any future arc_state_type_t
|
||||
* changes are handled. This is just a shift to promote
|
||||
* more compile-time checking.
|
||||
*/
|
||||
switch (state->arcs_state) {
|
||||
case ARC_STATE_ANON:
|
||||
break;
|
||||
case ARC_STATE_MRU:
|
||||
flags |= ARC_CACHED_IN_MRU | ARC_CACHED_IN_L1;
|
||||
break;
|
||||
case ARC_STATE_MFU:
|
||||
flags |= ARC_CACHED_IN_MFU | ARC_CACHED_IN_L1;
|
||||
break;
|
||||
case ARC_STATE_UNCACHED:
|
||||
/* The header is still in L1, probably not for long */
|
||||
flags |= ARC_CACHED_IN_L1;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (HDR_HAS_L2HDR(hdr))
|
||||
flags |= ARC_CACHED_IN_L2;
|
||||
|
||||
mutex_exit(hash_lock);
|
||||
|
||||
return (flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* "Read" the block at the specified DVA (in bp) via the
|
||||
* cache. If the block is found in the cache, invoke the provided
|
||||
|
||||
+33
-1
@@ -23,7 +23,7 @@
|
||||
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2022 by Pawel Jakub Dawidek
|
||||
* Copyright (c) 2023, Klara Inc.
|
||||
* Copyright (c) 2019, 2023, Klara Inc.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@@ -340,6 +340,16 @@ ddt_object_prefetch(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
ddt->ddt_object[type][class], ddk);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_object_prefetch_all(ddt_t *ddt, ddt_type_t type, ddt_class_t class)
|
||||
{
|
||||
if (!ddt_object_exists(ddt, type, class))
|
||||
return;
|
||||
|
||||
ddt_ops[type]->ddt_op_prefetch_all(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class]);
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_object_update(ddt_t *ddt, ddt_type_t type, ddt_class_t class,
|
||||
ddt_entry_t *dde, dmu_tx_t *tx)
|
||||
@@ -652,6 +662,28 @@ ddt_over_quota(spa_t *spa)
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
ddt_prefetch_all(spa_t *spa)
|
||||
{
|
||||
/*
|
||||
* Load all DDT entries for each type/class combination. This is
|
||||
* indended to perform a prefetch on all such blocks. For the same
|
||||
* reason that ddt_prefetch isn't locked, this is also not locked.
|
||||
*/
|
||||
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
|
||||
ddt_t *ddt = spa->spa_ddt[c];
|
||||
if (!ddt)
|
||||
continue;
|
||||
|
||||
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
|
||||
for (ddt_class_t class = 0; class < DDT_CLASSES;
|
||||
class++) {
|
||||
ddt_object_prefetch_all(ddt, type, class);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ddt_entry_t *
|
||||
ddt_lookup(ddt_t *ddt, const blkptr_t *bp, boolean_t add)
|
||||
{
|
||||
|
||||
@@ -248,3 +248,32 @@ ddt_get_pool_dedup_ratio(spa_t *spa)
|
||||
|
||||
return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
|
||||
}
|
||||
|
||||
int
|
||||
ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize)
|
||||
{
|
||||
uint64_t l1sz, l1tot, l2sz, l2tot;
|
||||
int err = 0;
|
||||
|
||||
l1tot = l2tot = 0;
|
||||
*psize = 0;
|
||||
for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
|
||||
ddt_t *ddt = spa->spa_ddt[c];
|
||||
if (ddt == NULL)
|
||||
continue;
|
||||
for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
|
||||
for (ddt_class_t class = 0; class < DDT_CLASSES;
|
||||
class++) {
|
||||
err = dmu_object_cached_size(ddt->ddt_os,
|
||||
ddt->ddt_object[type][class], &l1sz, &l2sz);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
l1tot += l1sz;
|
||||
l2tot += l2sz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*psize = l1tot + l2tot;
|
||||
return (err);
|
||||
}
|
||||
|
||||
@@ -147,6 +147,12 @@ ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
|
||||
(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
|
||||
}
|
||||
|
||||
static void
|
||||
ddt_zap_prefetch_all(objset_t *os, uint64_t object)
|
||||
{
|
||||
(void) zap_prefetch_object(os, object);
|
||||
}
|
||||
|
||||
static int
|
||||
ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
|
||||
const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx)
|
||||
@@ -231,6 +237,7 @@ const ddt_ops_t ddt_zap_ops = {
|
||||
ddt_zap_lookup,
|
||||
ddt_zap_contains,
|
||||
ddt_zap_prefetch,
|
||||
ddt_zap_prefetch_all,
|
||||
ddt_zap_update,
|
||||
ddt_zap_remove,
|
||||
ddt_zap_walk,
|
||||
|
||||
+210
-2
@@ -26,7 +26,7 @@
|
||||
* Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
|
||||
* Copyright (c) 2019 Datto Inc.
|
||||
* Copyright (c) 2019, Klara Inc.
|
||||
* Copyright (c) 2019, 2023, Klara Inc.
|
||||
* Copyright (c) 2019, Allan Jude
|
||||
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
|
||||
* Copyright (c) 2021, 2022 by Pawel Jakub Dawidek
|
||||
@@ -701,7 +701,7 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, const void *tag)
|
||||
* Issue prefetch I/Os for the given blocks. If level is greater than 0, the
|
||||
* indirect blocks prefetched will be those that point to the blocks containing
|
||||
* the data starting at offset, and continuing to offset + len. If the range
|
||||
* it too long, prefetch the first dmu_prefetch_max bytes as requested, while
|
||||
* is too long, prefetch the first dmu_prefetch_max bytes as requested, while
|
||||
* for the rest only a higher level, also fitting within dmu_prefetch_max. It
|
||||
* should primarily help random reads, since for long sequential reads there is
|
||||
* a speculative prefetcher.
|
||||
@@ -777,6 +777,106 @@ dmu_prefetch_by_dnode(dnode_t *dn, int64_t level, uint64_t offset,
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
kmutex_t dpa_lock;
|
||||
kcondvar_t dpa_cv;
|
||||
uint64_t dpa_pending_io;
|
||||
} dmu_prefetch_arg_t;
|
||||
|
||||
static void
|
||||
dmu_prefetch_done(void *arg, uint64_t level, uint64_t blkid, boolean_t issued)
|
||||
{
|
||||
(void) level; (void) blkid; (void)issued;
|
||||
dmu_prefetch_arg_t *dpa = arg;
|
||||
|
||||
ASSERT0(level);
|
||||
|
||||
mutex_enter(&dpa->dpa_lock);
|
||||
ASSERT3U(dpa->dpa_pending_io, >, 0);
|
||||
if (--dpa->dpa_pending_io == 0)
|
||||
cv_broadcast(&dpa->dpa_cv);
|
||||
mutex_exit(&dpa->dpa_lock);
|
||||
}
|
||||
|
||||
static void
|
||||
dmu_prefetch_wait_by_dnode(dnode_t *dn, uint64_t offset, uint64_t len)
|
||||
{
|
||||
dmu_prefetch_arg_t dpa;
|
||||
|
||||
mutex_init(&dpa.dpa_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&dpa.dpa_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
|
||||
uint64_t start = dbuf_whichblock(dn, 0, offset);
|
||||
uint64_t end = dbuf_whichblock(dn, 0, offset + len - 1) + 1;
|
||||
dpa.dpa_pending_io = end - start;
|
||||
|
||||
for (uint64_t blk = start; blk < end; blk++) {
|
||||
(void) dbuf_prefetch_impl(dn, 0, blk, ZIO_PRIORITY_ASYNC_READ,
|
||||
0, dmu_prefetch_done, &dpa);
|
||||
}
|
||||
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
/* wait for prefetch L0 reads to finish */
|
||||
mutex_enter(&dpa.dpa_lock);
|
||||
while (dpa.dpa_pending_io > 0) {
|
||||
cv_wait(&dpa.dpa_cv, &dpa.dpa_lock);
|
||||
|
||||
}
|
||||
mutex_exit(&dpa.dpa_lock);
|
||||
|
||||
mutex_destroy(&dpa.dpa_lock);
|
||||
cv_destroy(&dpa.dpa_cv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Issue prefetch I/Os for the given L0 block range and wait for the I/O
|
||||
* to complete. This does not enforce dmu_prefetch_max and will prefetch
|
||||
* the entire range. The blocks are read from disk into the ARC but no
|
||||
* decompression occurs (i.e., the dbuf cache is not required).
|
||||
*/
|
||||
int
|
||||
dmu_prefetch_wait(objset_t *os, uint64_t object, uint64_t offset, uint64_t size)
|
||||
{
|
||||
dnode_t *dn;
|
||||
int err = 0;
|
||||
|
||||
err = dnode_hold(os, object, FTAG, &dn);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
/*
|
||||
* Chunk the requests (16 indirects worth) so that we can be interrupted
|
||||
*/
|
||||
uint64_t chunksize;
|
||||
if (dn->dn_indblkshift) {
|
||||
uint64_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1);
|
||||
chunksize = (nbps * 16) << dn->dn_datablkshift;
|
||||
} else {
|
||||
chunksize = dn->dn_datablksz;
|
||||
}
|
||||
|
||||
while (size > 0) {
|
||||
uint64_t mylen = MIN(size, chunksize);
|
||||
|
||||
dmu_prefetch_wait_by_dnode(dn, offset, mylen);
|
||||
|
||||
offset += mylen;
|
||||
size -= mylen;
|
||||
|
||||
if (issig()) {
|
||||
err = SET_ERROR(EINTR);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Issue prefetch I/Os for the given object's dnode.
|
||||
*/
|
||||
@@ -1451,6 +1551,114 @@ dmu_write_uio(objset_t *os, uint64_t object, zfs_uio_t *uio, uint64_t size,
|
||||
}
|
||||
#endif /* _KERNEL */
|
||||
|
||||
static void
|
||||
dmu_cached_bps(spa_t *spa, blkptr_t *bps, uint_t nbps,
|
||||
uint64_t *l1sz, uint64_t *l2sz)
|
||||
{
|
||||
int cached_flags;
|
||||
|
||||
if (bps == NULL)
|
||||
return;
|
||||
|
||||
for (size_t blk_off = 0; blk_off < nbps; blk_off++) {
|
||||
blkptr_t *bp = &bps[blk_off];
|
||||
|
||||
if (BP_IS_HOLE(bp))
|
||||
continue;
|
||||
|
||||
cached_flags = arc_cached(spa, bp);
|
||||
if (cached_flags == 0)
|
||||
continue;
|
||||
|
||||
if ((cached_flags & (ARC_CACHED_IN_L1 | ARC_CACHED_IN_L2)) ==
|
||||
ARC_CACHED_IN_L2)
|
||||
*l2sz += BP_GET_LSIZE(bp);
|
||||
else
|
||||
*l1sz += BP_GET_LSIZE(bp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Estimate DMU object cached size.
|
||||
*/
|
||||
int
|
||||
dmu_object_cached_size(objset_t *os, uint64_t object,
|
||||
uint64_t *l1sz, uint64_t *l2sz)
|
||||
{
|
||||
dnode_t *dn;
|
||||
dmu_object_info_t doi;
|
||||
int err = 0;
|
||||
|
||||
*l1sz = *l2sz = 0;
|
||||
|
||||
if (dnode_hold(os, object, FTAG, &dn) != 0)
|
||||
return (0);
|
||||
|
||||
if (dn->dn_nlevels < 2) {
|
||||
dnode_rele(dn, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
||||
dmu_object_info_from_dnode(dn, &doi);
|
||||
|
||||
for (uint64_t off = 0; off < doi.doi_max_offset;
|
||||
off += dmu_prefetch_max) {
|
||||
/* dbuf_read doesn't prefetch L1 blocks. */
|
||||
dmu_prefetch_by_dnode(dn, 1, off,
|
||||
dmu_prefetch_max, ZIO_PRIORITY_SYNC_READ);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold all valid L1 blocks, asking ARC the status of each BP
|
||||
* contained in each such L1 block.
|
||||
*/
|
||||
uint_t nbps = bp_span_in_blocks(dn->dn_indblkshift, 1);
|
||||
uint64_t l1blks = 1 + (dn->dn_maxblkid / nbps);
|
||||
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
for (uint64_t blk = 0; blk < l1blks; blk++) {
|
||||
dmu_buf_impl_t *db = NULL;
|
||||
|
||||
if (issig()) {
|
||||
/*
|
||||
* On interrupt, get out, and bubble up EINTR
|
||||
*/
|
||||
err = EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we get an i/o error here, the L1 can't be read,
|
||||
* and nothing under it could be cached, so we just
|
||||
* continue. Ignoring the error from dbuf_hold_impl
|
||||
* or from dbuf_read is then a reasonable choice.
|
||||
*/
|
||||
err = dbuf_hold_impl(dn, 1, blk, B_TRUE, B_FALSE, FTAG, &db);
|
||||
if (err != 0) {
|
||||
/*
|
||||
* ignore error and continue
|
||||
*/
|
||||
err = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
err = dbuf_read(db, NULL, DB_RF_CANFAIL);
|
||||
if (err == 0) {
|
||||
dmu_cached_bps(dmu_objset_spa(os), db->db.db_data,
|
||||
nbps, l1sz, l2sz);
|
||||
}
|
||||
/*
|
||||
* error may be ignored, and we continue
|
||||
*/
|
||||
err = 0;
|
||||
dbuf_rele(db, FTAG);
|
||||
}
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a loaned anonymous arc buffer.
|
||||
*/
|
||||
|
||||
+55
-4
@@ -34,7 +34,7 @@
|
||||
* Copyright (c) 2017, Intel Corporation.
|
||||
* Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
|
||||
* Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
|
||||
* Copyright (c) 2024, Klara Inc.
|
||||
* Copyright (c) 2023, 2024, Klara Inc.
|
||||
*/
|
||||
|
||||
/*
|
||||
@@ -337,6 +337,55 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, const char *strval,
|
||||
nvlist_free(propval);
|
||||
}
|
||||
|
||||
static int
|
||||
spa_prop_add(spa_t *spa, const char *propname, nvlist_t *outnvl)
|
||||
{
|
||||
zpool_prop_t prop = zpool_name_to_prop(propname);
|
||||
zprop_source_t src = ZPROP_SRC_NONE;
|
||||
uint64_t intval;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* NB: Not all properties lookups via this API require
|
||||
* the spa props lock, so they must explicitly grab it here.
|
||||
*/
|
||||
switch (prop) {
|
||||
case ZPOOL_PROP_DEDUPCACHED:
|
||||
err = ddt_get_pool_dedup_cached(spa, &intval);
|
||||
if (err != 0)
|
||||
return (SET_ERROR(err));
|
||||
break;
|
||||
default:
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
spa_prop_add_list(outnvl, prop, NULL, intval, src);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
spa_prop_get_nvlist(spa_t *spa, char **props, unsigned int n_props,
|
||||
nvlist_t **outnvl)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (props == NULL)
|
||||
return (0);
|
||||
|
||||
if (*outnvl == NULL) {
|
||||
err = nvlist_alloc(outnvl, NV_UNIQUE_NAME, KM_SLEEP);
|
||||
if (err)
|
||||
return (err);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < n_props && err == 0; i++) {
|
||||
err = spa_prop_add(spa, props[i], *outnvl);
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a user property (source=src, propname=propval) to an nvlist.
|
||||
*/
|
||||
@@ -503,9 +552,11 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
|
||||
dsl_pool_t *dp;
|
||||
int err;
|
||||
|
||||
err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP);
|
||||
if (err)
|
||||
return (err);
|
||||
if (*nvp == NULL) {
|
||||
err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP);
|
||||
if (err)
|
||||
return (err);
|
||||
}
|
||||
|
||||
dp = spa_get_dsl(spa);
|
||||
dsl_pool_config_enter(dp, FTAG);
|
||||
|
||||
@@ -1072,6 +1072,21 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_prefetch_object(objset_t *os, uint64_t zapobj)
|
||||
{
|
||||
int error;
|
||||
dmu_object_info_t doi;
|
||||
|
||||
error = dmu_object_info(os, zapobj, &doi);
|
||||
if (error == 0 && DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
|
||||
error = SET_ERROR(EINVAL);
|
||||
if (error == 0)
|
||||
dmu_prefetch_wait(os, zapobj, 0, doi.doi_max_offset);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lookup_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf)
|
||||
@@ -1784,6 +1799,7 @@ EXPORT_SYMBOL(zap_lookup_uint64);
|
||||
EXPORT_SYMBOL(zap_contains);
|
||||
EXPORT_SYMBOL(zap_prefetch);
|
||||
EXPORT_SYMBOL(zap_prefetch_uint64);
|
||||
EXPORT_SYMBOL(zap_prefetch_object);
|
||||
EXPORT_SYMBOL(zap_add);
|
||||
EXPORT_SYMBOL(zap_add_by_dnode);
|
||||
EXPORT_SYMBOL(zap_add_uint64);
|
||||
|
||||
+89
-17
@@ -38,7 +38,7 @@
|
||||
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
* Copyright (c) 2019 Datto Inc.
|
||||
* Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
|
||||
* Copyright (c) 2019, 2021, 2024, Klara Inc.
|
||||
* Copyright (c) 2019, 2021, 2023, 2024, Klara Inc.
|
||||
* Copyright (c) 2019, Allan Jude
|
||||
* Copyright 2024 Oxide Computer Company
|
||||
*/
|
||||
@@ -3009,34 +3009,51 @@ zfs_ioc_pool_set_props(zfs_cmd_t *zc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_pool_get_props(zfs_cmd_t *zc)
|
||||
{
|
||||
spa_t *spa;
|
||||
int error;
|
||||
nvlist_t *nvp = NULL;
|
||||
/*
|
||||
* innvl: {
|
||||
* "get_props_names": [ "prop1", "prop2", ..., "propN" ]
|
||||
* }
|
||||
*/
|
||||
|
||||
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
|
||||
static const zfs_ioc_key_t zfs_keys_get_props[] = {
|
||||
{ ZPOOL_GET_PROPS_NAMES, DATA_TYPE_STRING_ARRAY, ZK_OPTIONAL },
|
||||
};
|
||||
|
||||
static int
|
||||
zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
nvlist_t *nvp = outnvl;
|
||||
spa_t *spa;
|
||||
char **props = NULL;
|
||||
unsigned int n_props = 0;
|
||||
int error;
|
||||
|
||||
if (nvlist_lookup_string_array(innvl, ZPOOL_GET_PROPS_NAMES,
|
||||
&props, &n_props) != 0) {
|
||||
props = NULL;
|
||||
}
|
||||
|
||||
if ((error = spa_open(pool, &spa, FTAG)) != 0) {
|
||||
/*
|
||||
* If the pool is faulted, there may be properties we can still
|
||||
* get (such as altroot and cachefile), so attempt to get them
|
||||
* anyway.
|
||||
*/
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
if ((spa = spa_lookup(zc->zc_name)) != NULL)
|
||||
if ((spa = spa_lookup(pool)) != NULL) {
|
||||
error = spa_prop_get(spa, &nvp);
|
||||
if (error == 0 && props != NULL)
|
||||
error = spa_prop_get_nvlist(spa, props, n_props,
|
||||
&nvp);
|
||||
}
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
} else {
|
||||
error = spa_prop_get(spa, &nvp);
|
||||
if (error == 0 && props != NULL)
|
||||
error = spa_prop_get_nvlist(spa, props, n_props, &nvp);
|
||||
spa_close(spa, FTAG);
|
||||
}
|
||||
|
||||
if (error == 0 && zc->zc_nvlist_dst != 0)
|
||||
error = put_nvlist(zc, nvp);
|
||||
else
|
||||
error = SET_ERROR(EFAULT);
|
||||
|
||||
nvlist_free(nvp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@@ -4031,6 +4048,52 @@ zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
|
||||
return (spa_checkpoint_discard(poolname));
|
||||
}
|
||||
|
||||
/*
|
||||
* Loads specific types of data for the given pool
|
||||
*
|
||||
* innvl: {
|
||||
* "prefetch_type" -> int32_t
|
||||
* }
|
||||
*
|
||||
* outnvl: empty
|
||||
*/
|
||||
static const zfs_ioc_key_t zfs_keys_pool_prefetch[] = {
|
||||
{ZPOOL_PREFETCH_TYPE, DATA_TYPE_INT32, 0},
|
||||
};
|
||||
|
||||
static int
|
||||
zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
(void) outnvl;
|
||||
|
||||
int error;
|
||||
spa_t *spa;
|
||||
int32_t type;
|
||||
|
||||
/*
|
||||
* Currently, only ZPOOL_PREFETCH_DDT is supported
|
||||
*/
|
||||
if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0 ||
|
||||
type != ZPOOL_PREFETCH_DDT) {
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
error = spa_open(poolname, &spa, FTAG);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
hrtime_t start_time = gethrtime();
|
||||
|
||||
ddt_prefetch_all(spa);
|
||||
|
||||
zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms", spa->spa_name,
|
||||
(u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
|
||||
|
||||
spa_close(spa, FTAG);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of dataset to destroy
|
||||
@@ -7283,6 +7346,12 @@ zfs_ioctl_init(void)
|
||||
zfs_keys_pool_discard_checkpoint,
|
||||
ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
|
||||
|
||||
zfs_ioctl_register("zpool_prefetch",
|
||||
ZFS_IOC_POOL_PREFETCH, zfs_ioc_pool_prefetch,
|
||||
zfs_secpolicy_config, POOL_NAME,
|
||||
POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
|
||||
zfs_keys_pool_prefetch, ARRAY_SIZE(zfs_keys_pool_prefetch));
|
||||
|
||||
zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
|
||||
zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
|
||||
@@ -7328,6 +7397,11 @@ zfs_ioctl_init(void)
|
||||
POOL_CHECK_NONE, B_TRUE, B_TRUE,
|
||||
zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
|
||||
|
||||
zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS,
|
||||
zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME,
|
||||
POOL_CHECK_NONE, B_FALSE, B_FALSE,
|
||||
zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props));
|
||||
|
||||
/* IOCTLS that use the legacy function signature */
|
||||
|
||||
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
|
||||
@@ -7383,8 +7457,6 @@ zfs_ioctl_init(void)
|
||||
|
||||
zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
|
||||
zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
|
||||
zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
|
||||
|
||||
zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
|
||||
zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
|
||||
|
||||
Reference in New Issue
Block a user