Add options to zfs redundant_metadata property

Currently, additional/extra copies are created for metadata in
addition to the redundancy provided by the pool(mirror/raidz/draid),
due to this 2 times more space is utilized per inode and this decreases
the total number of inodes that can be created in the filesystem. By
setting redundant_metadata to none, no additional copies of metadata
are created, hence can reduce the space consumed by the additional
metadata copies and increase the total number of inodes that can be
created in the filesystem.  Additionally, this can improve file create
performance due to the reduced amount of metadata which needs
to be written.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Dipak Ghosh <dipak.ghosh@hpe.com>
Signed-off-by: Akash B <akash-b@hpe.com>
Closes #13680
This commit is contained in:
Akash B
2022-10-20 05:37:51 +05:30
committed by GitHub
parent 2be0a124af
commit 5405be0365
9 changed files with 151 additions and 15 deletions
+4 -1
View File
@@ -25,6 +25,7 @@
* Copyright 2016, Joyent, Inc.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -369,6 +370,8 @@ zfs_prop_init(void)
static const zprop_index_t redundant_metadata_table[] = {
{ "all", ZFS_REDUNDANT_METADATA_ALL },
{ "most", ZFS_REDUNDANT_METADATA_MOST },
{ "some", ZFS_REDUNDANT_METADATA_SOME },
{ "none", ZFS_REDUNDANT_METADATA_NONE },
{ NULL }
};
@@ -388,7 +391,7 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
ZFS_REDUNDANT_METADATA_ALL,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"all | most", "REDUND_MD",
"all | most | some | none", "REDUND_MD",
redundant_metadata_table, sfeatures);
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+16 -5
View File
@@ -28,6 +28,7 @@
* Copyright (c) 2019 Datto Inc.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/dmu.h>
@@ -1992,12 +1993,22 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
ZCHECKSUM_FLAG_EMBEDDED))
checksum = ZIO_CHECKSUM_FLETCHER_4;
if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
(os->os_redundant_metadata ==
ZFS_REDUNDANT_METADATA_MOST &&
(level >= zfs_redundant_metadata_most_ditto_level ||
DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))))
switch (os->os_redundant_metadata) {
case ZFS_REDUNDANT_METADATA_ALL:
copies++;
break;
case ZFS_REDUNDANT_METADATA_MOST:
if (level >= zfs_redundant_metadata_most_ditto_level ||
DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))
copies++;
break;
case ZFS_REDUNDANT_METADATA_SOME:
if (DMU_OT_IS_CRITICAL(type))
copies++;
break;
case ZFS_REDUNDANT_METADATA_NONE:
break;
}
} else if (wp & WP_NOFILL) {
ASSERT(level == 0);
+4 -1
View File
@@ -32,6 +32,7 @@
* Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
* Copyright (c) 2019, Klara Inc.
* Copyright (c) 2019, Allan Jude
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -287,7 +288,9 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval)
* Inheritance and range checking should have been done by now.
*/
ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
newval == ZFS_REDUNDANT_METADATA_MOST);
newval == ZFS_REDUNDANT_METADATA_MOST ||
newval == ZFS_REDUNDANT_METADATA_SOME ||
newval == ZFS_REDUNDANT_METADATA_NONE);
os->os_redundant_metadata = newval;
}
+92
View File
@@ -23,6 +23,7 @@
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright 2019 Joyent, Inc.
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/
#include <sys/zfs_context.h>
@@ -41,6 +42,7 @@
#define ZPROP_INHERIT_SUFFIX "$inherit"
#define ZPROP_RECVD_SUFFIX "$recvd"
#define ZPROP_IUV_SUFFIX "$iuv"
static int
dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
@@ -69,6 +71,16 @@ dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
return (0);
}
static int
dsl_prop_known_index(zfs_prop_t prop, uint64_t value)
{
const char *str = NULL;
if (zfs_prop_get_type(prop) == PROP_TYPE_INDEX)
return (!zfs_prop_index_to_string(prop, value, &str));
return (-1);
}
int
dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot)
@@ -81,6 +93,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
boolean_t inheriting = B_FALSE;
char *inheritstr;
char *recvdstr;
char *iuvstr;
ASSERT(dsl_pool_config_held(dd->dd_pool));
@@ -91,6 +104,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
/*
* Note: dd may become NULL, therefore we shouldn't dereference it
@@ -105,6 +119,18 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
inheriting = B_TRUE;
}
/* Check for a iuv value. */
err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
iuvstr, intsz, numints, buf);
if (dsl_prop_known_index(zfs_name_to_prop(propname),
*(uint64_t *)buf) != 1)
err = ENOENT;
if (err != ENOENT) {
if (setpoint != NULL && err == 0)
dsl_dir_name(dd, setpoint);
break;
}
/* Check for a local value. */
err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
propname, intsz, numints, buf);
@@ -155,6 +181,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
kmem_strfree(inheritstr);
kmem_strfree(recvdstr);
kmem_strfree(iuvstr);
return (err);
}
@@ -647,6 +674,45 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_rele(dd, FTAG);
}
/*
* For newer values in zfs index type properties, we add a new key
* propname$iuv (iuv = Ignore Unknown Values) to the properties zap object
* to store the new property value and store the default value in the
* existing prop key. So that the propname$iuv key is ignored by the older zfs
* versions and the default property value from the existing prop key is
* used.
*/
static void
dsl_prop_set_iuv(objset_t *mos, uint64_t zapobj, const char *propname,
int intsz, int numints, const void *value, dmu_tx_t *tx)
{
char *iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
boolean_t iuv = B_FALSE;
zfs_prop_t prop = zfs_name_to_prop(propname);
switch (prop) {
case ZFS_PROP_REDUNDANT_METADATA:
if (*(uint64_t *)value == ZFS_REDUNDANT_METADATA_SOME ||
*(uint64_t *)value == ZFS_REDUNDANT_METADATA_NONE)
iuv = B_TRUE;
break;
default:
break;
}
if (iuv) {
VERIFY0(zap_update(mos, zapobj, iuvstr, intsz, numints,
value, tx));
uint64_t val = zfs_prop_default_numeric(prop);
VERIFY0(zap_update(mos, zapobj, propname, intsz, numints,
&val, tx));
} else {
zap_remove(mos, zapobj, iuvstr, tx);
}
kmem_strfree(iuvstr);
}
void
dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
zprop_source_t source, int intsz, int numints, const void *value,
@@ -659,6 +725,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
const char *valstr = NULL;
char *inheritstr;
char *recvdstr;
char *iuvstr;
char *tbuf = NULL;
int err;
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
@@ -692,6 +759,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
iuvstr = kmem_asprintf("%s%s", propname, ZPROP_IUV_SUFFIX);
switch ((int)source) {
case ZPROP_SRC_NONE:
@@ -709,11 +777,14 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
/*
* remove propname$inherit
* set propname -> value
* set propname$iuv -> new property value
*/
err = zap_remove(mos, zapobj, inheritstr, tx);
ASSERT(err == 0 || err == ENOENT);
VERIFY0(zap_update(mos, zapobj, propname,
intsz, numints, value, tx));
(void) dsl_prop_set_iuv(mos, zapobj, propname, intsz,
numints, value, tx);
break;
case ZPROP_SRC_INHERITED:
/*
@@ -723,6 +794,8 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
*/
err = zap_remove(mos, zapobj, propname, tx);
ASSERT(err == 0 || err == ENOENT);
err = zap_remove(mos, zapobj, iuvstr, tx);
ASSERT(err == 0 || err == ENOENT);
if (version >= SPA_VERSION_RECVD_PROPS &&
dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
dummy = 0;
@@ -763,6 +836,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
kmem_strfree(inheritstr);
kmem_strfree(recvdstr);
kmem_strfree(iuvstr);
/*
* If we are left with an empty snap zap we can destroy it.
@@ -1012,6 +1086,14 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
propname = za.za_name;
source = setpoint;
/* Skip if iuv entries are preset. */
valstr = kmem_asprintf("%s%s", propname,
ZPROP_IUV_SUFFIX);
err = zap_contains(mos, propobj, valstr);
kmem_strfree(valstr);
if (err == 0)
continue;
} else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) {
/* Skip explicitly inherited entries. */
continue;
@@ -1044,6 +1126,16 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
source = ((flags & DSL_PROP_GET_INHERITING) ?
setpoint : ZPROP_SOURCE_VAL_RECVD);
} else if (strcmp(suffix, ZPROP_IUV_SUFFIX) == 0) {
(void) strlcpy(buf, za.za_name,
MIN(sizeof (buf), suffix - za.za_name + 1));
propname = buf;
source = setpoint;
prop = zfs_name_to_prop(propname);
if (dsl_prop_known_index(prop,
za.za_first_integer) != 1)
continue;
} else {
/*
* For backward compatibility, skip suffixes we don't