mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Illumos 3835 zfs need not store 2 copies of all metadata
Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Approved by: Richard Lowe <richlowe@richlowe.net> Description from Matt Ahrens's bug report at Delphix: Add a new zfs property, "redundant_metadata" which can have values "all" or "most". The default will be "all", which is the current behavior. Setting to "most" will cause us to only store 1 copy of level-1 indirect blocks of user data files. Additional notes: The new man page section for this property states "The exact behavior of which metadata blocks are stored redundantly may change in future releases." and: "When set to most, ZFS stores an extra copy of most types of metadata. This can improve performance of random writes, because less metadata must be written." The current implementation is as described above in Matt's blog. It is controlled by a new global integer "zfs_redundant_metadata_most_ditto_level", currently initialized to 2. When "redundant_metadata" is set to "most", only indirect blocks of the specified level and higher will have additional ditto blocks created. Ported by: Tim Chase <tim@chase2k.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2542
This commit is contained in:
committed by
Brian Behlendorf
parent
603cb25ca5
commit
faf0f58c69
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
*/
|
||||
|
||||
@@ -210,7 +210,18 @@ zfs_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t redundant_metadata_table[] = {
|
||||
{ "all", ZFS_REDUNDANT_METADATA_ALL },
|
||||
{ "most", ZFS_REDUNDANT_METADATA_MOST },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
/* inherit index properties */
|
||||
zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
|
||||
ZFS_REDUNDANT_METADATA_ALL,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"all | most", "REDUND_MD",
|
||||
redundant_metadata_table);
|
||||
zprop_register_index(ZFS_PROP_SYNC, "sync", ZFS_SYNC_STANDARD,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"standard | always | disabled", "SYNC",
|
||||
|
||||
+15
-2
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
*/
|
||||
|
||||
@@ -1688,6 +1688,12 @@ dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||
|
||||
int zfs_mdcomp_disable = 0;
|
||||
|
||||
/*
|
||||
* When the "redundant_metadata" property is set to "most", only indirect
|
||||
* blocks of this level and higher will have an additional ditto block.
|
||||
*/
|
||||
int zfs_redundant_metadata_most_ditto_level = 2;
|
||||
|
||||
void
|
||||
dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
{
|
||||
@@ -1727,6 +1733,13 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
if (zio_checksum_table[checksum].ci_correctable < 1 ||
|
||||
zio_checksum_table[checksum].ci_eck)
|
||||
checksum = ZIO_CHECKSUM_FLETCHER_4;
|
||||
|
||||
if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
|
||||
(os->os_redundant_metadata ==
|
||||
ZFS_REDUNDANT_METADATA_MOST &&
|
||||
(level >= zfs_redundant_metadata_most_ditto_level ||
|
||||
DMU_OT_IS_METADATA(type) || (wp & WP_SPILL))))
|
||||
copies++;
|
||||
} else if (wp & WP_NOFILL) {
|
||||
ASSERT(level == 0);
|
||||
|
||||
@@ -1774,7 +1787,7 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
zp->zp_compress = compress;
|
||||
zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
|
||||
zp->zp_level = level;
|
||||
zp->zp_copies = MIN(copies + ismd, spa_max_replication(os->os_spa));
|
||||
zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
|
||||
zp->zp_dedup = dedup;
|
||||
zp->zp_dedup_verify = dedup && dedup_verify;
|
||||
zp->zp_nopwrite = nopwrite;
|
||||
|
||||
+29
-6
@@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
*/
|
||||
|
||||
@@ -114,13 +114,13 @@ dmu_objset_id(objset_t *os)
|
||||
return (ds ? ds->ds_object : 0);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zfs_sync_type_t
|
||||
dmu_objset_syncprop(objset_t *os)
|
||||
{
|
||||
return (os->os_sync);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zfs_logbias_op_t
|
||||
dmu_objset_logbias(objset_t *os)
|
||||
{
|
||||
return (os->os_logbias);
|
||||
@@ -228,6 +228,20 @@ sync_changed_cb(void *arg, uint64_t newval)
|
||||
zil_set_sync(os->os_zil, newval);
|
||||
}
|
||||
|
||||
static void
|
||||
redundant_metadata_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
objset_t *os = arg;
|
||||
|
||||
/*
|
||||
* Inheritance and range checking should have been done by now.
|
||||
*/
|
||||
ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
|
||||
newval == ZFS_REDUNDANT_METADATA_MOST);
|
||||
|
||||
os->os_redundant_metadata = newval;
|
||||
}
|
||||
|
||||
static void
|
||||
logbias_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
@@ -363,6 +377,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
zfs_prop_to_name(ZFS_PROP_SYNC),
|
||||
sync_changed_cb, os);
|
||||
}
|
||||
if (err == 0) {
|
||||
err = dsl_prop_register(ds,
|
||||
zfs_prop_to_name(
|
||||
ZFS_PROP_REDUNDANT_METADATA),
|
||||
redundant_metadata_changed_cb, os);
|
||||
}
|
||||
}
|
||||
if (err != 0) {
|
||||
VERIFY(arc_buf_remove_ref(os->os_phys_buf,
|
||||
@@ -376,9 +396,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
os->os_compress = ZIO_COMPRESS_LZJB;
|
||||
os->os_copies = spa_max_replication(spa);
|
||||
os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
|
||||
os->os_dedup_verify = 0;
|
||||
os->os_logbias = 0;
|
||||
os->os_sync = 0;
|
||||
os->os_dedup_verify = B_FALSE;
|
||||
os->os_logbias = ZFS_LOGBIAS_LATENCY;
|
||||
os->os_sync = ZFS_SYNC_STANDARD;
|
||||
os->os_primary_cache = ZFS_CACHE_ALL;
|
||||
os->os_secondary_cache = ZFS_CACHE_ALL;
|
||||
}
|
||||
@@ -623,6 +643,9 @@ dmu_objset_evict(objset_t *os)
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_SYNC),
|
||||
sync_changed_cb, os));
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA),
|
||||
redundant_metadata_changed_cb, os));
|
||||
}
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
|
||||
|
||||
Reference in New Issue
Block a user