Implementation of block cloning for ZFS

Block Cloning allows to manually clone a file (or a subset of its
blocks) into another (or the same) file by just creating additional
references to the data blocks without copying the data itself.
Those references are kept in the Block Reference Tables (BRTs).

The whole design of block cloning is documented in module/zfs/brt.c.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Christian Schwarz <christian.schwarz@nutanix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Rich Ercolani <rincebrain@gmail.com>
Signed-off-by: Pawel Jakub Dawidek <pawel@dawidek.net>
Closes #13392
This commit is contained in:
Pawel Jakub Dawidek
2023-03-10 20:59:53 +01:00
committed by GitHub
parent da19d919a8
commit 67a1b03791
51 changed files with 3480 additions and 120 deletions
+39
View File
@@ -52,6 +52,7 @@
#include <sys/dmu_tx.h>
#include <sys/zap.h>
#include <sys/zil.h>
#include <sys/brt.h>
#include <sys/ddt.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_removal.h>
@@ -341,6 +342,12 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
ddt_get_pool_dedup_ratio(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONEUSED, NULL,
brt_get_used(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONESAVED, NULL,
brt_get_saved(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_BCLONERATIO, NULL,
brt_get_ratio(spa), src);
spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
rvd->vdev_state, src);
@@ -1707,6 +1714,7 @@ spa_unload(spa_t *spa)
}
ddt_unload(spa);
brt_unload(spa);
spa_unload_log_sm_metadata(spa);
/*
@@ -4414,6 +4422,21 @@ spa_ld_load_dedup_tables(spa_t *spa)
return (0);
}
static int
spa_ld_load_brt(spa_t *spa)
{
int error = 0;
vdev_t *rvd = spa->spa_root_vdev;
error = brt_load(spa);
if (error != 0) {
spa_load_failed(spa, "brt_load failed [error=%d]", error);
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
}
return (0);
}
static int
spa_ld_verify_logs(spa_t *spa, spa_import_type_t type, const char **ereport)
{
@@ -4895,6 +4918,10 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, const char **ereport)
if (error != 0)
return (error);
error = spa_ld_load_brt(spa);
if (error != 0)
return (error);
/*
* Verify the logs now to make sure we don't have any unexpected errors
* when we claim log blocks later.
@@ -5963,6 +5990,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
* Create DDTs (dedup tables).
*/
ddt_create(spa);
/*
* Create BRT table and BRT table object.
*/
brt_create(spa);
spa_update_dspace(spa);
@@ -9138,6 +9169,7 @@ spa_sync_iterate_to_convergence(spa_t *spa, dmu_tx_t *tx)
&spa->spa_deferred_bpobj, tx);
}
brt_sync(spa, txg);
ddt_sync(spa, txg);
dsl_scan_sync(dp, tx);
svr_sync(spa, tx);
@@ -9262,6 +9294,13 @@ spa_sync(spa_t *spa, uint64_t txg)
spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL,
ZIO_FLAG_CANFAIL);
/*
* Now that there can be no more cloning in this transaction group,
* but we are still before issuing frees, we can process pending BRT
* updates.
*/
brt_pending_apply(spa, txg);
/*
* Lock out configuration changes.
*/