Stack overflow when destroying deeply nested clones

Destroy operations on deeply nested chains of clones can overflow
the stack:

        Depth    Size   Location    (221 entries)
        -----    ----   --------
  0)    15664      48   mutex_lock+0x5/0x30
  1)    15616       8   mutex_lock+0x5/0x30
...
 26)    13576      72   dsl_dataset_remove_clones_key.isra.4+0x124/0x1e0 [zfs]
 27)    13504      72   dsl_dataset_remove_clones_key.isra.4+0x18a/0x1e0 [zfs]
 28)    13432      72   dsl_dataset_remove_clones_key.isra.4+0x18a/0x1e0 [zfs]
...
185)     2128      72   dsl_dataset_remove_clones_key.isra.4+0x18a/0x1e0 [zfs]
186)     2056      72   dsl_dataset_remove_clones_key.isra.4+0x18a/0x1e0 [zfs]
187)     1984      72   dsl_dataset_remove_clones_key.isra.4+0x18a/0x1e0 [zfs]
188)     1912     136   dsl_destroy_snapshot_sync_impl+0x4e0/0x1090 [zfs]
189)     1776      16   dsl_destroy_snapshot_check+0x0/0x90 [zfs]
...
218)      304     128   kthread+0xdf/0x100
219)      176      48   ret_from_fork+0x22/0x40
220)      128     128   kthread+0x0/0x100

Fix this issue by converting dsl_dataset_remove_clones_key() from
recursive to iterative.

Reviewed-by: Paul Zuchowski <pzuchowski@datto.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Closes #7279 
Closes #7810
This commit is contained in:
LOLi
2018-08-22 20:03:31 +02:00
committed by Brian Behlendorf
parent e8a8208eef
commit c434d8806c
4 changed files with 124 additions and 30 deletions
+52 -28
View File
@@ -181,46 +181,70 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
dsl_dataset_phys(ds_next)->ds_deadlist_obj);
}
struct removeclonesnode {
list_node_t link;
dsl_dataset_t *ds;
};
static void
dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
zap_cursor_t *zc;
zap_attribute_t *za;
list_t clones;
struct removeclonesnode *rcn;
/*
* If it is the old version, dd_clones doesn't exist so we can't
* find the clones, but dsl_deadlist_remove_key() is a no-op so it
* doesn't matter.
*/
if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0)
return;
list_create(&clones, sizeof (struct removeclonesnode),
offsetof(struct removeclonesnode, link));
zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
rcn = kmem_zalloc(sizeof (struct removeclonesnode), KM_SLEEP);
rcn->ds = ds;
list_insert_head(&clones, rcn);
for (zap_cursor_init(zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones);
zap_cursor_retrieve(zc, za) == 0;
zap_cursor_advance(zc)) {
dsl_dataset_t *clone;
for (; rcn != NULL; rcn = list_next(&clones, rcn)) {
zap_cursor_t zc;
zap_attribute_t za;
/*
* If it is the old version, dd_clones doesn't exist so we can't
* find the clones, but dsl_deadlist_remove_key() is a no-op so
* it doesn't matter.
*/
if (dsl_dir_phys(rcn->ds->ds_dir)->dd_clones == 0)
continue;
VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
za->za_first_integer, FTAG, &clone));
if (clone->ds_dir->dd_origin_txg > mintxg) {
dsl_deadlist_remove_key(&clone->ds_deadlist,
mintxg, tx);
if (dsl_dataset_remap_deadlist_exists(clone)) {
dsl_deadlist_remove_key(
&clone->ds_remap_deadlist, mintxg, tx);
for (zap_cursor_init(&zc, mos,
dsl_dir_phys(rcn->ds->ds_dir)->dd_clones);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
VERIFY0(dsl_dataset_hold_obj(rcn->ds->ds_dir->dd_pool,
za.za_first_integer, FTAG, &clone));
if (clone->ds_dir->dd_origin_txg > mintxg) {
dsl_deadlist_remove_key(&clone->ds_deadlist,
mintxg, tx);
if (dsl_dataset_remap_deadlist_exists(clone)) {
dsl_deadlist_remove_key(
&clone->ds_remap_deadlist, mintxg,
tx);
}
rcn = kmem_zalloc(
sizeof (struct removeclonesnode), KM_SLEEP);
rcn->ds = clone;
list_insert_tail(&clones, rcn);
} else {
dsl_dataset_rele(clone, FTAG);
}
dsl_dataset_remove_clones_key(clone, mintxg, tx);
}
dsl_dataset_rele(clone, FTAG);
zap_cursor_fini(&zc);
}
zap_cursor_fini(zc);
kmem_free(za, sizeof (zap_attribute_t));
kmem_free(zc, sizeof (zap_cursor_t));
rcn = list_remove_head(&clones);
kmem_free(rcn, sizeof (struct removeclonesnode));
while ((rcn = list_remove_head(&clones)) != NULL) {
dsl_dataset_rele(rcn->ds, FTAG);
kmem_free(rcn, sizeof (struct removeclonesnode));
}
list_destroy(&clones);
}
static void