mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
port async unlinked drain from illumos-nexenta
This patch is an async implementation of the existing sync zfs_unlinked_drain() function. This function is called at mount time and is responsible for freeing znodes that we didn't get to freeing before. We don't have to hold mounting of the dataset until the unlinked list is fully drained as is done now. Since we can process the unlinked set asynchronously this results in a better user experience when mounting a dataset with entries in the unlinked set. Reviewed by: Jorgen Lundman <lundman@lundman.net> Reviewed by: Tom Caputi <tcaputi@datto.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Matt Ahrens <mahrens@delphix.com> Reviewed by: Paul Dagnelie <pcd@delphix.com> Signed-off-by: Alek Pinchuk <apinchuk@datto.com> Closes #8142
This commit is contained in:
parent
425d3237ee
commit
dcec0a12c8
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018 by Delphix. All rights reserved.
|
* Copyright (c) 2018 by Delphix. All rights reserved.
|
||||||
|
* Copyright (c) 2018 Datto Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _SYS_DATASET_KSTATS_H
|
#ifndef _SYS_DATASET_KSTATS_H
|
||||||
@ -35,6 +36,8 @@ typedef struct dataset_aggsum_stats_t {
|
|||||||
aggsum_t das_nwritten;
|
aggsum_t das_nwritten;
|
||||||
aggsum_t das_reads;
|
aggsum_t das_reads;
|
||||||
aggsum_t das_nread;
|
aggsum_t das_nread;
|
||||||
|
aggsum_t das_nunlinks;
|
||||||
|
aggsum_t das_nunlinked;
|
||||||
} dataset_aggsum_stats_t;
|
} dataset_aggsum_stats_t;
|
||||||
|
|
||||||
typedef struct dataset_kstat_values {
|
typedef struct dataset_kstat_values {
|
||||||
@ -43,6 +46,16 @@ typedef struct dataset_kstat_values {
|
|||||||
kstat_named_t dkv_nwritten;
|
kstat_named_t dkv_nwritten;
|
||||||
kstat_named_t dkv_reads;
|
kstat_named_t dkv_reads;
|
||||||
kstat_named_t dkv_nread;
|
kstat_named_t dkv_nread;
|
||||||
|
/*
|
||||||
|
* nunlinks is initialized to the unlinked set size on mount and
|
||||||
|
* is incremented whenever a new entry is added to the unlinked set
|
||||||
|
*/
|
||||||
|
kstat_named_t dkv_nunlinks;
|
||||||
|
/*
|
||||||
|
* nunlinked is initialized to zero on mount and is incremented when an
|
||||||
|
* entry is removed from the unlinked set
|
||||||
|
*/
|
||||||
|
kstat_named_t dkv_nunlinked;
|
||||||
} dataset_kstat_values_t;
|
} dataset_kstat_values_t;
|
||||||
|
|
||||||
typedef struct dataset_kstats {
|
typedef struct dataset_kstats {
|
||||||
@ -56,4 +69,7 @@ void dataset_kstats_destroy(dataset_kstats_t *);
|
|||||||
void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
|
void dataset_kstats_update_write_kstats(dataset_kstats_t *, int64_t);
|
||||||
void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);
|
void dataset_kstats_update_read_kstats(dataset_kstats_t *, int64_t);
|
||||||
|
|
||||||
|
void dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *, int64_t);
|
||||||
|
void dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *, int64_t);
|
||||||
|
|
||||||
#endif /* _SYS_DATASET_KSTATS_H */
|
#endif /* _SYS_DATASET_KSTATS_H */
|
||||||
|
@ -96,6 +96,7 @@ typedef struct dsl_pool {
|
|||||||
struct dsl_dataset *dp_origin_snap;
|
struct dsl_dataset *dp_origin_snap;
|
||||||
uint64_t dp_root_dir_obj;
|
uint64_t dp_root_dir_obj;
|
||||||
struct taskq *dp_iput_taskq;
|
struct taskq *dp_iput_taskq;
|
||||||
|
struct taskq *dp_unlinked_drain_taskq;
|
||||||
|
|
||||||
/* No lock needed - sync context only */
|
/* No lock needed - sync context only */
|
||||||
blkptr_t dp_meta_rootbp;
|
blkptr_t dp_meta_rootbp;
|
||||||
@ -176,6 +177,7 @@ boolean_t dsl_pool_config_held(dsl_pool_t *dp);
|
|||||||
boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp);
|
boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp);
|
||||||
|
|
||||||
taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);
|
taskq_t *dsl_pool_iput_taskq(dsl_pool_t *dp);
|
||||||
|
taskq_t *dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp);
|
||||||
|
|
||||||
int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
|
int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
|
||||||
const char *tag, uint64_t now, dmu_tx_t *tx);
|
const char *tag, uint64_t now, dmu_tx_t *tx);
|
||||||
|
@ -64,6 +64,7 @@ extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
|
|||||||
extern boolean_t zfs_dirempty(znode_t *);
|
extern boolean_t zfs_dirempty(znode_t *);
|
||||||
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
|
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
|
||||||
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
|
extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
|
||||||
|
extern void zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs);
|
||||||
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
|
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
|
||||||
extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int);
|
extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int);
|
||||||
extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *);
|
extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *);
|
||||||
|
@ -117,6 +117,8 @@ struct zfsvfs {
|
|||||||
boolean_t z_replay; /* set during ZIL replay */
|
boolean_t z_replay; /* set during ZIL replay */
|
||||||
boolean_t z_use_sa; /* version allow system attributes */
|
boolean_t z_use_sa; /* version allow system attributes */
|
||||||
boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */
|
boolean_t z_xattr_sa; /* allow xattrs to be stores as SA */
|
||||||
|
boolean_t z_draining; /* is true when drain is active */
|
||||||
|
boolean_t z_drain_cancel; /* signal the unlinked drain to stop */
|
||||||
uint64_t z_version; /* ZPL version */
|
uint64_t z_version; /* ZPL version */
|
||||||
uint64_t z_shares_dir; /* hidden shares dir */
|
uint64_t z_shares_dir; /* hidden shares dir */
|
||||||
dataset_kstats_t z_kstat; /* fs kstats */
|
dataset_kstats_t z_kstat; /* fs kstats */
|
||||||
@ -132,6 +134,7 @@ struct zfsvfs {
|
|||||||
uint64_t z_hold_size; /* znode hold array size */
|
uint64_t z_hold_size; /* znode hold array size */
|
||||||
avl_tree_t *z_hold_trees; /* znode hold trees */
|
avl_tree_t *z_hold_trees; /* znode hold trees */
|
||||||
kmutex_t *z_hold_locks; /* znode hold locks */
|
kmutex_t *z_hold_locks; /* znode hold locks */
|
||||||
|
taskqid_t z_drain_task; /* task id for the unlink drain task */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ZSB_XATTR 0x0001 /* Enable user xattrs */
|
#define ZSB_XATTR 0x0001 /* Enable user xattrs */
|
||||||
|
@ -1149,6 +1149,21 @@ Rate limit delay zevents (which report slow I/Os) to this many per second.
|
|||||||
Default value: 20
|
Default value: 20
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
.sp
|
||||||
|
.ne 2
|
||||||
|
.na
|
||||||
|
\fBzfs_unlink_suspend_progress\fR (uint)
|
||||||
|
.ad
|
||||||
|
.RS 12n
|
||||||
|
When enabled, files will not be asynchronously removed from the list of pending
|
||||||
|
unlinks and the space they consume will be leaked. Once this option has been
|
||||||
|
disabled and the dataset is remounted, the pending unlinks will be processed
|
||||||
|
and the freed space returned to the pool.
|
||||||
|
This option is used by the test suite to facilitate testing.
|
||||||
|
.sp
|
||||||
|
Uses \fB0\fR (default) to allow progress and \fB1\fR to pause progress.
|
||||||
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018 by Delphix. All rights reserved.
|
* Copyright (c) 2018 by Delphix. All rights reserved.
|
||||||
|
* Copyright (c) 2018 Datto Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <sys/dataset_kstats.h>
|
#include <sys/dataset_kstats.h>
|
||||||
@ -34,6 +35,8 @@ static dataset_kstat_values_t empty_dataset_kstats = {
|
|||||||
{ "nwritten", KSTAT_DATA_UINT64 },
|
{ "nwritten", KSTAT_DATA_UINT64 },
|
||||||
{ "reads", KSTAT_DATA_UINT64 },
|
{ "reads", KSTAT_DATA_UINT64 },
|
||||||
{ "nread", KSTAT_DATA_UINT64 },
|
{ "nread", KSTAT_DATA_UINT64 },
|
||||||
|
{ "nunlinks", KSTAT_DATA_UINT64 },
|
||||||
|
{ "nunlinked", KSTAT_DATA_UINT64 },
|
||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -54,6 +57,10 @@ dataset_kstats_update(kstat_t *ksp, int rw)
|
|||||||
aggsum_value(&dk->dk_aggsums.das_reads);
|
aggsum_value(&dk->dk_aggsums.das_reads);
|
||||||
dkv->dkv_nread.value.ui64 =
|
dkv->dkv_nread.value.ui64 =
|
||||||
aggsum_value(&dk->dk_aggsums.das_nread);
|
aggsum_value(&dk->dk_aggsums.das_nread);
|
||||||
|
dkv->dkv_nunlinks.value.ui64 =
|
||||||
|
aggsum_value(&dk->dk_aggsums.das_nunlinks);
|
||||||
|
dkv->dkv_nunlinked.value.ui64 =
|
||||||
|
aggsum_value(&dk->dk_aggsums.das_nunlinked);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
@ -136,6 +143,8 @@ dataset_kstats_create(dataset_kstats_t *dk, objset_t *objset)
|
|||||||
aggsum_init(&dk->dk_aggsums.das_nwritten, 0);
|
aggsum_init(&dk->dk_aggsums.das_nwritten, 0);
|
||||||
aggsum_init(&dk->dk_aggsums.das_reads, 0);
|
aggsum_init(&dk->dk_aggsums.das_reads, 0);
|
||||||
aggsum_init(&dk->dk_aggsums.das_nread, 0);
|
aggsum_init(&dk->dk_aggsums.das_nread, 0);
|
||||||
|
aggsum_init(&dk->dk_aggsums.das_nunlinks, 0);
|
||||||
|
aggsum_init(&dk->dk_aggsums.das_nunlinked, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -156,6 +165,8 @@ dataset_kstats_destroy(dataset_kstats_t *dk)
|
|||||||
aggsum_fini(&dk->dk_aggsums.das_nwritten);
|
aggsum_fini(&dk->dk_aggsums.das_nwritten);
|
||||||
aggsum_fini(&dk->dk_aggsums.das_reads);
|
aggsum_fini(&dk->dk_aggsums.das_reads);
|
||||||
aggsum_fini(&dk->dk_aggsums.das_nread);
|
aggsum_fini(&dk->dk_aggsums.das_nread);
|
||||||
|
aggsum_fini(&dk->dk_aggsums.das_nunlinks);
|
||||||
|
aggsum_fini(&dk->dk_aggsums.das_nunlinked);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -183,3 +194,21 @@ dataset_kstats_update_read_kstats(dataset_kstats_t *dk,
|
|||||||
aggsum_add(&dk->dk_aggsums.das_reads, 1);
|
aggsum_add(&dk->dk_aggsums.das_reads, 1);
|
||||||
aggsum_add(&dk->dk_aggsums.das_nread, nread);
|
aggsum_add(&dk->dk_aggsums.das_nread, nread);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dataset_kstats_update_nunlinks_kstat(dataset_kstats_t *dk, int64_t delta)
|
||||||
|
{
|
||||||
|
if (dk->dk_kstats == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
aggsum_add(&dk->dk_aggsums.das_nunlinks, delta);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dataset_kstats_update_nunlinked_kstat(dataset_kstats_t *dk, int64_t delta)
|
||||||
|
{
|
||||||
|
if (dk->dk_kstats == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
aggsum_add(&dk->dk_aggsums.das_nunlinked, delta);
|
||||||
|
}
|
||||||
|
@ -223,6 +223,9 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
|||||||
|
|
||||||
dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
|
dp->dp_iput_taskq = taskq_create("z_iput", max_ncpus, defclsyspri,
|
||||||
max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
max_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||||
|
dp->dp_unlinked_drain_taskq = taskq_create("z_unlinked_drain",
|
||||||
|
max_ncpus, defclsyspri, max_ncpus, INT_MAX,
|
||||||
|
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||||
|
|
||||||
return (dp);
|
return (dp);
|
||||||
}
|
}
|
||||||
@ -413,6 +416,7 @@ dsl_pool_close(dsl_pool_t *dp)
|
|||||||
rrw_destroy(&dp->dp_config_rwlock);
|
rrw_destroy(&dp->dp_config_rwlock);
|
||||||
mutex_destroy(&dp->dp_lock);
|
mutex_destroy(&dp->dp_lock);
|
||||||
cv_destroy(&dp->dp_spaceavail_cv);
|
cv_destroy(&dp->dp_spaceavail_cv);
|
||||||
|
taskq_destroy(dp->dp_unlinked_drain_taskq);
|
||||||
taskq_destroy(dp->dp_iput_taskq);
|
taskq_destroy(dp->dp_iput_taskq);
|
||||||
if (dp->dp_blkstats != NULL) {
|
if (dp->dp_blkstats != NULL) {
|
||||||
mutex_destroy(&dp->dp_blkstats->zab_lock);
|
mutex_destroy(&dp->dp_blkstats->zab_lock);
|
||||||
@ -1097,6 +1101,12 @@ dsl_pool_iput_taskq(dsl_pool_t *dp)
|
|||||||
return (dp->dp_iput_taskq);
|
return (dp->dp_iput_taskq);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
taskq_t *
|
||||||
|
dsl_pool_unlinked_drain_taskq(dsl_pool_t *dp)
|
||||||
|
{
|
||||||
|
return (dp->dp_unlinked_drain_taskq);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Walk through the pool-wide zap object of temporary snapshot user holds
|
* Walk through the pool-wide zap object of temporary snapshot user holds
|
||||||
* and release them.
|
* and release them.
|
||||||
|
@ -458,26 +458,31 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
|
|||||||
|
|
||||||
VERIFY3U(0, ==,
|
VERIFY3U(0, ==,
|
||||||
zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
|
zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
|
||||||
|
|
||||||
|
dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clean up any znodes that had no links when we either crashed or
|
* Clean up any znodes that had no links when we either crashed or
|
||||||
* (force) umounted the file system.
|
* (force) umounted the file system.
|
||||||
*/
|
*/
|
||||||
void
|
static void
|
||||||
zfs_unlinked_drain(zfsvfs_t *zfsvfs)
|
zfs_unlinked_drain_task(void *arg)
|
||||||
{
|
{
|
||||||
|
zfsvfs_t *zfsvfs = arg;
|
||||||
zap_cursor_t zc;
|
zap_cursor_t zc;
|
||||||
zap_attribute_t zap;
|
zap_attribute_t zap;
|
||||||
dmu_object_info_t doi;
|
dmu_object_info_t doi;
|
||||||
znode_t *zp;
|
znode_t *zp;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Iterate over the contents of the unlinked set.
|
* Iterate over the contents of the unlinked set.
|
||||||
*/
|
*/
|
||||||
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
|
for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
|
||||||
zap_cursor_retrieve(&zc, &zap) == 0;
|
zap_cursor_retrieve(&zc, &zap) == 0 && !zfsvfs->z_drain_cancel;
|
||||||
zap_cursor_advance(&zc)) {
|
zap_cursor_advance(&zc)) {
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -507,9 +512,61 @@ zfs_unlinked_drain(zfsvfs_t *zfsvfs)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
zp->z_unlinked = B_TRUE;
|
zp->z_unlinked = B_TRUE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* iput() is Linux's equivalent to illumos' VN_RELE(). It will
|
||||||
|
* decrement the inode's ref count and may cause the inode to be
|
||||||
|
* synchronously freed. We interrupt freeing of this inode, by
|
||||||
|
* checking the return value of dmu_objset_zfs_unmounting() in
|
||||||
|
* dmu_free_long_range(), when an unmount is requested.
|
||||||
|
*/
|
||||||
iput(ZTOI(zp));
|
iput(ZTOI(zp));
|
||||||
|
ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
|
||||||
}
|
}
|
||||||
zap_cursor_fini(&zc);
|
zap_cursor_fini(&zc);
|
||||||
|
|
||||||
|
zfsvfs->z_draining = B_FALSE;
|
||||||
|
zfsvfs->z_drain_task = TASKQID_INVALID;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sets z_draining then tries to dispatch async unlinked drain.
|
||||||
|
* If that fails executes synchronous unlinked drain.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
zfs_unlinked_drain(zfsvfs_t *zfsvfs)
|
||||||
|
{
|
||||||
|
ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
|
||||||
|
ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
|
||||||
|
|
||||||
|
zfsvfs->z_draining = B_TRUE;
|
||||||
|
zfsvfs->z_drain_cancel = B_FALSE;
|
||||||
|
|
||||||
|
zfsvfs->z_drain_task = taskq_dispatch(
|
||||||
|
dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
|
||||||
|
zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
|
||||||
|
if (zfsvfs->z_drain_task == TASKQID_INVALID) {
|
||||||
|
zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
|
||||||
|
zfs_unlinked_drain_task(zfsvfs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait for the unlinked drain taskq task to stop. This will interrupt the
|
||||||
|
* unlinked set processing if it is in progress.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
|
||||||
|
{
|
||||||
|
ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
|
||||||
|
|
||||||
|
if (zfsvfs->z_draining) {
|
||||||
|
zfsvfs->z_drain_cancel = B_TRUE;
|
||||||
|
taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
|
||||||
|
dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
|
||||||
|
zfsvfs->z_drain_task = TASKQID_INVALID;
|
||||||
|
zfsvfs->z_draining = B_FALSE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -684,6 +741,8 @@ zfs_rmnode(znode_t *zp)
|
|||||||
VERIFY3U(0, ==,
|
VERIFY3U(0, ==,
|
||||||
zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
|
zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
|
||||||
|
|
||||||
|
dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
|
||||||
|
|
||||||
zfs_znode_delete(zp, tx);
|
zfs_znode_delete(zp, tx);
|
||||||
|
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
@ -1178,6 +1178,10 @@ zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
|
|||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zfsvfs->z_drain_task = TASKQID_INVALID;
|
||||||
|
zfsvfs->z_draining = B_FALSE;
|
||||||
|
zfsvfs->z_drain_cancel = B_TRUE;
|
||||||
|
|
||||||
*zfvp = zfsvfs;
|
*zfvp = zfsvfs;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
@ -1200,14 +1204,27 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
|||||||
* operations out since we closed the ZIL.
|
* operations out since we closed the ZIL.
|
||||||
*/
|
*/
|
||||||
if (mounting) {
|
if (mounting) {
|
||||||
|
ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
|
||||||
|
dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* During replay we remove the read only flag to
|
* During replay we remove the read only flag to
|
||||||
* allow replays to succeed.
|
* allow replays to succeed.
|
||||||
*/
|
*/
|
||||||
if (readonly != 0)
|
if (readonly != 0) {
|
||||||
readonly_changed_cb(zfsvfs, B_FALSE);
|
readonly_changed_cb(zfsvfs, B_FALSE);
|
||||||
else
|
} else {
|
||||||
|
zap_stats_t zs;
|
||||||
|
if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
|
||||||
|
&zs) == 0) {
|
||||||
|
dataset_kstats_update_nunlinks_kstat(
|
||||||
|
&zfsvfs->z_kstat, zs.zs_num_entries);
|
||||||
|
}
|
||||||
|
dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
|
||||||
|
"num_entries in unlinked set: %llu",
|
||||||
|
zs.zs_num_entries);
|
||||||
zfs_unlinked_drain(zfsvfs);
|
zfs_unlinked_drain(zfsvfs);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Parse and replay the intent log.
|
* Parse and replay the intent log.
|
||||||
@ -1250,9 +1267,6 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
|||||||
/* restore readonly bit */
|
/* restore readonly bit */
|
||||||
if (readonly != 0)
|
if (readonly != 0)
|
||||||
readonly_changed_cb(zfsvfs, B_TRUE);
|
readonly_changed_cb(zfsvfs, B_TRUE);
|
||||||
|
|
||||||
ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
|
|
||||||
dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1633,6 +1647,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
|||||||
{
|
{
|
||||||
znode_t *zp;
|
znode_t *zp;
|
||||||
|
|
||||||
|
zfs_unlinked_drain_stop_wait(zfsvfs);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If someone has not already unmounted this file system,
|
* If someone has not already unmounted this file system,
|
||||||
* drain the iput_taskq to ensure all active references to the
|
* drain the iput_taskq to ensure all active references to the
|
||||||
@ -1884,6 +1900,7 @@ zfs_preumount(struct super_block *sb)
|
|||||||
|
|
||||||
/* zfsvfs is NULL when zfs_domount fails during mount */
|
/* zfsvfs is NULL when zfs_domount fails during mount */
|
||||||
if (zfsvfs) {
|
if (zfsvfs) {
|
||||||
|
zfs_unlinked_drain_stop_wait(zfsvfs);
|
||||||
zfsctl_destroy(sb->s_fs_info);
|
zfsctl_destroy(sb->s_fs_info);
|
||||||
/*
|
/*
|
||||||
* Wait for iput_async before entering evict_inodes in
|
* Wait for iput_async before entering evict_inodes in
|
||||||
@ -2159,6 +2176,15 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
|
|||||||
}
|
}
|
||||||
mutex_exit(&zfsvfs->z_znodes_lock);
|
mutex_exit(&zfsvfs->z_znodes_lock);
|
||||||
|
|
||||||
|
if (!zfs_is_readonly(zfsvfs) && !zfsvfs->z_unmounted) {
|
||||||
|
/*
|
||||||
|
* zfs_suspend_fs() could have interrupted freeing
|
||||||
|
* of dnodes. We need to restart this freeing so
|
||||||
|
* that we don't "leak" the space.
|
||||||
|
*/
|
||||||
|
zfs_unlinked_drain(zfsvfs);
|
||||||
|
}
|
||||||
|
|
||||||
bail:
|
bail:
|
||||||
/* release the VFS ops */
|
/* release the VFS ops */
|
||||||
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
rw_exit(&zfsvfs->z_teardown_inactive_lock);
|
||||||
|
@ -91,6 +91,12 @@ static kmem_cache_t *znode_cache = NULL;
|
|||||||
static kmem_cache_t *znode_hold_cache = NULL;
|
static kmem_cache_t *znode_hold_cache = NULL;
|
||||||
unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
|
unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is used by the test suite so that it can delay znodes from being
|
||||||
|
* freed in order to inspect the unlinked set.
|
||||||
|
*/
|
||||||
|
int zfs_unlink_suspend_progress = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
|
* This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
|
||||||
* z_rangelock. It will modify the offset and length of the lock to reflect
|
* z_rangelock. It will modify the offset and length of the lock to reflect
|
||||||
@ -1339,7 +1345,7 @@ zfs_zinactive(znode_t *zp)
|
|||||||
*/
|
*/
|
||||||
if (zp->z_unlinked) {
|
if (zp->z_unlinked) {
|
||||||
ASSERT(!zfsvfs->z_issnap);
|
ASSERT(!zfsvfs->z_issnap);
|
||||||
if (!zfs_is_readonly(zfsvfs)) {
|
if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
|
||||||
mutex_exit(&zp->z_lock);
|
mutex_exit(&zp->z_lock);
|
||||||
zfs_znode_hold_exit(zfsvfs, zh);
|
zfs_znode_hold_exit(zfsvfs, zh);
|
||||||
zfs_rmnode(zp);
|
zfs_rmnode(zp);
|
||||||
@ -2214,4 +2220,7 @@ EXPORT_SYMBOL(zfs_obj_to_path);
|
|||||||
/* CSTYLED */
|
/* CSTYLED */
|
||||||
module_param(zfs_object_mutex_size, uint, 0644);
|
module_param(zfs_object_mutex_size, uint, 0644);
|
||||||
MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
|
MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
|
||||||
|
module_param(zfs_unlink_suspend_progress, int, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
|
||||||
|
"(debug - leaks space into the unlinked set)");
|
||||||
#endif
|
#endif
|
||||||
|
@ -644,7 +644,7 @@ tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
|
|||||||
tags = ['functional', 'mmp']
|
tags = ['functional', 'mmp']
|
||||||
|
|
||||||
[tests/functional/mount]
|
[tests/functional/mount]
|
||||||
tests = ['umount_001', 'umountall_001']
|
tests = ['umount_001', 'umount_unlinked_drain', 'umountall_001']
|
||||||
tags = ['functional', 'mount']
|
tags = ['functional', 'mount']
|
||||||
|
|
||||||
[tests/functional/mv_files]
|
[tests/functional/mv_files]
|
||||||
|
@ -3,4 +3,5 @@ dist_pkgdata_SCRIPTS = \
|
|||||||
setup.ksh \
|
setup.ksh \
|
||||||
cleanup.ksh \
|
cleanup.ksh \
|
||||||
umount_001.ksh \
|
umount_001.ksh \
|
||||||
|
umount_unlinked_drain.ksh \
|
||||||
umountall_001.ksh
|
umountall_001.ksh
|
||||||
|
119
tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh
Executable file
119
tests/zfs-tests/tests/functional/mount/umount_unlinked_drain.ksh
Executable file
@ -0,0 +1,119 @@
|
|||||||
|
#!/bin/ksh -p
|
||||||
|
|
||||||
|
#
|
||||||
|
# This file and its contents are supplied under the terms of the
|
||||||
|
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||||
|
# You may only use this file in accordance with the terms of version
|
||||||
|
# 1.0 of the CDDL.
|
||||||
|
#
|
||||||
|
# A full copy of the text of the CDDL should have accompanied this
|
||||||
|
# source. A copy of the CDDL is also available via the Internet at
|
||||||
|
# http://www.illumos.org/license/CDDL.
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright 2018 Datto Inc.
|
||||||
|
#
|
||||||
|
|
||||||
|
. $STF_SUITE/include/libtest.shlib
|
||||||
|
|
||||||
|
#
|
||||||
|
# DESCRIPTION:
|
||||||
|
# Test async unlinked drain to ensure mounting is not held up when there are
|
||||||
|
# entries in the unlinked set. We also try to test that the list is able to be
|
||||||
|
# filled up and drained at the same time.
|
||||||
|
#
|
||||||
|
# STRATEGY:
|
||||||
|
# 1. Use zfs_unlink_suspend_progress tunable to disable freeing to build up
|
||||||
|
# the unlinked set
|
||||||
|
# 2. Make sure mount happens even when there are entries in the unlinked set
|
||||||
|
# 3. Drain and build up the unlinked list at the same time to test for races
|
||||||
|
#
|
||||||
|
|
||||||
|
function cleanup
|
||||||
|
{
|
||||||
|
log_must set_tunable32 zfs_unlink_suspend_progress $default_unlink_sp
|
||||||
|
for fs in $(seq 1 3); do
|
||||||
|
mounted $TESTDIR.$fs || zfs mount $TESTPOOL/$TESTFS.$fs
|
||||||
|
rm -f $TESTDIR.$fs/file-*
|
||||||
|
zfs set xattr=on $TESTPOOL/$TESTFS.$fs
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
function unlinked_size_is
|
||||||
|
{
|
||||||
|
MAX_ITERS=5 # iteration to do before we consider reported number stable
|
||||||
|
iters=0
|
||||||
|
last_usize=0
|
||||||
|
while [[ $iters -le $MAX_ITERS ]]; do
|
||||||
|
kstat_file=$(grep -nrwl /proc/spl/kstat/zfs/$2/objset-0x* -e $3)
|
||||||
|
nunlinks=`cat $kstat_file | grep nunlinks | awk '{print $3}'`
|
||||||
|
nunlinked=`cat $kstat_file | grep nunlinked | awk '{print $3}'`
|
||||||
|
usize=$(($nunlinks - $nunlinked))
|
||||||
|
if [[ $iters == $MAX_ITERS && $usize == $1 ]]; then
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [[ $usize == $last_usize ]]; then
|
||||||
|
(( iters++ ))
|
||||||
|
else
|
||||||
|
iters=0
|
||||||
|
fi
|
||||||
|
last_usize=$usize
|
||||||
|
done
|
||||||
|
|
||||||
|
log_note "Unexpected unlinked set size: $last_usize, expected $1"
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
UNLINK_SP_PARAM=/sys/module/zfs/parameters/zfs_unlink_suspend_progress
|
||||||
|
default_unlink_sp=$(get_tunable zfs_unlink_suspend_progress)
|
||||||
|
|
||||||
|
log_onexit cleanup
|
||||||
|
|
||||||
|
log_assert "Unlinked list drain does not hold up mounting of fs"
|
||||||
|
|
||||||
|
for fs in 1 2 3; do
|
||||||
|
set -A xattrs on sa off
|
||||||
|
for xa in ${xattrs[@]}; do
|
||||||
|
# setup fs and ensure all deleted files got into unliked set
|
||||||
|
log_must mounted $TESTDIR.$fs
|
||||||
|
|
||||||
|
log_must zfs set xattr=$xa $TESTPOOL/$TESTFS.$fs
|
||||||
|
|
||||||
|
if [[ $xa == off ]]; then
|
||||||
|
for fn in $(seq 1 175); do
|
||||||
|
log_must mkfile 128k $TESTDIR.$fs/file-$fn
|
||||||
|
done
|
||||||
|
else
|
||||||
|
log_must xattrtest -f 175 -x 3 -r -k -p $TESTDIR.$fs
|
||||||
|
fi
|
||||||
|
|
||||||
|
log_must set_tunable32 zfs_unlink_suspend_progress 1
|
||||||
|
log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
|
||||||
|
|
||||||
|
# build up unlinked set
|
||||||
|
for fn in $(seq 1 100); do
|
||||||
|
log_must eval "rm $TESTDIR.$fs/file-$fn &"
|
||||||
|
done
|
||||||
|
log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
|
||||||
|
|
||||||
|
# test that we can mount fs without emptying the unlinked list
|
||||||
|
log_must zfs umount $TESTPOOL/$TESTFS.$fs
|
||||||
|
log_must unmounted $TESTDIR.$fs
|
||||||
|
log_must zfs mount $TESTPOOL/$TESTFS.$fs
|
||||||
|
log_must mounted $TESTDIR.$fs
|
||||||
|
log_must unlinked_size_is 100 $TESTPOOL $TESTPOOL/$TESTFS.$fs
|
||||||
|
|
||||||
|
# confirm we can drain and add to unlinked set at the same time
|
||||||
|
log_must set_tunable32 zfs_unlink_suspend_progress 0
|
||||||
|
log_must zfs umount $TESTPOOL/$TESTFS.$fs
|
||||||
|
log_must zfs mount $TESTPOOL/$TESTFS.$fs
|
||||||
|
for fn in $(seq 101 175); do
|
||||||
|
log_must eval "rm $TESTDIR.$fs/file-$fn &"
|
||||||
|
done
|
||||||
|
log_must unlinked_size_is 0 $TESTPOOL $TESTPOOL/$TESTFS.$fs
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
log_pass "Confirmed unlinked list drain does not hold up mounting of fs"
|
Loading…
Reference in New Issue
Block a user