mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-26 18:04:22 +03:00
Speed up zvol import and export speed
Speed up import and export speed by: * Add system delay taskq * Parallel prefetch zvol dnodes during zvol_create_minors * Parallel zvol_free during zvol_remove_minors * Reduce list linear search using ida and hash Reviewed-by: Boris Protopopov <boris.protopopov@actifio.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Chunwei Chen <david.chen@osnexus.com> Closes #5433
This commit is contained in:
commit
f95e647891
@ -498,6 +498,7 @@ typedef struct taskq {
|
|||||||
#define TASKQID_INVALID ((taskqid_t)0)
|
#define TASKQID_INVALID ((taskqid_t)0)
|
||||||
|
|
||||||
extern taskq_t *system_taskq;
|
extern taskq_t *system_taskq;
|
||||||
|
extern taskq_t *system_delay_taskq;
|
||||||
|
|
||||||
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
|
||||||
#define taskq_create_proc(a, b, c, d, e, p, f) \
|
#define taskq_create_proc(a, b, c, d, e, p, f) \
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
|
|
||||||
int taskq_now;
|
int taskq_now;
|
||||||
taskq_t *system_taskq;
|
taskq_t *system_taskq;
|
||||||
|
taskq_t *system_delay_taskq;
|
||||||
|
|
||||||
#define TASKQ_ACTIVE 0x00010000
|
#define TASKQ_ACTIVE 0x00010000
|
||||||
|
|
||||||
@ -353,6 +354,8 @@ system_taskq_init(void)
|
|||||||
{
|
{
|
||||||
system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512,
|
system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512,
|
||||||
TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
|
TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
|
||||||
|
system_delay_taskq = taskq_create("delay_taskq", 4, maxclsyspri, 4,
|
||||||
|
512, TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -360,4 +363,6 @@ system_taskq_fini(void)
|
|||||||
{
|
{
|
||||||
taskq_destroy(system_taskq);
|
taskq_destroy(system_taskq);
|
||||||
system_taskq = NULL; /* defensive */
|
system_taskq = NULL; /* defensive */
|
||||||
|
taskq_destroy(system_delay_taskq);
|
||||||
|
system_delay_taskq = NULL;
|
||||||
}
|
}
|
||||||
|
@ -1208,7 +1208,7 @@ spa_deactivate(spa_t *spa)
|
|||||||
list_destroy(&spa->spa_evicting_os_list);
|
list_destroy(&spa->spa_evicting_os_list);
|
||||||
list_destroy(&spa->spa_state_dirty_list);
|
list_destroy(&spa->spa_state_dirty_list);
|
||||||
|
|
||||||
taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
|
taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
|
||||||
|
|
||||||
for (t = 0; t < ZIO_TYPES; t++) {
|
for (t = 0; t < ZIO_TYPES; t++) {
|
||||||
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
|
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
|
||||||
@ -6515,8 +6515,8 @@ spa_sync(spa_t *spa, uint64_t txg)
|
|||||||
tx = dmu_tx_create_assigned(dp, txg);
|
tx = dmu_tx_create_assigned(dp, txg);
|
||||||
|
|
||||||
spa->spa_sync_starttime = gethrtime();
|
spa->spa_sync_starttime = gethrtime();
|
||||||
taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
|
taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
|
||||||
spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
|
spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
|
||||||
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
|
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
|
||||||
NSEC_TO_TICK(spa->spa_deadman_synctime));
|
NSEC_TO_TICK(spa->spa_deadman_synctime));
|
||||||
|
|
||||||
@ -6704,7 +6704,7 @@ spa_sync(spa_t *spa, uint64_t txg)
|
|||||||
}
|
}
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
|
||||||
taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
|
taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
|
||||||
spa->spa_deadman_tqid = 0;
|
spa->spa_deadman_tqid = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -530,7 +530,7 @@ spa_deadman(void *arg)
|
|||||||
if (zfs_deadman_enabled)
|
if (zfs_deadman_enabled)
|
||||||
vdev_deadman(spa->spa_root_vdev);
|
vdev_deadman(spa->spa_root_vdev);
|
||||||
|
|
||||||
spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
|
spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
|
||||||
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
|
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
|
||||||
NSEC_TO_TICK(spa->spa_deadman_synctime));
|
NSEC_TO_TICK(spa->spa_deadman_synctime));
|
||||||
}
|
}
|
||||||
|
@ -111,11 +111,6 @@ static krwlock_t zfs_snapshot_lock;
|
|||||||
int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
|
int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
|
||||||
int zfs_admin_snapshot = 1;
|
int zfs_admin_snapshot = 1;
|
||||||
|
|
||||||
/*
|
|
||||||
* Dedicated task queue for unmounting snapshots.
|
|
||||||
*/
|
|
||||||
static taskq_t *zfs_expire_taskq;
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char *se_name; /* full snapshot name */
|
char *se_name; /* full snapshot name */
|
||||||
char *se_path; /* full mount path */
|
char *se_path; /* full mount path */
|
||||||
@ -365,7 +360,7 @@ zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
|
|||||||
{
|
{
|
||||||
ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
|
ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
|
||||||
|
|
||||||
if (taskq_cancel_id(zfs_expire_taskq, se->se_taskqid) == 0) {
|
if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) {
|
||||||
se->se_taskqid = TASKQID_INVALID;
|
se->se_taskqid = TASKQID_INVALID;
|
||||||
zfsctl_snapshot_rele(se);
|
zfsctl_snapshot_rele(se);
|
||||||
}
|
}
|
||||||
@ -383,7 +378,7 @@ zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
zfsctl_snapshot_hold(se);
|
zfsctl_snapshot_hold(se);
|
||||||
se->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
|
se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
|
||||||
snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
|
snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1257,9 +1252,6 @@ zfsctl_init(void)
|
|||||||
sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
|
sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
|
||||||
se_node_objsetid));
|
se_node_objsetid));
|
||||||
rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
|
rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
|
||||||
|
|
||||||
zfs_expire_taskq = taskq_create("z_unmount", 1, defclsyspri,
|
|
||||||
1, 8, TASKQ_PREPOPULATE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1269,8 +1261,6 @@ zfsctl_init(void)
|
|||||||
void
|
void
|
||||||
zfsctl_fini(void)
|
zfsctl_fini(void)
|
||||||
{
|
{
|
||||||
taskq_destroy(zfs_expire_taskq);
|
|
||||||
|
|
||||||
avl_destroy(&zfs_snapshots_by_name);
|
avl_destroy(&zfs_snapshots_by_name);
|
||||||
avl_destroy(&zfs_snapshots_by_objsetid);
|
avl_destroy(&zfs_snapshots_by_objsetid);
|
||||||
rw_destroy(&zfs_snapshot_lock);
|
rw_destroy(&zfs_snapshot_lock);
|
||||||
|
@ -1922,6 +1922,7 @@ zfs_fini(void)
|
|||||||
/*
|
/*
|
||||||
* we don't use outstanding because zpl_posix_acl_free might add more.
|
* we don't use outstanding because zpl_posix_acl_free might add more.
|
||||||
*/
|
*/
|
||||||
|
taskq_wait(system_delay_taskq);
|
||||||
taskq_wait(system_taskq);
|
taskq_wait(system_taskq);
|
||||||
unregister_filesystem(&zpl_fs_type);
|
unregister_filesystem(&zpl_fs_type);
|
||||||
zfs_znode_fini();
|
zfs_znode_fini();
|
||||||
|
@ -1511,8 +1511,8 @@ zpl_posix_acl_free(void *arg)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (refire)
|
if (refire)
|
||||||
taskq_dispatch_delay(system_taskq, zpl_posix_acl_free, NULL,
|
taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
|
||||||
TQ_SLEEP, new_time);
|
NULL, TQ_SLEEP, new_time);
|
||||||
|
|
||||||
while (freelist) {
|
while (freelist) {
|
||||||
a = freelist;
|
a = freelist;
|
||||||
@ -1537,7 +1537,7 @@ zpl_posix_acl_release_impl(struct posix_acl *acl)
|
|||||||
*prev = a;
|
*prev = a;
|
||||||
/* if it was empty before, schedule the free task */
|
/* if it was empty before, schedule the free task */
|
||||||
if (prev == &acl_rel_head)
|
if (prev == &acl_rel_head)
|
||||||
taskq_dispatch_delay(system_taskq, zpl_posix_acl_free, NULL,
|
taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
|
||||||
TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
|
NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -63,6 +63,11 @@ static kmutex_t zvol_state_lock;
|
|||||||
static list_t zvol_state_list;
|
static list_t zvol_state_list;
|
||||||
void *zvol_tag = "zvol_tag";
|
void *zvol_tag = "zvol_tag";
|
||||||
|
|
||||||
|
#define ZVOL_HT_SIZE 1024
|
||||||
|
static struct hlist_head *zvol_htable;
|
||||||
|
#define ZVOL_HT_HEAD(hash) (&zvol_htable[(hash) & (ZVOL_HT_SIZE-1)])
|
||||||
|
static DEFINE_IDA(zvol_ida);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The in-core state of each volume.
|
* The in-core state of each volume.
|
||||||
*/
|
*/
|
||||||
@ -81,6 +86,8 @@ typedef struct zvol_state {
|
|||||||
struct gendisk *zv_disk; /* generic disk */
|
struct gendisk *zv_disk; /* generic disk */
|
||||||
struct request_queue *zv_queue; /* request queue */
|
struct request_queue *zv_queue; /* request queue */
|
||||||
list_node_t zv_next; /* next zvol_state_t linkage */
|
list_node_t zv_next; /* next zvol_state_t linkage */
|
||||||
|
uint64_t zv_hash; /* name hash */
|
||||||
|
struct hlist_node zv_hlink; /* hash link */
|
||||||
} zvol_state_t;
|
} zvol_state_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
@ -102,30 +109,17 @@ typedef struct {
|
|||||||
|
|
||||||
#define ZVOL_RDONLY 0x1
|
#define ZVOL_RDONLY 0x1
|
||||||
|
|
||||||
/*
|
static uint64_t
|
||||||
* Find the next available range of ZVOL_MINORS minor numbers. The
|
zvol_name_hash(const char *name)
|
||||||
* zvol_state_list is kept in ascending minor order so we simply need
|
|
||||||
* to scan the list for the first gap in the sequence. This allows us
|
|
||||||
* to recycle minor number as devices are created and removed.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
zvol_find_minor(unsigned *minor)
|
|
||||||
{
|
{
|
||||||
zvol_state_t *zv;
|
int i;
|
||||||
|
uint64_t crc = -1ULL;
|
||||||
*minor = 0;
|
uint8_t *p = (uint8_t *)name;
|
||||||
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
|
||||||
for (zv = list_head(&zvol_state_list); zv != NULL;
|
for (i = 0; i < MAXNAMELEN - 1 && *p; i++, p++) {
|
||||||
zv = list_next(&zvol_state_list, zv), *minor += ZVOL_MINORS) {
|
crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (*p)) & 0xFF];
|
||||||
if (MINOR(zv->zv_dev) != MINOR(*minor))
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
return (crc);
|
||||||
/* All minors are in use */
|
|
||||||
if (*minor >= (1 << MINORBITS))
|
|
||||||
return (SET_ERROR(ENXIO));
|
|
||||||
|
|
||||||
return (0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -146,22 +140,32 @@ zvol_find_by_dev(dev_t dev)
|
|||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find a zvol_state_t given the name and hash generated by zvol_name_hash.
|
||||||
|
*/
|
||||||
|
static zvol_state_t *
|
||||||
|
zvol_find_by_name_hash(const char *name, uint64_t hash)
|
||||||
|
{
|
||||||
|
zvol_state_t *zv;
|
||||||
|
struct hlist_node *p;
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
||||||
|
hlist_for_each(p, ZVOL_HT_HEAD(hash)) {
|
||||||
|
zv = hlist_entry(p, zvol_state_t, zv_hlink);
|
||||||
|
if (zv->zv_hash == hash &&
|
||||||
|
strncmp(zv->zv_name, name, MAXNAMELEN) == 0)
|
||||||
|
return (zv);
|
||||||
|
}
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find a zvol_state_t given the name provided at zvol_alloc() time.
|
* Find a zvol_state_t given the name provided at zvol_alloc() time.
|
||||||
*/
|
*/
|
||||||
static zvol_state_t *
|
static zvol_state_t *
|
||||||
zvol_find_by_name(const char *name)
|
zvol_find_by_name(const char *name)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv;
|
return (zvol_find_by_name_hash(name, zvol_name_hash(name)));
|
||||||
|
|
||||||
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
|
||||||
for (zv = list_head(&zvol_state_list); zv != NULL;
|
|
||||||
zv = list_next(&zvol_state_list, zv)) {
|
|
||||||
if (strncmp(zv->zv_name, name, MAXNAMELEN) == 0)
|
|
||||||
return (zv);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -921,32 +925,26 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The zvol_state_t's are inserted in increasing MINOR(dev_t) order.
|
* The zvol_state_t's are inserted into zvol_state_list and zvol_htable.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
zvol_insert(zvol_state_t *zv_insert)
|
zvol_insert(zvol_state_t *zv)
|
||||||
{
|
{
|
||||||
zvol_state_t *zv = NULL;
|
|
||||||
|
|
||||||
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
||||||
ASSERT3U(MINOR(zv_insert->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
|
ASSERT3U(MINOR(zv->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
|
||||||
for (zv = list_head(&zvol_state_list); zv != NULL;
|
list_insert_head(&zvol_state_list, zv);
|
||||||
zv = list_next(&zvol_state_list, zv)) {
|
hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
|
||||||
if (MINOR(zv->zv_dev) > MINOR(zv_insert->zv_dev))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
list_insert_before(&zvol_state_list, zv, zv_insert);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Simply remove the zvol from to list of zvols.
|
* Simply remove the zvol from to list of zvols.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
zvol_remove(zvol_state_t *zv_remove)
|
zvol_remove(zvol_state_t *zv)
|
||||||
{
|
{
|
||||||
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
||||||
list_remove(&zvol_state_list, zv_remove);
|
list_remove(&zvol_state_list, zv);
|
||||||
|
hlist_del(&zv->zv_hlink);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -1038,7 +1036,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Obtain a copy of private_data under the lock to make sure
|
* Obtain a copy of private_data under the lock to make sure
|
||||||
* that either the result of zvol_freeg() setting
|
* that either the result of zvol_free() setting
|
||||||
* bdev->bd_disk->private_data to NULL is observed, or zvol_free()
|
* bdev->bd_disk->private_data to NULL is observed, or zvol_free()
|
||||||
* is not called on this zv because of the positive zv_open_count.
|
* is not called on this zv because of the positive zv_open_count.
|
||||||
*/
|
*/
|
||||||
@ -1318,12 +1316,13 @@ out_kmem:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cleanup then free a zvol_state_t which was created by zvol_alloc().
|
* Used for taskq, if used out side zvol_state_lock, you need to clear
|
||||||
|
* zv_disk->private_data inside lock first.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
zvol_free(zvol_state_t *zv)
|
zvol_free_impl(void *arg)
|
||||||
{
|
{
|
||||||
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
zvol_state_t *zv = arg;
|
||||||
ASSERT(zv->zv_open_count == 0);
|
ASSERT(zv->zv_open_count == 0);
|
||||||
|
|
||||||
zfs_rlock_destroy(&zv->zv_range_lock);
|
zfs_rlock_destroy(&zv->zv_range_lock);
|
||||||
@ -1334,9 +1333,20 @@ zvol_free(zvol_state_t *zv)
|
|||||||
blk_cleanup_queue(zv->zv_queue);
|
blk_cleanup_queue(zv->zv_queue);
|
||||||
put_disk(zv->zv_disk);
|
put_disk(zv->zv_disk);
|
||||||
|
|
||||||
|
ida_simple_remove(&zvol_ida, MINOR(zv->zv_dev) >> ZVOL_MINOR_BITS);
|
||||||
kmem_free(zv, sizeof (zvol_state_t));
|
kmem_free(zv, sizeof (zvol_state_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cleanup then free a zvol_state_t which was created by zvol_alloc().
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
zvol_free(zvol_state_t *zv)
|
||||||
|
{
|
||||||
|
ASSERT(MUTEX_HELD(&zvol_state_lock));
|
||||||
|
zvol_free_impl(zv);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a block device minor node and setup the linkage between it
|
* Create a block device minor node and setup the linkage between it
|
||||||
* and the specified volume. Once this function returns the block
|
* and the specified volume. Once this function returns the block
|
||||||
@ -1352,10 +1362,17 @@ zvol_create_minor_impl(const char *name)
|
|||||||
uint64_t len;
|
uint64_t len;
|
||||||
unsigned minor = 0;
|
unsigned minor = 0;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
int idx;
|
||||||
|
uint64_t hash = zvol_name_hash(name);
|
||||||
|
|
||||||
|
idx = ida_simple_get(&zvol_ida, 0, 0, kmem_flags_convert(KM_SLEEP));
|
||||||
|
if (idx < 0)
|
||||||
|
return (SET_ERROR(-idx));
|
||||||
|
minor = idx << ZVOL_MINOR_BITS;
|
||||||
|
|
||||||
mutex_enter(&zvol_state_lock);
|
mutex_enter(&zvol_state_lock);
|
||||||
|
|
||||||
zv = zvol_find_by_name(name);
|
zv = zvol_find_by_name_hash(name, hash);
|
||||||
if (zv) {
|
if (zv) {
|
||||||
error = SET_ERROR(EEXIST);
|
error = SET_ERROR(EEXIST);
|
||||||
goto out;
|
goto out;
|
||||||
@ -1375,15 +1392,12 @@ zvol_create_minor_impl(const char *name)
|
|||||||
if (error)
|
if (error)
|
||||||
goto out_dmu_objset_disown;
|
goto out_dmu_objset_disown;
|
||||||
|
|
||||||
error = zvol_find_minor(&minor);
|
|
||||||
if (error)
|
|
||||||
goto out_dmu_objset_disown;
|
|
||||||
|
|
||||||
zv = zvol_alloc(MKDEV(zvol_major, minor), name);
|
zv = zvol_alloc(MKDEV(zvol_major, minor), name);
|
||||||
if (zv == NULL) {
|
if (zv == NULL) {
|
||||||
error = SET_ERROR(EAGAIN);
|
error = SET_ERROR(EAGAIN);
|
||||||
goto out_dmu_objset_disown;
|
goto out_dmu_objset_disown;
|
||||||
}
|
}
|
||||||
|
zv->zv_hash = hash;
|
||||||
|
|
||||||
if (dmu_objset_is_snapshot(os))
|
if (dmu_objset_is_snapshot(os))
|
||||||
zv->zv_flags |= ZVOL_RDONLY;
|
zv->zv_flags |= ZVOL_RDONLY;
|
||||||
@ -1449,6 +1463,7 @@ out:
|
|||||||
add_disk(zv->zv_disk);
|
add_disk(zv->zv_disk);
|
||||||
} else {
|
} else {
|
||||||
mutex_exit(&zvol_state_lock);
|
mutex_exit(&zvol_state_lock);
|
||||||
|
ida_simple_remove(&zvol_ida, idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (SET_ERROR(error));
|
return (SET_ERROR(error));
|
||||||
@ -1478,6 +1493,32 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
|
|||||||
set_disk_ro(zv->zv_disk, readonly);
|
set_disk_ro(zv->zv_disk, readonly);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef struct minors_job {
|
||||||
|
list_t *list;
|
||||||
|
list_node_t link;
|
||||||
|
/* input */
|
||||||
|
char *name;
|
||||||
|
/* output */
|
||||||
|
int error;
|
||||||
|
} minors_job_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prefetch zvol dnodes for the minors_job
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
zvol_prefetch_minors_impl(void *arg)
|
||||||
|
{
|
||||||
|
minors_job_t *job = arg;
|
||||||
|
char *dsname = job->name;
|
||||||
|
objset_t *os = NULL;
|
||||||
|
|
||||||
|
job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, zvol_tag,
|
||||||
|
&os);
|
||||||
|
if (job->error == 0) {
|
||||||
|
dmu_prefetch(os, ZVOL_OBJ, 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
|
||||||
|
dmu_objset_disown(os, zvol_tag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mask errors to continue dmu_objset_find() traversal
|
* Mask errors to continue dmu_objset_find() traversal
|
||||||
@ -1485,7 +1526,9 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
|
|||||||
static int
|
static int
|
||||||
zvol_create_snap_minor_cb(const char *dsname, void *arg)
|
zvol_create_snap_minor_cb(const char *dsname, void *arg)
|
||||||
{
|
{
|
||||||
const char *name = (const char *)arg;
|
minors_job_t *j = arg;
|
||||||
|
list_t *minors_list = j->list;
|
||||||
|
const char *name = j->name;
|
||||||
|
|
||||||
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
|
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
|
||||||
|
|
||||||
@ -1498,7 +1541,19 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg)
|
|||||||
dprintf("zvol_create_snap_minor_cb(): "
|
dprintf("zvol_create_snap_minor_cb(): "
|
||||||
"%s is not a shapshot name\n", dsname);
|
"%s is not a shapshot name\n", dsname);
|
||||||
} else {
|
} else {
|
||||||
(void) zvol_create_minor_impl(dsname);
|
minors_job_t *job;
|
||||||
|
char *n = strdup(dsname);
|
||||||
|
if (n == NULL)
|
||||||
|
return (0);
|
||||||
|
|
||||||
|
job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
|
||||||
|
job->name = n;
|
||||||
|
job->list = minors_list;
|
||||||
|
job->error = 0;
|
||||||
|
list_insert_tail(minors_list, job);
|
||||||
|
/* don't care if dispatch fails, because job->error is 0 */
|
||||||
|
taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
|
||||||
|
TQ_SLEEP);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
@ -1512,6 +1567,7 @@ zvol_create_minors_cb(const char *dsname, void *arg)
|
|||||||
{
|
{
|
||||||
uint64_t snapdev;
|
uint64_t snapdev;
|
||||||
int error;
|
int error;
|
||||||
|
list_t *minors_list = arg;
|
||||||
|
|
||||||
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
|
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
|
||||||
|
|
||||||
@ -1527,19 +1583,28 @@ zvol_create_minors_cb(const char *dsname, void *arg)
|
|||||||
* snapshots and create device minor nodes for those.
|
* snapshots and create device minor nodes for those.
|
||||||
*/
|
*/
|
||||||
if (strchr(dsname, '@') == 0) {
|
if (strchr(dsname, '@') == 0) {
|
||||||
/* create minor for the 'dsname' explicitly */
|
minors_job_t *job;
|
||||||
error = zvol_create_minor_impl(dsname);
|
char *n = strdup(dsname);
|
||||||
if ((error == 0 || error == EEXIST) &&
|
if (n == NULL)
|
||||||
(snapdev == ZFS_SNAPDEV_VISIBLE)) {
|
return (0);
|
||||||
fstrans_cookie_t cookie = spl_fstrans_mark();
|
|
||||||
|
job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
|
||||||
|
job->name = n;
|
||||||
|
job->list = minors_list;
|
||||||
|
job->error = 0;
|
||||||
|
list_insert_tail(minors_list, job);
|
||||||
|
/* don't care if dispatch fails, because job->error is 0 */
|
||||||
|
taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
|
||||||
|
TQ_SLEEP);
|
||||||
|
|
||||||
|
if (snapdev == ZFS_SNAPDEV_VISIBLE) {
|
||||||
/*
|
/*
|
||||||
* traverse snapshots only, do not traverse children,
|
* traverse snapshots only, do not traverse children,
|
||||||
* and skip the 'dsname'
|
* and skip the 'dsname'
|
||||||
*/
|
*/
|
||||||
error = dmu_objset_find((char *)dsname,
|
error = dmu_objset_find((char *)dsname,
|
||||||
zvol_create_snap_minor_cb, (void *)dsname,
|
zvol_create_snap_minor_cb, (void *)job,
|
||||||
DS_FIND_SNAPSHOTS);
|
DS_FIND_SNAPSHOTS);
|
||||||
spl_fstrans_unmark(cookie);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dprintf("zvol_create_minors_cb(): %s is not a zvol name\n",
|
dprintf("zvol_create_minors_cb(): %s is not a zvol name\n",
|
||||||
@ -1572,10 +1637,24 @@ zvol_create_minors_impl(const char *name)
|
|||||||
int error = 0;
|
int error = 0;
|
||||||
fstrans_cookie_t cookie;
|
fstrans_cookie_t cookie;
|
||||||
char *atp, *parent;
|
char *atp, *parent;
|
||||||
|
list_t minors_list;
|
||||||
|
minors_job_t *job;
|
||||||
|
|
||||||
if (zvol_inhibit_dev)
|
if (zvol_inhibit_dev)
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the list for prefetch jobs. Whenever we found a match
|
||||||
|
* during dmu_objset_find, we insert a minors_job to the list and do
|
||||||
|
* taskq_dispatch to parallel prefetch zvol dnodes. Note we don't need
|
||||||
|
* any lock because all list operation is done on the current thread.
|
||||||
|
*
|
||||||
|
* We will use this list to do zvol_create_minor_impl after prefetch
|
||||||
|
* so we don't have to traverse using dmu_objset_find again.
|
||||||
|
*/
|
||||||
|
list_create(&minors_list, sizeof (minors_job_t),
|
||||||
|
offsetof(minors_job_t, link));
|
||||||
|
|
||||||
parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
|
parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
|
||||||
(void) strlcpy(parent, name, MAXPATHLEN);
|
(void) strlcpy(parent, name, MAXPATHLEN);
|
||||||
|
|
||||||
@ -1591,11 +1670,26 @@ zvol_create_minors_impl(const char *name)
|
|||||||
} else {
|
} else {
|
||||||
cookie = spl_fstrans_mark();
|
cookie = spl_fstrans_mark();
|
||||||
error = dmu_objset_find(parent, zvol_create_minors_cb,
|
error = dmu_objset_find(parent, zvol_create_minors_cb,
|
||||||
NULL, DS_FIND_CHILDREN);
|
&minors_list, DS_FIND_CHILDREN);
|
||||||
spl_fstrans_unmark(cookie);
|
spl_fstrans_unmark(cookie);
|
||||||
}
|
}
|
||||||
|
|
||||||
kmem_free(parent, MAXPATHLEN);
|
kmem_free(parent, MAXPATHLEN);
|
||||||
|
taskq_wait_outstanding(system_taskq, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prefetch is completed, we can do zvol_create_minor_impl
|
||||||
|
* sequentially.
|
||||||
|
*/
|
||||||
|
while ((job = list_head(&minors_list)) != NULL) {
|
||||||
|
list_remove(&minors_list, job);
|
||||||
|
if (!job->error)
|
||||||
|
zvol_create_minor_impl(job->name);
|
||||||
|
strfree(job->name);
|
||||||
|
kmem_free(job, sizeof (minors_job_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
list_destroy(&minors_list);
|
||||||
|
|
||||||
return (SET_ERROR(error));
|
return (SET_ERROR(error));
|
||||||
}
|
}
|
||||||
@ -1608,6 +1702,7 @@ zvol_remove_minors_impl(const char *name)
|
|||||||
{
|
{
|
||||||
zvol_state_t *zv, *zv_next;
|
zvol_state_t *zv, *zv_next;
|
||||||
int namelen = ((name) ? strlen(name) : 0);
|
int namelen = ((name) ? strlen(name) : 0);
|
||||||
|
taskqid_t t, tid = TASKQID_INVALID;
|
||||||
|
|
||||||
if (zvol_inhibit_dev)
|
if (zvol_inhibit_dev)
|
||||||
return;
|
return;
|
||||||
@ -1627,11 +1722,22 @@ zvol_remove_minors_impl(const char *name)
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
zvol_remove(zv);
|
zvol_remove(zv);
|
||||||
zvol_free(zv);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/* clear this so zvol_open won't open it */
|
||||||
|
zv->zv_disk->private_data = NULL;
|
||||||
|
|
||||||
|
/* try parallel zv_free, if failed do it in place */
|
||||||
|
t = taskq_dispatch(system_taskq, zvol_free_impl, zv,
|
||||||
|
TQ_SLEEP);
|
||||||
|
if (t == TASKQID_INVALID)
|
||||||
|
zvol_free(zv);
|
||||||
|
else
|
||||||
|
tid = t;
|
||||||
|
}
|
||||||
|
}
|
||||||
mutex_exit(&zvol_state_lock);
|
mutex_exit(&zvol_state_lock);
|
||||||
|
if (tid != TASKQID_INVALID)
|
||||||
|
taskq_wait_outstanding(system_taskq, tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remove minor for this specific snapshot only */
|
/* Remove minor for this specific snapshot only */
|
||||||
@ -1933,16 +2039,25 @@ zvol_rename_minors(spa_t *spa, const char *name1, const char *name2,
|
|||||||
int
|
int
|
||||||
zvol_init(void)
|
zvol_init(void)
|
||||||
{
|
{
|
||||||
int error;
|
int i, error;
|
||||||
|
|
||||||
list_create(&zvol_state_list, sizeof (zvol_state_t),
|
list_create(&zvol_state_list, sizeof (zvol_state_t),
|
||||||
offsetof(zvol_state_t, zv_next));
|
offsetof(zvol_state_t, zv_next));
|
||||||
mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
|
||||||
|
zvol_htable = kmem_alloc(ZVOL_HT_SIZE * sizeof (struct hlist_head),
|
||||||
|
KM_SLEEP);
|
||||||
|
if (!zvol_htable) {
|
||||||
|
error = ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
for (i = 0; i < ZVOL_HT_SIZE; i++)
|
||||||
|
INIT_HLIST_HEAD(&zvol_htable[i]);
|
||||||
|
|
||||||
error = register_blkdev(zvol_major, ZVOL_DRIVER);
|
error = register_blkdev(zvol_major, ZVOL_DRIVER);
|
||||||
if (error) {
|
if (error) {
|
||||||
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
|
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
|
||||||
goto out;
|
goto out_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
|
blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
|
||||||
@ -1950,6 +2065,8 @@ zvol_init(void)
|
|||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
|
out_free:
|
||||||
|
kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
|
||||||
out:
|
out:
|
||||||
mutex_destroy(&zvol_state_lock);
|
mutex_destroy(&zvol_state_lock);
|
||||||
list_destroy(&zvol_state_list);
|
list_destroy(&zvol_state_list);
|
||||||
@ -1964,6 +2081,7 @@ zvol_fini(void)
|
|||||||
|
|
||||||
blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
|
blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
|
||||||
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
||||||
|
kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
|
||||||
|
|
||||||
list_destroy(&zvol_state_list);
|
list_destroy(&zvol_state_list);
|
||||||
mutex_destroy(&zvol_state_lock);
|
mutex_destroy(&zvol_state_lock);
|
||||||
|
Loading…
Reference in New Issue
Block a user