mirror_zfs/include/sys/zvol_impl.h
Rob Norris 96f9d271ea zvol: remove the OS-side minor before freeing the zvol
When destroying a zvol, it is not "unpublished" from the system (that
is, /dev/zd* node removed) until zvol_os_free(). Under Linux, at the
time del_gendisk() and put_disk() are called, the device node may still
be have an active hold, from a userspace program or something inside the
kernel (a partition probe). As it is currently, this can lead to calls
to zvol_open() or zvol_release() while the zvol_state_t is partially or
fully freed. zvol_open() has some protection against this by checking
that private_data is NULL, but zvol_release does not.

This implements a better ordering for all of this by adding a new
OS-side method, zvol_os_remove_minor(), which is responsible for fully
decoupling the "private" (OS-side) objects from the zvol_state_t. For
Linux, that means calling put_disk(), nulling private_data, and freeing
zv_zso.

This takes the place of zvol_os_clear_private(), which was a nod in that
direction but did not do enough, and did not do it early enough.

Equivalent changes are made on the FreeBSD side to follow the API
change.

Sponsored-by: Klara, Inc.
Sponsored-by: Railway Corporation
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Fedor Uporov <fuporov.vstack@gmail.com>
Signed-off-by: Rob Norris <rob.norris@klarasystems.com>
Closes #17625
2025-08-19 10:06:21 -07:00

143 lines
4.8 KiB
C

// SPDX-License-Identifier: CDDL-1.0
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2024, 2025, Klara, Inc.
*/
#ifndef _SYS_ZVOL_IMPL_H
#define _SYS_ZVOL_IMPL_H
#include <sys/zfs_context.h>
#define ZVOL_RDONLY (1<<0) /* zvol is readonly (writes rejected) */
#define ZVOL_WRITTEN_TO (1<<1) /* zvol has been written to (needs flush) */
#define ZVOL_EXCL (1<<2) /* zvol has O_EXCL client right now */
#define ZVOL_REMOVING (1<<3) /* zvol waiting to remove minor */
/*
* The in-core state of each volume.
*/
typedef struct zvol_state {
char zv_name[MAXNAMELEN]; /* name */
uint64_t zv_volsize; /* advertised space */
uint64_t zv_volblocksize; /* volume block size */
objset_t *zv_objset; /* objset handle */
uint32_t zv_flags; /* ZVOL_* flags */
uint32_t zv_open_count; /* open counts */
uint32_t zv_changed; /* disk changed */
uint32_t zv_volmode; /* volmode */
zilog_t *zv_zilog; /* ZIL handle */
zfs_rangelock_t zv_rangelock; /* for range locking */
dnode_t *zv_dn; /* dnode hold */
dataset_kstats_t zv_kstat; /* zvol kstats */
list_node_t zv_next; /* next zvol_state_t linkage */
uint64_t zv_hash; /* name hash */
struct hlist_node zv_hlink; /* hash link */
kmutex_t zv_state_lock; /* protects zvol_state_t */
atomic_t zv_suspend_ref; /* refcount for suspend */
krwlock_t zv_suspend_lock; /* suspend lock */
kcondvar_t zv_removing_cv; /* ready to remove minor */
struct zvol_state_os *zv_zso; /* private platform state */
boolean_t zv_threading; /* volthreading property */
} zvol_state_t;
/*
* zvol taskqs
*/
typedef struct zv_taskq {
uint_t tqs_cnt;
taskq_t **tqs_taskq;
} zv_taskq_t;
typedef struct zv_request_stack {
zvol_state_t *zv;
struct bio *bio;
#ifdef __linux__
struct request *rq;
#endif
} zv_request_t;
typedef struct zv_request_task {
zv_request_t zvr;
taskq_ent_t ent;
} zv_request_task_t;
/*
* Switch taskq at multiple of 512 MB offset. This can be set to a lower value
* to utilize more threads for small files but may affect prefetch hits.
*/
#define ZVOL_TASKQ_OFFSET_SHIFT 29
extern krwlock_t zvol_state_lock;
#define ZVOL_HT_SIZE 1024
extern struct hlist_head *zvol_htable;
#define ZVOL_HT_HEAD(hash) (&zvol_htable[(hash) & (ZVOL_HT_SIZE-1)])
extern zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE];
extern unsigned int zvol_inhibit_dev;
extern unsigned int zvol_prefetch_bytes;
extern unsigned int zvol_volmode;
extern unsigned int zvol_threads;
extern unsigned int zvol_num_taskqs;
extern unsigned int zvol_request_sync;
extern zv_taskq_t zvol_taskqs;
/*
* platform independent functions exported to platform code
*/
zvol_state_t *zvol_find_by_name_hash(const char *name,
uint64_t hash, int mode);
int zvol_first_open(zvol_state_t *zv, boolean_t readonly);
uint64_t zvol_name_hash(const char *name);
void zvol_last_close(zvol_state_t *zv);
void zvol_insert(zvol_state_t *zv);
void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
uint64_t len);
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint64_t size, boolean_t commit);
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio);
int zvol_init_impl(void);
void zvol_fini_impl(void);
void zvol_wait_close(zvol_state_t *zv);
int zvol_clone_range(zvol_state_handle_t *, uint64_t,
zvol_state_handle_t *, uint64_t, uint64_t);
void zvol_log_clone_range(zilog_t *zilog, dmu_tx_t *tx, int txtype,
uint64_t off, uint64_t len, uint64_t blksz, const blkptr_t *bps,
size_t nbps);
zv_request_task_t *zv_request_task_create(zv_request_t zvr);
void zv_request_task_free(zv_request_task_t *task);
/*
* platform dependent functions exported to platform independent code
*/
void zvol_os_free(zvol_state_t *zv);
int zvol_os_rename_minor(zvol_state_t *zv, const char *newname);
int zvol_os_create_minor(const char *name);
int zvol_os_update_volsize(zvol_state_t *zv, uint64_t volsize);
boolean_t zvol_os_is_zvol(const char *path);
void zvol_os_remove_minor(zvol_state_t *zv);
void zvol_os_set_disk_ro(zvol_state_t *zv, int flags);
void zvol_os_set_capacity(zvol_state_t *zv, uint64_t capacity);
#endif