Files
mirror_zfs/module/zfs/vdev_file.c
T

374 lines
9.1 KiB
C
Raw Normal View History

2025-01-04 11:04:27 +11:00
// SPDX-License-Identifier: CDDL-1.0
2008-11-20 12:01:55 -08:00
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
2022-07-11 23:16:13 +02:00
* or https://opensource.org/licenses/CDDL-1.0.
2008-11-20 12:01:55 -08:00
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2020-09-18 14:13:47 -05:00
* Copyright (c) 2011, 2020 by Delphix. All rights reserved.
* Copyright (c) 2025, Klara, Inc.
2008-11-20 12:01:55 -08:00
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/vdev_file.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
#include <sys/fs/zfs.h>
#include <sys/fm/fs/zfs.h>
#include <sys/abd.h>
#include <sys/stat.h>
2008-11-20 12:01:55 -08:00
/*
* Virtual device vector for files.
*/
2016-12-21 10:47:15 -08:00
static taskq_t *vdev_file_taskq;
2020-09-18 14:13:47 -05:00
/*
* By default, the logical/physical ashift for file vdevs is set to
* SPA_MINBLOCKSHIFT (9). This allows all file vdevs to use 512B (1 << 9)
* blocksizes. Users may opt to change one or both of these for testing
* or performance reasons. Care should be taken as these values will
* impact the vdev_ashift setting which can only be set at vdev creation
* time.
*/
static uint_t vdev_file_logical_ashift = SPA_MINBLOCKSHIFT;
static uint_t vdev_file_physical_ashift = SPA_MINBLOCKSHIFT;
2020-09-18 14:13:47 -05:00
void
vdev_file_init(void)
{
vdev_file_taskq = taskq_create("z_vdev_file", MAX(boot_ncpus, 16),
minclsyspri, boot_ncpus, INT_MAX, TASKQ_DYNAMIC);
VERIFY(vdev_file_taskq);
}
void
vdev_file_fini(void)
{
taskq_destroy(vdev_file_taskq);
}
static void
vdev_file_hold(vdev_t *vd)
{
ASSERT3P(vd->vdev_path, !=, NULL);
}
static void
vdev_file_rele(vdev_t *vd)
{
ASSERT3P(vd->vdev_path, !=, NULL);
}
2019-11-21 09:32:57 -08:00
static mode_t
vdev_file_open_mode(spa_mode_t spa_mode)
{
mode_t mode = 0;
if ((spa_mode & SPA_MODE_READ) && (spa_mode & SPA_MODE_WRITE)) {
mode = O_RDWR;
} else if (spa_mode & SPA_MODE_READ) {
mode = O_RDONLY;
} else if (spa_mode & SPA_MODE_WRITE) {
mode = O_WRONLY;
}
return (mode | O_LARGEFILE);
}
2008-11-20 12:01:55 -08:00
static int
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
uint64_t *logical_ashift, uint64_t *physical_ashift)
2008-11-20 12:01:55 -08:00
{
vdev_file_t *vf;
2019-11-21 09:32:57 -08:00
zfs_file_t *fp;
zfs_file_attr_t zfa;
2008-11-20 12:01:55 -08:00
int error;
2019-03-29 09:13:20 -07:00
/*
* Rotational optimizations only make sense on block devices.
*/
2015-08-29 12:01:07 -04:00
vd->vdev_nonrot = B_TRUE;
2019-03-29 09:13:20 -07:00
/*
* Allow TRIM on file based vdevs. This may not always be supported,
* since it depends on your kernel version and underlying filesystem
* type but it is always safe to attempt.
*/
vd->vdev_has_trim = B_TRUE;
/*
* Disable secure TRIM on file based vdevs. There is no way to
* request this behavior from the underlying filesystem.
*/
vd->vdev_has_securetrim = B_FALSE;
2008-11-20 12:01:55 -08:00
/*
* We must have a pathname, and it must be absolute.
*/
if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
2013-03-08 10:41:28 -08:00
return (SET_ERROR(EINVAL));
2008-11-20 12:01:55 -08:00
}
/*
* Reopen the device if it's not currently open. Otherwise,
* just update the physical size of the device.
*/
if (vd->vdev_tsd != NULL) {
ASSERT(vd->vdev_reopening);
vf = vd->vdev_tsd;
goto skip_open;
}
2014-11-20 19:09:39 -05:00
vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
2008-11-20 12:01:55 -08:00
/*
* We always open the files from the root of the global zone, even if
* we're in a local zone. If the user has gotten to this point, the
* administrator has already decided that the pool should be available
* to local zone users, so the underlying devices should be as well.
*/
ASSERT3P(vd->vdev_path, !=, NULL);
ASSERT3S(vd->vdev_path[0], ==, '/');
2008-11-20 12:01:55 -08:00
2019-11-21 09:32:57 -08:00
error = zfs_file_open(vd->vdev_path,
vdev_file_open_mode(spa_mode(vd->vdev_spa)), 0, &fp);
2008-11-20 12:01:55 -08:00
if (error) {
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
return (error);
}
2019-11-21 09:32:57 -08:00
vf->vf_file = fp;
2008-11-20 12:01:55 -08:00
#ifdef _KERNEL
/*
* Make sure it's a regular file.
*/
2019-11-21 09:32:57 -08:00
if (zfs_file_getattr(fp, &zfa)) {
return (SET_ERROR(ENODEV));
}
if (!S_ISREG(zfa.zfa_mode)) {
2008-11-20 12:01:55 -08:00
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
2013-03-08 10:41:28 -08:00
return (SET_ERROR(ENODEV));
2008-11-20 12:01:55 -08:00
}
#endif
skip_open:
2019-11-21 09:32:57 -08:00
error = zfs_file_getattr(vf->vf_file, &zfa);
2008-11-20 12:01:55 -08:00
if (error) {
vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
return (error);
}
2019-11-21 09:32:57 -08:00
*max_psize = *psize = zfa.zfa_size;
2020-09-18 14:13:47 -05:00
*logical_ashift = vdev_file_logical_ashift;
*physical_ashift = vdev_file_physical_ashift;
2008-11-20 12:01:55 -08:00
return (0);
}
static void
vdev_file_close(vdev_t *vd)
{
vdev_file_t *vf = vd->vdev_tsd;
if (vd->vdev_reopening || vf == NULL)
2008-11-20 12:01:55 -08:00
return;
2019-11-21 09:32:57 -08:00
if (vf->vf_file != NULL) {
(void) zfs_file_close(vf->vf_file);
2008-11-20 12:01:55 -08:00
}
vd->vdev_delayed_close = B_FALSE;
2008-11-20 12:01:55 -08:00
kmem_free(vf, sizeof (vdev_file_t));
vd->vdev_tsd = NULL;
}
2013-05-02 16:36:32 -07:00
static void
vdev_file_io_strategy(void *arg)
2008-11-20 12:01:55 -08:00
{
2013-05-02 16:36:32 -07:00
zio_t *zio = (zio_t *)arg;
2008-11-20 12:01:55 -08:00
vdev_t *vd = zio->io_vd;
2013-05-02 16:36:32 -07:00
vdev_file_t *vf = vd->vdev_tsd;
void *buf;
ssize_t resid;
2019-11-21 09:32:57 -08:00
loff_t off;
ssize_t size;
int err;
off = zio->io_offset;
size = zio->io_size;
resid = 0;
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
2019-11-21 09:32:57 -08:00
if (zio->io_type == ZIO_TYPE_READ) {
buf = abd_borrow_buf(zio->io_abd, zio->io_size);
2019-11-21 09:32:57 -08:00
err = zfs_file_pread(vf->vf_file, buf, size, off, &resid);
abd_return_buf_copy(zio->io_abd, buf, size);
} else {
buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
err = zfs_file_pwrite(vf->vf_file, buf, size, off,
vd->vdev_ashift, &resid);
2019-11-21 09:32:57 -08:00
abd_return_buf(zio->io_abd, buf, size);
}
zio->io_error = err;
2013-05-02 16:36:32 -07:00
if (resid != 0 && zio->io_error == 0)
2013-03-08 10:41:28 -08:00
zio->io_error = SET_ERROR(ENOSPC);
zio_delay_interrupt(zio);
2013-05-02 16:36:32 -07:00
}
2014-07-13 14:35:19 -04:00
static void
vdev_file_io_fsync(void *arg)
{
zio_t *zio = (zio_t *)arg;
vdev_file_t *vf = zio->io_vd->vdev_tsd;
2019-11-21 09:32:57 -08:00
zio->io_error = zfs_file_fsync(vf->vf_file, O_SYNC | O_DSYNC);
2014-07-13 14:35:19 -04:00
zio_interrupt(zio);
}
2025-02-23 06:16:54 +11:00
static void
vdev_file_io_deallocate(void *arg)
{
zio_t *zio = (zio_t *)arg;
vdev_file_t *vf = zio->io_vd->vdev_tsd;
zio->io_error = zfs_file_deallocate(vf->vf_file,
zio->io_offset, zio->io_size);
zio_interrupt(zio);
}
static void
2013-05-02 16:36:32 -07:00
vdev_file_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
2008-11-20 12:01:55 -08:00
if (zio->io_type == ZIO_TYPE_FLUSH) {
2013-05-02 16:36:32 -07:00
/* XXPOLICY */
if (!vdev_readable(vd)) {
2013-03-08 10:41:28 -08:00
zio->io_error = SET_ERROR(ENXIO);
zio_interrupt(zio);
return;
2013-05-02 16:36:32 -07:00
}
2024-04-04 22:34:54 +11:00
if (zfs_nocacheflush) {
2025-02-23 06:16:54 +11:00
zio_interrupt(zio);
2024-04-04 22:34:54 +11:00
return;
2008-11-20 12:01:55 -08:00
}
2025-02-23 06:16:54 +11:00
VERIFY3U(taskq_dispatch(vdev_file_taskq,
vdev_file_io_fsync, zio, TQ_SLEEP), !=, TASKQID_INVALID);
2024-04-04 22:34:54 +11:00
2019-03-29 09:13:20 -07:00
return;
2025-02-23 06:16:54 +11:00
}
if (zio->io_type == ZIO_TYPE_TRIM) {
2019-03-29 09:13:20 -07:00
ASSERT3U(zio->io_size, !=, 0);
2025-02-23 06:16:54 +11:00
VERIFY3U(taskq_dispatch(vdev_file_taskq,
vdev_file_io_deallocate, zio, TQ_SLEEP), !=,
TASKQID_INVALID);
return;
2008-11-20 12:01:55 -08:00
}
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
zio->io_target_timestamp = zio_handle_io_delay(zio);
2016-12-21 10:47:15 -08:00
VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
2016-10-28 22:40:14 +00:00
TQ_SLEEP), !=, TASKQID_INVALID);
2008-11-20 12:01:55 -08:00
}
static void
2008-11-20 12:01:55 -08:00
vdev_file_io_done(zio_t *zio)
{
2021-12-12 16:06:44 +01:00
(void) zio;
2008-11-20 12:01:55 -08:00
}
vdev_ops_t vdev_file_ops = {
2020-11-13 13:51:51 -08:00
.vdev_op_init = NULL,
.vdev_op_fini = NULL,
2019-06-21 04:29:02 +03:00
.vdev_op_open = vdev_file_open,
.vdev_op_close = vdev_file_close,
.vdev_op_psize_to_asize = vdev_default_asize,
.vdev_op_asize_to_psize = vdev_default_psize,
2020-11-13 13:51:51 -08:00
.vdev_op_min_asize = vdev_default_min_asize,
.vdev_op_min_alloc = NULL,
2019-06-21 04:29:02 +03:00
.vdev_op_io_start = vdev_file_io_start,
.vdev_op_io_done = vdev_file_io_done,
.vdev_op_state_change = NULL,
.vdev_op_need_resilver = NULL,
.vdev_op_hold = vdev_file_hold,
.vdev_op_rele = vdev_file_rele,
.vdev_op_remap = NULL,
.vdev_op_xlate = vdev_default_xlate,
2020-11-13 13:51:51 -08:00
.vdev_op_rebuild_asize = NULL,
.vdev_op_metaslab_init = NULL,
.vdev_op_config_generate = NULL,
.vdev_op_nparity = NULL,
.vdev_op_ndisks = NULL,
2019-06-21 04:29:02 +03:00
.vdev_op_type = VDEV_TYPE_FILE, /* name of this vdev type */
.vdev_op_leaf = B_TRUE /* leaf vdev */
2008-11-20 12:01:55 -08:00
};
/*
* From userland we access disks just like files.
*/
#ifndef _KERNEL
vdev_ops_t vdev_disk_ops = {
2020-11-13 13:51:51 -08:00
.vdev_op_init = NULL,
.vdev_op_fini = NULL,
2019-06-21 04:29:02 +03:00
.vdev_op_open = vdev_file_open,
.vdev_op_close = vdev_file_close,
.vdev_op_psize_to_asize = vdev_default_asize,
2020-11-13 13:51:51 -08:00
.vdev_op_min_asize = vdev_default_min_asize,
.vdev_op_min_alloc = NULL,
2019-06-21 04:29:02 +03:00
.vdev_op_io_start = vdev_file_io_start,
.vdev_op_io_done = vdev_file_io_done,
.vdev_op_state_change = NULL,
.vdev_op_need_resilver = NULL,
.vdev_op_hold = vdev_file_hold,
.vdev_op_rele = vdev_file_rele,
.vdev_op_remap = NULL,
.vdev_op_xlate = vdev_default_xlate,
2020-11-13 13:51:51 -08:00
.vdev_op_rebuild_asize = NULL,
.vdev_op_metaslab_init = NULL,
.vdev_op_config_generate = NULL,
.vdev_op_nparity = NULL,
.vdev_op_ndisks = NULL,
2019-06-21 04:29:02 +03:00
.vdev_op_type = VDEV_TYPE_DISK, /* name of this vdev type */
.vdev_op_leaf = B_TRUE /* leaf vdev */
2008-11-20 12:01:55 -08:00
};
#endif
2020-09-18 14:13:47 -05:00
ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, logical_ashift, UINT, ZMOD_RW,
2020-09-18 14:13:47 -05:00
"Logical ashift for file-based devices");
ZFS_MODULE_PARAM(zfs_vdev_file, vdev_file_, physical_ashift, UINT, ZMOD_RW,
2020-09-18 14:13:47 -05:00
"Physical ashift for file-based devices");