mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-04-13 15:11:45 +03:00
421 lines
8.7 KiB
C
421 lines
8.7 KiB
C
|
|
// SPDX-License-Identifier: CDDL-1.0
|
||
|
|
/*
|
||
|
|
* CDDL HEADER START
|
||
|
|
*
|
||
|
|
* The contents of this file are subject to the terms of the
|
||
|
|
* Common Development and Distribution License (the "License").
|
||
|
|
* You may not use this file except in compliance with the License.
|
||
|
|
*
|
||
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||
|
|
* or https://opensource.org/licenses/CDDL-1.0.
|
||
|
|
* See the License for the specific language governing permissions
|
||
|
|
* and limitations under the License.
|
||
|
|
*
|
||
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
||
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||
|
|
*
|
||
|
|
* CDDL HEADER END
|
||
|
|
*/
|
||
|
|
/*
|
||
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||
|
|
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||
|
|
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
|
||
|
|
* Copyright (c) 2025, Klara, Inc.
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include <sys/zfs_context.h>
|
||
|
|
#include <sys/zfs_file.h>
|
||
|
|
#include <libzpool.h>
|
||
|
|
#include <libzutil.h>
|
||
|
|
|
||
|
|
/* If set, all blocks read will be copied to the specified directory. */
|
||
|
|
char *vn_dumpdir = NULL;
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Open file
|
||
|
|
*
|
||
|
|
* path - fully qualified path to file
|
||
|
|
* flags - file attributes O_READ / O_WRITE / O_EXCL
|
||
|
|
* fpp - pointer to return file pointer
|
||
|
|
*
|
||
|
|
* Returns 0 on success underlying error on failure.
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
|
||
|
|
{
|
||
|
|
int fd;
|
||
|
|
int dump_fd;
|
||
|
|
int err;
|
||
|
|
int old_umask = 0;
|
||
|
|
zfs_file_t *fp;
|
||
|
|
struct stat64 st;
|
||
|
|
|
||
|
|
if (!(flags & O_CREAT) && stat64(path, &st) == -1)
|
||
|
|
return (errno);
|
||
|
|
|
||
|
|
if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
|
||
|
|
flags |= O_DIRECT;
|
||
|
|
|
||
|
|
if (flags & O_CREAT)
|
||
|
|
old_umask = umask(0);
|
||
|
|
|
||
|
|
fd = open64(path, flags, mode);
|
||
|
|
if (fd == -1)
|
||
|
|
return (errno);
|
||
|
|
|
||
|
|
if (flags & O_CREAT)
|
||
|
|
(void) umask(old_umask);
|
||
|
|
|
||
|
|
if (vn_dumpdir != NULL) {
|
||
|
|
char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
|
||
|
|
const char *inpath = zfs_basename(path);
|
||
|
|
|
||
|
|
(void) snprintf(dumppath, MAXPATHLEN,
|
||
|
|
"%s/%s", vn_dumpdir, inpath);
|
||
|
|
dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
|
||
|
|
umem_free(dumppath, MAXPATHLEN);
|
||
|
|
if (dump_fd == -1) {
|
||
|
|
err = errno;
|
||
|
|
close(fd);
|
||
|
|
return (err);
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
dump_fd = -1;
|
||
|
|
}
|
||
|
|
|
||
|
|
(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
|
||
|
|
|
||
|
|
fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
|
||
|
|
fp->f_fd = fd;
|
||
|
|
fp->f_dump_fd = dump_fd;
|
||
|
|
*fpp = fp;
|
||
|
|
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
void
|
||
|
|
zfs_file_close(zfs_file_t *fp)
|
||
|
|
{
|
||
|
|
close(fp->f_fd);
|
||
|
|
if (fp->f_dump_fd != -1)
|
||
|
|
close(fp->f_dump_fd);
|
||
|
|
|
||
|
|
umem_free(fp, sizeof (zfs_file_t));
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Stateful write - use os internal file pointer to determine where to
|
||
|
|
* write and update on successful completion.
|
||
|
|
*
|
||
|
|
* fp - pointer to file (pipe, socket, etc) to write to
|
||
|
|
* buf - buffer to write
|
||
|
|
* count - # of bytes to write
|
||
|
|
* resid - pointer to count of unwritten bytes (if short write)
|
||
|
|
*
|
||
|
|
* Returns 0 on success errno on failure.
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
|
||
|
|
{
|
||
|
|
ssize_t rc;
|
||
|
|
|
||
|
|
rc = write(fp->f_fd, buf, count);
|
||
|
|
if (rc < 0)
|
||
|
|
return (errno);
|
||
|
|
|
||
|
|
if (resid) {
|
||
|
|
*resid = count - rc;
|
||
|
|
} else if (rc != count) {
|
||
|
|
return (EIO);
|
||
|
|
}
|
||
|
|
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Stateless write - os internal file pointer is not updated.
|
||
|
|
*
|
||
|
|
* fp - pointer to file (pipe, socket, etc) to write to
|
||
|
|
* buf - buffer to write
|
||
|
|
* count - # of bytes to write
|
||
|
|
* off - file offset to write to (only valid for seekable types)
|
||
|
|
* resid - pointer to count of unwritten bytes
|
||
|
|
*
|
||
|
|
* Returns 0 on success errno on failure.
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_pwrite(zfs_file_t *fp, const void *buf,
|
||
|
|
size_t count, loff_t pos, uint8_t ashift, ssize_t *resid)
|
||
|
|
{
|
||
|
|
ssize_t rc, split, done;
|
||
|
|
int sectors;
|
||
|
|
|
||
|
|
/*
|
||
|
|
* To simulate partial disk writes, we split writes into two
|
||
|
|
* system calls so that the process can be killed in between.
|
||
|
|
* This is used by ztest to simulate realistic failure modes.
|
||
|
|
*/
|
||
|
|
sectors = count >> ashift;
|
||
|
|
split = (sectors > 0 ? rand() % sectors : 0) << ashift;
|
||
|
|
rc = pwrite64(fp->f_fd, buf, split, pos);
|
||
|
|
if (rc != -1) {
|
||
|
|
done = rc;
|
||
|
|
rc = pwrite64(fp->f_fd, (char *)buf + split,
|
||
|
|
count - split, pos + split);
|
||
|
|
}
|
||
|
|
#ifdef __linux__
|
||
|
|
if (rc == -1 && errno == EINVAL) {
|
||
|
|
/*
|
||
|
|
* Under Linux, this most likely means an alignment issue
|
||
|
|
* (memory or disk) due to O_DIRECT, so we abort() in order
|
||
|
|
* to catch the offender.
|
||
|
|
*/
|
||
|
|
abort();
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
if (rc < 0)
|
||
|
|
return (errno);
|
||
|
|
|
||
|
|
done += rc;
|
||
|
|
|
||
|
|
if (resid) {
|
||
|
|
*resid = count - done;
|
||
|
|
} else if (done != count) {
|
||
|
|
return (EIO);
|
||
|
|
}
|
||
|
|
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Stateful read - use os internal file pointer to determine where to
|
||
|
|
* read and update on successful completion.
|
||
|
|
*
|
||
|
|
* fp - pointer to file (pipe, socket, etc) to read from
|
||
|
|
* buf - buffer to write
|
||
|
|
* count - # of bytes to read
|
||
|
|
* resid - pointer to count of unread bytes (if short read)
|
||
|
|
*
|
||
|
|
* Returns 0 on success errno on failure.
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
|
||
|
|
{
|
||
|
|
int rc;
|
||
|
|
|
||
|
|
rc = read(fp->f_fd, buf, count);
|
||
|
|
if (rc < 0)
|
||
|
|
return (errno);
|
||
|
|
|
||
|
|
if (resid) {
|
||
|
|
*resid = count - rc;
|
||
|
|
} else if (rc != count) {
|
||
|
|
return (EIO);
|
||
|
|
}
|
||
|
|
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Stateless read - os internal file pointer is not updated.
|
||
|
|
*
|
||
|
|
* fp - pointer to file (pipe, socket, etc) to read from
|
||
|
|
* buf - buffer to write
|
||
|
|
* count - # of bytes to write
|
||
|
|
* off - file offset to read from (only valid for seekable types)
|
||
|
|
* resid - pointer to count of unwritten bytes (if short write)
|
||
|
|
*
|
||
|
|
* Returns 0 on success errno on failure.
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
|
||
|
|
ssize_t *resid)
|
||
|
|
{
|
||
|
|
ssize_t rc;
|
||
|
|
|
||
|
|
rc = pread64(fp->f_fd, buf, count, off);
|
||
|
|
if (rc < 0) {
|
||
|
|
#ifdef __linux__
|
||
|
|
/*
|
||
|
|
* Under Linux, this most likely means an alignment issue
|
||
|
|
* (memory or disk) due to O_DIRECT, so we abort() in order to
|
||
|
|
* catch the offender.
|
||
|
|
*/
|
||
|
|
if (errno == EINVAL)
|
||
|
|
abort();
|
||
|
|
#endif
|
||
|
|
return (errno);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (fp->f_dump_fd != -1) {
|
||
|
|
int status;
|
||
|
|
|
||
|
|
status = pwrite64(fp->f_dump_fd, buf, rc, off);
|
||
|
|
ASSERT(status != -1);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (resid) {
|
||
|
|
*resid = count - rc;
|
||
|
|
} else if (rc != count) {
|
||
|
|
return (EIO);
|
||
|
|
}
|
||
|
|
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* lseek - set / get file pointer
|
||
|
|
*
|
||
|
|
* fp - pointer to file (pipe, socket, etc) to read from
|
||
|
|
* offp - value to seek to, returns current value plus passed offset
|
||
|
|
* whence - see man pages for standard lseek whence values
|
||
|
|
*
|
||
|
|
* Returns 0 on success errno on failure (ESPIPE for non seekable types)
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
|
||
|
|
{
|
||
|
|
loff_t rc;
|
||
|
|
|
||
|
|
rc = lseek(fp->f_fd, *offp, whence);
|
||
|
|
if (rc < 0)
|
||
|
|
return (errno);
|
||
|
|
|
||
|
|
*offp = rc;
|
||
|
|
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Get file attributes
|
||
|
|
*
|
||
|
|
* filp - file pointer
|
||
|
|
* zfattr - pointer to file attr structure
|
||
|
|
*
|
||
|
|
* Currently only used for fetching size and file mode
|
||
|
|
*
|
||
|
|
* Returns 0 on success or error code of underlying getattr call on failure.
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
|
||
|
|
{
|
||
|
|
struct stat64 st;
|
||
|
|
|
||
|
|
if (fstat64_blk(fp->f_fd, &st) == -1)
|
||
|
|
return (errno);
|
||
|
|
|
||
|
|
zfattr->zfa_size = st.st_size;
|
||
|
|
zfattr->zfa_mode = st.st_mode;
|
||
|
|
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Sync file to disk
|
||
|
|
*
|
||
|
|
* filp - file pointer
|
||
|
|
* flags - O_SYNC and or O_DSYNC
|
||
|
|
*
|
||
|
|
* Returns 0 on success or error code of underlying sync call on failure.
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_fsync(zfs_file_t *fp, int flags)
|
||
|
|
{
|
||
|
|
(void) flags;
|
||
|
|
|
||
|
|
if (fsync(fp->f_fd) < 0)
|
||
|
|
return (errno);
|
||
|
|
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* deallocate - zero and/or deallocate file storage
|
||
|
|
*
|
||
|
|
* fp - file pointer
|
||
|
|
* offset - offset to start zeroing or deallocating
|
||
|
|
* len - length to zero or deallocate
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len)
|
||
|
|
{
|
||
|
|
int rc;
|
||
|
|
#if defined(__linux__)
|
||
|
|
rc = fallocate(fp->f_fd,
|
||
|
|
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len);
|
||
|
|
#elif defined(__FreeBSD__) && (__FreeBSD_version >= 1400029)
|
||
|
|
struct spacectl_range rqsr = {
|
||
|
|
.r_offset = offset,
|
||
|
|
.r_len = len,
|
||
|
|
};
|
||
|
|
rc = fspacectl(fp->f_fd, SPACECTL_DEALLOC, &rqsr, 0, &rqsr);
|
||
|
|
#else
|
||
|
|
(void) fp, (void) offset, (void) len;
|
||
|
|
rc = EOPNOTSUPP;
|
||
|
|
#endif
|
||
|
|
if (rc)
|
||
|
|
return (SET_ERROR(rc));
|
||
|
|
return (0);
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Request current file pointer offset
|
||
|
|
*
|
||
|
|
* fp - pointer to file
|
||
|
|
*
|
||
|
|
* Returns current file offset.
|
||
|
|
*/
|
||
|
|
loff_t
|
||
|
|
zfs_file_off(zfs_file_t *fp)
|
||
|
|
{
|
||
|
|
return (lseek(fp->f_fd, SEEK_CUR, 0));
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* unlink file
|
||
|
|
*
|
||
|
|
* path - fully qualified file path
|
||
|
|
*
|
||
|
|
* Returns 0 on success.
|
||
|
|
*
|
||
|
|
* OPTIONAL
|
||
|
|
*/
|
||
|
|
int
|
||
|
|
zfs_file_unlink(const char *path)
|
||
|
|
{
|
||
|
|
return (remove(path));
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Get reference to file pointer
|
||
|
|
*
|
||
|
|
* fd - input file descriptor
|
||
|
|
*
|
||
|
|
* Returns pointer to file struct or NULL.
|
||
|
|
* Unsupported in user space.
|
||
|
|
*/
|
||
|
|
zfs_file_t *
|
||
|
|
zfs_file_get(int fd)
|
||
|
|
{
|
||
|
|
(void) fd;
|
||
|
|
abort();
|
||
|
|
return (NULL);
|
||
|
|
}
|
||
|
|
/*
|
||
|
|
* Drop reference to file pointer
|
||
|
|
*
|
||
|
|
* fp - pointer to file struct
|
||
|
|
*
|
||
|
|
* Unsupported in user space.
|
||
|
|
*/
|
||
|
|
void
|
||
|
|
zfs_file_put(zfs_file_t *fp)
|
||
|
|
{
|
||
|
|
abort();
|
||
|
|
(void) fp;
|
||
|
|
}
|