mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 19:04:45 +03:00
Compare commits
54 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 494aaaed89 | |||
| 522414da3b | |||
| a8c256046b | |||
| eb34de04d7 | |||
| d813aa8530 | |||
| 3b267e72de | |||
| 349fb77f11 | |||
| 2a953e0ac9 | |||
| e4985bf5a1 | |||
| e96675a7b1 | |||
| d702f86eaf | |||
| 41c4599cba | |||
| 56a2a0981e | |||
| 9b9b09f452 | |||
| 89fcb8c6f9 | |||
| 55dd24c4cc | |||
| 78287023ce | |||
| 479dca51c6 | |||
| 87e9e82865 | |||
| 0733fe2aa5 | |||
| fd836dfe24 | |||
| e92a680c70 | |||
| f1659cc782 | |||
| f863ac3d0f | |||
| f6d2e5c075 | |||
| f2fe4d51a8 | |||
| 76663fe372 | |||
| 44c8ff9b0c | |||
| f0ffcc3adc | |||
| e534ba5ce7 | |||
| 1c7048357d | |||
| 3ec4ea68d4 | |||
| bd7a02c251 | |||
| e82e68400a | |||
| 3f67e012e4 | |||
| 21875dd090 | |||
| fe9d409e90 | |||
| 7aef672b77 | |||
| f9a9aea126 | |||
| 8ba748d414 | |||
| e860cb0200 | |||
| 86c3ed40e1 | |||
| 6e41aca519 | |||
| 79f7de5752 | |||
| 0ef1964c79 | |||
| eaa62d9951 | |||
| 8ca95d78c5 | |||
| edebca5dfc | |||
| 1cc1bf4fa7 | |||
| 0bcd1151f0 | |||
| 78fd79eacd | |||
| 6d693e20a2 | |||
| b76724ae47 | |||
| 459c99ff23 |
@@ -83,6 +83,7 @@
|
|||||||
modules.order
|
modules.order
|
||||||
Makefile
|
Makefile
|
||||||
Makefile.in
|
Makefile.in
|
||||||
|
changelog
|
||||||
*.patch
|
*.patch
|
||||||
*.orig
|
*.orig
|
||||||
*.tmp
|
*.tmp
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
Meta: 1
|
Meta: 1
|
||||||
Name: zfs
|
Name: zfs
|
||||||
Branch: 1.0
|
Branch: 1.0
|
||||||
Version: 2.2.0
|
Version: 2.2.2
|
||||||
Release: 1
|
Release: 1
|
||||||
Release-Tags: relext
|
Release-Tags: relext
|
||||||
License: CDDL
|
License: CDDL
|
||||||
Author: OpenZFS
|
Author: OpenZFS
|
||||||
Linux-Maximum: 6.5
|
Linux-Maximum: 6.6
|
||||||
Linux-Minimum: 3.10
|
Linux-Minimum: 3.10
|
||||||
|
|||||||
@@ -32,4 +32,4 @@ For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`
|
|||||||
|
|
||||||
# Supported Kernels
|
# Supported Kernels
|
||||||
* The `META` file contains the officially recognized supported Linux kernel versions.
|
* The `META` file contains the officially recognized supported Linux kernel versions.
|
||||||
* Supported FreeBSD versions are any supported branches and releases starting from 12.2-RELEASE.
|
* Supported FreeBSD versions are any supported branches and releases starting from 12.4-RELEASE.
|
||||||
|
|||||||
+1
-1
@@ -711,7 +711,7 @@ def section_archits(kstats_dict):
|
|||||||
pd_total = int(arc_stats['prefetch_data_hits']) +\
|
pd_total = int(arc_stats['prefetch_data_hits']) +\
|
||||||
int(arc_stats['prefetch_data_iohits']) +\
|
int(arc_stats['prefetch_data_iohits']) +\
|
||||||
int(arc_stats['prefetch_data_misses'])
|
int(arc_stats['prefetch_data_misses'])
|
||||||
prt_2('ARC prefetch metadata accesses:', f_perc(pd_total, all_accesses),
|
prt_2('ARC prefetch data accesses:', f_perc(pd_total, all_accesses),
|
||||||
f_hits(pd_total))
|
f_hits(pd_total))
|
||||||
pd_todo = (('Prefetch data hits:', arc_stats['prefetch_data_hits']),
|
pd_todo = (('Prefetch data hits:', arc_stats['prefetch_data_hits']),
|
||||||
('Prefetch data I/O hits:', arc_stats['prefetch_data_iohits']),
|
('Prefetch data I/O hits:', arc_stats['prefetch_data_iohits']),
|
||||||
|
|||||||
+112
-17
@@ -34,6 +34,7 @@
|
|||||||
* Copyright (c) 2021 Allan Jude
|
* Copyright (c) 2021 Allan Jude
|
||||||
* Copyright (c) 2021 Toomas Soome <tsoome@me.com>
|
* Copyright (c) 2021 Toomas Soome <tsoome@me.com>
|
||||||
* Copyright (c) 2023, Klara Inc.
|
* Copyright (c) 2023, Klara Inc.
|
||||||
|
* Copyright (c) 2023, Rob Norris <robn@despairlabs.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@@ -80,6 +81,7 @@
|
|||||||
#include <sys/dsl_scan.h>
|
#include <sys/dsl_scan.h>
|
||||||
#include <sys/btree.h>
|
#include <sys/btree.h>
|
||||||
#include <sys/brt.h>
|
#include <sys/brt.h>
|
||||||
|
#include <sys/brt_impl.h>
|
||||||
#include <zfs_comutil.h>
|
#include <zfs_comutil.h>
|
||||||
#include <sys/zstd/zstd.h>
|
#include <sys/zstd/zstd.h>
|
||||||
|
|
||||||
@@ -899,6 +901,8 @@ usage(void)
|
|||||||
"don't print label contents\n");
|
"don't print label contents\n");
|
||||||
(void) fprintf(stderr, " -t --txg=INTEGER "
|
(void) fprintf(stderr, " -t --txg=INTEGER "
|
||||||
"highest txg to use when searching for uberblocks\n");
|
"highest txg to use when searching for uberblocks\n");
|
||||||
|
(void) fprintf(stderr, " -T --brt-stats "
|
||||||
|
"BRT statistics\n");
|
||||||
(void) fprintf(stderr, " -u --uberblock "
|
(void) fprintf(stderr, " -u --uberblock "
|
||||||
"uberblock\n");
|
"uberblock\n");
|
||||||
(void) fprintf(stderr, " -U --cachefile=PATH "
|
(void) fprintf(stderr, " -U --cachefile=PATH "
|
||||||
@@ -999,6 +1003,15 @@ zdb_nicenum(uint64_t num, char *buf, size_t buflen)
|
|||||||
nicenum(num, buf, buflen);
|
nicenum(num, buf, buflen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
zdb_nicebytes(uint64_t bytes, char *buf, size_t buflen)
|
||||||
|
{
|
||||||
|
if (dump_opt['P'])
|
||||||
|
(void) snprintf(buf, buflen, "%llu", (longlong_t)bytes);
|
||||||
|
else
|
||||||
|
zfs_nicebytes(bytes, buf, buflen);
|
||||||
|
}
|
||||||
|
|
||||||
static const char histo_stars[] = "****************************************";
|
static const char histo_stars[] = "****************************************";
|
||||||
static const uint64_t histo_width = sizeof (histo_stars) - 1;
|
static const uint64_t histo_width = sizeof (histo_stars) - 1;
|
||||||
|
|
||||||
@@ -2081,6 +2094,76 @@ dump_all_ddts(spa_t *spa)
|
|||||||
dump_dedup_ratio(&dds_total);
|
dump_dedup_ratio(&dds_total);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_brt(spa_t *spa)
|
||||||
|
{
|
||||||
|
if (!spa_feature_is_enabled(spa, SPA_FEATURE_BLOCK_CLONING)) {
|
||||||
|
printf("BRT: unsupported on this pool\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!spa_feature_is_active(spa, SPA_FEATURE_BLOCK_CLONING)) {
|
||||||
|
printf("BRT: empty\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
brt_t *brt = spa->spa_brt;
|
||||||
|
VERIFY(brt);
|
||||||
|
|
||||||
|
char count[32], used[32], saved[32];
|
||||||
|
zdb_nicebytes(brt_get_used(spa), used, sizeof (used));
|
||||||
|
zdb_nicebytes(brt_get_saved(spa), saved, sizeof (saved));
|
||||||
|
uint64_t ratio = brt_get_ratio(spa);
|
||||||
|
printf("BRT: used %s; saved %s; ratio %llu.%02llux\n", used, saved,
|
||||||
|
(u_longlong_t)(ratio / 100), (u_longlong_t)(ratio % 100));
|
||||||
|
|
||||||
|
if (dump_opt['T'] < 2)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
|
||||||
|
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
|
||||||
|
if (brtvd == NULL)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!brtvd->bv_initiated) {
|
||||||
|
printf("BRT: vdev %" PRIu64 ": empty\n", vdevid);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
zdb_nicenum(brtvd->bv_totalcount, count, sizeof (count));
|
||||||
|
zdb_nicebytes(brtvd->bv_usedspace, used, sizeof (used));
|
||||||
|
zdb_nicebytes(brtvd->bv_savedspace, saved, sizeof (saved));
|
||||||
|
printf("BRT: vdev %" PRIu64 ": refcnt %s; used %s; saved %s\n",
|
||||||
|
vdevid, count, used, saved);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dump_opt['T'] < 3)
|
||||||
|
return;
|
||||||
|
|
||||||
|
char dva[64];
|
||||||
|
printf("\n%-16s %-10s\n", "DVA", "REFCNT");
|
||||||
|
|
||||||
|
for (uint64_t vdevid = 0; vdevid < brt->brt_nvdevs; vdevid++) {
|
||||||
|
brt_vdev_t *brtvd = &brt->brt_vdevs[vdevid];
|
||||||
|
if (brtvd == NULL || !brtvd->bv_initiated)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
zap_cursor_t zc;
|
||||||
|
zap_attribute_t za;
|
||||||
|
for (zap_cursor_init(&zc, brt->brt_mos, brtvd->bv_mos_entries);
|
||||||
|
zap_cursor_retrieve(&zc, &za) == 0;
|
||||||
|
zap_cursor_advance(&zc)) {
|
||||||
|
uint64_t offset = *(uint64_t *)za.za_name;
|
||||||
|
uint64_t refcnt = za.za_first_integer;
|
||||||
|
|
||||||
|
snprintf(dva, sizeof (dva), "%" PRIu64 ":%llx", vdevid,
|
||||||
|
(u_longlong_t)offset);
|
||||||
|
printf("%-16s %-10llu\n", dva, (u_longlong_t)refcnt);
|
||||||
|
}
|
||||||
|
zap_cursor_fini(&zc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
|
dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
|
||||||
{
|
{
|
||||||
@@ -8093,6 +8176,9 @@ dump_zpool(spa_t *spa)
|
|||||||
if (dump_opt['D'])
|
if (dump_opt['D'])
|
||||||
dump_all_ddts(spa);
|
dump_all_ddts(spa);
|
||||||
|
|
||||||
|
if (dump_opt['T'])
|
||||||
|
dump_brt(spa);
|
||||||
|
|
||||||
if (dump_opt['d'] > 2 || dump_opt['m'])
|
if (dump_opt['d'] > 2 || dump_opt['m'])
|
||||||
dump_metaslabs(spa);
|
dump_metaslabs(spa);
|
||||||
if (dump_opt['M'])
|
if (dump_opt['M'])
|
||||||
@@ -8879,6 +8965,7 @@ main(int argc, char **argv)
|
|||||||
{"io-stats", no_argument, NULL, 's'},
|
{"io-stats", no_argument, NULL, 's'},
|
||||||
{"simulate-dedup", no_argument, NULL, 'S'},
|
{"simulate-dedup", no_argument, NULL, 'S'},
|
||||||
{"txg", required_argument, NULL, 't'},
|
{"txg", required_argument, NULL, 't'},
|
||||||
|
{"brt-stats", no_argument, NULL, 'T'},
|
||||||
{"uberblock", no_argument, NULL, 'u'},
|
{"uberblock", no_argument, NULL, 'u'},
|
||||||
{"cachefile", required_argument, NULL, 'U'},
|
{"cachefile", required_argument, NULL, 'U'},
|
||||||
{"verbose", no_argument, NULL, 'v'},
|
{"verbose", no_argument, NULL, 'v'},
|
||||||
@@ -8892,7 +8979,7 @@ main(int argc, char **argv)
|
|||||||
};
|
};
|
||||||
|
|
||||||
while ((c = getopt_long(argc, argv,
|
while ((c = getopt_long(argc, argv,
|
||||||
"AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:uU:vVx:XYyZ",
|
"AbBcCdDeEFGhiI:kK:lLmMNo:Op:PqrRsSt:TuU:vVx:XYyZ",
|
||||||
long_options, NULL)) != -1) {
|
long_options, NULL)) != -1) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'b':
|
case 'b':
|
||||||
@@ -8914,6 +9001,7 @@ main(int argc, char **argv)
|
|||||||
case 'R':
|
case 'R':
|
||||||
case 's':
|
case 's':
|
||||||
case 'S':
|
case 'S':
|
||||||
|
case 'T':
|
||||||
case 'u':
|
case 'u':
|
||||||
case 'y':
|
case 'y':
|
||||||
case 'Z':
|
case 'Z':
|
||||||
@@ -9076,22 +9164,6 @@ main(int argc, char **argv)
|
|||||||
if (dump_opt['l'])
|
if (dump_opt['l'])
|
||||||
return (dump_label(argv[0]));
|
return (dump_label(argv[0]));
|
||||||
|
|
||||||
if (dump_opt['O']) {
|
|
||||||
if (argc != 2)
|
|
||||||
usage();
|
|
||||||
dump_opt['v'] = verbose + 3;
|
|
||||||
return (dump_path(argv[0], argv[1], NULL));
|
|
||||||
}
|
|
||||||
if (dump_opt['r']) {
|
|
||||||
target_is_spa = B_FALSE;
|
|
||||||
if (argc != 3)
|
|
||||||
usage();
|
|
||||||
dump_opt['v'] = verbose;
|
|
||||||
error = dump_path(argv[0], argv[1], &object);
|
|
||||||
if (error != 0)
|
|
||||||
fatal("internal error: %s", strerror(error));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dump_opt['X'] || dump_opt['F'])
|
if (dump_opt['X'] || dump_opt['F'])
|
||||||
rewind = ZPOOL_DO_REWIND |
|
rewind = ZPOOL_DO_REWIND |
|
||||||
(dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
|
(dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
|
||||||
@@ -9192,6 +9264,29 @@ main(int argc, char **argv)
|
|||||||
searchdirs = NULL;
|
searchdirs = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to make sure to process -O option or call
|
||||||
|
* dump_path after the -e option has been processed,
|
||||||
|
* which imports the pool to the namespace if it's
|
||||||
|
* not in the cachefile.
|
||||||
|
*/
|
||||||
|
if (dump_opt['O']) {
|
||||||
|
if (argc != 2)
|
||||||
|
usage();
|
||||||
|
dump_opt['v'] = verbose + 3;
|
||||||
|
return (dump_path(argv[0], argv[1], NULL));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dump_opt['r']) {
|
||||||
|
target_is_spa = B_FALSE;
|
||||||
|
if (argc != 3)
|
||||||
|
usage();
|
||||||
|
dump_opt['v'] = verbose;
|
||||||
|
error = dump_path(argv[0], argv[1], &object);
|
||||||
|
if (error != 0)
|
||||||
|
fatal("internal error: %s", strerror(error));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* import_checkpointed_state makes the assumption that the
|
* import_checkpointed_state makes the assumption that the
|
||||||
* target pool that we pass it is already part of the spa
|
* target pool that we pass it is already part of the spa
|
||||||
|
|||||||
+77
-19
@@ -24,6 +24,7 @@
|
|||||||
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
|
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2016, 2017, Intel Corporation.
|
* Copyright (c) 2016, 2017, Intel Corporation.
|
||||||
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||||
|
* Copyright (c) 2023, Klara Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -146,6 +147,17 @@ zfs_unavail_pool(zpool_handle_t *zhp, void *data)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Write an array of strings to the zed log
|
||||||
|
*/
|
||||||
|
static void lines_to_zed_log_msg(char **lines, int lines_cnt)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < lines_cnt; i++) {
|
||||||
|
zed_log_msg(LOG_INFO, "%s", lines[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Two stage replace on Linux
|
* Two stage replace on Linux
|
||||||
* since we get disk notifications
|
* since we get disk notifications
|
||||||
@@ -193,14 +205,21 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
uint64_t is_spare = 0;
|
uint64_t is_spare = 0;
|
||||||
const char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
|
const char *physpath = NULL, *new_devid = NULL, *enc_sysfs_path = NULL;
|
||||||
char rawpath[PATH_MAX], fullpath[PATH_MAX];
|
char rawpath[PATH_MAX], fullpath[PATH_MAX];
|
||||||
char devpath[PATH_MAX];
|
char pathbuf[PATH_MAX];
|
||||||
int ret;
|
int ret;
|
||||||
int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
|
int online_flag = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
|
||||||
boolean_t is_sd = B_FALSE;
|
boolean_t is_sd = B_FALSE;
|
||||||
boolean_t is_mpath_wholedisk = B_FALSE;
|
boolean_t is_mpath_wholedisk = B_FALSE;
|
||||||
uint_t c;
|
uint_t c;
|
||||||
vdev_stat_t *vs;
|
vdev_stat_t *vs;
|
||||||
|
char **lines = NULL;
|
||||||
|
int lines_cnt = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the persistent path, typically under the '/dev/disk/by-id' or
|
||||||
|
* '/dev/disk/by-vdev' directories. Note that this path can change
|
||||||
|
* when a vdev is replaced with a new disk.
|
||||||
|
*/
|
||||||
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
|
if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@@ -357,15 +376,17 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
|
(void) snprintf(rawpath, sizeof (rawpath), "%s%s",
|
||||||
is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
|
is_sd ? DEV_BYVDEV_PATH : DEV_BYPATH_PATH, physpath);
|
||||||
|
|
||||||
if (realpath(rawpath, devpath) == NULL && !is_mpath_wholedisk) {
|
if (realpath(rawpath, pathbuf) == NULL && !is_mpath_wholedisk) {
|
||||||
zed_log_msg(LOG_INFO, " realpath: %s failed (%s)",
|
zed_log_msg(LOG_INFO, " realpath: %s failed (%s)",
|
||||||
rawpath, strerror(errno));
|
rawpath, strerror(errno));
|
||||||
|
|
||||||
(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
|
int err = zpool_vdev_online(zhp, fullpath,
|
||||||
&newstate);
|
ZFS_ONLINE_FORCEFAULT, &newstate);
|
||||||
|
|
||||||
zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s)",
|
zed_log_msg(LOG_INFO, " zpool_vdev_online: %s FORCEFAULT (%s) "
|
||||||
fullpath, libzfs_error_description(g_zfshdl));
|
"err %d, new state %d",
|
||||||
|
fullpath, libzfs_error_description(g_zfshdl), err,
|
||||||
|
err ? (int)newstate : 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -383,6 +404,22 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
|
|
||||||
if (is_mpath_wholedisk) {
|
if (is_mpath_wholedisk) {
|
||||||
/* Don't label device mapper or multipath disks. */
|
/* Don't label device mapper or multipath disks. */
|
||||||
|
zed_log_msg(LOG_INFO,
|
||||||
|
" it's a multipath wholedisk, don't label");
|
||||||
|
if (zpool_prepare_disk(zhp, vdev, "autoreplace", &lines,
|
||||||
|
&lines_cnt) != 0) {
|
||||||
|
zed_log_msg(LOG_INFO,
|
||||||
|
" zpool_prepare_disk: could not "
|
||||||
|
"prepare '%s' (%s)", fullpath,
|
||||||
|
libzfs_error_description(g_zfshdl));
|
||||||
|
if (lines_cnt > 0) {
|
||||||
|
zed_log_msg(LOG_INFO,
|
||||||
|
" zfs_prepare_disk output:");
|
||||||
|
lines_to_zed_log_msg(lines, lines_cnt);
|
||||||
|
}
|
||||||
|
libzfs_free_str_array(lines, lines_cnt);
|
||||||
|
return;
|
||||||
|
}
|
||||||
} else if (!labeled) {
|
} else if (!labeled) {
|
||||||
/*
|
/*
|
||||||
* we're auto-replacing a raw disk, so label it first
|
* we're auto-replacing a raw disk, so label it first
|
||||||
@@ -399,16 +436,24 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
* to trigger a ZFS fault for the device (and any hot spare
|
* to trigger a ZFS fault for the device (and any hot spare
|
||||||
* replacement).
|
* replacement).
|
||||||
*/
|
*/
|
||||||
leafname = strrchr(devpath, '/') + 1;
|
leafname = strrchr(pathbuf, '/') + 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is a request to label a whole disk, then attempt to
|
* If this is a request to label a whole disk, then attempt to
|
||||||
* write out the label.
|
* write out the label.
|
||||||
*/
|
*/
|
||||||
if (zpool_label_disk(g_zfshdl, zhp, leafname) != 0) {
|
if (zpool_prepare_and_label_disk(g_zfshdl, zhp, leafname,
|
||||||
zed_log_msg(LOG_INFO, " zpool_label_disk: could not "
|
vdev, "autoreplace", &lines, &lines_cnt) != 0) {
|
||||||
|
zed_log_msg(LOG_WARNING,
|
||||||
|
" zpool_prepare_and_label_disk: could not "
|
||||||
"label '%s' (%s)", leafname,
|
"label '%s' (%s)", leafname,
|
||||||
libzfs_error_description(g_zfshdl));
|
libzfs_error_description(g_zfshdl));
|
||||||
|
if (lines_cnt > 0) {
|
||||||
|
zed_log_msg(LOG_INFO,
|
||||||
|
" zfs_prepare_disk output:");
|
||||||
|
lines_to_zed_log_msg(lines, lines_cnt);
|
||||||
|
}
|
||||||
|
libzfs_free_str_array(lines, lines_cnt);
|
||||||
|
|
||||||
(void) zpool_vdev_online(zhp, fullpath,
|
(void) zpool_vdev_online(zhp, fullpath,
|
||||||
ZFS_ONLINE_FORCEFAULT, &newstate);
|
ZFS_ONLINE_FORCEFAULT, &newstate);
|
||||||
@@ -431,7 +476,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
sizeof (device->pd_physpath));
|
sizeof (device->pd_physpath));
|
||||||
list_insert_tail(&g_device_list, device);
|
list_insert_tail(&g_device_list, device);
|
||||||
|
|
||||||
zed_log_msg(LOG_INFO, " zpool_label_disk: async '%s' (%llu)",
|
zed_log_msg(LOG_NOTICE, " zpool_label_disk: async '%s' (%llu)",
|
||||||
leafname, (u_longlong_t)guid);
|
leafname, (u_longlong_t)guid);
|
||||||
|
|
||||||
return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */
|
return; /* resumes at EC_DEV_ADD.ESC_DISK for partition */
|
||||||
@@ -454,8 +499,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
/* unexpected partition slice encountered */
|
/* unexpected partition slice encountered */
|
||||||
zed_log_msg(LOG_INFO, "labeled disk %s unexpected here",
|
zed_log_msg(LOG_WARNING, "labeled disk %s was "
|
||||||
fullpath);
|
"unexpected here", fullpath);
|
||||||
(void) zpool_vdev_online(zhp, fullpath,
|
(void) zpool_vdev_online(zhp, fullpath,
|
||||||
ZFS_ONLINE_FORCEFAULT, &newstate);
|
ZFS_ONLINE_FORCEFAULT, &newstate);
|
||||||
return;
|
return;
|
||||||
@@ -464,10 +509,21 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)",
|
zed_log_msg(LOG_INFO, " zpool_label_disk: resume '%s' (%llu)",
|
||||||
physpath, (u_longlong_t)guid);
|
physpath, (u_longlong_t)guid);
|
||||||
|
|
||||||
(void) snprintf(devpath, sizeof (devpath), "%s%s",
|
/*
|
||||||
DEV_BYID_PATH, new_devid);
|
* Paths that begin with '/dev/disk/by-id/' will change and so
|
||||||
|
* they must be updated before calling zpool_vdev_attach().
|
||||||
|
*/
|
||||||
|
if (strncmp(path, DEV_BYID_PATH, strlen(DEV_BYID_PATH)) == 0) {
|
||||||
|
(void) snprintf(pathbuf, sizeof (pathbuf), "%s%s",
|
||||||
|
DEV_BYID_PATH, new_devid);
|
||||||
|
zed_log_msg(LOG_INFO, " zpool_label_disk: path '%s' "
|
||||||
|
"replaced by '%s'", path, pathbuf);
|
||||||
|
path = pathbuf;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
libzfs_free_str_array(lines, lines_cnt);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Construct the root vdev to pass to zpool_vdev_attach(). While adding
|
* Construct the root vdev to pass to zpool_vdev_attach(). While adding
|
||||||
* the entire vdev structure is harmless, we construct a reduced set of
|
* the entire vdev structure is harmless, we construct a reduced set of
|
||||||
@@ -506,9 +562,11 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
* Wait for udev to verify the links exist, then auto-replace
|
* Wait for udev to verify the links exist, then auto-replace
|
||||||
* the leaf disk at same physical location.
|
* the leaf disk at same physical location.
|
||||||
*/
|
*/
|
||||||
if (zpool_label_disk_wait(path, 3000) != 0) {
|
if (zpool_label_disk_wait(path, DISK_LABEL_WAIT) != 0) {
|
||||||
zed_log_msg(LOG_WARNING, "zfs_mod: expected replacement "
|
zed_log_msg(LOG_WARNING, "zfs_mod: pool '%s', after labeling "
|
||||||
"disk %s is missing", path);
|
"replacement disk, the expected disk partition link '%s' "
|
||||||
|
"is missing after waiting %u ms",
|
||||||
|
zpool_get_name(zhp), path, DISK_LABEL_WAIT);
|
||||||
nvlist_free(nvroot);
|
nvlist_free(nvroot);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -523,7 +581,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
|
|||||||
B_TRUE, B_FALSE);
|
B_TRUE, B_FALSE);
|
||||||
}
|
}
|
||||||
|
|
||||||
zed_log_msg(LOG_INFO, " zpool_vdev_replace: %s with %s (%s)",
|
zed_log_msg(LOG_WARNING, " zpool_vdev_replace: %s with %s (%s)",
|
||||||
fullpath, path, (ret == 0) ? "no errors" :
|
fullpath, path, (ret == 0) ? "no errors" :
|
||||||
libzfs_error_description(g_zfshdl));
|
libzfs_error_description(g_zfshdl));
|
||||||
|
|
||||||
@@ -621,7 +679,7 @@ zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
|
|||||||
dp->dd_prop, path);
|
dp->dd_prop, path);
|
||||||
dp->dd_found = B_TRUE;
|
dp->dd_found = B_TRUE;
|
||||||
|
|
||||||
/* pass the new devid for use by replacing code */
|
/* pass the new devid for use by auto-replacing code */
|
||||||
if (dp->dd_new_devid != NULL) {
|
if (dp->dd_new_devid != NULL) {
|
||||||
(void) nvlist_add_string(nvl, "new_devid",
|
(void) nvlist_add_string(nvl, "new_devid",
|
||||||
dp->dd_new_devid);
|
dp->dd_new_devid);
|
||||||
|
|||||||
@@ -1,6 +1,9 @@
|
|||||||
# Features which are supported by GRUB2
|
# Features which are supported by GRUB2
|
||||||
|
allocation_classes
|
||||||
async_destroy
|
async_destroy
|
||||||
|
block_cloning
|
||||||
bookmarks
|
bookmarks
|
||||||
|
device_rebuild
|
||||||
embedded_data
|
embedded_data
|
||||||
empty_bpobj
|
empty_bpobj
|
||||||
enabled_txg
|
enabled_txg
|
||||||
@@ -9,6 +12,12 @@ filesystem_limits
|
|||||||
hole_birth
|
hole_birth
|
||||||
large_blocks
|
large_blocks
|
||||||
livelist
|
livelist
|
||||||
|
log_spacemap
|
||||||
lz4_compress
|
lz4_compress
|
||||||
|
project_quota
|
||||||
|
resilver_defer
|
||||||
spacemap_histogram
|
spacemap_histogram
|
||||||
|
spacemap_v2
|
||||||
|
userobj_accounting
|
||||||
|
zilsaxattr
|
||||||
zpool_checkpoint
|
zpool_checkpoint
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ edonr
|
|||||||
embedded_data
|
embedded_data
|
||||||
empty_bpobj
|
empty_bpobj
|
||||||
enabled_txg
|
enabled_txg
|
||||||
encryption
|
|
||||||
extensible_dataset
|
extensible_dataset
|
||||||
filesystem_limits
|
filesystem_limits
|
||||||
hole_birth
|
hole_birth
|
||||||
|
|||||||
+7
-26
@@ -443,37 +443,22 @@ vdev_run_cmd(vdev_cmd_data_t *data, char *cmd)
|
|||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
char *argv[2] = {cmd};
|
char *argv[2] = {cmd};
|
||||||
char *env[5] = {(char *)"PATH=/bin:/sbin:/usr/bin:/usr/sbin"};
|
char **env;
|
||||||
char **lines = NULL;
|
char **lines = NULL;
|
||||||
int lines_cnt = 0;
|
int lines_cnt = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
/* Setup our custom environment variables */
|
env = zpool_vdev_script_alloc_env(data->pool, data->path, data->upath,
|
||||||
rc = asprintf(&env[1], "VDEV_PATH=%s",
|
data->vdev_enc_sysfs_path, NULL, NULL);
|
||||||
data->path ? data->path : "");
|
if (env == NULL)
|
||||||
if (rc == -1) {
|
|
||||||
env[1] = NULL;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
|
||||||
|
|
||||||
rc = asprintf(&env[2], "VDEV_UPATH=%s",
|
|
||||||
data->upath ? data->upath : "");
|
|
||||||
if (rc == -1) {
|
|
||||||
env[2] = NULL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
rc = asprintf(&env[3], "VDEV_ENC_SYSFS_PATH=%s",
|
|
||||||
data->vdev_enc_sysfs_path ?
|
|
||||||
data->vdev_enc_sysfs_path : "");
|
|
||||||
if (rc == -1) {
|
|
||||||
env[3] = NULL;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Run the command */
|
/* Run the command */
|
||||||
rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines,
|
rc = libzfs_run_process_get_stdout_nopath(cmd, argv, env, &lines,
|
||||||
&lines_cnt);
|
&lines_cnt);
|
||||||
|
|
||||||
|
zpool_vdev_script_free_env(env);
|
||||||
|
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
@@ -485,10 +470,6 @@ vdev_run_cmd(vdev_cmd_data_t *data, char *cmd)
|
|||||||
out:
|
out:
|
||||||
if (lines != NULL)
|
if (lines != NULL)
|
||||||
libzfs_free_str_array(lines, lines_cnt);
|
libzfs_free_str_array(lines, lines_cnt);
|
||||||
|
|
||||||
/* Start with i = 1 since env[0] was statically allocated */
|
|
||||||
for (i = 1; i < ARRAY_SIZE(env); i++)
|
|
||||||
free(env[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -126,6 +126,10 @@ vdev_cmd_data_list_t *all_pools_for_each_vdev_run(int argc, char **argv,
|
|||||||
|
|
||||||
void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl);
|
void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl);
|
||||||
|
|
||||||
|
void free_vdev_cmd_data(vdev_cmd_data_t *data);
|
||||||
|
|
||||||
|
int vdev_run_cmd_simple(char *path, char *cmd);
|
||||||
|
|
||||||
int check_device(const char *path, boolean_t force,
|
int check_device(const char *path, boolean_t force,
|
||||||
boolean_t isspare, boolean_t iswholedisk);
|
boolean_t isspare, boolean_t iswholedisk);
|
||||||
boolean_t check_sector_size_database(char *path, int *sector_size);
|
boolean_t check_sector_size_database(char *path, int *sector_size);
|
||||||
|
|||||||
+36
-7
@@ -936,6 +936,15 @@ zero_label(const char *path)
|
|||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
lines_to_stderr(char *lines[], int lines_cnt)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < lines_cnt; i++) {
|
||||||
|
fprintf(stderr, "%s\n", lines[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Go through and find any whole disks in the vdev specification, labelling them
|
* Go through and find any whole disks in the vdev specification, labelling them
|
||||||
* as appropriate. When constructing the vdev spec, we were unable to open this
|
* as appropriate. When constructing the vdev spec, we were unable to open this
|
||||||
@@ -947,7 +956,7 @@ zero_label(const char *path)
|
|||||||
* need to get the devid after we label the disk.
|
* need to get the devid after we label the disk.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
make_disks(zpool_handle_t *zhp, nvlist_t *nv)
|
make_disks(zpool_handle_t *zhp, nvlist_t *nv, boolean_t replacing)
|
||||||
{
|
{
|
||||||
nvlist_t **child;
|
nvlist_t **child;
|
||||||
uint_t c, children;
|
uint_t c, children;
|
||||||
@@ -1032,6 +1041,8 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
|
|||||||
*/
|
*/
|
||||||
if (!is_exclusive && !is_spare(NULL, udevpath)) {
|
if (!is_exclusive && !is_spare(NULL, udevpath)) {
|
||||||
char *devnode = strrchr(devpath, '/') + 1;
|
char *devnode = strrchr(devpath, '/') + 1;
|
||||||
|
char **lines = NULL;
|
||||||
|
int lines_cnt = 0;
|
||||||
|
|
||||||
ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT));
|
ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT));
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
@@ -1043,9 +1054,27 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
|
|||||||
/*
|
/*
|
||||||
* When labeling a pool the raw device node name
|
* When labeling a pool the raw device node name
|
||||||
* is provided as it appears under /dev/.
|
* is provided as it appears under /dev/.
|
||||||
|
*
|
||||||
|
* Note that 'zhp' will be NULL when we're creating a
|
||||||
|
* pool.
|
||||||
*/
|
*/
|
||||||
if (zpool_label_disk(g_zfs, zhp, devnode) == -1)
|
if (zpool_prepare_and_label_disk(g_zfs, zhp, devnode,
|
||||||
|
nv, zhp == NULL ? "create" :
|
||||||
|
replacing ? "replace" : "add", &lines,
|
||||||
|
&lines_cnt) != 0) {
|
||||||
|
(void) fprintf(stderr,
|
||||||
|
gettext(
|
||||||
|
"Error preparing/labeling disk.\n"));
|
||||||
|
if (lines_cnt > 0) {
|
||||||
|
(void) fprintf(stderr,
|
||||||
|
gettext("zfs_prepare_disk output:\n"));
|
||||||
|
lines_to_stderr(lines, lines_cnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
libzfs_free_str_array(lines, lines_cnt);
|
||||||
return (-1);
|
return (-1);
|
||||||
|
}
|
||||||
|
libzfs_free_str_array(lines, lines_cnt);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for udev to signal the device is available
|
* Wait for udev to signal the device is available
|
||||||
@@ -1082,19 +1111,19 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (c = 0; c < children; c++)
|
for (c = 0; c < children; c++)
|
||||||
if ((ret = make_disks(zhp, child[c])) != 0)
|
if ((ret = make_disks(zhp, child[c], replacing)) != 0)
|
||||||
return (ret);
|
return (ret);
|
||||||
|
|
||||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
|
||||||
&child, &children) == 0)
|
&child, &children) == 0)
|
||||||
for (c = 0; c < children; c++)
|
for (c = 0; c < children; c++)
|
||||||
if ((ret = make_disks(zhp, child[c])) != 0)
|
if ((ret = make_disks(zhp, child[c], replacing)) != 0)
|
||||||
return (ret);
|
return (ret);
|
||||||
|
|
||||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
|
||||||
&child, &children) == 0)
|
&child, &children) == 0)
|
||||||
for (c = 0; c < children; c++)
|
for (c = 0; c < children; c++)
|
||||||
if ((ret = make_disks(zhp, child[c])) != 0)
|
if ((ret = make_disks(zhp, child[c], replacing)) != 0)
|
||||||
return (ret);
|
return (ret);
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
@@ -1752,7 +1781,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
|
|||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
|
if (!flags.dryrun && make_disks(zhp, newroot, B_FALSE) != 0) {
|
||||||
nvlist_free(newroot);
|
nvlist_free(newroot);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
@@ -1873,7 +1902,7 @@ make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
|
|||||||
/*
|
/*
|
||||||
* Run through the vdev specification and label any whole disks found.
|
* Run through the vdev specification and label any whole disks found.
|
||||||
*/
|
*/
|
||||||
if (!dryrun && make_disks(zhp, newroot) != 0) {
|
if (!dryrun && make_disks(zhp, newroot, replacing) != 0) {
|
||||||
nvlist_free(newroot);
|
nvlist_free(newroot);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ AM_CPPFLAGS += -D_REENTRANT
|
|||||||
AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64
|
AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64
|
||||||
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE
|
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE
|
||||||
AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
|
AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
|
||||||
|
AM_CPPFLAGS += -DZFSEXECDIR=\"$(zfsexecdir)\"
|
||||||
AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
|
AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
|
||||||
AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"
|
AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"
|
||||||
AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\"
|
AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\"
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
dnl #
|
||||||
|
dnl # 6.6 API change,
|
||||||
|
dnl # fsync_bdev was removed in favor of sync_blockdev
|
||||||
|
dnl #
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_SYNC_BDEV], [
|
||||||
|
ZFS_LINUX_TEST_SRC([fsync_bdev], [
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
],[
|
||||||
|
fsync_bdev(NULL);
|
||||||
|
])
|
||||||
|
|
||||||
|
ZFS_LINUX_TEST_SRC([sync_blockdev], [
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
],[
|
||||||
|
sync_blockdev(NULL);
|
||||||
|
])
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_DEFUN([ZFS_AC_KERNEL_SYNC_BDEV], [
|
||||||
|
AC_MSG_CHECKING([whether fsync_bdev() exists])
|
||||||
|
ZFS_LINUX_TEST_RESULT([fsync_bdev], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_FSYNC_BDEV, 1,
|
||||||
|
[fsync_bdev() is declared in include/blkdev.h])
|
||||||
|
],[
|
||||||
|
AC_MSG_CHECKING([whether sync_blockdev() exists])
|
||||||
|
ZFS_LINUX_TEST_RESULT([sync_blockdev], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_SYNC_BLOCKDEV, 1,
|
||||||
|
[sync_blockdev() is declared in include/blkdev.h])
|
||||||
|
],[
|
||||||
|
ZFS_LINUX_TEST_ERROR(
|
||||||
|
[neither fsync_bdev() nor sync_blockdev() exist])
|
||||||
|
])
|
||||||
|
])
|
||||||
|
])
|
||||||
@@ -7,6 +7,10 @@ dnl #
|
|||||||
dnl # 6.3 API
|
dnl # 6.3 API
|
||||||
dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument
|
dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument
|
||||||
dnl #
|
dnl #
|
||||||
|
dnl # 6.6 API
|
||||||
|
dnl # generic_fillattr() now takes u32 as second argument, representing a
|
||||||
|
dnl # request_mask for statx
|
||||||
|
dnl #
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
|
AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
|
||||||
ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
|
ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
@@ -25,22 +29,39 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
|
|||||||
struct kstat *k = NULL;
|
struct kstat *k = NULL;
|
||||||
generic_fillattr(idmap, in, k);
|
generic_fillattr(idmap, in, k);
|
||||||
])
|
])
|
||||||
|
|
||||||
|
ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap_reqmask], [
|
||||||
|
#include <linux/fs.h>
|
||||||
|
],[
|
||||||
|
struct mnt_idmap *idmap = NULL;
|
||||||
|
struct inode *in = NULL;
|
||||||
|
struct kstat *k = NULL;
|
||||||
|
generic_fillattr(idmap, 0, in, k);
|
||||||
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [
|
AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [
|
||||||
AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
|
AC_MSG_CHECKING(
|
||||||
ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
|
[whether generic_fillattr requires struct mnt_idmap* and request_mask])
|
||||||
|
ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap_reqmask], [
|
||||||
AC_MSG_RESULT([yes])
|
AC_MSG_RESULT([yes])
|
||||||
AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
|
AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK, 1,
|
||||||
[generic_fillattr requires struct mnt_idmap*])
|
[generic_fillattr requires struct mnt_idmap* and u32 request_mask])
|
||||||
],[
|
],[
|
||||||
AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
|
AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
|
||||||
ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
|
ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
|
||||||
AC_MSG_RESULT([yes])
|
AC_MSG_RESULT([yes])
|
||||||
AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
|
AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
|
||||||
[generic_fillattr requires struct user_namespace*])
|
[generic_fillattr requires struct mnt_idmap*])
|
||||||
],[
|
],[
|
||||||
AC_MSG_RESULT([no])
|
AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
|
||||||
|
ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
|
||||||
|
AC_MSG_RESULT([yes])
|
||||||
|
AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
|
||||||
|
[generic_fillattr requires struct user_namespace*])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT([no])
|
||||||
|
])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|||||||
@@ -27,6 +27,31 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_TIMES], [
|
|||||||
memset(&ip, 0, sizeof(ip));
|
memset(&ip, 0, sizeof(ip));
|
||||||
ts = ip.i_mtime;
|
ts = ip.i_mtime;
|
||||||
])
|
])
|
||||||
|
|
||||||
|
dnl #
|
||||||
|
dnl # 6.6 API change
|
||||||
|
dnl # i_ctime no longer directly accessible, must use
|
||||||
|
dnl # inode_get_ctime(ip), inode_set_ctime*(ip) to
|
||||||
|
dnl # read/write.
|
||||||
|
dnl #
|
||||||
|
ZFS_LINUX_TEST_SRC([inode_get_ctime], [
|
||||||
|
#include <linux/fs.h>
|
||||||
|
],[
|
||||||
|
struct inode ip;
|
||||||
|
|
||||||
|
memset(&ip, 0, sizeof(ip));
|
||||||
|
inode_get_ctime(&ip);
|
||||||
|
])
|
||||||
|
|
||||||
|
ZFS_LINUX_TEST_SRC([inode_set_ctime_to_ts], [
|
||||||
|
#include <linux/fs.h>
|
||||||
|
],[
|
||||||
|
struct inode ip;
|
||||||
|
struct timespec64 ts = {0};
|
||||||
|
|
||||||
|
memset(&ip, 0, sizeof(ip));
|
||||||
|
inode_set_ctime_to_ts(&ip, ts);
|
||||||
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
|
AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
|
||||||
@@ -47,4 +72,22 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_TIMES], [
|
|||||||
AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1,
|
AC_DEFINE(HAVE_INODE_TIMESPEC64_TIMES, 1,
|
||||||
[inode->i_*time's are timespec64])
|
[inode->i_*time's are timespec64])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([whether inode_get_ctime() exists])
|
||||||
|
ZFS_LINUX_TEST_RESULT([inode_get_ctime], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_INODE_GET_CTIME, 1,
|
||||||
|
[inode_get_ctime() exists in linux/fs.h])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([whether inode_set_ctime_to_ts() exists])
|
||||||
|
ZFS_LINUX_TEST_RESULT([inode_set_ctime_to_ts], [
|
||||||
|
AC_MSG_RESULT(yes)
|
||||||
|
AC_DEFINE(HAVE_INODE_SET_CTIME_TO_TS, 1,
|
||||||
|
[inode_set_ctime_to_ts() exists in linux/fs.h])
|
||||||
|
],[
|
||||||
|
AC_MSG_RESULT(no)
|
||||||
|
])
|
||||||
])
|
])
|
||||||
|
|||||||
@@ -162,6 +162,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
|||||||
ZFS_AC_KERNEL_SRC_RECLAIMED
|
ZFS_AC_KERNEL_SRC_RECLAIMED
|
||||||
ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
|
ZFS_AC_KERNEL_SRC_REGISTER_SYSCTL_TABLE
|
||||||
ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
|
ZFS_AC_KERNEL_SRC_COPY_SPLICE_READ
|
||||||
|
ZFS_AC_KERNEL_SRC_SYNC_BDEV
|
||||||
case "$host_cpu" in
|
case "$host_cpu" in
|
||||||
powerpc*)
|
powerpc*)
|
||||||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||||
@@ -303,6 +304,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
|||||||
ZFS_AC_KERNEL_RECLAIMED
|
ZFS_AC_KERNEL_RECLAIMED
|
||||||
ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
|
ZFS_AC_KERNEL_REGISTER_SYSCTL_TABLE
|
||||||
ZFS_AC_KERNEL_COPY_SPLICE_READ
|
ZFS_AC_KERNEL_COPY_SPLICE_READ
|
||||||
|
ZFS_AC_KERNEL_SYNC_BDEV
|
||||||
case "$host_cpu" in
|
case "$host_cpu" in
|
||||||
powerpc*)
|
powerpc*)
|
||||||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||||
|
|||||||
@@ -67,6 +67,7 @@ ZFS_AC_DEBUG_INVARIANTS
|
|||||||
|
|
||||||
AC_CONFIG_FILES([
|
AC_CONFIG_FILES([
|
||||||
contrib/debian/rules
|
contrib/debian/rules
|
||||||
|
contrib/debian/changelog
|
||||||
Makefile
|
Makefile
|
||||||
include/Makefile
|
include/Makefile
|
||||||
lib/libzfs/libzfs.pc
|
lib/libzfs/libzfs.pc
|
||||||
|
|||||||
@@ -1,3 +1,9 @@
|
|||||||
|
openzfs-linux (@VERSION@-1) unstable; urgency=low
|
||||||
|
|
||||||
|
* OpenZFS @VERSION@ is tagged.
|
||||||
|
|
||||||
|
-- Umer Saleem <usaleem@ixsystems.com> Wed, 15 Nov 2023 15:00:00 +0500
|
||||||
|
|
||||||
openzfs-linux (2.2.0-0) unstable; urgency=low
|
openzfs-linux (2.2.0-0) unstable; urgency=low
|
||||||
|
|
||||||
* OpenZFS 2.2.0 is tagged.
|
* OpenZFS 2.2.0 is tagged.
|
||||||
@@ -197,7 +197,6 @@ Recommends: openzfs-zfs-zed, openzfs-zfsutils (>= ${source:Version}), ${linux:Re
|
|||||||
Suggests: debhelper
|
Suggests: debhelper
|
||||||
Breaks: spl-dkms (<< 0.8.0~rc1)
|
Breaks: spl-dkms (<< 0.8.0~rc1)
|
||||||
Replaces: spl-dkms, zfs-dkms
|
Replaces: spl-dkms, zfs-dkms
|
||||||
Conflicts: zfs-dkms
|
|
||||||
Provides: openzfs-zfs-modules
|
Provides: openzfs-zfs-modules
|
||||||
Description: OpenZFS filesystem kernel modules for Linux
|
Description: OpenZFS filesystem kernel modules for Linux
|
||||||
OpenZFS is a storage platform that encompasses the functionality of
|
OpenZFS is a storage platform that encompasses the functionality of
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ usr/bin/zvol_wait
|
|||||||
usr/lib/modules-load.d/ lib/
|
usr/lib/modules-load.d/ lib/
|
||||||
usr/lib/zfs-linux/zpool.d/
|
usr/lib/zfs-linux/zpool.d/
|
||||||
usr/lib/zfs-linux/zpool_influxdb
|
usr/lib/zfs-linux/zpool_influxdb
|
||||||
|
usr/lib/zfs-linux/zfs_prepare_disk
|
||||||
usr/sbin/arc_summary
|
usr/sbin/arc_summary
|
||||||
usr/sbin/arcstat
|
usr/sbin/arcstat
|
||||||
usr/sbin/dbufstat
|
usr/sbin/dbufstat
|
||||||
@@ -87,6 +88,7 @@ usr/share/man/man8/zfs-wait.8
|
|||||||
usr/share/man/man8/zfs-zone.8
|
usr/share/man/man8/zfs-zone.8
|
||||||
usr/share/man/man8/zfs.8
|
usr/share/man/man8/zfs.8
|
||||||
usr/share/man/man8/zfs_ids_to_path.8
|
usr/share/man/man8/zfs_ids_to_path.8
|
||||||
|
usr/share/man/man8/zfs_prepare_disk.8
|
||||||
usr/share/man/man7/zfsconcepts.7
|
usr/share/man/man7/zfsconcepts.7
|
||||||
usr/share/man/man7/zfsprops.7
|
usr/share/man/man7/zfsprops.7
|
||||||
usr/share/man/man8/zgenhostid.8
|
usr/share/man/man8/zgenhostid.8
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ COMMON_H = \
|
|||||||
sys/bqueue.h \
|
sys/bqueue.h \
|
||||||
sys/btree.h \
|
sys/btree.h \
|
||||||
sys/brt.h \
|
sys/brt.h \
|
||||||
|
sys/brt_impl.h \
|
||||||
sys/dataset_kstats.h \
|
sys/dataset_kstats.h \
|
||||||
sys/dbuf.h \
|
sys/dbuf.h \
|
||||||
sys/ddt.h \
|
sys/ddt.h \
|
||||||
|
|||||||
@@ -326,6 +326,15 @@ _LIBZFS_H nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
|
|||||||
boolean_t *, boolean_t *, boolean_t *);
|
boolean_t *, boolean_t *, boolean_t *);
|
||||||
_LIBZFS_H int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *,
|
_LIBZFS_H int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *,
|
||||||
const char *);
|
const char *);
|
||||||
|
_LIBZFS_H int zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv,
|
||||||
|
const char *prepare_str, char **lines[], int *lines_cnt);
|
||||||
|
_LIBZFS_H int zpool_prepare_and_label_disk(libzfs_handle_t *hdl,
|
||||||
|
zpool_handle_t *, const char *, nvlist_t *vdev_nv, const char *prepare_str,
|
||||||
|
char **lines[], int *lines_cnt);
|
||||||
|
_LIBZFS_H char ** zpool_vdev_script_alloc_env(const char *pool_name,
|
||||||
|
const char *vdev_path, const char *vdev_upath,
|
||||||
|
const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val);
|
||||||
|
_LIBZFS_H void zpool_vdev_script_free_env(char **env);
|
||||||
_LIBZFS_H uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp,
|
_LIBZFS_H uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp,
|
||||||
const char *path);
|
const char *path);
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -34,7 +34,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Default wait time for a device name to be created.
|
* Default wait time in milliseconds for a device name to be created.
|
||||||
*/
|
*/
|
||||||
#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */
|
#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */
|
||||||
|
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ typedef enum {
|
|||||||
} while (0)
|
} while (0)
|
||||||
#define mutex_destroy(lock) sx_destroy(lock)
|
#define mutex_destroy(lock) sx_destroy(lock)
|
||||||
#define mutex_enter(lock) sx_xlock(lock)
|
#define mutex_enter(lock) sx_xlock(lock)
|
||||||
|
#define mutex_enter_interruptible(lock) sx_xlock_sig(lock)
|
||||||
#define mutex_enter_nested(lock, type) sx_xlock(lock)
|
#define mutex_enter_nested(lock, type) sx_xlock(lock)
|
||||||
#define mutex_tryenter(lock) sx_try_xlock(lock)
|
#define mutex_tryenter(lock) sx_try_xlock(lock)
|
||||||
#define mutex_exit(lock) sx_xunlock(lock)
|
#define mutex_exit(lock) sx_xunlock(lock)
|
||||||
|
|||||||
@@ -30,9 +30,9 @@
|
|||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/proc.h>
|
#include <sys/proc.h>
|
||||||
|
#include <sys/queue.h>
|
||||||
#include <sys/taskqueue.h>
|
#include <sys/taskqueue.h>
|
||||||
#include <sys/thread.h>
|
#include <sys/thread.h>
|
||||||
#include <sys/ck.h>
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
@@ -48,16 +48,16 @@ typedef uintptr_t taskqid_t;
|
|||||||
typedef void (task_func_t)(void *);
|
typedef void (task_func_t)(void *);
|
||||||
|
|
||||||
typedef struct taskq_ent {
|
typedef struct taskq_ent {
|
||||||
struct task tqent_task;
|
union {
|
||||||
struct timeout_task tqent_timeout_task;
|
struct task tqent_task;
|
||||||
|
struct timeout_task tqent_timeout_task;
|
||||||
|
};
|
||||||
task_func_t *tqent_func;
|
task_func_t *tqent_func;
|
||||||
void *tqent_arg;
|
void *tqent_arg;
|
||||||
taskqid_t tqent_id;
|
taskqid_t tqent_id;
|
||||||
CK_LIST_ENTRY(taskq_ent) tqent_hash;
|
LIST_ENTRY(taskq_ent) tqent_hash;
|
||||||
uint8_t tqent_type;
|
uint_t tqent_type;
|
||||||
uint8_t tqent_registered;
|
volatile uint_t tqent_rc;
|
||||||
uint8_t tqent_cancelled;
|
|
||||||
volatile uint32_t tqent_rc;
|
|
||||||
} taskq_ent_t;
|
} taskq_ent_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ void vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
|
|||||||
void vfs_clearmntopt(vfs_t *vfsp, const char *name);
|
void vfs_clearmntopt(vfs_t *vfsp, const char *name);
|
||||||
int vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp);
|
int vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp);
|
||||||
int mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype,
|
int mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype,
|
||||||
char *fspath, char *fspec, int fsflags);
|
char *fspath, char *fspec, int fsflags, vfs_t *parent_vfsp);
|
||||||
|
|
||||||
typedef uint64_t vfs_feature_t;
|
typedef uint64_t vfs_feature_t;
|
||||||
|
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ enum symfollow { NO_FOLLOW = NOFOLLOW };
|
|||||||
#ifndef IN_BASE
|
#ifndef IN_BASE
|
||||||
#include_next <sys/vnode.h>
|
#include_next <sys/vnode.h>
|
||||||
#endif
|
#endif
|
||||||
|
#include <sys/ccompat.h>
|
||||||
#include <sys/mount.h>
|
#include <sys/mount.h>
|
||||||
#include <sys/cred.h>
|
#include <sys/cred.h>
|
||||||
#include <sys/fcntl.h>
|
#include <sys/fcntl.h>
|
||||||
@@ -104,7 +105,7 @@ vn_flush_cached_data(vnode_t *vp, boolean_t sync)
|
|||||||
zfs_vmobject_wlock(vp->v_object);
|
zfs_vmobject_wlock(vp->v_object);
|
||||||
vm_object_page_clean(vp->v_object, 0, 0, flags);
|
vm_object_page_clean(vp->v_object, 0, 0, flags);
|
||||||
zfs_vmobject_wunlock(vp->v_object);
|
zfs_vmobject_wunlock(vp->v_object);
|
||||||
VOP_UNLOCK(vp);
|
VOP_UNLOCK1(vp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -286,6 +286,7 @@ typedef struct zfid_long {
|
|||||||
|
|
||||||
extern uint_t zfs_fsyncer_key;
|
extern uint_t zfs_fsyncer_key;
|
||||||
extern int zfs_super_owner;
|
extern int zfs_super_owner;
|
||||||
|
extern int zfs_bclone_enabled;
|
||||||
|
|
||||||
extern void zfs_init(void);
|
extern void zfs_init(void);
|
||||||
extern void zfs_fini(void);
|
extern void zfs_fini(void);
|
||||||
|
|||||||
@@ -461,10 +461,16 @@ zpl_is_32bit_api(void)
|
|||||||
* 6.3 API change
|
* 6.3 API change
|
||||||
* generic_fillattr() first arg is changed to struct mnt_idmap *
|
* generic_fillattr() first arg is changed to struct mnt_idmap *
|
||||||
*
|
*
|
||||||
|
* 6.6 API change
|
||||||
|
* generic_fillattr() gets new second arg request_mask, a u32 type
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
#ifdef HAVE_GENERIC_FILLATTR_IDMAP
|
#ifdef HAVE_GENERIC_FILLATTR_IDMAP
|
||||||
#define zpl_generic_fillattr(idmap, ip, sp) \
|
#define zpl_generic_fillattr(idmap, ip, sp) \
|
||||||
generic_fillattr(idmap, ip, sp)
|
generic_fillattr(idmap, ip, sp)
|
||||||
|
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
|
||||||
|
#define zpl_generic_fillattr(idmap, rqm, ip, sp) \
|
||||||
|
generic_fillattr(idmap, rqm, ip, sp)
|
||||||
#elif defined(HAVE_GENERIC_FILLATTR_USERNS)
|
#elif defined(HAVE_GENERIC_FILLATTR_USERNS)
|
||||||
#define zpl_generic_fillattr(user_ns, ip, sp) \
|
#define zpl_generic_fillattr(user_ns, ip, sp) \
|
||||||
generic_fillattr(user_ns, ip, sp)
|
generic_fillattr(user_ns, ip, sp)
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ typedef struct spl_kmem_magazine {
|
|||||||
uint32_t skm_refill; /* Batch refill size */
|
uint32_t skm_refill; /* Batch refill size */
|
||||||
struct spl_kmem_cache *skm_cache; /* Owned by cache */
|
struct spl_kmem_cache *skm_cache; /* Owned by cache */
|
||||||
unsigned int skm_cpu; /* Owned by cpu */
|
unsigned int skm_cpu; /* Owned by cpu */
|
||||||
void *skm_objs[0]; /* Object pointers */
|
void *skm_objs[]; /* Object pointers */
|
||||||
} spl_kmem_magazine_t;
|
} spl_kmem_magazine_t;
|
||||||
|
|
||||||
typedef struct spl_kmem_obj {
|
typedef struct spl_kmem_obj {
|
||||||
|
|||||||
@@ -128,7 +128,6 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
|
|||||||
|
|
||||||
#define NESTED_SINGLE 1
|
#define NESTED_SINGLE 1
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
||||||
#define mutex_enter_nested(mp, subclass) \
|
#define mutex_enter_nested(mp, subclass) \
|
||||||
{ \
|
{ \
|
||||||
ASSERT3P(mutex_owner(mp), !=, current); \
|
ASSERT3P(mutex_owner(mp), !=, current); \
|
||||||
@@ -137,16 +136,22 @@ spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
|
|||||||
spl_mutex_lockdep_on_maybe(mp); \
|
spl_mutex_lockdep_on_maybe(mp); \
|
||||||
spl_mutex_set_owner(mp); \
|
spl_mutex_set_owner(mp); \
|
||||||
}
|
}
|
||||||
#else /* CONFIG_DEBUG_LOCK_ALLOC */
|
|
||||||
#define mutex_enter_nested(mp, subclass) \
|
#define mutex_enter_interruptible(mp) \
|
||||||
{ \
|
/* CSTYLED */ \
|
||||||
|
({ \
|
||||||
|
int _rc_; \
|
||||||
|
\
|
||||||
ASSERT3P(mutex_owner(mp), !=, current); \
|
ASSERT3P(mutex_owner(mp), !=, current); \
|
||||||
spl_mutex_lockdep_off_maybe(mp); \
|
spl_mutex_lockdep_off_maybe(mp); \
|
||||||
mutex_lock(MUTEX(mp)); \
|
_rc_ = mutex_lock_interruptible(MUTEX(mp)); \
|
||||||
spl_mutex_lockdep_on_maybe(mp); \
|
spl_mutex_lockdep_on_maybe(mp); \
|
||||||
spl_mutex_set_owner(mp); \
|
if (!_rc_) { \
|
||||||
}
|
spl_mutex_set_owner(mp); \
|
||||||
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
|
} \
|
||||||
|
\
|
||||||
|
_rc_; \
|
||||||
|
})
|
||||||
|
|
||||||
#define mutex_enter(mp) mutex_enter_nested((mp), 0)
|
#define mutex_enter(mp) mutex_enter_nested((mp), 0)
|
||||||
|
|
||||||
|
|||||||
@@ -73,13 +73,6 @@ typedef struct zfs_uio {
|
|||||||
size_t uio_skip;
|
size_t uio_skip;
|
||||||
|
|
||||||
struct request *rq;
|
struct request *rq;
|
||||||
|
|
||||||
/*
|
|
||||||
* Used for saving rq_for_each_segment() state between calls
|
|
||||||
* to zfs_uiomove_bvec_rq().
|
|
||||||
*/
|
|
||||||
struct req_iterator iter;
|
|
||||||
struct bio_vec bv;
|
|
||||||
} zfs_uio_t;
|
} zfs_uio_t;
|
||||||
|
|
||||||
|
|
||||||
@@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
|
|||||||
} else {
|
} else {
|
||||||
uio->uio_bvec = NULL;
|
uio->uio_bvec = NULL;
|
||||||
uio->uio_iovcnt = 0;
|
uio->uio_iovcnt = 0;
|
||||||
memset(&uio->iter, 0, sizeof (uio->iter));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uio->uio_loffset = io_offset(bio, rq);
|
uio->uio_loffset = io_offset(bio, rq);
|
||||||
|
|||||||
@@ -45,6 +45,8 @@ extern "C" {
|
|||||||
typedef struct zfsvfs zfsvfs_t;
|
typedef struct zfsvfs zfsvfs_t;
|
||||||
struct znode;
|
struct znode;
|
||||||
|
|
||||||
|
extern int zfs_bclone_enabled;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This structure emulates the vfs_t from other platforms. It's purpose
|
* This structure emulates the vfs_t from other platforms. It's purpose
|
||||||
* is to facilitate the handling of mount options and minimize structural
|
* is to facilitate the handling of mount options and minimize structural
|
||||||
|
|||||||
@@ -56,7 +56,12 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
|
|||||||
extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
|
extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
|
||||||
cred_t *cr, int flags);
|
cred_t *cr, int flags);
|
||||||
extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
|
extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
|
||||||
|
#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
|
||||||
|
extern int zfs_getattr_fast(zidmap_t *, u32 request_mask, struct inode *ip,
|
||||||
|
struct kstat *sp);
|
||||||
|
#else
|
||||||
extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp);
|
extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp);
|
||||||
|
#endif
|
||||||
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr,
|
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr,
|
||||||
zidmap_t *mnt_ns);
|
zidmap_t *mnt_ns);
|
||||||
extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
|
extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ extern const struct file_operations zpl_file_operations;
|
|||||||
extern const struct file_operations zpl_dir_file_operations;
|
extern const struct file_operations zpl_dir_file_operations;
|
||||||
|
|
||||||
/* zpl_super.c */
|
/* zpl_super.c */
|
||||||
extern void zpl_prune_sb(int64_t nr_to_scan, void *arg);
|
extern void zpl_prune_sb(uint64_t nr_to_scan, void *arg);
|
||||||
|
|
||||||
extern const struct super_operations zpl_super_operations;
|
extern const struct super_operations zpl_super_operations;
|
||||||
extern const struct export_operations zpl_export_operations;
|
extern const struct export_operations zpl_export_operations;
|
||||||
@@ -263,4 +263,15 @@ extern long zpl_ioctl_fideduperange(struct file *filp, void *arg);
|
|||||||
#define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(dentry, ia)
|
#define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(dentry, ia)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_INODE_GET_CTIME
|
||||||
|
#define zpl_inode_get_ctime(ip) inode_get_ctime(ip)
|
||||||
|
#else
|
||||||
|
#define zpl_inode_get_ctime(ip) (ip->i_ctime)
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_INODE_SET_CTIME_TO_TS
|
||||||
|
#define zpl_inode_set_ctime_to_ts(ip, ts) inode_set_ctime_to_ts(ip, ts)
|
||||||
|
#else
|
||||||
|
#define zpl_inode_set_ctime_to_ts(ip, ts) (ip->i_ctime = ts)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _SYS_ZPL_H */
|
#endif /* _SYS_ZPL_H */
|
||||||
|
|||||||
+1
-1
@@ -81,7 +81,7 @@ typedef struct arc_prune arc_prune_t;
|
|||||||
typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
|
typedef void arc_read_done_func_t(zio_t *zio, const zbookmark_phys_t *zb,
|
||||||
const blkptr_t *bp, arc_buf_t *buf, void *priv);
|
const blkptr_t *bp, arc_buf_t *buf, void *priv);
|
||||||
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
|
typedef void arc_write_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
|
||||||
typedef void arc_prune_func_t(int64_t bytes, void *priv);
|
typedef void arc_prune_func_t(uint64_t bytes, void *priv);
|
||||||
|
|
||||||
/* Shared module parameters */
|
/* Shared module parameters */
|
||||||
extern uint_t zfs_arc_average_blocksize;
|
extern uint_t zfs_arc_average_blocksize;
|
||||||
|
|||||||
@@ -1065,7 +1065,6 @@ extern void arc_wait_for_eviction(uint64_t, boolean_t);
|
|||||||
|
|
||||||
extern void arc_lowmem_init(void);
|
extern void arc_lowmem_init(void);
|
||||||
extern void arc_lowmem_fini(void);
|
extern void arc_lowmem_fini(void);
|
||||||
extern void arc_prune_async(uint64_t);
|
|
||||||
extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
|
extern int arc_memory_throttle(spa_t *spa, uint64_t reserve, uint64_t txg);
|
||||||
extern uint64_t arc_free_memory(void);
|
extern uint64_t arc_free_memory(void);
|
||||||
extern int64_t arc_available_memory(void);
|
extern int64_t arc_available_memory(void);
|
||||||
|
|||||||
@@ -0,0 +1,199 @@
|
|||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or https://opensource.org/licenses/CDDL-1.0.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2020, 2021, 2022 by Pawel Jakub Dawidek
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _SYS_BRT_IMPL_H
|
||||||
|
#define _SYS_BRT_IMPL_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BRT - Block Reference Table.
|
||||||
|
*/
|
||||||
|
#define BRT_OBJECT_VDEV_PREFIX "com.fudosecurity:brt:vdev:"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We divide each VDEV into 16MB chunks. Each chunk is represented in memory
|
||||||
|
* by a 16bit counter, thus 1TB VDEV requires 128kB of memory: (1TB / 16MB) * 2B
|
||||||
|
* Each element in this array represents how many BRT entries do we have in this
|
||||||
|
* chunk of storage. We always load this entire array into memory and update as
|
||||||
|
* needed. By having it in memory we can quickly tell (during zio_free()) if
|
||||||
|
* there are any BRT entries that we might need to update.
|
||||||
|
*
|
||||||
|
* This value cannot be larger than 16MB, at least as long as we support
|
||||||
|
* 512 byte block sizes. With 512 byte block size we can have exactly
|
||||||
|
* 32768 blocks in 16MB. In 32MB we could have 65536 blocks, which is one too
|
||||||
|
* many for a 16bit counter.
|
||||||
|
*/
|
||||||
|
#define BRT_RANGESIZE (16 * 1024 * 1024)
|
||||||
|
_Static_assert(BRT_RANGESIZE / SPA_MINBLOCKSIZE <= UINT16_MAX,
|
||||||
|
"BRT_RANGESIZE is too large.");
|
||||||
|
/*
|
||||||
|
* We don't want to update the whole structure every time. Maintain bitmap
|
||||||
|
* of dirty blocks within the regions, so that a single bit represents a
|
||||||
|
* block size of entcounts. For example if we have a 1PB vdev then all
|
||||||
|
* entcounts take 128MB of memory ((64TB / 16MB) * 2B). We can divide this
|
||||||
|
* 128MB array of entcounts into 32kB disk blocks, as we don't want to update
|
||||||
|
* the whole 128MB on disk when we have updated only a single entcount.
|
||||||
|
* We maintain a bitmap where each 32kB disk block within 128MB entcounts array
|
||||||
|
* is represented by a single bit. This gives us 4096 bits. A set bit in the
|
||||||
|
* bitmap means that we had a change in at least one of the 16384 entcounts
|
||||||
|
* that reside on a 32kB disk block (32kB / sizeof (uint16_t)).
|
||||||
|
*/
|
||||||
|
#define BRT_BLOCKSIZE (32 * 1024)
|
||||||
|
#define BRT_RANGESIZE_TO_NBLOCKS(size) \
|
||||||
|
(((size) - 1) / BRT_BLOCKSIZE / sizeof (uint16_t) + 1)
|
||||||
|
|
||||||
|
#define BRT_LITTLE_ENDIAN 0
|
||||||
|
#define BRT_BIG_ENDIAN 1
|
||||||
|
#ifdef _ZFS_LITTLE_ENDIAN
|
||||||
|
#define BRT_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
|
||||||
|
#define BRT_NON_NATIVE_BYTEORDER BRT_BIG_ENDIAN
|
||||||
|
#else
|
||||||
|
#define BRT_NATIVE_BYTEORDER BRT_BIG_ENDIAN
|
||||||
|
#define BRT_NON_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct brt_vdev_phys {
|
||||||
|
uint64_t bvp_mos_entries;
|
||||||
|
uint64_t bvp_size;
|
||||||
|
uint64_t bvp_byteorder;
|
||||||
|
uint64_t bvp_totalcount;
|
||||||
|
uint64_t bvp_rangesize;
|
||||||
|
uint64_t bvp_usedspace;
|
||||||
|
uint64_t bvp_savedspace;
|
||||||
|
} brt_vdev_phys_t;
|
||||||
|
|
||||||
|
typedef struct brt_vdev {
|
||||||
|
/*
|
||||||
|
* VDEV id.
|
||||||
|
*/
|
||||||
|
uint64_t bv_vdevid;
|
||||||
|
/*
|
||||||
|
* Is the structure initiated?
|
||||||
|
* (bv_entcount and bv_bitmap are allocated?)
|
||||||
|
*/
|
||||||
|
boolean_t bv_initiated;
|
||||||
|
/*
|
||||||
|
* Object number in the MOS for the entcount array and brt_vdev_phys.
|
||||||
|
*/
|
||||||
|
uint64_t bv_mos_brtvdev;
|
||||||
|
/*
|
||||||
|
* Object number in the MOS for the entries table.
|
||||||
|
*/
|
||||||
|
uint64_t bv_mos_entries;
|
||||||
|
/*
|
||||||
|
* Entries to sync.
|
||||||
|
*/
|
||||||
|
avl_tree_t bv_tree;
|
||||||
|
/*
|
||||||
|
* Does the bv_entcount[] array needs byte swapping?
|
||||||
|
*/
|
||||||
|
boolean_t bv_need_byteswap;
|
||||||
|
/*
|
||||||
|
* Number of entries in the bv_entcount[] array.
|
||||||
|
*/
|
||||||
|
uint64_t bv_size;
|
||||||
|
/*
|
||||||
|
* This is the array with BRT entry count per BRT_RANGESIZE.
|
||||||
|
*/
|
||||||
|
uint16_t *bv_entcount;
|
||||||
|
/*
|
||||||
|
* Sum of all bv_entcount[]s.
|
||||||
|
*/
|
||||||
|
uint64_t bv_totalcount;
|
||||||
|
/*
|
||||||
|
* Space on disk occupied by cloned blocks (without compression).
|
||||||
|
*/
|
||||||
|
uint64_t bv_usedspace;
|
||||||
|
/*
|
||||||
|
* How much additional space would be occupied without block cloning.
|
||||||
|
*/
|
||||||
|
uint64_t bv_savedspace;
|
||||||
|
/*
|
||||||
|
* brt_vdev_phys needs updating on disk.
|
||||||
|
*/
|
||||||
|
boolean_t bv_meta_dirty;
|
||||||
|
/*
|
||||||
|
* bv_entcount[] needs updating on disk.
|
||||||
|
*/
|
||||||
|
boolean_t bv_entcount_dirty;
|
||||||
|
/*
|
||||||
|
* bv_entcount[] potentially can be a bit too big to sychronize it all
|
||||||
|
* when we just changed few entcounts. The fields below allow us to
|
||||||
|
* track updates to bv_entcount[] array since the last sync.
|
||||||
|
* A single bit in the bv_bitmap represents as many entcounts as can
|
||||||
|
* fit into a single BRT_BLOCKSIZE.
|
||||||
|
* For example we have 65536 entcounts in the bv_entcount array
|
||||||
|
* (so the whole array is 128kB). We updated bv_entcount[2] and
|
||||||
|
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
|
||||||
|
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
|
||||||
|
*/
|
||||||
|
ulong_t *bv_bitmap;
|
||||||
|
uint64_t bv_nblocks;
|
||||||
|
} brt_vdev_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In-core brt
|
||||||
|
*/
|
||||||
|
typedef struct brt {
|
||||||
|
krwlock_t brt_lock;
|
||||||
|
spa_t *brt_spa;
|
||||||
|
#define brt_mos brt_spa->spa_meta_objset
|
||||||
|
uint64_t brt_rangesize;
|
||||||
|
uint64_t brt_usedspace;
|
||||||
|
uint64_t brt_savedspace;
|
||||||
|
avl_tree_t brt_pending_tree[TXG_SIZE];
|
||||||
|
kmutex_t brt_pending_lock[TXG_SIZE];
|
||||||
|
/* Sum of all entries across all bv_trees. */
|
||||||
|
uint64_t brt_nentries;
|
||||||
|
brt_vdev_t *brt_vdevs;
|
||||||
|
uint64_t brt_nvdevs;
|
||||||
|
} brt_t;
|
||||||
|
|
||||||
|
/* Size of bre_offset / sizeof (uint64_t). */
|
||||||
|
#define BRT_KEY_WORDS (1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In-core brt entry.
|
||||||
|
* On-disk we use bre_offset as the key and bre_refcount as the value.
|
||||||
|
*/
|
||||||
|
typedef struct brt_entry {
|
||||||
|
uint64_t bre_offset;
|
||||||
|
uint64_t bre_refcount;
|
||||||
|
avl_node_t bre_node;
|
||||||
|
} brt_entry_t;
|
||||||
|
|
||||||
|
typedef struct brt_pending_entry {
|
||||||
|
blkptr_t bpe_bp;
|
||||||
|
int bpe_count;
|
||||||
|
avl_node_t bpe_node;
|
||||||
|
} brt_pending_entry_t;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _SYS_BRT_IMPL_H */
|
||||||
+1
-2
@@ -1072,8 +1072,7 @@ int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
|
|||||||
int dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_read_l0_bps(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t length, struct blkptr *bps, size_t *nbpsp);
|
uint64_t length, struct blkptr *bps, size_t *nbpsp);
|
||||||
int dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset,
|
int dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset,
|
||||||
uint64_t length, dmu_tx_t *tx, const struct blkptr *bps, size_t nbps,
|
uint64_t length, dmu_tx_t *tx, const struct blkptr *bps, size_t nbps);
|
||||||
boolean_t replay);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initial setup and final teardown.
|
* Initial setup and final teardown.
|
||||||
|
|||||||
+1
-1
@@ -837,7 +837,7 @@ extern kmutex_t spa_namespace_lock;
|
|||||||
|
|
||||||
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
|
extern void spa_write_cachefile(spa_t *, boolean_t, boolean_t, boolean_t);
|
||||||
extern void spa_config_load(void);
|
extern void spa_config_load(void);
|
||||||
extern nvlist_t *spa_all_configs(uint64_t *);
|
extern int spa_all_configs(uint64_t *generation, nvlist_t **pools);
|
||||||
extern void spa_config_set(spa_t *spa, nvlist_t *config);
|
extern void spa_config_set(spa_t *spa, nvlist_t *config);
|
||||||
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
|
extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
|
||||||
int getstats);
|
int getstats);
|
||||||
|
|||||||
@@ -73,8 +73,7 @@ struct tx_cpu {
|
|||||||
kcondvar_t tc_cv[TXG_SIZE];
|
kcondvar_t tc_cv[TXG_SIZE];
|
||||||
uint64_t tc_count[TXG_SIZE]; /* tx hold count on each txg */
|
uint64_t tc_count[TXG_SIZE]; /* tx hold count on each txg */
|
||||||
list_t tc_callbacks[TXG_SIZE]; /* commit cb list */
|
list_t tc_callbacks[TXG_SIZE]; /* commit cb list */
|
||||||
char tc_pad[8]; /* pad to fill 3 cache lines */
|
} ____cacheline_aligned;
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The tx_state structure maintains the state information about the different
|
* The tx_state structure maintains the state information about the different
|
||||||
|
|||||||
@@ -131,7 +131,10 @@ typedef const struct vdev_ops {
|
|||||||
* Virtual device properties
|
* Virtual device properties
|
||||||
*/
|
*/
|
||||||
typedef union vdev_queue_class {
|
typedef union vdev_queue_class {
|
||||||
list_t vqc_list;
|
struct {
|
||||||
|
ulong_t vqc_list_numnodes;
|
||||||
|
list_t vqc_list;
|
||||||
|
};
|
||||||
avl_tree_t vqc_tree;
|
avl_tree_t vqc_tree;
|
||||||
} vdev_queue_class_t;
|
} vdev_queue_class_t;
|
||||||
|
|
||||||
|
|||||||
@@ -130,7 +130,7 @@ typedef struct raidz_row {
|
|||||||
uint64_t rr_offset; /* Logical offset for *_io_verify() */
|
uint64_t rr_offset; /* Logical offset for *_io_verify() */
|
||||||
uint64_t rr_size; /* Physical size for *_io_verify() */
|
uint64_t rr_size; /* Physical size for *_io_verify() */
|
||||||
#endif
|
#endif
|
||||||
raidz_col_t rr_col[0]; /* Flexible array of I/O columns */
|
raidz_col_t rr_col[]; /* Flexible array of I/O columns */
|
||||||
} raidz_row_t;
|
} raidz_row_t;
|
||||||
|
|
||||||
typedef struct raidz_map {
|
typedef struct raidz_map {
|
||||||
@@ -139,7 +139,7 @@ typedef struct raidz_map {
|
|||||||
int rm_nskip; /* RAIDZ sectors skipped for padding */
|
int rm_nskip; /* RAIDZ sectors skipped for padding */
|
||||||
int rm_skipstart; /* Column index of padding start */
|
int rm_skipstart; /* Column index of padding start */
|
||||||
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
|
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
|
||||||
raidz_row_t *rm_row[0]; /* flexible array of rows */
|
raidz_row_t *rm_row[]; /* flexible array of rows */
|
||||||
} raidz_map_t;
|
} raidz_map_t;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -274,11 +274,13 @@ typedef struct kmutex {
|
|||||||
extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie);
|
extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie);
|
||||||
extern void mutex_destroy(kmutex_t *mp);
|
extern void mutex_destroy(kmutex_t *mp);
|
||||||
extern void mutex_enter(kmutex_t *mp);
|
extern void mutex_enter(kmutex_t *mp);
|
||||||
|
extern int mutex_enter_check_return(kmutex_t *mp);
|
||||||
extern void mutex_exit(kmutex_t *mp);
|
extern void mutex_exit(kmutex_t *mp);
|
||||||
extern int mutex_tryenter(kmutex_t *mp);
|
extern int mutex_tryenter(kmutex_t *mp);
|
||||||
|
|
||||||
#define NESTED_SINGLE 1
|
#define NESTED_SINGLE 1
|
||||||
#define mutex_enter_nested(mp, class) mutex_enter(mp)
|
#define mutex_enter_nested(mp, class) mutex_enter(mp)
|
||||||
|
#define mutex_enter_interruptible(mp) mutex_enter_check_return(mp)
|
||||||
/*
|
/*
|
||||||
* RW locks
|
* RW locks
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -515,6 +515,8 @@
|
|||||||
<elf-symbol name='zpool_open' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_open' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_open_canfail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_open_canfail' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_pool_state_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_pool_state_to_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
|
<elf-symbol name='zpool_prepare_and_label_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
|
<elf-symbol name='zpool_prepare_disk' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_print_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_print_unsup_feat' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_prop_align_right' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_prop_column_name' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
@@ -562,6 +564,8 @@
|
|||||||
<elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_vdev_remove' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_vdev_remove_cancel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_vdev_remove_wanted' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
|
<elf-symbol name='zpool_vdev_script_alloc_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
|
<elf-symbol name='zpool_vdev_script_free_env' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_vdev_split' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_wait' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
<elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
<elf-symbol name='zpool_wait_status' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||||
|
|||||||
@@ -2071,3 +2071,196 @@ printf_color(const char *color, const char *format, ...)
|
|||||||
|
|
||||||
return (rc);
|
return (rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* PATH + 5 env vars + a NULL entry = 7 */
|
||||||
|
#define ZPOOL_VDEV_SCRIPT_ENV_COUNT 7
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There's a few places where ZFS will call external scripts (like the script
|
||||||
|
* in zpool.d/ and `zfs_prepare_disk`). These scripts are called with a
|
||||||
|
* reduced $PATH, and some vdev specific environment vars set. This function
|
||||||
|
* will allocate an populate the environment variable array that is passed to
|
||||||
|
* these scripts. The user must free the arrays with zpool_vdev_free_env() when
|
||||||
|
* they are done.
|
||||||
|
*
|
||||||
|
* The following env vars will be set (but value could be blank):
|
||||||
|
*
|
||||||
|
* POOL_NAME
|
||||||
|
* VDEV_PATH
|
||||||
|
* VDEV_UPATH
|
||||||
|
* VDEV_ENC_SYSFS_PATH
|
||||||
|
*
|
||||||
|
* In addition, you can set an optional environment variable named 'opt_key'
|
||||||
|
* to 'opt_val' if you want.
|
||||||
|
*
|
||||||
|
* Returns allocated env[] array on success, NULL otherwise.
|
||||||
|
*/
|
||||||
|
char **
|
||||||
|
zpool_vdev_script_alloc_env(const char *pool_name,
|
||||||
|
const char *vdev_path, const char *vdev_upath,
|
||||||
|
const char *vdev_enc_sysfs_path, const char *opt_key, const char *opt_val)
|
||||||
|
{
|
||||||
|
char **env = NULL;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
env = calloc(ZPOOL_VDEV_SCRIPT_ENV_COUNT, sizeof (*env));
|
||||||
|
if (!env)
|
||||||
|
return (NULL);
|
||||||
|
|
||||||
|
env[0] = strdup("PATH=/bin:/sbin:/usr/bin:/usr/sbin");
|
||||||
|
if (!env[0])
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
/* Setup our custom environment variables */
|
||||||
|
rc = asprintf(&env[1], "POOL_NAME=%s", pool_name ? pool_name : "");
|
||||||
|
if (rc == -1) {
|
||||||
|
env[1] = NULL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = asprintf(&env[2], "VDEV_PATH=%s", vdev_path ? vdev_path : "");
|
||||||
|
if (rc == -1) {
|
||||||
|
env[2] = NULL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = asprintf(&env[3], "VDEV_UPATH=%s", vdev_upath ? vdev_upath : "");
|
||||||
|
if (rc == -1) {
|
||||||
|
env[3] = NULL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = asprintf(&env[4], "VDEV_ENC_SYSFS_PATH=%s",
|
||||||
|
vdev_enc_sysfs_path ? vdev_enc_sysfs_path : "");
|
||||||
|
if (rc == -1) {
|
||||||
|
env[4] = NULL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_key != NULL) {
|
||||||
|
rc = asprintf(&env[5], "%s=%s", opt_key,
|
||||||
|
opt_val ? opt_val : "");
|
||||||
|
if (rc == -1) {
|
||||||
|
env[5] = NULL;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (env);
|
||||||
|
|
||||||
|
error:
|
||||||
|
for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++)
|
||||||
|
free(env[i]);
|
||||||
|
|
||||||
|
free(env);
|
||||||
|
|
||||||
|
return (NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free the env[] array that was allocated by zpool_vdev_script_alloc_env().
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
zpool_vdev_script_free_env(char **env)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < ZPOOL_VDEV_SCRIPT_ENV_COUNT; i++)
|
||||||
|
free(env[i]);
|
||||||
|
|
||||||
|
free(env);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Prepare a disk by (optionally) running a program before labeling the disk.
|
||||||
|
* This can be useful for installing disk firmware or doing some pre-flight
|
||||||
|
* checks on the disk before it becomes part of the pool. The program run is
|
||||||
|
* located at ZFSEXECDIR/zfs_prepare_disk
|
||||||
|
* (E.x: /usr/local/libexec/zfs/zfs_prepare_disk).
|
||||||
|
*
|
||||||
|
* Return 0 on success, non-zero on failure.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
zpool_prepare_disk(zpool_handle_t *zhp, nvlist_t *vdev_nv,
|
||||||
|
const char *prepare_str, char **lines[], int *lines_cnt)
|
||||||
|
{
|
||||||
|
const char *script_path = ZFSEXECDIR "/zfs_prepare_disk";
|
||||||
|
const char *pool_name;
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
/* Path to script and a NULL entry */
|
||||||
|
char *argv[2] = {(char *)script_path};
|
||||||
|
char **env = NULL;
|
||||||
|
const char *path = NULL, *enc_sysfs_path = NULL;
|
||||||
|
char *upath;
|
||||||
|
*lines_cnt = 0;
|
||||||
|
|
||||||
|
if (access(script_path, X_OK) != 0) {
|
||||||
|
/* No script, nothing to do */
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
|
(void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_PATH, &path);
|
||||||
|
(void) nvlist_lookup_string(vdev_nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
|
||||||
|
&enc_sysfs_path);
|
||||||
|
|
||||||
|
upath = zfs_get_underlying_path(path);
|
||||||
|
pool_name = zhp ? zpool_get_name(zhp) : NULL;
|
||||||
|
|
||||||
|
env = zpool_vdev_script_alloc_env(pool_name, path, upath,
|
||||||
|
enc_sysfs_path, "VDEV_PREPARE", prepare_str);
|
||||||
|
|
||||||
|
free(upath);
|
||||||
|
|
||||||
|
if (env == NULL) {
|
||||||
|
return (ENOMEM);
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = libzfs_run_process_get_stdout(script_path, argv, env, lines,
|
||||||
|
lines_cnt);
|
||||||
|
|
||||||
|
zpool_vdev_script_free_env(env);
|
||||||
|
|
||||||
|
return (rc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Optionally run a script and then label a disk. The script can be used to
|
||||||
|
* prepare a disk for inclusion into the pool. For example, it might update
|
||||||
|
* the disk's firmware or check its health.
|
||||||
|
*
|
||||||
|
* The 'name' provided is the short name, stripped of any leading
|
||||||
|
* /dev path, and is passed to zpool_label_disk. vdev_nv is the nvlist for
|
||||||
|
* the vdev. prepare_str is a string that gets passed as the VDEV_PREPARE
|
||||||
|
* env variable to the script.
|
||||||
|
*
|
||||||
|
* The following env vars are passed to the script:
|
||||||
|
*
|
||||||
|
* POOL_NAME: The pool name (blank during zpool create)
|
||||||
|
* VDEV_PREPARE: Reason why the disk is being prepared for inclusion:
|
||||||
|
* "create", "add", "replace", or "autoreplace"
|
||||||
|
* VDEV_PATH: Path to the disk
|
||||||
|
* VDEV_UPATH: One of the 'underlying paths' to the disk. This is
|
||||||
|
* useful for DM devices.
|
||||||
|
* VDEV_ENC_SYSFS_PATH: Path to the disk's enclosure sysfs path, if available.
|
||||||
|
*
|
||||||
|
* Note, some of these values can be blank.
|
||||||
|
*
|
||||||
|
* Return 0 on success, non-zero otherwise.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
zpool_prepare_and_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp,
|
||||||
|
const char *name, nvlist_t *vdev_nv, const char *prepare_str,
|
||||||
|
char **lines[], int *lines_cnt)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
char vdev_path[MAXPATHLEN];
|
||||||
|
(void) snprintf(vdev_path, sizeof (vdev_path), "%s/%s", DISK_ROOT,
|
||||||
|
name);
|
||||||
|
|
||||||
|
/* zhp will be NULL when creating a pool */
|
||||||
|
rc = zpool_prepare_disk(zhp, vdev_nv, prepare_str, lines, lines_cnt);
|
||||||
|
if (rc != 0)
|
||||||
|
return (rc);
|
||||||
|
|
||||||
|
rc = zpool_label_disk(hdl, zhp, name);
|
||||||
|
return (rc);
|
||||||
|
}
|
||||||
|
|||||||
@@ -205,6 +205,15 @@ mutex_enter(kmutex_t *mp)
|
|||||||
mp->m_owner = pthread_self();
|
mp->m_owner = pthread_self();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
mutex_enter_check_return(kmutex_t *mp)
|
||||||
|
{
|
||||||
|
int error = pthread_mutex_lock(&mp->m_lock);
|
||||||
|
if (error == 0)
|
||||||
|
mp->m_owner = pthread_self();
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
mutex_tryenter(kmutex_t *mp)
|
mutex_tryenter(kmutex_t *mp)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -582,9 +582,8 @@ zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
|
|||||||
* Wait up to timeout_ms for udev to set up the device node. The device is
|
* Wait up to timeout_ms for udev to set up the device node. The device is
|
||||||
* considered ready when libudev determines it has been initialized, all of
|
* considered ready when libudev determines it has been initialized, all of
|
||||||
* the device links have been verified to exist, and it has been allowed to
|
* the device links have been verified to exist, and it has been allowed to
|
||||||
* settle. At this point the device the device can be accessed reliably.
|
* settle. At this point the device can be accessed reliably. Depending on
|
||||||
* Depending on the complexity of the udev rules this process could take
|
* the complexity of the udev rules this process could take several seconds.
|
||||||
* several seconds.
|
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
zpool_label_disk_wait(const char *path, int timeout_ms)
|
zpool_label_disk_wait(const char *path, int timeout_ms)
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ dist_man_MANS = \
|
|||||||
%D%/man8/zfs-userspace.8 \
|
%D%/man8/zfs-userspace.8 \
|
||||||
%D%/man8/zfs-wait.8 \
|
%D%/man8/zfs-wait.8 \
|
||||||
%D%/man8/zfs_ids_to_path.8 \
|
%D%/man8/zfs_ids_to_path.8 \
|
||||||
|
%D%/man8/zfs_prepare_disk.8 \
|
||||||
%D%/man8/zgenhostid.8 \
|
%D%/man8/zgenhostid.8 \
|
||||||
%D%/man8/zinject.8 \
|
%D%/man8/zinject.8 \
|
||||||
%D%/man8/zpool.8 \
|
%D%/man8/zpool.8 \
|
||||||
|
|||||||
+63
-1
@@ -1137,6 +1137,11 @@ Selecting any option other than
|
|||||||
results in vector instructions
|
results in vector instructions
|
||||||
from the respective CPU instruction set being used.
|
from the respective CPU instruction set being used.
|
||||||
.
|
.
|
||||||
|
.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
|
||||||
|
Enable the experimental block cloning feature.
|
||||||
|
If this setting is 0, then even if feature@block_cloning is enabled,
|
||||||
|
attempts to clone blocks will act as though the feature is disabled.
|
||||||
|
.
|
||||||
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
|
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
|
||||||
Select a BLAKE3 implementation.
|
Select a BLAKE3 implementation.
|
||||||
.Pp
|
.Pp
|
||||||
@@ -2172,7 +2177,7 @@ if a volatile out-of-order write cache is enabled.
|
|||||||
Disable intent logging replay.
|
Disable intent logging replay.
|
||||||
Can be disabled for recovery from corrupted ZIL.
|
Can be disabled for recovery from corrupted ZIL.
|
||||||
.
|
.
|
||||||
.It Sy zil_slog_bulk Ns = Ns Sy 786432 Ns B Po 768 KiB Pc Pq u64
|
.It Sy zil_slog_bulk Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64
|
||||||
Limit SLOG write size per commit executed with synchronous priority.
|
Limit SLOG write size per commit executed with synchronous priority.
|
||||||
Any writes above that will be executed with lower (asynchronous) priority
|
Any writes above that will be executed with lower (asynchronous) priority
|
||||||
to limit potential SLOG device abuse by single active ZIL writer.
|
to limit potential SLOG device abuse by single active ZIL writer.
|
||||||
@@ -2317,6 +2322,63 @@ If
|
|||||||
.Sy zvol_threads
|
.Sy zvol_threads
|
||||||
to the number of CPUs present or 32 (whichever is greater).
|
to the number of CPUs present or 32 (whichever is greater).
|
||||||
.
|
.
|
||||||
|
.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
|
||||||
|
The number of threads per zvol to use for queuing IO requests.
|
||||||
|
This parameter will only appear if your kernel supports
|
||||||
|
.Li blk-mq
|
||||||
|
and is only read and assigned to a zvol at zvol load time.
|
||||||
|
If
|
||||||
|
.Sy 0
|
||||||
|
(the default) then internally set
|
||||||
|
.Sy zvol_blk_mq_threads
|
||||||
|
to the number of CPUs present.
|
||||||
|
.
|
||||||
|
.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||||
|
Set to
|
||||||
|
.Sy 1
|
||||||
|
to use the
|
||||||
|
.Li blk-mq
|
||||||
|
API for zvols.
|
||||||
|
Set to
|
||||||
|
.Sy 0
|
||||||
|
(the default) to use the legacy zvol APIs.
|
||||||
|
This setting can give better or worse zvol performance depending on
|
||||||
|
the workload.
|
||||||
|
This parameter will only appear if your kernel supports
|
||||||
|
.Li blk-mq
|
||||||
|
and is only read and assigned to a zvol at zvol load time.
|
||||||
|
.
|
||||||
|
.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
|
||||||
|
If
|
||||||
|
.Sy zvol_use_blk_mq
|
||||||
|
is enabled, then process this number of
|
||||||
|
.Sy volblocksize Ns -sized blocks per zvol thread.
|
||||||
|
This tunable can be use to favor better performance for zvol reads (lower
|
||||||
|
values) or writes (higher values).
|
||||||
|
If set to
|
||||||
|
.Sy 0 ,
|
||||||
|
then the zvol layer will process the maximum number of blocks
|
||||||
|
per thread that it can.
|
||||||
|
This parameter will only appear if your kernel supports
|
||||||
|
.Li blk-mq
|
||||||
|
and is only applied at each zvol's load time.
|
||||||
|
.
|
||||||
|
.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
|
||||||
|
The queue_depth value for the zvol
|
||||||
|
.Li blk-mq
|
||||||
|
interface.
|
||||||
|
This parameter will only appear if your kernel supports
|
||||||
|
.Li blk-mq
|
||||||
|
and is only applied at each zvol's load time.
|
||||||
|
If
|
||||||
|
.Sy 0
|
||||||
|
(the default) then use the kernel's default queue depth.
|
||||||
|
Values are clamped to the kernel's
|
||||||
|
.Dv BLKDEV_MIN_RQ
|
||||||
|
and
|
||||||
|
.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
|
||||||
|
limits.
|
||||||
|
.
|
||||||
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
|
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
|
||||||
Defines zvol block devices behaviour when
|
Defines zvol block devices behaviour when
|
||||||
.Sy volmode Ns = Ns Sy default :
|
.Sy volmode Ns = Ns Sy default :
|
||||||
|
|||||||
@@ -219,8 +219,11 @@ to the end of the line is ignored.
|
|||||||
.Bd -literal -compact -offset 4n
|
.Bd -literal -compact -offset 4n
|
||||||
.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
|
.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
|
||||||
# Features which are supported by GRUB2
|
# Features which are supported by GRUB2
|
||||||
|
allocation_classes
|
||||||
async_destroy
|
async_destroy
|
||||||
|
block_cloning
|
||||||
bookmarks
|
bookmarks
|
||||||
|
device_rebuild
|
||||||
embedded_data
|
embedded_data
|
||||||
empty_bpobj
|
empty_bpobj
|
||||||
enabled_txg
|
enabled_txg
|
||||||
@@ -229,8 +232,14 @@ filesystem_limits
|
|||||||
hole_birth
|
hole_birth
|
||||||
large_blocks
|
large_blocks
|
||||||
livelist
|
livelist
|
||||||
|
log_spacemap
|
||||||
lz4_compress
|
lz4_compress
|
||||||
|
project_quota
|
||||||
|
resilver_defer
|
||||||
spacemap_histogram
|
spacemap_histogram
|
||||||
|
spacemap_v2
|
||||||
|
userobj_accounting
|
||||||
|
zilsaxattr
|
||||||
zpool_checkpoint
|
zpool_checkpoint
|
||||||
|
|
||||||
.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
|
.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
|
||||||
|
|||||||
@@ -1,2 +1,3 @@
|
|||||||
/zed.8
|
/zed.8
|
||||||
/zfs-mount-generator.8
|
/zfs-mount-generator.8
|
||||||
|
/zfs_prepare_disk.8
|
||||||
|
|||||||
+9
-2
@@ -14,7 +14,7 @@
|
|||||||
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
|
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
|
||||||
.\" Copyright (c) 2017 Intel Corporation.
|
.\" Copyright (c) 2017 Intel Corporation.
|
||||||
.\"
|
.\"
|
||||||
.Dd June 27, 2023
|
.Dd November 18, 2023
|
||||||
.Dt ZDB 8
|
.Dt ZDB 8
|
||||||
.Os
|
.Os
|
||||||
.
|
.
|
||||||
@@ -23,7 +23,7 @@
|
|||||||
.Nd display ZFS storage pool debugging and consistency information
|
.Nd display ZFS storage pool debugging and consistency information
|
||||||
.Sh SYNOPSIS
|
.Sh SYNOPSIS
|
||||||
.Nm
|
.Nm
|
||||||
.Op Fl AbcdDFGhikLMNPsvXYy
|
.Op Fl AbcdDFGhikLMNPsTvXYy
|
||||||
.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
|
.Op Fl e Oo Fl V Oc Oo Fl p Ar path Oc Ns …
|
||||||
.Op Fl I Ar inflight-I/O-ops
|
.Op Fl I Ar inflight-I/O-ops
|
||||||
.Oo Fl o Ar var Ns = Ns Ar value Oc Ns …
|
.Oo Fl o Ar var Ns = Ns Ar value Oc Ns …
|
||||||
@@ -403,6 +403,13 @@ Display operation counts, bandwidth, and error counts of I/O to the pool from
|
|||||||
Simulate the effects of deduplication, constructing a DDT and then display
|
Simulate the effects of deduplication, constructing a DDT and then display
|
||||||
that DDT as with
|
that DDT as with
|
||||||
.Fl DD .
|
.Fl DD .
|
||||||
|
.It Fl T , -brt-stats
|
||||||
|
Display block reference table (BRT) statistics, including the size of uniques
|
||||||
|
blocks cloned, the space saving as a result of cloning, and the saving ratio.
|
||||||
|
.It Fl TT
|
||||||
|
Display the per-vdev BRT statistics, including total references.
|
||||||
|
.It Fl TTT
|
||||||
|
Dump the contents of the block reference tables.
|
||||||
.It Fl u , -uberblock
|
.It Fl u , -uberblock
|
||||||
Display the current uberblock.
|
Display the current uberblock.
|
||||||
.El
|
.El
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
.\"
|
||||||
|
.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049).
|
||||||
|
.\" Copyright (C) 2023 Lawrence Livermore National Security, LLC.
|
||||||
|
.\" Refer to the OpenZFS git commit log for authoritative copyright attribution.
|
||||||
|
.\"
|
||||||
|
.\" The contents of this file are subject to the terms of the
|
||||||
|
.\" Common Development and Distribution License Version 1.0 (CDDL-1.0).
|
||||||
|
.\" You can obtain a copy of the license from the top-level file
|
||||||
|
.\" "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
|
||||||
|
.\" You may not use this file except in compliance with the license.
|
||||||
|
.\"
|
||||||
|
.\" Developed at Lawrence Livermore National Laboratory (LLNL-CODE-403049)
|
||||||
|
.\"
|
||||||
|
.Dd August 30, 2023
|
||||||
|
.Dt ZFS_PREPARE_DISK 8
|
||||||
|
.Os
|
||||||
|
.
|
||||||
|
.Sh NAME
|
||||||
|
.Nm zfs_prepare_disk
|
||||||
|
.Nd special script that gets run before bringing a disk into a pool
|
||||||
|
.Sh DESCRIPTION
|
||||||
|
.Nm
|
||||||
|
is an optional script that gets called by libzfs before bringing a disk into a
|
||||||
|
pool.
|
||||||
|
It can be modified by the user to run whatever commands are necessary to prepare
|
||||||
|
a disk for inclusion into the pool.
|
||||||
|
For example, users can add lines to
|
||||||
|
.Nm zfs_prepare_disk
|
||||||
|
to do things like update the drive's firmware or check the drive's health.
|
||||||
|
.Nm zfs_prepare_disk
|
||||||
|
is optional and can be removed if not needed.
|
||||||
|
libzfs will look for the script at @zfsexecdir@/zfs_prepare_disk.
|
||||||
|
.
|
||||||
|
.Ss Properties
|
||||||
|
.Nm zfs_prepare_disk
|
||||||
|
will be passed the following environment variables:
|
||||||
|
.sp
|
||||||
|
.Bl -tag -compact -width "VDEV_ENC_SYSFS_PATH"
|
||||||
|
.
|
||||||
|
.It Nm POOL_NAME
|
||||||
|
.No Name of the pool
|
||||||
|
.It Nm VDEV_PATH
|
||||||
|
.No Path to the disk (like /dev/sda)
|
||||||
|
.It Nm VDEV_PREPARE
|
||||||
|
.No Reason why the disk is being prepared for inclusion
|
||||||
|
('create', 'add', 'replace', or 'autoreplace').
|
||||||
|
This can be useful if you only want the script to be run under certain actions.
|
||||||
|
.It Nm VDEV_UPATH
|
||||||
|
.No Path to one of the underlying devices for the
|
||||||
|
disk.
|
||||||
|
For multipath this would return one of the /dev/sd* paths to the disk.
|
||||||
|
If the device is not a device mapper device, then
|
||||||
|
.Nm VDEV_UPATH
|
||||||
|
just returns the same value as
|
||||||
|
.Nm VDEV_PATH
|
||||||
|
.It Nm VDEV_ENC_SYSFS_PATH
|
||||||
|
.No Path to the disk's enclosure sysfs path, if available
|
||||||
|
.El
|
||||||
|
.Pp
|
||||||
|
Note that some of these variables may have a blank value.
|
||||||
|
.Nm POOL_NAME
|
||||||
|
is blank at pool creation time, for example.
|
||||||
|
.Sh ENVIRONMENT
|
||||||
|
.Nm zfs_prepare_disk
|
||||||
|
runs with a limited $PATH.
|
||||||
|
.Sh EXIT STATUS
|
||||||
|
.Nm zfs_prepare_disk
|
||||||
|
should return 0 on success, non-zero otherwise.
|
||||||
|
If non-zero is returned, the disk will not be included in the pool.
|
||||||
|
.
|
||||||
@@ -488,6 +488,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
|
|||||||
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
||||||
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
||||||
|
|
||||||
|
UBSAN_SANITIZE_zap_leaf.o := n
|
||||||
|
UBSAN_SANITIZE_zap_micro.o := n
|
||||||
|
UBSAN_SANITIZE_sa.o := n
|
||||||
|
|
||||||
# Suppress incorrect warnings from versions of objtool which are not
|
# Suppress incorrect warnings from versions of objtool which are not
|
||||||
# aware of x86 EVEX prefix instructions used for AVX512.
|
# aware of x86 EVEX prefix instructions used for AVX512.
|
||||||
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
|
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
|
||||||
|
|||||||
@@ -30,8 +30,6 @@
|
|||||||
__FBSDID("$FreeBSD$");
|
__FBSDID("$FreeBSD$");
|
||||||
|
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#include <sys/ck.h>
|
|
||||||
#include <sys/epoch.h>
|
|
||||||
#include <sys/kernel.h>
|
#include <sys/kernel.h>
|
||||||
#include <sys/kmem.h>
|
#include <sys/kmem.h>
|
||||||
#include <sys/lock.h>
|
#include <sys/lock.h>
|
||||||
@@ -66,11 +64,9 @@ taskq_t *dynamic_taskq = NULL;
|
|||||||
|
|
||||||
proc_t *system_proc;
|
proc_t *system_proc;
|
||||||
|
|
||||||
extern int uma_align_cache;
|
|
||||||
|
|
||||||
static MALLOC_DEFINE(M_TASKQ, "taskq", "taskq structures");
|
static MALLOC_DEFINE(M_TASKQ, "taskq", "taskq structures");
|
||||||
|
|
||||||
static CK_LIST_HEAD(tqenthashhead, taskq_ent) *tqenthashtbl;
|
static LIST_HEAD(tqenthashhead, taskq_ent) *tqenthashtbl;
|
||||||
static unsigned long tqenthash;
|
static unsigned long tqenthash;
|
||||||
static unsigned long tqenthashlock;
|
static unsigned long tqenthashlock;
|
||||||
static struct sx *tqenthashtbl_lock;
|
static struct sx *tqenthashtbl_lock;
|
||||||
@@ -80,8 +76,8 @@ static taskqid_t tqidnext;
|
|||||||
#define TQIDHASH(tqid) (&tqenthashtbl[(tqid) & tqenthash])
|
#define TQIDHASH(tqid) (&tqenthashtbl[(tqid) & tqenthash])
|
||||||
#define TQIDHASHLOCK(tqid) (&tqenthashtbl_lock[((tqid) & tqenthashlock)])
|
#define TQIDHASHLOCK(tqid) (&tqenthashtbl_lock[((tqid) & tqenthashlock)])
|
||||||
|
|
||||||
|
#define NORMAL_TASK 0
|
||||||
#define TIMEOUT_TASK 1
|
#define TIMEOUT_TASK 1
|
||||||
#define NORMAL_TASK 2
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
system_taskq_init(void *arg)
|
system_taskq_init(void *arg)
|
||||||
@@ -121,7 +117,7 @@ system_taskq_fini(void *arg)
|
|||||||
for (i = 0; i < tqenthashlock + 1; i++)
|
for (i = 0; i < tqenthashlock + 1; i++)
|
||||||
sx_destroy(&tqenthashtbl_lock[i]);
|
sx_destroy(&tqenthashtbl_lock[i]);
|
||||||
for (i = 0; i < tqenthash + 1; i++)
|
for (i = 0; i < tqenthash + 1; i++)
|
||||||
VERIFY(CK_LIST_EMPTY(&tqenthashtbl[i]));
|
VERIFY(LIST_EMPTY(&tqenthashtbl[i]));
|
||||||
free(tqenthashtbl_lock, M_TASKQ);
|
free(tqenthashtbl_lock, M_TASKQ);
|
||||||
free(tqenthashtbl, M_TASKQ);
|
free(tqenthashtbl, M_TASKQ);
|
||||||
}
|
}
|
||||||
@@ -162,27 +158,27 @@ taskq_lookup(taskqid_t tqid)
|
|||||||
{
|
{
|
||||||
taskq_ent_t *ent = NULL;
|
taskq_ent_t *ent = NULL;
|
||||||
|
|
||||||
sx_xlock(TQIDHASHLOCK(tqid));
|
if (tqid == 0)
|
||||||
CK_LIST_FOREACH(ent, TQIDHASH(tqid), tqent_hash) {
|
return (NULL);
|
||||||
|
sx_slock(TQIDHASHLOCK(tqid));
|
||||||
|
LIST_FOREACH(ent, TQIDHASH(tqid), tqent_hash) {
|
||||||
if (ent->tqent_id == tqid)
|
if (ent->tqent_id == tqid)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ent != NULL)
|
if (ent != NULL)
|
||||||
refcount_acquire(&ent->tqent_rc);
|
refcount_acquire(&ent->tqent_rc);
|
||||||
sx_xunlock(TQIDHASHLOCK(tqid));
|
sx_sunlock(TQIDHASHLOCK(tqid));
|
||||||
return (ent);
|
return (ent);
|
||||||
}
|
}
|
||||||
|
|
||||||
static taskqid_t
|
static taskqid_t
|
||||||
taskq_insert(taskq_ent_t *ent)
|
taskq_insert(taskq_ent_t *ent)
|
||||||
{
|
{
|
||||||
taskqid_t tqid;
|
taskqid_t tqid = __taskq_genid();
|
||||||
|
|
||||||
tqid = __taskq_genid();
|
|
||||||
ent->tqent_id = tqid;
|
ent->tqent_id = tqid;
|
||||||
ent->tqent_registered = B_TRUE;
|
|
||||||
sx_xlock(TQIDHASHLOCK(tqid));
|
sx_xlock(TQIDHASHLOCK(tqid));
|
||||||
CK_LIST_INSERT_HEAD(TQIDHASH(tqid), ent, tqent_hash);
|
LIST_INSERT_HEAD(TQIDHASH(tqid), ent, tqent_hash);
|
||||||
sx_xunlock(TQIDHASHLOCK(tqid));
|
sx_xunlock(TQIDHASHLOCK(tqid));
|
||||||
return (tqid);
|
return (tqid);
|
||||||
}
|
}
|
||||||
@@ -192,13 +188,14 @@ taskq_remove(taskq_ent_t *ent)
|
|||||||
{
|
{
|
||||||
taskqid_t tqid = ent->tqent_id;
|
taskqid_t tqid = ent->tqent_id;
|
||||||
|
|
||||||
if (!ent->tqent_registered)
|
if (tqid == 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
sx_xlock(TQIDHASHLOCK(tqid));
|
sx_xlock(TQIDHASHLOCK(tqid));
|
||||||
CK_LIST_REMOVE(ent, tqent_hash);
|
if (ent->tqent_id != 0) {
|
||||||
|
LIST_REMOVE(ent, tqent_hash);
|
||||||
|
ent->tqent_id = 0;
|
||||||
|
}
|
||||||
sx_xunlock(TQIDHASHLOCK(tqid));
|
sx_xunlock(TQIDHASHLOCK(tqid));
|
||||||
ent->tqent_registered = B_FALSE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -285,21 +282,22 @@ taskq_cancel_id(taskq_t *tq, taskqid_t tid)
|
|||||||
int rc;
|
int rc;
|
||||||
taskq_ent_t *ent;
|
taskq_ent_t *ent;
|
||||||
|
|
||||||
if (tid == 0)
|
|
||||||
return (0);
|
|
||||||
|
|
||||||
if ((ent = taskq_lookup(tid)) == NULL)
|
if ((ent = taskq_lookup(tid)) == NULL)
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
ent->tqent_cancelled = B_TRUE;
|
if (ent->tqent_type == NORMAL_TASK) {
|
||||||
if (ent->tqent_type == TIMEOUT_TASK) {
|
rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend);
|
||||||
|
if (rc == EBUSY)
|
||||||
|
taskqueue_drain(tq->tq_queue, &ent->tqent_task);
|
||||||
|
} else {
|
||||||
rc = taskqueue_cancel_timeout(tq->tq_queue,
|
rc = taskqueue_cancel_timeout(tq->tq_queue,
|
||||||
&ent->tqent_timeout_task, &pend);
|
&ent->tqent_timeout_task, &pend);
|
||||||
} else
|
if (rc == EBUSY) {
|
||||||
rc = taskqueue_cancel(tq->tq_queue, &ent->tqent_task, &pend);
|
taskqueue_drain_timeout(tq->tq_queue,
|
||||||
if (rc == EBUSY) {
|
&ent->tqent_timeout_task);
|
||||||
taskqueue_drain(tq->tq_queue, &ent->tqent_task);
|
}
|
||||||
} else if (pend) {
|
}
|
||||||
|
if (pend) {
|
||||||
/*
|
/*
|
||||||
* Tasks normally free themselves when run, but here the task
|
* Tasks normally free themselves when run, but here the task
|
||||||
* was cancelled so it did not free itself.
|
* was cancelled so it did not free itself.
|
||||||
@@ -312,12 +310,13 @@ taskq_cancel_id(taskq_t *tq, taskqid_t tid)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
taskq_run(void *arg, int pending __unused)
|
taskq_run(void *arg, int pending)
|
||||||
{
|
{
|
||||||
taskq_ent_t *task = arg;
|
taskq_ent_t *task = arg;
|
||||||
|
|
||||||
if (!task->tqent_cancelled)
|
if (pending == 0)
|
||||||
task->tqent_func(task->tqent_arg);
|
return;
|
||||||
|
task->tqent_func(task->tqent_arg);
|
||||||
taskq_free(task);
|
taskq_free(task);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -345,7 +344,6 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
|
|||||||
task->tqent_func = func;
|
task->tqent_func = func;
|
||||||
task->tqent_arg = arg;
|
task->tqent_arg = arg;
|
||||||
task->tqent_type = TIMEOUT_TASK;
|
task->tqent_type = TIMEOUT_TASK;
|
||||||
task->tqent_cancelled = B_FALSE;
|
|
||||||
refcount_init(&task->tqent_rc, 1);
|
refcount_init(&task->tqent_rc, 1);
|
||||||
tqid = taskq_insert(task);
|
tqid = taskq_insert(task);
|
||||||
TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0,
|
TIMEOUT_TASK_INIT(tq->tq_queue, &task->tqent_timeout_task, 0,
|
||||||
@@ -379,7 +377,6 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
|
|||||||
refcount_init(&task->tqent_rc, 1);
|
refcount_init(&task->tqent_rc, 1);
|
||||||
task->tqent_func = func;
|
task->tqent_func = func;
|
||||||
task->tqent_arg = arg;
|
task->tqent_arg = arg;
|
||||||
task->tqent_cancelled = B_FALSE;
|
|
||||||
task->tqent_type = NORMAL_TASK;
|
task->tqent_type = NORMAL_TASK;
|
||||||
tqid = taskq_insert(task);
|
tqid = taskq_insert(task);
|
||||||
TASK_INIT(&task->tqent_task, prio, taskq_run, task);
|
TASK_INIT(&task->tqent_task, prio, taskq_run, task);
|
||||||
@@ -388,10 +385,12 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
taskq_run_ent(void *arg, int pending __unused)
|
taskq_run_ent(void *arg, int pending)
|
||||||
{
|
{
|
||||||
taskq_ent_t *task = arg;
|
taskq_ent_t *task = arg;
|
||||||
|
|
||||||
|
if (pending == 0)
|
||||||
|
return;
|
||||||
task->tqent_func(task->tqent_arg);
|
task->tqent_func(task->tqent_arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -406,8 +405,6 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint32_t flags,
|
|||||||
* can go at the front of the queue.
|
* can go at the front of the queue.
|
||||||
*/
|
*/
|
||||||
prio = !!(flags & TQ_FRONT);
|
prio = !!(flags & TQ_FRONT);
|
||||||
task->tqent_cancelled = B_FALSE;
|
|
||||||
task->tqent_registered = B_FALSE;
|
|
||||||
task->tqent_id = 0;
|
task->tqent_id = 0;
|
||||||
task->tqent_func = func;
|
task->tqent_func = func;
|
||||||
task->tqent_arg = arg;
|
task->tqent_arg = arg;
|
||||||
@@ -427,12 +424,13 @@ taskq_wait_id(taskq_t *tq, taskqid_t tid)
|
|||||||
{
|
{
|
||||||
taskq_ent_t *ent;
|
taskq_ent_t *ent;
|
||||||
|
|
||||||
if (tid == 0)
|
|
||||||
return;
|
|
||||||
if ((ent = taskq_lookup(tid)) == NULL)
|
if ((ent = taskq_lookup(tid)) == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
taskqueue_drain(tq->tq_queue, &ent->tqent_task);
|
if (ent->tqent_type == NORMAL_TASK)
|
||||||
|
taskqueue_drain(tq->tq_queue, &ent->tqent_task);
|
||||||
|
else
|
||||||
|
taskqueue_drain_timeout(tq->tq_queue, &ent->tqent_timeout_task);
|
||||||
taskq_free(ent);
|
taskq_free(ent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
|
|||||||
|
|
||||||
int
|
int
|
||||||
mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
|
mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
|
||||||
char *fspec, int fsflags)
|
char *fspec, int fsflags, vfs_t *parent_vfsp)
|
||||||
{
|
{
|
||||||
struct vfsconf *vfsp;
|
struct vfsconf *vfsp;
|
||||||
struct mount *mp;
|
struct mount *mp;
|
||||||
@@ -220,6 +220,13 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
|
|||||||
mp->mnt_opt = mp->mnt_optnew;
|
mp->mnt_opt = mp->mnt_optnew;
|
||||||
(void) VFS_STATFS(mp, &mp->mnt_stat);
|
(void) VFS_STATFS(mp, &mp->mnt_stat);
|
||||||
|
|
||||||
|
#ifdef VFS_SUPPORTS_EXJAIL_CLONE
|
||||||
|
/*
|
||||||
|
* Clone the mnt_exjail credentials of the parent, as required.
|
||||||
|
*/
|
||||||
|
vfs_exjail_clone(parent_vfsp, mp);
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prevent external consumers of mount options from reading
|
* Prevent external consumers of mount options from reading
|
||||||
* mnt_optnew.
|
* mnt_optnew.
|
||||||
|
|||||||
@@ -32,11 +32,7 @@ __FBSDID("$FreeBSD$");
|
|||||||
#include <sys/kmem.h>
|
#include <sys/kmem.h>
|
||||||
#include <sys/kmem_cache.h>
|
#include <sys/kmem_cache.h>
|
||||||
#include <sys/zmod.h>
|
#include <sys/zmod.h>
|
||||||
#if __FreeBSD_version >= 1300041
|
|
||||||
#include <contrib/zlib/zlib.h>
|
#include <contrib/zlib/zlib.h>
|
||||||
#else
|
|
||||||
#include <sys/zlib.h>
|
|
||||||
#endif
|
|
||||||
#include <sys/kobj.h>
|
#include <sys/kobj.h>
|
||||||
|
|
||||||
|
|
||||||
@@ -90,11 +86,7 @@ zlib_inflateInit(z_stream *stream)
|
|||||||
static int
|
static int
|
||||||
zlib_inflate(z_stream *stream, int finish)
|
zlib_inflate(z_stream *stream, int finish)
|
||||||
{
|
{
|
||||||
#if __FreeBSD_version >= 1300024
|
|
||||||
return (inflate(stream, finish));
|
return (inflate(stream, finish));
|
||||||
#else
|
|
||||||
return (_zlib104_inflate(stream, finish));
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -52,11 +52,6 @@
|
|||||||
#include <sys/vm.h>
|
#include <sys/vm.h>
|
||||||
#include <sys/vmmeter.h>
|
#include <sys/vmmeter.h>
|
||||||
|
|
||||||
#if __FreeBSD_version >= 1300139
|
|
||||||
static struct sx arc_vnlru_lock;
|
|
||||||
static struct vnode *arc_vnlru_marker;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern struct vfsops zfs_vfsops;
|
extern struct vfsops zfs_vfsops;
|
||||||
|
|
||||||
uint_t zfs_arc_free_target = 0;
|
uint_t zfs_arc_free_target = 0;
|
||||||
@@ -131,53 +126,6 @@ arc_default_max(uint64_t min, uint64_t allmem)
|
|||||||
return (MAX(allmem * 5 / 8, size));
|
return (MAX(allmem * 5 / 8, size));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Helper function for arc_prune_async() it is responsible for safely
|
|
||||||
* handling the execution of a registered arc_prune_func_t.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
arc_prune_task(void *arg)
|
|
||||||
{
|
|
||||||
uint64_t nr_scan = (uintptr_t)arg;
|
|
||||||
|
|
||||||
#ifndef __ILP32__
|
|
||||||
if (nr_scan > INT_MAX)
|
|
||||||
nr_scan = INT_MAX;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if __FreeBSD_version >= 1300139
|
|
||||||
sx_xlock(&arc_vnlru_lock);
|
|
||||||
vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
|
|
||||||
sx_xunlock(&arc_vnlru_lock);
|
|
||||||
#else
|
|
||||||
vnlru_free(nr_scan, &zfs_vfsops);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Notify registered consumers they must drop holds on a portion of the ARC
|
|
||||||
* buffered they reference. This provides a mechanism to ensure the ARC can
|
|
||||||
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
|
|
||||||
* is analogous to dnlc_reduce_cache() but more generic.
|
|
||||||
*
|
|
||||||
* This operation is performed asynchronously so it may be safely called
|
|
||||||
* in the context of the arc_reclaim_thread(). A reference is taken here
|
|
||||||
* for each registered arc_prune_t and the arc_prune_task() is responsible
|
|
||||||
* for releasing it once the registered arc_prune_func_t has completed.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
arc_prune_async(uint64_t adjust)
|
|
||||||
{
|
|
||||||
|
|
||||||
#ifndef __LP64__
|
|
||||||
if (adjust > UINTPTR_MAX)
|
|
||||||
adjust = UINTPTR_MAX;
|
|
||||||
#endif
|
|
||||||
taskq_dispatch(arc_prune_taskq, arc_prune_task,
|
|
||||||
(void *)(intptr_t)adjust, TQ_SLEEP);
|
|
||||||
ARCSTAT_BUMP(arcstat_prune);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
arc_all_memory(void)
|
arc_all_memory(void)
|
||||||
{
|
{
|
||||||
@@ -228,10 +176,6 @@ arc_lowmem_init(void)
|
|||||||
{
|
{
|
||||||
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
|
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
|
||||||
EVENTHANDLER_PRI_FIRST);
|
EVENTHANDLER_PRI_FIRST);
|
||||||
#if __FreeBSD_version >= 1300139
|
|
||||||
arc_vnlru_marker = vnlru_alloc_marker();
|
|
||||||
sx_init(&arc_vnlru_lock, "arc vnlru lock");
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -239,12 +183,6 @@ arc_lowmem_fini(void)
|
|||||||
{
|
{
|
||||||
if (arc_event_lowmem != NULL)
|
if (arc_event_lowmem != NULL)
|
||||||
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
|
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
|
||||||
#if __FreeBSD_version >= 1300139
|
|
||||||
if (arc_vnlru_marker != NULL) {
|
|
||||||
vnlru_free_marker(arc_vnlru_marker);
|
|
||||||
sx_destroy(&arc_vnlru_lock);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ knlist_sx_xunlock(void *arg)
|
|||||||
sx_xunlock((struct sx *)arg);
|
sx_xunlock((struct sx *)arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if __FreeBSD_version >= 1300128
|
||||||
static void
|
static void
|
||||||
knlist_sx_assert_lock(void *arg, int what)
|
knlist_sx_assert_lock(void *arg, int what)
|
||||||
{
|
{
|
||||||
@@ -55,11 +56,28 @@ knlist_sx_assert_lock(void *arg, int what)
|
|||||||
else
|
else
|
||||||
sx_assert((struct sx *)arg, SX_UNLOCKED);
|
sx_assert((struct sx *)arg, SX_UNLOCKED);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
static void
|
||||||
|
knlist_sx_assert_locked(void *arg)
|
||||||
|
{
|
||||||
|
sx_assert((struct sx *)arg, SX_LOCKED);
|
||||||
|
}
|
||||||
|
static void
|
||||||
|
knlist_sx_assert_unlocked(void *arg)
|
||||||
|
{
|
||||||
|
sx_assert((struct sx *)arg, SX_UNLOCKED);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void
|
void
|
||||||
knlist_init_sx(struct knlist *knl, struct sx *lock)
|
knlist_init_sx(struct knlist *knl, struct sx *lock)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
#if __FreeBSD_version >= 1300128
|
||||||
knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock,
|
knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock,
|
||||||
knlist_sx_assert_lock);
|
knlist_sx_assert_lock);
|
||||||
|
#else
|
||||||
|
knlist_init(knl, lock, knlist_sx_xlock, knlist_sx_xunlock,
|
||||||
|
knlist_sx_assert_locked, knlist_sx_assert_unlocked);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1026,7 +1026,8 @@ zfsctl_snapdir_lookup(struct vop_lookup_args *ap)
|
|||||||
"%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
|
"%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
|
||||||
dvp->v_vfsp->mnt_stat.f_mntonname, name);
|
dvp->v_vfsp->mnt_stat.f_mntonname, name);
|
||||||
|
|
||||||
err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
|
err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0,
|
||||||
|
dvp->v_vfsp);
|
||||||
kmem_free(mountpoint, mountpoint_len);
|
kmem_free(mountpoint, mountpoint_len);
|
||||||
if (err == 0) {
|
if (err == 0) {
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -89,6 +89,10 @@ int zfs_debug_level;
|
|||||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
|
SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
|
||||||
"Debug level");
|
"Debug level");
|
||||||
|
|
||||||
|
int zfs_bclone_enabled = 0;
|
||||||
|
SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
|
||||||
|
&zfs_bclone_enabled, 0, "Enable block cloning");
|
||||||
|
|
||||||
struct zfs_jailparam {
|
struct zfs_jailparam {
|
||||||
int mount_snapshot;
|
int mount_snapshot;
|
||||||
};
|
};
|
||||||
@@ -2070,6 +2074,26 @@ zfs_vnodes_adjust_back(void)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if __FreeBSD_version >= 1300139
|
||||||
|
static struct sx zfs_vnlru_lock;
|
||||||
|
static struct vnode *zfs_vnlru_marker;
|
||||||
|
#endif
|
||||||
|
static arc_prune_t *zfs_prune;
|
||||||
|
|
||||||
|
static void
|
||||||
|
zfs_prune_task(uint64_t nr_to_scan, void *arg __unused)
|
||||||
|
{
|
||||||
|
if (nr_to_scan > INT_MAX)
|
||||||
|
nr_to_scan = INT_MAX;
|
||||||
|
#if __FreeBSD_version >= 1300139
|
||||||
|
sx_xlock(&zfs_vnlru_lock);
|
||||||
|
vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker);
|
||||||
|
sx_xunlock(&zfs_vnlru_lock);
|
||||||
|
#else
|
||||||
|
vnlru_free(nr_to_scan, &zfs_vfsops);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_init(void)
|
zfs_init(void)
|
||||||
{
|
{
|
||||||
@@ -2096,11 +2120,23 @@ zfs_init(void)
|
|||||||
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
|
dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
|
||||||
|
|
||||||
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
|
zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
|
||||||
|
|
||||||
|
#if __FreeBSD_version >= 1300139
|
||||||
|
zfs_vnlru_marker = vnlru_alloc_marker();
|
||||||
|
sx_init(&zfs_vnlru_lock, "zfs vnlru lock");
|
||||||
|
#endif
|
||||||
|
zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zfs_fini(void)
|
zfs_fini(void)
|
||||||
{
|
{
|
||||||
|
arc_remove_prune_callback(zfs_prune);
|
||||||
|
#if __FreeBSD_version >= 1300139
|
||||||
|
vnlru_free_marker(zfs_vnlru_marker);
|
||||||
|
sx_destroy(&zfs_vnlru_lock);
|
||||||
|
#endif
|
||||||
|
|
||||||
taskq_destroy(zfsvfs_taskq);
|
taskq_destroy(zfsvfs_taskq);
|
||||||
zfsctl_fini();
|
zfsctl_fini();
|
||||||
zfs_znode_fini();
|
zfs_znode_fini();
|
||||||
|
|||||||
@@ -6213,6 +6213,7 @@ zfs_deallocate(struct vop_deallocate_args *ap)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if __FreeBSD_version >= 1300039
|
||||||
#ifndef _SYS_SYSPROTO_H_
|
#ifndef _SYS_SYSPROTO_H_
|
||||||
struct vop_copy_file_range_args {
|
struct vop_copy_file_range_args {
|
||||||
struct vnode *a_invp;
|
struct vnode *a_invp;
|
||||||
@@ -6243,6 +6244,11 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
|
|||||||
int error;
|
int error;
|
||||||
uint64_t len = *ap->a_lenp;
|
uint64_t len = *ap->a_lenp;
|
||||||
|
|
||||||
|
if (!zfs_bclone_enabled) {
|
||||||
|
mp = NULL;
|
||||||
|
goto bad_write_fallback;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TODO: If offset/length is not aligned to recordsize, use
|
* TODO: If offset/length is not aligned to recordsize, use
|
||||||
* vn_generic_copy_file_range() on this fragment.
|
* vn_generic_copy_file_range() on this fragment.
|
||||||
@@ -6314,6 +6320,7 @@ bad_write_fallback:
|
|||||||
ap->a_incred, ap->a_outcred, ap->a_fsizetd);
|
ap->a_incred, ap->a_outcred, ap->a_fsizetd);
|
||||||
return (error);
|
return (error);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
struct vop_vector zfs_vnodeops;
|
struct vop_vector zfs_vnodeops;
|
||||||
struct vop_vector zfs_fifoops;
|
struct vop_vector zfs_fifoops;
|
||||||
@@ -6378,7 +6385,9 @@ struct vop_vector zfs_vnodeops = {
|
|||||||
#if __FreeBSD_version >= 1400043
|
#if __FreeBSD_version >= 1400043
|
||||||
.vop_add_writecount = vop_stdadd_writecount_nomsync,
|
.vop_add_writecount = vop_stdadd_writecount_nomsync,
|
||||||
#endif
|
#endif
|
||||||
|
#if __FreeBSD_version >= 1300039
|
||||||
.vop_copy_file_range = zfs_freebsd_copy_file_range,
|
.vop_copy_file_range = zfs_freebsd_copy_file_range,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
|
VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
|
||||||
|
|
||||||
|
|||||||
@@ -1364,6 +1364,19 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
|
|||||||
vec++;
|
vec++;
|
||||||
total_len += crypt_len;
|
total_len += crypt_len;
|
||||||
}
|
}
|
||||||
|
} else if (txtype == TX_CLONE_RANGE) {
|
||||||
|
const size_t o = offsetof(lr_clone_range_t, lr_nbps);
|
||||||
|
crypt_len = o - sizeof (lr_t);
|
||||||
|
dst_iovecs[vec].iov_base = (char *)dlrp + sizeof (lr_t);
|
||||||
|
dst_iovecs[vec].iov_len = crypt_len;
|
||||||
|
|
||||||
|
/* copy the bps now since they will not be encrypted */
|
||||||
|
memcpy(dlrp + o, slrp + o, lr_len - o);
|
||||||
|
memcpy(aadp, slrp + o, lr_len - o);
|
||||||
|
aadp += lr_len - o;
|
||||||
|
aad_len += lr_len - o;
|
||||||
|
vec++;
|
||||||
|
total_len += crypt_len;
|
||||||
} else {
|
} else {
|
||||||
crypt_len = lr_len - sizeof (lr_t);
|
crypt_len = lr_len - sizeof (lr_t);
|
||||||
dst_iovecs[vec].iov_base = (char *)dlrp +
|
dst_iovecs[vec].iov_base = (char *)dlrp +
|
||||||
|
|||||||
@@ -489,56 +489,5 @@ arc_unregister_hotplug(void)
|
|||||||
}
|
}
|
||||||
#endif /* _KERNEL */
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
/*
|
|
||||||
* Helper function for arc_prune_async() it is responsible for safely
|
|
||||||
* handling the execution of a registered arc_prune_func_t.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
arc_prune_task(void *ptr)
|
|
||||||
{
|
|
||||||
arc_prune_t *ap = (arc_prune_t *)ptr;
|
|
||||||
arc_prune_func_t *func = ap->p_pfunc;
|
|
||||||
|
|
||||||
if (func != NULL)
|
|
||||||
func(ap->p_adjust, ap->p_private);
|
|
||||||
|
|
||||||
zfs_refcount_remove(&ap->p_refcnt, func);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Notify registered consumers they must drop holds on a portion of the ARC
|
|
||||||
* buffered they reference. This provides a mechanism to ensure the ARC can
|
|
||||||
* honor the metadata limit and reclaim otherwise pinned ARC buffers. This
|
|
||||||
* is analogous to dnlc_reduce_cache() but more generic.
|
|
||||||
*
|
|
||||||
* This operation is performed asynchronously so it may be safely called
|
|
||||||
* in the context of the arc_reclaim_thread(). A reference is taken here
|
|
||||||
* for each registered arc_prune_t and the arc_prune_task() is responsible
|
|
||||||
* for releasing it once the registered arc_prune_func_t has completed.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
arc_prune_async(uint64_t adjust)
|
|
||||||
{
|
|
||||||
arc_prune_t *ap;
|
|
||||||
|
|
||||||
mutex_enter(&arc_prune_mtx);
|
|
||||||
for (ap = list_head(&arc_prune_list); ap != NULL;
|
|
||||||
ap = list_next(&arc_prune_list, ap)) {
|
|
||||||
|
|
||||||
if (zfs_refcount_count(&ap->p_refcnt) >= 2)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
|
|
||||||
ap->p_adjust = adjust;
|
|
||||||
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
|
|
||||||
ap, TQ_SLEEP) == TASKQID_INVALID) {
|
|
||||||
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
ARCSTAT_BUMP(arcstat_prune);
|
|
||||||
}
|
|
||||||
mutex_exit(&arc_prune_mtx);
|
|
||||||
}
|
|
||||||
|
|
||||||
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs_arc, zfs_arc_, shrinker_limit, INT, ZMOD_RW,
|
||||||
"Limit on number of pages that ARC shrinker can reclaim at once");
|
"Limit on number of pages that ARC shrinker can reclaim at once");
|
||||||
|
|||||||
@@ -522,7 +522,7 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
|
|||||||
ip->i_blkbits = SPA_MINBLOCKSHIFT;
|
ip->i_blkbits = SPA_MINBLOCKSHIFT;
|
||||||
ip->i_atime = now;
|
ip->i_atime = now;
|
||||||
ip->i_mtime = now;
|
ip->i_mtime = now;
|
||||||
ip->i_ctime = now;
|
zpl_inode_set_ctime_to_ts(ip, now);
|
||||||
ip->i_fop = fops;
|
ip->i_fop = fops;
|
||||||
ip->i_op = ops;
|
ip->i_op = ops;
|
||||||
#if defined(IOP_XATTR)
|
#if defined(IOP_XATTR)
|
||||||
|
|||||||
@@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
|||||||
this_seg_start = orig_loffset;
|
this_seg_start = orig_loffset;
|
||||||
|
|
||||||
rq_for_each_segment(bv, rq, iter) {
|
rq_for_each_segment(bv, rq, iter) {
|
||||||
if (uio->iter.bio) {
|
|
||||||
/*
|
|
||||||
* If uio->iter.bio is present, then we know we've saved
|
|
||||||
* uio->iter from a previous call to this function, and
|
|
||||||
* we can skip ahead in this rq_for_each_segment() loop
|
|
||||||
* to where we last left off. That way, we don't need
|
|
||||||
* to iterate over tons of segments we've already
|
|
||||||
* processed - we can just restore the "saved state".
|
|
||||||
*/
|
|
||||||
iter = uio->iter;
|
|
||||||
bv = uio->bv;
|
|
||||||
this_seg_start = uio->uio_loffset;
|
|
||||||
memset(&uio->iter, 0, sizeof (uio->iter));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lookup what the logical offset of the last byte of this
|
* Lookup what the logical offset of the last byte of this
|
||||||
* segment is.
|
* segment is.
|
||||||
@@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
|||||||
copied = 1; /* We copied some data */
|
copied = 1; /* We copied some data */
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n == 0) {
|
|
||||||
/*
|
|
||||||
* All done copying. Save our 'iter' value to the uio.
|
|
||||||
* This allows us to "save our state" and skip ahead in
|
|
||||||
* the rq_for_each_segment() loop the next time we call
|
|
||||||
* call zfs_uiomove_bvec_rq() on this uio (which we
|
|
||||||
* will be doing for any remaining data in the uio).
|
|
||||||
*/
|
|
||||||
uio->iter = iter; /* make a copy of the struct data */
|
|
||||||
uio->bv = bv;
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
this_seg_start = this_seg_end + 1;
|
this_seg_start = this_seg_end + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1488,7 +1488,7 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
|
|||||||
* read-only flag, pretend it was set, as done for snapshots.
|
* read-only flag, pretend it was set, as done for snapshots.
|
||||||
*/
|
*/
|
||||||
if (!canwrite)
|
if (!canwrite)
|
||||||
vfs->vfs_readonly = true;
|
vfs->vfs_readonly = B_TRUE;
|
||||||
|
|
||||||
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
|
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
|
||||||
if (error) {
|
if (error) {
|
||||||
|
|||||||
@@ -1652,7 +1652,12 @@ out:
|
|||||||
* RETURN: 0 (always succeeds)
|
* RETURN: 0 (always succeeds)
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
|
#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
|
||||||
|
zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip,
|
||||||
|
struct kstat *sp)
|
||||||
|
#else
|
||||||
zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
|
zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
znode_t *zp = ITOZ(ip);
|
znode_t *zp = ITOZ(ip);
|
||||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||||
@@ -1665,7 +1670,11 @@ zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
|
|||||||
|
|
||||||
mutex_enter(&zp->z_lock);
|
mutex_enter(&zp->z_lock);
|
||||||
|
|
||||||
|
#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
|
||||||
|
zpl_generic_fillattr(user_ns, request_mask, ip, sp);
|
||||||
|
#else
|
||||||
zpl_generic_fillattr(user_ns, ip, sp);
|
zpl_generic_fillattr(user_ns, ip, sp);
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
* +1 link count for root inode with visible '.zfs' directory.
|
* +1 link count for root inode with visible '.zfs' directory.
|
||||||
*/
|
*/
|
||||||
@@ -2442,8 +2451,8 @@ top:
|
|||||||
|
|
||||||
if (mask & (ATTR_CTIME | ATTR_SIZE)) {
|
if (mask & (ATTR_CTIME | ATTR_SIZE)) {
|
||||||
ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
|
ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
|
||||||
ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime,
|
zpl_inode_set_ctime_to_ts(ZTOI(zp),
|
||||||
ZTOI(zp));
|
zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp)));
|
||||||
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
|
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
|
||||||
ctime, sizeof (ctime));
|
ctime, sizeof (ctime));
|
||||||
}
|
}
|
||||||
@@ -3648,6 +3657,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||||||
caddr_t va;
|
caddr_t va;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
uint64_t mtime[2], ctime[2];
|
uint64_t mtime[2], ctime[2];
|
||||||
|
inode_timespec_t tmp_ctime;
|
||||||
sa_bulk_attr_t bulk[3];
|
sa_bulk_attr_t bulk[3];
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
struct address_space *mapping;
|
struct address_space *mapping;
|
||||||
@@ -3812,7 +3822,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
|
|||||||
|
|
||||||
/* Preserve the mtime and ctime provided by the inode */
|
/* Preserve the mtime and ctime provided by the inode */
|
||||||
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
|
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
|
||||||
ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
|
tmp_ctime = zpl_inode_get_ctime(ip);
|
||||||
|
ZFS_TIME_ENCODE(&tmp_ctime, ctime);
|
||||||
zp->z_atime_dirty = B_FALSE;
|
zp->z_atime_dirty = B_FALSE;
|
||||||
zp->z_seq++;
|
zp->z_seq++;
|
||||||
|
|
||||||
@@ -3862,6 +3873,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
|
|||||||
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
zfsvfs_t *zfsvfs = ITOZSB(ip);
|
||||||
dmu_tx_t *tx;
|
dmu_tx_t *tx;
|
||||||
uint64_t mode, atime[2], mtime[2], ctime[2];
|
uint64_t mode, atime[2], mtime[2], ctime[2];
|
||||||
|
inode_timespec_t tmp_ctime;
|
||||||
sa_bulk_attr_t bulk[4];
|
sa_bulk_attr_t bulk[4];
|
||||||
int error = 0;
|
int error = 0;
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
@@ -3908,7 +3920,8 @@ zfs_dirty_inode(struct inode *ip, int flags)
|
|||||||
/* Preserve the mode, mtime and ctime provided by the inode */
|
/* Preserve the mode, mtime and ctime provided by the inode */
|
||||||
ZFS_TIME_ENCODE(&ip->i_atime, atime);
|
ZFS_TIME_ENCODE(&ip->i_atime, atime);
|
||||||
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
|
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
|
||||||
ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
|
tmp_ctime = zpl_inode_get_ctime(ip);
|
||||||
|
ZFS_TIME_ENCODE(&tmp_ctime, ctime);
|
||||||
mode = ip->i_mode;
|
mode = ip->i_mode;
|
||||||
|
|
||||||
zp->z_mode = mode;
|
zp->z_mode = mode;
|
||||||
@@ -4058,8 +4071,8 @@ zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
|
|||||||
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
if ((vm_flags & VM_WRITE) && (zp->z_pflags &
|
if ((vm_flags & VM_WRITE) && (vm_flags & VM_SHARED) &&
|
||||||
(ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
|
(zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
|
||||||
zfs_exit(zfsvfs, FTAG);
|
zfs_exit(zfsvfs, FTAG);
|
||||||
return (SET_ERROR(EPERM));
|
return (SET_ERROR(EPERM));
|
||||||
}
|
}
|
||||||
@@ -4229,4 +4242,8 @@ EXPORT_SYMBOL(zfs_map);
|
|||||||
module_param(zfs_delete_blocks, ulong, 0644);
|
module_param(zfs_delete_blocks, ulong, 0644);
|
||||||
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
|
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
|
||||||
|
|
||||||
|
/* CSTYLED */
|
||||||
|
module_param(zfs_bclone_enabled, uint, 0644);
|
||||||
|
MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -542,6 +542,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
|
|||||||
uint64_t links;
|
uint64_t links;
|
||||||
uint64_t z_uid, z_gid;
|
uint64_t z_uid, z_gid;
|
||||||
uint64_t atime[2], mtime[2], ctime[2], btime[2];
|
uint64_t atime[2], mtime[2], ctime[2], btime[2];
|
||||||
|
inode_timespec_t tmp_ctime;
|
||||||
uint64_t projid = ZFS_DEFAULT_PROJID;
|
uint64_t projid = ZFS_DEFAULT_PROJID;
|
||||||
sa_bulk_attr_t bulk[12];
|
sa_bulk_attr_t bulk[12];
|
||||||
int count = 0;
|
int count = 0;
|
||||||
@@ -615,7 +616,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
|
|||||||
|
|
||||||
ZFS_TIME_DECODE(&ip->i_atime, atime);
|
ZFS_TIME_DECODE(&ip->i_atime, atime);
|
||||||
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
|
ZFS_TIME_DECODE(&ip->i_mtime, mtime);
|
||||||
ZFS_TIME_DECODE(&ip->i_ctime, ctime);
|
ZFS_TIME_DECODE(&tmp_ctime, ctime);
|
||||||
|
zpl_inode_set_ctime_to_ts(ip, tmp_ctime);
|
||||||
ZFS_TIME_DECODE(&zp->z_btime, btime);
|
ZFS_TIME_DECODE(&zp->z_btime, btime);
|
||||||
|
|
||||||
ip->i_ino = zp->z_id;
|
ip->i_ino = zp->z_id;
|
||||||
@@ -1195,6 +1197,7 @@ zfs_rezget(znode_t *zp)
|
|||||||
uint64_t gen;
|
uint64_t gen;
|
||||||
uint64_t z_uid, z_gid;
|
uint64_t z_uid, z_gid;
|
||||||
uint64_t atime[2], mtime[2], ctime[2], btime[2];
|
uint64_t atime[2], mtime[2], ctime[2], btime[2];
|
||||||
|
inode_timespec_t tmp_ctime;
|
||||||
uint64_t projid = ZFS_DEFAULT_PROJID;
|
uint64_t projid = ZFS_DEFAULT_PROJID;
|
||||||
znode_hold_t *zh;
|
znode_hold_t *zh;
|
||||||
|
|
||||||
@@ -1289,7 +1292,8 @@ zfs_rezget(znode_t *zp)
|
|||||||
|
|
||||||
ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
|
ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
|
||||||
ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
|
ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
|
||||||
ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
|
ZFS_TIME_DECODE(&tmp_ctime, ctime);
|
||||||
|
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
|
||||||
ZFS_TIME_DECODE(&zp->z_btime, btime);
|
ZFS_TIME_DECODE(&zp->z_btime, btime);
|
||||||
|
|
||||||
if ((uint32_t)gen != ZTOI(zp)->i_generation) {
|
if ((uint32_t)gen != ZTOI(zp)->i_generation) {
|
||||||
@@ -1397,7 +1401,7 @@ zfs_zinactive(znode_t *zp)
|
|||||||
boolean_t
|
boolean_t
|
||||||
zfs_relatime_need_update(const struct inode *ip)
|
zfs_relatime_need_update(const struct inode *ip)
|
||||||
{
|
{
|
||||||
inode_timespec_t now;
|
inode_timespec_t now, tmp_ctime;
|
||||||
|
|
||||||
gethrestime(&now);
|
gethrestime(&now);
|
||||||
/*
|
/*
|
||||||
@@ -1408,7 +1412,8 @@ zfs_relatime_need_update(const struct inode *ip)
|
|||||||
if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
|
if (zfs_compare_timespec(&ip->i_mtime, &ip->i_atime) >= 0)
|
||||||
return (B_TRUE);
|
return (B_TRUE);
|
||||||
|
|
||||||
if (zfs_compare_timespec(&ip->i_ctime, &ip->i_atime) >= 0)
|
tmp_ctime = zpl_inode_get_ctime(ip);
|
||||||
|
if (zfs_compare_timespec(&tmp_ctime, &ip->i_atime) >= 0)
|
||||||
return (B_TRUE);
|
return (B_TRUE);
|
||||||
|
|
||||||
if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
|
if ((hrtime_t)now.tv_sec - (hrtime_t)ip->i_atime.tv_sec >= 24*60*60)
|
||||||
@@ -1434,7 +1439,7 @@ void
|
|||||||
zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
|
zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
|
||||||
uint64_t ctime[2])
|
uint64_t ctime[2])
|
||||||
{
|
{
|
||||||
inode_timespec_t now;
|
inode_timespec_t now, tmp_ctime;
|
||||||
|
|
||||||
gethrestime(&now);
|
gethrestime(&now);
|
||||||
|
|
||||||
@@ -1451,7 +1456,8 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
|
|||||||
|
|
||||||
if (flag & ATTR_CTIME) {
|
if (flag & ATTR_CTIME) {
|
||||||
ZFS_TIME_ENCODE(&now, ctime);
|
ZFS_TIME_ENCODE(&now, ctime);
|
||||||
ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
|
ZFS_TIME_DECODE(&tmp_ctime, ctime);
|
||||||
|
zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ctime);
|
||||||
if (ZTOZSB(zp)->z_use_fuids)
|
if (ZTOZSB(zp)->z_use_fuids)
|
||||||
zp->z_pflags |= ZFS_ARCHIVE;
|
zp->z_pflags |= ZFS_ARCHIVE;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1543,6 +1543,21 @@ zio_crypt_init_uios_zil(boolean_t encrypt, uint8_t *plainbuf,
|
|||||||
nr_iovecs++;
|
nr_iovecs++;
|
||||||
total_len += crypt_len;
|
total_len += crypt_len;
|
||||||
}
|
}
|
||||||
|
} else if (txtype == TX_CLONE_RANGE) {
|
||||||
|
const size_t o = offsetof(lr_clone_range_t, lr_nbps);
|
||||||
|
crypt_len = o - sizeof (lr_t);
|
||||||
|
src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
|
||||||
|
src_iovecs[nr_iovecs].iov_len = crypt_len;
|
||||||
|
dst_iovecs[nr_iovecs].iov_base = dlrp + sizeof (lr_t);
|
||||||
|
dst_iovecs[nr_iovecs].iov_len = crypt_len;
|
||||||
|
|
||||||
|
/* copy the bps now since they will not be encrypted */
|
||||||
|
memcpy(dlrp + o, slrp + o, lr_len - o);
|
||||||
|
memcpy(aadp, slrp + o, lr_len - o);
|
||||||
|
aadp += lr_len - o;
|
||||||
|
aad_len += lr_len - o;
|
||||||
|
nr_iovecs++;
|
||||||
|
total_len += crypt_len;
|
||||||
} else {
|
} else {
|
||||||
crypt_len = lr_len - sizeof (lr_t);
|
crypt_len = lr_len - sizeof (lr_t);
|
||||||
src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
|
src_iovecs[nr_iovecs].iov_base = slrp + sizeof (lr_t);
|
||||||
|
|||||||
@@ -124,6 +124,8 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
|
|||||||
generic_fillattr(user_ns, ip, stat);
|
generic_fillattr(user_ns, ip, stat);
|
||||||
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
|
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
|
||||||
generic_fillattr(user_ns, ip, stat);
|
generic_fillattr(user_ns, ip, stat);
|
||||||
|
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
|
||||||
|
generic_fillattr(user_ns, request_mask, ip, stat);
|
||||||
#else
|
#else
|
||||||
(void) user_ns;
|
(void) user_ns;
|
||||||
#endif
|
#endif
|
||||||
@@ -435,6 +437,8 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
|
|||||||
generic_fillattr(user_ns, ip, stat);
|
generic_fillattr(user_ns, ip, stat);
|
||||||
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
|
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
|
||||||
generic_fillattr(user_ns, ip, stat);
|
generic_fillattr(user_ns, ip, stat);
|
||||||
|
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
|
||||||
|
generic_fillattr(user_ns, request_mask, ip, stat);
|
||||||
#else
|
#else
|
||||||
(void) user_ns;
|
(void) user_ns;
|
||||||
#endif
|
#endif
|
||||||
@@ -609,6 +613,8 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
|
|||||||
generic_fillattr(user_ns, path->dentry->d_inode, stat);
|
generic_fillattr(user_ns, path->dentry->d_inode, stat);
|
||||||
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
|
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
|
||||||
generic_fillattr(user_ns, path->dentry->d_inode, stat);
|
generic_fillattr(user_ns, path->dentry->d_inode, stat);
|
||||||
|
#elif defined(HAVE_GENERIC_FILLATTR_IDMAP_REQMASK)
|
||||||
|
generic_fillattr(user_ns, request_mask, ip, stat);
|
||||||
#else
|
#else
|
||||||
(void) user_ns;
|
(void) user_ns;
|
||||||
#endif
|
#endif
|
||||||
@@ -623,7 +629,10 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
|
|||||||
|
|
||||||
error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
|
error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
|
#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
|
||||||
|
error = -zfs_getattr_fast(user_ns, request_mask, ZTOI(dzp),
|
||||||
|
stat);
|
||||||
|
#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
|
||||||
error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
|
error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
|
||||||
#else
|
#else
|
||||||
error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
|
error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
|
||||||
|
|||||||
@@ -31,6 +31,8 @@
|
|||||||
#include <sys/zfs_vnops.h>
|
#include <sys/zfs_vnops.h>
|
||||||
#include <sys/zfeature.h>
|
#include <sys/zfeature.h>
|
||||||
|
|
||||||
|
int zfs_bclone_enabled = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clone part of a file via block cloning.
|
* Clone part of a file via block cloning.
|
||||||
*
|
*
|
||||||
@@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
|
|||||||
fstrans_cookie_t cookie;
|
fstrans_cookie_t cookie;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
if (!zfs_bclone_enabled)
|
||||||
|
return (-EOPNOTSUPP);
|
||||||
|
|
||||||
if (!spa_feature_is_enabled(
|
if (!spa_feature_is_enabled(
|
||||||
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
|
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
|
||||||
return (-EOPNOTSUPP);
|
return (-EOPNOTSUPP);
|
||||||
|
|||||||
@@ -435,7 +435,9 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
|
|||||||
* XXX query_flags currently ignored.
|
* XXX query_flags currently ignored.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
|
#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
|
||||||
|
error = -zfs_getattr_fast(user_ns, request_mask, ip, stat);
|
||||||
|
#elif (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
|
||||||
error = -zfs_getattr_fast(user_ns, ip, stat);
|
error = -zfs_getattr_fast(user_ns, ip, stat);
|
||||||
#else
|
#else
|
||||||
error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
|
error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
|
||||||
@@ -774,7 +776,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
|
|||||||
return (-EMLINK);
|
return (-EMLINK);
|
||||||
|
|
||||||
crhold(cr);
|
crhold(cr);
|
||||||
ip->i_ctime = current_time(ip);
|
zpl_inode_set_ctime_to_ts(ip, current_time(ip));
|
||||||
/* Must have an existing ref, so igrab() cannot return NULL */
|
/* Must have an existing ref, so igrab() cannot return NULL */
|
||||||
VERIFY3P(igrab(ip), !=, NULL);
|
VERIFY3P(igrab(ip), !=, NULL);
|
||||||
|
|
||||||
|
|||||||
@@ -375,7 +375,7 @@ zpl_kill_sb(struct super_block *sb)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zpl_prune_sb(int64_t nr_to_scan, void *arg)
|
zpl_prune_sb(uint64_t nr_to_scan, void *arg)
|
||||||
{
|
{
|
||||||
struct super_block *sb = (struct super_block *)arg;
|
struct super_block *sb = (struct super_block *)arg;
|
||||||
int objects = 0;
|
int objects = 0;
|
||||||
|
|||||||
@@ -513,7 +513,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
|
|||||||
error = -zfs_write_simple(xzp, value, size, pos, NULL);
|
error = -zfs_write_simple(xzp, value, size, pos, NULL);
|
||||||
out:
|
out:
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
ip->i_ctime = current_time(ip);
|
zpl_inode_set_ctime_to_ts(ip, current_time(ip));
|
||||||
zfs_mark_inode_dirty(ip);
|
zfs_mark_inode_dirty(ip);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1011,7 +1011,8 @@ zpl_set_acl_impl(struct inode *ip, struct posix_acl *acl, int type)
|
|||||||
*/
|
*/
|
||||||
if (ip->i_mode != mode) {
|
if (ip->i_mode != mode) {
|
||||||
ip->i_mode = ITOZ(ip)->z_mode = mode;
|
ip->i_mode = ITOZ(ip)->z_mode = mode;
|
||||||
ip->i_ctime = current_time(ip);
|
zpl_inode_set_ctime_to_ts(ip,
|
||||||
|
current_time(ip));
|
||||||
zfs_mark_inode_dirty(ip);
|
zfs_mark_inode_dirty(ip);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1170,7 +1171,7 @@ zpl_init_acl(struct inode *ip, struct inode *dir)
|
|||||||
return (PTR_ERR(acl));
|
return (PTR_ERR(acl));
|
||||||
if (!acl) {
|
if (!acl) {
|
||||||
ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());
|
ITOZ(ip)->z_mode = (ip->i_mode &= ~current_umask());
|
||||||
ip->i_ctime = current_time(ip);
|
zpl_inode_set_ctime_to_ts(ip, current_time(ip));
|
||||||
zfs_mark_inode_dirty(ip);
|
zfs_mark_inode_dirty(ip);
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -873,7 +873,13 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode,
|
|||||||
|
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case BLKFLSBUF:
|
case BLKFLSBUF:
|
||||||
|
#ifdef HAVE_FSYNC_BDEV
|
||||||
fsync_bdev(bdev);
|
fsync_bdev(bdev);
|
||||||
|
#elif defined(HAVE_SYNC_BLOCKDEV)
|
||||||
|
sync_blockdev(bdev);
|
||||||
|
#else
|
||||||
|
#error "Neither fsync_bdev() nor sync_blockdev() found"
|
||||||
|
#endif
|
||||||
invalidate_bdev(bdev);
|
invalidate_bdev(bdev);
|
||||||
rw_enter(&zv->zv_suspend_lock, RW_READER);
|
rw_enter(&zv->zv_suspend_lock, RW_READER);
|
||||||
|
|
||||||
@@ -1620,6 +1626,18 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
|
|||||||
module_param(zvol_volmode, uint, 0644);
|
module_param(zvol_volmode, uint, 0644);
|
||||||
MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
|
MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
|
||||||
|
|
||||||
|
#ifdef HAVE_BLK_MQ
|
||||||
|
module_param(zvol_blk_mq_queue_depth, uint, 0644);
|
||||||
|
MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
|
||||||
|
|
||||||
|
module_param(zvol_use_blk_mq, uint, 0644);
|
||||||
|
MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
|
||||||
|
|
||||||
|
module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
|
||||||
|
MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
|
||||||
|
"Process volblocksize blocks per thread");
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||||
module_param(zvol_open_timeout_ms, uint, 0644);
|
module_param(zvol_open_timeout_ms, uint, 0644);
|
||||||
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
|
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
|
||||||
|
|||||||
+77
-21
@@ -886,6 +886,8 @@ static void l2arc_do_free_on_write(void);
|
|||||||
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
|
static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
|
||||||
boolean_t state_only);
|
boolean_t state_only);
|
||||||
|
|
||||||
|
static void arc_prune_async(uint64_t adjust);
|
||||||
|
|
||||||
#define l2arc_hdr_arcstats_increment(hdr) \
|
#define l2arc_hdr_arcstats_increment(hdr) \
|
||||||
l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
|
l2arc_hdr_arcstats_update((hdr), B_TRUE, B_FALSE)
|
||||||
#define l2arc_hdr_arcstats_decrement(hdr) \
|
#define l2arc_hdr_arcstats_decrement(hdr) \
|
||||||
@@ -1364,7 +1366,7 @@ arc_buf_is_shared(arc_buf_t *buf)
|
|||||||
abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) &&
|
abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) &&
|
||||||
buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd));
|
buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd));
|
||||||
IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr));
|
IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr));
|
||||||
IMPLY(shared, ARC_BUF_SHARED(buf));
|
EQUIV(shared, ARC_BUF_SHARED(buf));
|
||||||
IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf));
|
IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1998,7 +2000,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
|
|||||||
IMPLY(encrypted, HDR_ENCRYPTED(hdr));
|
IMPLY(encrypted, HDR_ENCRYPTED(hdr));
|
||||||
IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf));
|
IMPLY(encrypted, ARC_BUF_ENCRYPTED(buf));
|
||||||
IMPLY(encrypted, ARC_BUF_COMPRESSED(buf));
|
IMPLY(encrypted, ARC_BUF_COMPRESSED(buf));
|
||||||
IMPLY(encrypted, !ARC_BUF_SHARED(buf));
|
IMPLY(encrypted, !arc_buf_is_shared(buf));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the caller wanted encrypted data we just need to copy it from
|
* If the caller wanted encrypted data we just need to copy it from
|
||||||
@@ -2066,7 +2068,9 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (hdr_compressed == compressed) {
|
if (hdr_compressed == compressed) {
|
||||||
if (!arc_buf_is_shared(buf)) {
|
if (ARC_BUF_SHARED(buf)) {
|
||||||
|
ASSERT(arc_buf_is_shared(buf));
|
||||||
|
} else {
|
||||||
abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd,
|
abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd,
|
||||||
arc_buf_size(buf));
|
arc_buf_size(buf));
|
||||||
}
|
}
|
||||||
@@ -2078,7 +2082,7 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
|
|||||||
* If the buf is sharing its data with the hdr, unlink it and
|
* If the buf is sharing its data with the hdr, unlink it and
|
||||||
* allocate a new data buffer for the buf.
|
* allocate a new data buffer for the buf.
|
||||||
*/
|
*/
|
||||||
if (arc_buf_is_shared(buf)) {
|
if (ARC_BUF_SHARED(buf)) {
|
||||||
ASSERT(ARC_BUF_COMPRESSED(buf));
|
ASSERT(ARC_BUF_COMPRESSED(buf));
|
||||||
|
|
||||||
/* We need to give the buf its own b_data */
|
/* We need to give the buf its own b_data */
|
||||||
@@ -2090,6 +2094,8 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
|
|||||||
/* Previously overhead was 0; just add new overhead */
|
/* Previously overhead was 0; just add new overhead */
|
||||||
ARCSTAT_INCR(arcstat_overhead_size, HDR_GET_LSIZE(hdr));
|
ARCSTAT_INCR(arcstat_overhead_size, HDR_GET_LSIZE(hdr));
|
||||||
} else if (ARC_BUF_COMPRESSED(buf)) {
|
} else if (ARC_BUF_COMPRESSED(buf)) {
|
||||||
|
ASSERT(!arc_buf_is_shared(buf));
|
||||||
|
|
||||||
/* We need to reallocate the buf's b_data */
|
/* We need to reallocate the buf's b_data */
|
||||||
arc_free_data_buf(hdr, buf->b_data, HDR_GET_PSIZE(hdr),
|
arc_free_data_buf(hdr, buf->b_data, HDR_GET_PSIZE(hdr),
|
||||||
buf);
|
buf);
|
||||||
@@ -2217,7 +2223,7 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
|
|||||||
|
|
||||||
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
|
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
|
||||||
buf = buf->b_next) {
|
buf = buf->b_next) {
|
||||||
if (arc_buf_is_shared(buf))
|
if (ARC_BUF_SHARED(buf))
|
||||||
continue;
|
continue;
|
||||||
(void) zfs_refcount_add_many(&state->arcs_esize[type],
|
(void) zfs_refcount_add_many(&state->arcs_esize[type],
|
||||||
arc_buf_size(buf), buf);
|
arc_buf_size(buf), buf);
|
||||||
@@ -2256,7 +2262,7 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
|
|||||||
|
|
||||||
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
|
for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL;
|
||||||
buf = buf->b_next) {
|
buf = buf->b_next) {
|
||||||
if (arc_buf_is_shared(buf))
|
if (ARC_BUF_SHARED(buf))
|
||||||
continue;
|
continue;
|
||||||
(void) zfs_refcount_remove_many(&state->arcs_esize[type],
|
(void) zfs_refcount_remove_many(&state->arcs_esize[type],
|
||||||
arc_buf_size(buf), buf);
|
arc_buf_size(buf), buf);
|
||||||
@@ -2481,7 +2487,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||||||
* add to the refcount if the arc_buf_t is
|
* add to the refcount if the arc_buf_t is
|
||||||
* not shared.
|
* not shared.
|
||||||
*/
|
*/
|
||||||
if (arc_buf_is_shared(buf))
|
if (ARC_BUF_SHARED(buf))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
(void) zfs_refcount_add_many(
|
(void) zfs_refcount_add_many(
|
||||||
@@ -2537,7 +2543,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr)
|
|||||||
* add to the refcount if the arc_buf_t is
|
* add to the refcount if the arc_buf_t is
|
||||||
* not shared.
|
* not shared.
|
||||||
*/
|
*/
|
||||||
if (arc_buf_is_shared(buf))
|
if (ARC_BUF_SHARED(buf))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
(void) zfs_refcount_remove_many(
|
(void) zfs_refcount_remove_many(
|
||||||
@@ -3061,9 +3067,10 @@ arc_buf_destroy_impl(arc_buf_t *buf)
|
|||||||
arc_cksum_verify(buf);
|
arc_cksum_verify(buf);
|
||||||
arc_buf_unwatch(buf);
|
arc_buf_unwatch(buf);
|
||||||
|
|
||||||
if (arc_buf_is_shared(buf)) {
|
if (ARC_BUF_SHARED(buf)) {
|
||||||
arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
|
arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
|
||||||
} else {
|
} else {
|
||||||
|
ASSERT(!arc_buf_is_shared(buf));
|
||||||
uint64_t size = arc_buf_size(buf);
|
uint64_t size = arc_buf_size(buf);
|
||||||
arc_free_data_buf(hdr, buf->b_data, size, buf);
|
arc_free_data_buf(hdr, buf->b_data, size, buf);
|
||||||
ARCSTAT_INCR(arcstat_overhead_size, -size);
|
ARCSTAT_INCR(arcstat_overhead_size, -size);
|
||||||
@@ -3104,9 +3111,9 @@ arc_buf_destroy_impl(arc_buf_t *buf)
|
|||||||
*/
|
*/
|
||||||
if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) {
|
if (lastbuf != NULL && !ARC_BUF_ENCRYPTED(lastbuf)) {
|
||||||
/* Only one buf can be shared at once */
|
/* Only one buf can be shared at once */
|
||||||
VERIFY(!arc_buf_is_shared(lastbuf));
|
ASSERT(!arc_buf_is_shared(lastbuf));
|
||||||
/* hdr is uncompressed so can't have compressed buf */
|
/* hdr is uncompressed so can't have compressed buf */
|
||||||
VERIFY(!ARC_BUF_COMPRESSED(lastbuf));
|
ASSERT(!ARC_BUF_COMPRESSED(lastbuf));
|
||||||
|
|
||||||
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
|
ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
|
||||||
arc_hdr_free_abd(hdr, B_FALSE);
|
arc_hdr_free_abd(hdr, B_FALSE);
|
||||||
@@ -5863,12 +5870,9 @@ top:
|
|||||||
* 3. This buffer isn't currently writing to the L2ARC.
|
* 3. This buffer isn't currently writing to the L2ARC.
|
||||||
* 4. The L2ARC entry wasn't evicted, which may
|
* 4. The L2ARC entry wasn't evicted, which may
|
||||||
* also have invalidated the vdev.
|
* also have invalidated the vdev.
|
||||||
* 5. This isn't prefetch or l2arc_noprefetch is 0.
|
|
||||||
*/
|
*/
|
||||||
if (HDR_HAS_L2HDR(hdr) &&
|
if (HDR_HAS_L2HDR(hdr) &&
|
||||||
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
|
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr)) {
|
||||||
!(l2arc_noprefetch &&
|
|
||||||
(*arc_flags & ARC_FLAG_PREFETCH))) {
|
|
||||||
l2arc_read_callback_t *cb;
|
l2arc_read_callback_t *cb;
|
||||||
abd_t *abd;
|
abd_t *abd;
|
||||||
uint64_t asize;
|
uint64_t asize;
|
||||||
@@ -6048,6 +6052,56 @@ arc_remove_prune_callback(arc_prune_t *p)
|
|||||||
kmem_free(p, sizeof (*p));
|
kmem_free(p, sizeof (*p));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper function for arc_prune_async() it is responsible for safely
|
||||||
|
* handling the execution of a registered arc_prune_func_t.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
arc_prune_task(void *ptr)
|
||||||
|
{
|
||||||
|
arc_prune_t *ap = (arc_prune_t *)ptr;
|
||||||
|
arc_prune_func_t *func = ap->p_pfunc;
|
||||||
|
|
||||||
|
if (func != NULL)
|
||||||
|
func(ap->p_adjust, ap->p_private);
|
||||||
|
|
||||||
|
zfs_refcount_remove(&ap->p_refcnt, func);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Notify registered consumers they must drop holds on a portion of the ARC
|
||||||
|
* buffers they reference. This provides a mechanism to ensure the ARC can
|
||||||
|
* honor the metadata limit and reclaim otherwise pinned ARC buffers.
|
||||||
|
*
|
||||||
|
* This operation is performed asynchronously so it may be safely called
|
||||||
|
* in the context of the arc_reclaim_thread(). A reference is taken here
|
||||||
|
* for each registered arc_prune_t and the arc_prune_task() is responsible
|
||||||
|
* for releasing it once the registered arc_prune_func_t has completed.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
arc_prune_async(uint64_t adjust)
|
||||||
|
{
|
||||||
|
arc_prune_t *ap;
|
||||||
|
|
||||||
|
mutex_enter(&arc_prune_mtx);
|
||||||
|
for (ap = list_head(&arc_prune_list); ap != NULL;
|
||||||
|
ap = list_next(&arc_prune_list, ap)) {
|
||||||
|
|
||||||
|
if (zfs_refcount_count(&ap->p_refcnt) >= 2)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
zfs_refcount_add(&ap->p_refcnt, ap->p_pfunc);
|
||||||
|
ap->p_adjust = adjust;
|
||||||
|
if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
|
||||||
|
ap, TQ_SLEEP) == TASKQID_INVALID) {
|
||||||
|
zfs_refcount_remove(&ap->p_refcnt, ap->p_pfunc);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
ARCSTAT_BUMP(arcstat_prune);
|
||||||
|
}
|
||||||
|
mutex_exit(&arc_prune_mtx);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Notify the arc that a block was freed, and thus will never be used again.
|
* Notify the arc that a block was freed, and thus will never be used again.
|
||||||
*/
|
*/
|
||||||
@@ -6189,7 +6243,7 @@ arc_release(arc_buf_t *buf, const void *tag)
|
|||||||
ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL);
|
ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL);
|
||||||
VERIFY3S(remove_reference(hdr, tag), >, 0);
|
VERIFY3S(remove_reference(hdr, tag), >, 0);
|
||||||
|
|
||||||
if (arc_buf_is_shared(buf) && !ARC_BUF_COMPRESSED(buf)) {
|
if (ARC_BUF_SHARED(buf) && !ARC_BUF_COMPRESSED(buf)) {
|
||||||
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
|
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
|
||||||
ASSERT(ARC_BUF_LAST(buf));
|
ASSERT(ARC_BUF_LAST(buf));
|
||||||
}
|
}
|
||||||
@@ -6206,9 +6260,9 @@ arc_release(arc_buf_t *buf, const void *tag)
|
|||||||
* If the current arc_buf_t and the hdr are sharing their data
|
* If the current arc_buf_t and the hdr are sharing their data
|
||||||
* buffer, then we must stop sharing that block.
|
* buffer, then we must stop sharing that block.
|
||||||
*/
|
*/
|
||||||
if (arc_buf_is_shared(buf)) {
|
if (ARC_BUF_SHARED(buf)) {
|
||||||
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
|
ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
|
||||||
VERIFY(!arc_buf_is_shared(lastbuf));
|
ASSERT(!arc_buf_is_shared(lastbuf));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First, sever the block sharing relationship between
|
* First, sever the block sharing relationship between
|
||||||
@@ -6241,7 +6295,7 @@ arc_release(arc_buf_t *buf, const void *tag)
|
|||||||
*/
|
*/
|
||||||
ASSERT(arc_buf_is_shared(lastbuf) ||
|
ASSERT(arc_buf_is_shared(lastbuf) ||
|
||||||
arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
|
arc_hdr_get_compress(hdr) != ZIO_COMPRESS_OFF);
|
||||||
ASSERT(!ARC_BUF_SHARED(buf));
|
ASSERT(!arc_buf_is_shared(buf));
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
|
ASSERT(hdr->b_l1hdr.b_pabd != NULL || HDR_HAS_RABD(hdr));
|
||||||
@@ -6335,9 +6389,10 @@ arc_write_ready(zio_t *zio)
|
|||||||
arc_cksum_free(hdr);
|
arc_cksum_free(hdr);
|
||||||
arc_buf_unwatch(buf);
|
arc_buf_unwatch(buf);
|
||||||
if (hdr->b_l1hdr.b_pabd != NULL) {
|
if (hdr->b_l1hdr.b_pabd != NULL) {
|
||||||
if (arc_buf_is_shared(buf)) {
|
if (ARC_BUF_SHARED(buf)) {
|
||||||
arc_unshare_buf(hdr, buf);
|
arc_unshare_buf(hdr, buf);
|
||||||
} else {
|
} else {
|
||||||
|
ASSERT(!arc_buf_is_shared(buf));
|
||||||
arc_hdr_free_abd(hdr, B_FALSE);
|
arc_hdr_free_abd(hdr, B_FALSE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -6636,9 +6691,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
|||||||
* The hdr will remain with a NULL data pointer and the
|
* The hdr will remain with a NULL data pointer and the
|
||||||
* buf will take sole ownership of the block.
|
* buf will take sole ownership of the block.
|
||||||
*/
|
*/
|
||||||
if (arc_buf_is_shared(buf)) {
|
if (ARC_BUF_SHARED(buf)) {
|
||||||
arc_unshare_buf(hdr, buf);
|
arc_unshare_buf(hdr, buf);
|
||||||
} else {
|
} else {
|
||||||
|
ASSERT(!arc_buf_is_shared(buf));
|
||||||
arc_hdr_free_abd(hdr, B_FALSE);
|
arc_hdr_free_abd(hdr, B_FALSE);
|
||||||
}
|
}
|
||||||
VERIFY3P(buf->b_data, !=, NULL);
|
VERIFY3P(buf->b_data, !=, NULL);
|
||||||
|
|||||||
+8
-170
@@ -28,6 +28,7 @@
|
|||||||
#include <sys/spa_impl.h>
|
#include <sys/spa_impl.h>
|
||||||
#include <sys/zio.h>
|
#include <sys/zio.h>
|
||||||
#include <sys/brt.h>
|
#include <sys/brt.h>
|
||||||
|
#include <sys/brt_impl.h>
|
||||||
#include <sys/ddt.h>
|
#include <sys/ddt.h>
|
||||||
#include <sys/bitmap.h>
|
#include <sys/bitmap.h>
|
||||||
#include <sys/zap.h>
|
#include <sys/zap.h>
|
||||||
@@ -234,178 +235,15 @@
|
|||||||
* destination dataset is mounted and its ZIL replayed.
|
* destination dataset is mounted and its ZIL replayed.
|
||||||
* To address this situation we leverage zil_claim() mechanism where ZFS will
|
* To address this situation we leverage zil_claim() mechanism where ZFS will
|
||||||
* parse all the ZILs on pool import. When we come across TX_CLONE_RANGE
|
* parse all the ZILs on pool import. When we come across TX_CLONE_RANGE
|
||||||
* entries, we will bump reference counters for their BPs in the BRT and then
|
* entries, we will bump reference counters for their BPs in the BRT. Then
|
||||||
* on mount and ZIL replay we will just attach BPs to the file without
|
* on mount and ZIL replay we bump the reference counters once more, while the
|
||||||
* bumping reference counters.
|
* first references are dropped during ZIL destroy by zil_free_clone_range().
|
||||||
* Note it is still possible that after zil_claim() we never mount the
|
* It is possible that after zil_claim() we never mount the destination, so
|
||||||
* destination, so we never replay its ZIL and we destroy it. This way we would
|
* we never replay its ZIL and just destroy it. In this case the only taken
|
||||||
* end up with leaked references in BRT. We address that too as ZFS gives us
|
* references will be dropped by zil_free_clone_range(), since the cloning is
|
||||||
* a chance to clean this up on dataset destroy (see zil_free_clone_range()).
|
* not going to ever take place.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* BRT - Block Reference Table.
|
|
||||||
*/
|
|
||||||
#define BRT_OBJECT_VDEV_PREFIX "com.fudosecurity:brt:vdev:"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We divide each VDEV into 16MB chunks. Each chunk is represented in memory
|
|
||||||
* by a 16bit counter, thus 1TB VDEV requires 128kB of memory: (1TB / 16MB) * 2B
|
|
||||||
* Each element in this array represents how many BRT entries do we have in this
|
|
||||||
* chunk of storage. We always load this entire array into memory and update as
|
|
||||||
* needed. By having it in memory we can quickly tell (during zio_free()) if
|
|
||||||
* there are any BRT entries that we might need to update.
|
|
||||||
*
|
|
||||||
* This value cannot be larger than 16MB, at least as long as we support
|
|
||||||
* 512 byte block sizes. With 512 byte block size we can have exactly
|
|
||||||
* 32768 blocks in 16MB. In 32MB we could have 65536 blocks, which is one too
|
|
||||||
* many for a 16bit counter.
|
|
||||||
*/
|
|
||||||
#define BRT_RANGESIZE (16 * 1024 * 1024)
|
|
||||||
_Static_assert(BRT_RANGESIZE / SPA_MINBLOCKSIZE <= UINT16_MAX,
|
|
||||||
"BRT_RANGESIZE is too large.");
|
|
||||||
/*
|
|
||||||
* We don't want to update the whole structure every time. Maintain bitmap
|
|
||||||
* of dirty blocks within the regions, so that a single bit represents a
|
|
||||||
* block size of entcounts. For example if we have a 1PB vdev then all
|
|
||||||
* entcounts take 128MB of memory ((64TB / 16MB) * 2B). We can divide this
|
|
||||||
* 128MB array of entcounts into 32kB disk blocks, as we don't want to update
|
|
||||||
* the whole 128MB on disk when we have updated only a single entcount.
|
|
||||||
* We maintain a bitmap where each 32kB disk block within 128MB entcounts array
|
|
||||||
* is represented by a single bit. This gives us 4096 bits. A set bit in the
|
|
||||||
* bitmap means that we had a change in at least one of the 16384 entcounts
|
|
||||||
* that reside on a 32kB disk block (32kB / sizeof (uint16_t)).
|
|
||||||
*/
|
|
||||||
#define BRT_BLOCKSIZE (32 * 1024)
|
|
||||||
#define BRT_RANGESIZE_TO_NBLOCKS(size) \
|
|
||||||
(((size) - 1) / BRT_BLOCKSIZE / sizeof (uint16_t) + 1)
|
|
||||||
|
|
||||||
#define BRT_LITTLE_ENDIAN 0
|
|
||||||
#define BRT_BIG_ENDIAN 1
|
|
||||||
#ifdef _ZFS_LITTLE_ENDIAN
|
|
||||||
#define BRT_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
|
|
||||||
#define BRT_NON_NATIVE_BYTEORDER BRT_BIG_ENDIAN
|
|
||||||
#else
|
|
||||||
#define BRT_NATIVE_BYTEORDER BRT_BIG_ENDIAN
|
|
||||||
#define BRT_NON_NATIVE_BYTEORDER BRT_LITTLE_ENDIAN
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef struct brt_vdev_phys {
|
|
||||||
uint64_t bvp_mos_entries;
|
|
||||||
uint64_t bvp_size;
|
|
||||||
uint64_t bvp_byteorder;
|
|
||||||
uint64_t bvp_totalcount;
|
|
||||||
uint64_t bvp_rangesize;
|
|
||||||
uint64_t bvp_usedspace;
|
|
||||||
uint64_t bvp_savedspace;
|
|
||||||
} brt_vdev_phys_t;
|
|
||||||
|
|
||||||
typedef struct brt_vdev {
|
|
||||||
/*
|
|
||||||
* VDEV id.
|
|
||||||
*/
|
|
||||||
uint64_t bv_vdevid;
|
|
||||||
/*
|
|
||||||
* Is the structure initiated?
|
|
||||||
* (bv_entcount and bv_bitmap are allocated?)
|
|
||||||
*/
|
|
||||||
boolean_t bv_initiated;
|
|
||||||
/*
|
|
||||||
* Object number in the MOS for the entcount array and brt_vdev_phys.
|
|
||||||
*/
|
|
||||||
uint64_t bv_mos_brtvdev;
|
|
||||||
/*
|
|
||||||
* Object number in the MOS for the entries table.
|
|
||||||
*/
|
|
||||||
uint64_t bv_mos_entries;
|
|
||||||
/*
|
|
||||||
* Entries to sync.
|
|
||||||
*/
|
|
||||||
avl_tree_t bv_tree;
|
|
||||||
/*
|
|
||||||
* Does the bv_entcount[] array needs byte swapping?
|
|
||||||
*/
|
|
||||||
boolean_t bv_need_byteswap;
|
|
||||||
/*
|
|
||||||
* Number of entries in the bv_entcount[] array.
|
|
||||||
*/
|
|
||||||
uint64_t bv_size;
|
|
||||||
/*
|
|
||||||
* This is the array with BRT entry count per BRT_RANGESIZE.
|
|
||||||
*/
|
|
||||||
uint16_t *bv_entcount;
|
|
||||||
/*
|
|
||||||
* Sum of all bv_entcount[]s.
|
|
||||||
*/
|
|
||||||
uint64_t bv_totalcount;
|
|
||||||
/*
|
|
||||||
* Space on disk occupied by cloned blocks (without compression).
|
|
||||||
*/
|
|
||||||
uint64_t bv_usedspace;
|
|
||||||
/*
|
|
||||||
* How much additional space would be occupied without block cloning.
|
|
||||||
*/
|
|
||||||
uint64_t bv_savedspace;
|
|
||||||
/*
|
|
||||||
* brt_vdev_phys needs updating on disk.
|
|
||||||
*/
|
|
||||||
boolean_t bv_meta_dirty;
|
|
||||||
/*
|
|
||||||
* bv_entcount[] needs updating on disk.
|
|
||||||
*/
|
|
||||||
boolean_t bv_entcount_dirty;
|
|
||||||
/*
|
|
||||||
* bv_entcount[] potentially can be a bit too big to sychronize it all
|
|
||||||
* when we just changed few entcounts. The fields below allow us to
|
|
||||||
* track updates to bv_entcount[] array since the last sync.
|
|
||||||
* A single bit in the bv_bitmap represents as many entcounts as can
|
|
||||||
* fit into a single BRT_BLOCKSIZE.
|
|
||||||
* For example we have 65536 entcounts in the bv_entcount array
|
|
||||||
* (so the whole array is 128kB). We updated bv_entcount[2] and
|
|
||||||
* bv_entcount[5]. In that case only first bit in the bv_bitmap will
|
|
||||||
* be set and we will write only first BRT_BLOCKSIZE out of 128kB.
|
|
||||||
*/
|
|
||||||
ulong_t *bv_bitmap;
|
|
||||||
uint64_t bv_nblocks;
|
|
||||||
} brt_vdev_t;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In-core brt
|
|
||||||
*/
|
|
||||||
typedef struct brt {
|
|
||||||
krwlock_t brt_lock;
|
|
||||||
spa_t *brt_spa;
|
|
||||||
#define brt_mos brt_spa->spa_meta_objset
|
|
||||||
uint64_t brt_rangesize;
|
|
||||||
uint64_t brt_usedspace;
|
|
||||||
uint64_t brt_savedspace;
|
|
||||||
avl_tree_t brt_pending_tree[TXG_SIZE];
|
|
||||||
kmutex_t brt_pending_lock[TXG_SIZE];
|
|
||||||
/* Sum of all entries across all bv_trees. */
|
|
||||||
uint64_t brt_nentries;
|
|
||||||
brt_vdev_t *brt_vdevs;
|
|
||||||
uint64_t brt_nvdevs;
|
|
||||||
} brt_t;
|
|
||||||
|
|
||||||
/* Size of bre_offset / sizeof (uint64_t). */
|
|
||||||
#define BRT_KEY_WORDS (1)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In-core brt entry.
|
|
||||||
* On-disk we use bre_offset as the key and bre_refcount as the value.
|
|
||||||
*/
|
|
||||||
typedef struct brt_entry {
|
|
||||||
uint64_t bre_offset;
|
|
||||||
uint64_t bre_refcount;
|
|
||||||
avl_node_t bre_node;
|
|
||||||
} brt_entry_t;
|
|
||||||
|
|
||||||
typedef struct brt_pending_entry {
|
|
||||||
blkptr_t bpe_bp;
|
|
||||||
int bpe_count;
|
|
||||||
avl_node_t bpe_node;
|
|
||||||
} brt_pending_entry_t;
|
|
||||||
|
|
||||||
static kmem_cache_t *brt_entry_cache;
|
static kmem_cache_t *brt_entry_cache;
|
||||||
static kmem_cache_t *brt_pending_entry_cache;
|
static kmem_cache_t *brt_pending_entry_cache;
|
||||||
|
|
||||||
|
|||||||
+9
-1
@@ -2700,15 +2700,23 @@ dmu_buf_will_clone(dmu_buf_t *db_fake, dmu_tx_t *tx)
|
|||||||
* writes and clones into this block.
|
* writes and clones into this block.
|
||||||
*/
|
*/
|
||||||
mutex_enter(&db->db_mtx);
|
mutex_enter(&db->db_mtx);
|
||||||
|
DBUF_VERIFY(db);
|
||||||
VERIFY(!dbuf_undirty(db, tx));
|
VERIFY(!dbuf_undirty(db, tx));
|
||||||
ASSERT3P(dbuf_find_dirty_eq(db, tx->tx_txg), ==, NULL);
|
ASSERT3P(dbuf_find_dirty_eq(db, tx->tx_txg), ==, NULL);
|
||||||
if (db->db_buf != NULL) {
|
if (db->db_buf != NULL) {
|
||||||
arc_buf_destroy(db->db_buf, db);
|
arc_buf_destroy(db->db_buf, db);
|
||||||
db->db_buf = NULL;
|
db->db_buf = NULL;
|
||||||
|
dbuf_clear_data(db);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
db->db_state = DB_NOFILL;
|
||||||
|
DTRACE_SET_STATE(db, "allocating NOFILL buffer for clone");
|
||||||
|
|
||||||
|
DBUF_VERIFY(db);
|
||||||
mutex_exit(&db->db_mtx);
|
mutex_exit(&db->db_mtx);
|
||||||
|
|
||||||
dmu_buf_will_not_fill(db_fake, tx);
|
dbuf_noread(db);
|
||||||
|
(void) dbuf_dirty(db, tx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|||||||
+2
-4
@@ -2267,7 +2267,7 @@ out:
|
|||||||
|
|
||||||
int
|
int
|
||||||
dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
|
dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
|
||||||
dmu_tx_t *tx, const blkptr_t *bps, size_t nbps, boolean_t replay)
|
dmu_tx_t *tx, const blkptr_t *bps, size_t nbps)
|
||||||
{
|
{
|
||||||
spa_t *spa;
|
spa_t *spa;
|
||||||
dmu_buf_t **dbp, *dbuf;
|
dmu_buf_t **dbp, *dbuf;
|
||||||
@@ -2341,10 +2341,8 @@ dmu_brt_clone(objset_t *os, uint64_t object, uint64_t offset, uint64_t length,
|
|||||||
* When data in embedded into BP there is no need to create
|
* When data in embedded into BP there is no need to create
|
||||||
* BRT entry as there is no data block. Just copy the BP as
|
* BRT entry as there is no data block. Just copy the BP as
|
||||||
* it contains the data.
|
* it contains the data.
|
||||||
* Also, when replaying ZIL we don't want to bump references
|
|
||||||
* in the BRT as it was already done during ZIL claim.
|
|
||||||
*/
|
*/
|
||||||
if (!replay && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
||||||
brt_pending_add(spa, bp, tx);
|
brt_pending_add(spa, bp, tx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+5
-3
@@ -210,10 +210,12 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
|
|||||||
dmu_buf_impl_t *db;
|
dmu_buf_impl_t *db;
|
||||||
|
|
||||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||||
db = dbuf_hold_level(dn, level, blkid, FTAG);
|
err = dbuf_hold_impl(dn, level, blkid, TRUE, FALSE, FTAG, &db);
|
||||||
rw_exit(&dn->dn_struct_rwlock);
|
rw_exit(&dn->dn_struct_rwlock);
|
||||||
if (db == NULL)
|
if (err == ENOENT)
|
||||||
return (SET_ERROR(EIO));
|
return (0);
|
||||||
|
if (err != 0)
|
||||||
|
return (err);
|
||||||
/*
|
/*
|
||||||
* PARTIAL_FIRST allows caching for uncacheable blocks. It will
|
* PARTIAL_FIRST allows caching for uncacheable blocks. It will
|
||||||
* be cleared after dmu_buf_will_dirty() call dbuf_read() again.
|
* be cleared after dmu_buf_will_dirty() call dbuf_read() again.
|
||||||
|
|||||||
+10
-2
@@ -1764,7 +1764,14 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Checks if the dnode contains any uncommitted dirty records.
|
* Checks if the dnode itself is dirty, or is carrying any uncommitted records.
|
||||||
|
* It is important to check both conditions, as some operations (eg appending
|
||||||
|
* to a file) can dirty both as a single logical unit, but they are not synced
|
||||||
|
* out atomically, so checking one and not the other can result in an object
|
||||||
|
* appearing to be clean mid-way through a commit.
|
||||||
|
*
|
||||||
|
* Do not change this lightly! If you get it wrong, dmu_offset_next() can
|
||||||
|
* detect a hole where there is really data, leading to silent corruption.
|
||||||
*/
|
*/
|
||||||
boolean_t
|
boolean_t
|
||||||
dnode_is_dirty(dnode_t *dn)
|
dnode_is_dirty(dnode_t *dn)
|
||||||
@@ -1772,7 +1779,8 @@ dnode_is_dirty(dnode_t *dn)
|
|||||||
mutex_enter(&dn->dn_mtx);
|
mutex_enter(&dn->dn_mtx);
|
||||||
|
|
||||||
for (int i = 0; i < TXG_SIZE; i++) {
|
for (int i = 0; i < TXG_SIZE; i++) {
|
||||||
if (multilist_link_active(&dn->dn_dirty_link[i])) {
|
if (multilist_link_active(&dn->dn_dirty_link[i]) ||
|
||||||
|
!list_is_empty(&dn->dn_dirty_records[i])) {
|
||||||
mutex_exit(&dn->dn_mtx);
|
mutex_exit(&dn->dn_mtx);
|
||||||
return (B_TRUE);
|
return (B_TRUE);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -965,18 +965,18 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
|
|||||||
uint64_t delay_min_bytes =
|
uint64_t delay_min_bytes =
|
||||||
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
|
||||||
|
|
||||||
mutex_enter(&dp->dp_lock);
|
/*
|
||||||
uint64_t dirty = dp->dp_dirty_total;
|
* We are not taking the dp_lock here and few other places, since torn
|
||||||
mutex_exit(&dp->dp_lock);
|
* reads are unlikely: on 64-bit systems due to register size and on
|
||||||
|
* 32-bit due to memory constraints. Pool-wide locks in hot path may
|
||||||
return (dirty > delay_min_bytes);
|
* be too expensive, while we do not need a precise result here.
|
||||||
|
*/
|
||||||
|
return (dp->dp_dirty_total > delay_min_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean_t
|
static boolean_t
|
||||||
dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
|
dsl_pool_need_dirty_sync(dsl_pool_t *dp, uint64_t txg)
|
||||||
{
|
{
|
||||||
ASSERT(MUTEX_HELD(&dp->dp_lock));
|
|
||||||
|
|
||||||
uint64_t dirty_min_bytes =
|
uint64_t dirty_min_bytes =
|
||||||
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
zfs_dirty_data_max * zfs_dirty_data_sync_percent / 100;
|
||||||
uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
|
uint64_t dirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
|
||||||
|
|||||||
@@ -367,23 +367,24 @@ spa_write_cachefile(spa_t *target, boolean_t removing, boolean_t postsysevent,
|
|||||||
* So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
|
* So we have to invent the ZFS_IOC_CONFIG ioctl to grab the configuration
|
||||||
* information for all pool visible within the zone.
|
* information for all pool visible within the zone.
|
||||||
*/
|
*/
|
||||||
nvlist_t *
|
int
|
||||||
spa_all_configs(uint64_t *generation)
|
spa_all_configs(uint64_t *generation, nvlist_t **pools)
|
||||||
{
|
{
|
||||||
nvlist_t *pools;
|
|
||||||
spa_t *spa = NULL;
|
spa_t *spa = NULL;
|
||||||
|
|
||||||
if (*generation == spa_config_generation)
|
if (*generation == spa_config_generation)
|
||||||
return (NULL);
|
return (SET_ERROR(EEXIST));
|
||||||
|
|
||||||
pools = fnvlist_alloc();
|
int error = mutex_enter_interruptible(&spa_namespace_lock);
|
||||||
|
if (error)
|
||||||
|
return (SET_ERROR(EINTR));
|
||||||
|
|
||||||
mutex_enter(&spa_namespace_lock);
|
*pools = fnvlist_alloc();
|
||||||
while ((spa = spa_next(spa)) != NULL) {
|
while ((spa = spa_next(spa)) != NULL) {
|
||||||
if (INGLOBALZONE(curproc) ||
|
if (INGLOBALZONE(curproc) ||
|
||||||
zone_dataset_visible(spa_name(spa), NULL)) {
|
zone_dataset_visible(spa_name(spa), NULL)) {
|
||||||
mutex_enter(&spa->spa_props_lock);
|
mutex_enter(&spa->spa_props_lock);
|
||||||
fnvlist_add_nvlist(pools, spa_name(spa),
|
fnvlist_add_nvlist(*pools, spa_name(spa),
|
||||||
spa->spa_config);
|
spa->spa_config);
|
||||||
mutex_exit(&spa->spa_props_lock);
|
mutex_exit(&spa->spa_props_lock);
|
||||||
}
|
}
|
||||||
@@ -391,7 +392,7 @@ spa_all_configs(uint64_t *generation)
|
|||||||
*generation = spa_config_generation;
|
*generation = spa_config_generation;
|
||||||
mutex_exit(&spa_namespace_lock);
|
mutex_exit(&spa_namespace_lock);
|
||||||
|
|
||||||
return (pools);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|||||||
@@ -4215,6 +4215,7 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
|
|||||||
/* XXX - L2ARC 1.0 does not support expansion */
|
/* XXX - L2ARC 1.0 does not support expansion */
|
||||||
if (vd->vdev_aux)
|
if (vd->vdev_aux)
|
||||||
return (spa_vdev_state_exit(spa, vd, ENOTSUP));
|
return (spa_vdev_state_exit(spa, vd, ENOTSUP));
|
||||||
|
spa->spa_ccw_fail_time = 0;
|
||||||
spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
|
spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -273,8 +273,10 @@ vdev_queue_class_add(vdev_queue_t *vq, zio_t *zio)
|
|||||||
{
|
{
|
||||||
zio_priority_t p = zio->io_priority;
|
zio_priority_t p = zio->io_priority;
|
||||||
vq->vq_cqueued |= 1U << p;
|
vq->vq_cqueued |= 1U << p;
|
||||||
if (vdev_queue_class_fifo(p))
|
if (vdev_queue_class_fifo(p)) {
|
||||||
list_insert_tail(&vq->vq_class[p].vqc_list, zio);
|
list_insert_tail(&vq->vq_class[p].vqc_list, zio);
|
||||||
|
vq->vq_class[p].vqc_list_numnodes++;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
avl_add(&vq->vq_class[p].vqc_tree, zio);
|
avl_add(&vq->vq_class[p].vqc_tree, zio);
|
||||||
}
|
}
|
||||||
@@ -288,6 +290,7 @@ vdev_queue_class_remove(vdev_queue_t *vq, zio_t *zio)
|
|||||||
list_t *list = &vq->vq_class[p].vqc_list;
|
list_t *list = &vq->vq_class[p].vqc_list;
|
||||||
list_remove(list, zio);
|
list_remove(list, zio);
|
||||||
empty = list_is_empty(list);
|
empty = list_is_empty(list);
|
||||||
|
vq->vq_class[p].vqc_list_numnodes--;
|
||||||
} else {
|
} else {
|
||||||
avl_tree_t *tree = &vq->vq_class[p].vqc_tree;
|
avl_tree_t *tree = &vq->vq_class[p].vqc_tree;
|
||||||
avl_remove(tree, zio);
|
avl_remove(tree, zio);
|
||||||
@@ -1069,7 +1072,7 @@ vdev_queue_class_length(vdev_t *vd, zio_priority_t p)
|
|||||||
{
|
{
|
||||||
vdev_queue_t *vq = &vd->vdev_queue;
|
vdev_queue_t *vq = &vd->vdev_queue;
|
||||||
if (vdev_queue_class_fifo(p))
|
if (vdev_queue_class_fifo(p))
|
||||||
return (list_is_empty(&vq->vq_class[p].vqc_list) == 0);
|
return (vq->vq_class[p].vqc_list_numnodes);
|
||||||
else
|
else
|
||||||
return (avl_numnodes(&vq->vq_class[p].vqc_tree));
|
return (avl_numnodes(&vq->vq_class[p].vqc_tree));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1582,8 +1582,9 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc)
|
|||||||
nvlist_t *configs;
|
nvlist_t *configs;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
|
error = spa_all_configs(&zc->zc_cookie, &configs);
|
||||||
return (SET_ERROR(EEXIST));
|
if (error)
|
||||||
|
return (error);
|
||||||
|
|
||||||
error = put_nvlist(zc, configs);
|
error = put_nvlist(zc, configs);
|
||||||
|
|
||||||
|
|||||||
+11
-2
@@ -1094,6 +1094,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|||||||
|
|
||||||
ASSERT(!outzfsvfs->z_replay);
|
ASSERT(!outzfsvfs->z_replay);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Block cloning from an unencrypted dataset into an encrypted
|
||||||
|
* dataset and vice versa is not supported.
|
||||||
|
*/
|
||||||
|
if (inos->os_encrypted != outos->os_encrypted) {
|
||||||
|
zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
|
||||||
|
return (SET_ERROR(EXDEV));
|
||||||
|
}
|
||||||
|
|
||||||
error = zfs_verify_zp(inzp);
|
error = zfs_verify_zp(inzp);
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
error = zfs_verify_zp(outzp);
|
error = zfs_verify_zp(outzp);
|
||||||
@@ -1324,7 +1333,7 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
|
|||||||
}
|
}
|
||||||
|
|
||||||
error = dmu_brt_clone(outos, outzp->z_id, outoff, size, tx,
|
error = dmu_brt_clone(outos, outzp->z_id, outoff, size, tx,
|
||||||
bps, nbps, B_FALSE);
|
bps, nbps);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
break;
|
break;
|
||||||
@@ -1458,7 +1467,7 @@ zfs_clone_range_replay(znode_t *zp, uint64_t off, uint64_t len, uint64_t blksz,
|
|||||||
if (zp->z_blksz < blksz)
|
if (zp->z_blksz < blksz)
|
||||||
zfs_grow_blocksize(zp, blksz, tx);
|
zfs_grow_blocksize(zp, blksz, tx);
|
||||||
|
|
||||||
dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps, B_TRUE);
|
dmu_brt_clone(zfsvfs->z_os, zp->z_id, off, len, tx, bps, nbps);
|
||||||
|
|
||||||
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
|
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -145,7 +145,7 @@ static int zil_nocacheflush = 0;
|
|||||||
* Any writes above that will be executed with lower (asynchronous) priority
|
* Any writes above that will be executed with lower (asynchronous) priority
|
||||||
* to limit potential SLOG device abuse by single active ZIL writer.
|
* to limit potential SLOG device abuse by single active ZIL writer.
|
||||||
*/
|
*/
|
||||||
static uint64_t zil_slog_bulk = 768 * 1024;
|
static uint64_t zil_slog_bulk = 64 * 1024 * 1024;
|
||||||
|
|
||||||
static kmem_cache_t *zil_lwb_cache;
|
static kmem_cache_t *zil_lwb_cache;
|
||||||
static kmem_cache_t *zil_zcw_cache;
|
static kmem_cache_t *zil_zcw_cache;
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
|
|||||||
BuildArch: noarch
|
BuildArch: noarch
|
||||||
|
|
||||||
Requires: dkms >= 2.2.0.3
|
Requires: dkms >= 2.2.0.3
|
||||||
|
Requires(pre): dkms >= 2.2.0.3
|
||||||
Requires(post): dkms >= 2.2.0.3
|
Requires(post): dkms >= 2.2.0.3
|
||||||
Requires(preun): dkms >= 2.2.0.3
|
Requires(preun): dkms >= 2.2.0.3
|
||||||
Requires: gcc, make, perl, diffutils
|
Requires: gcc, make, perl, diffutils
|
||||||
@@ -68,9 +69,93 @@ fi
|
|||||||
%defattr(-,root,root)
|
%defattr(-,root,root)
|
||||||
/usr/src/%{module}-%{version}
|
/usr/src/%{module}-%{version}
|
||||||
|
|
||||||
|
%pre
|
||||||
|
echo "Running pre installation script: $0. Parameters: $*"
|
||||||
|
# We don't want any other versions lingering around in dkms.
|
||||||
|
# Tests with 'dnf' showed that in case of reinstall, or upgrade
|
||||||
|
# the preun scriptlet removed the version we are trying to install.
|
||||||
|
# Because of this, find all zfs dkms sources in /var/lib/dkms and
|
||||||
|
# remove them, if we find a matching version in dkms.
|
||||||
|
|
||||||
|
dkms_root=/var/lib/dkms
|
||||||
|
if [ -d ${dkms_root}/%{module} ]; then
|
||||||
|
cd ${dkms_root}/%{module}
|
||||||
|
for x in [[:digit:]]*; do
|
||||||
|
[ -d "$x" ] || continue
|
||||||
|
otherver="$x"
|
||||||
|
opath="${dkms_root}/%{module}/${otherver}"
|
||||||
|
if [ "$otherver" != %{version} ]; then
|
||||||
|
# This is a workaround for a broken 'dkms status', we caused in a previous version.
|
||||||
|
# One day it might be not needed anymore, but it does not hurt to keep it.
|
||||||
|
if dkms status -m %{module} -v "$otherver" 2>&1 | grep "${opath}/source/dkms.conf does not exist"
|
||||||
|
then
|
||||||
|
echo "ERROR: dkms status is broken!" >&2
|
||||||
|
if [ -L "${opath}/source" -a ! -d "${opath}/source" ]
|
||||||
|
then
|
||||||
|
echo "Trying to fix it by removing the symlink: ${opath}/source" >&2
|
||||||
|
echo "You should manually remove ${opath}" >&2
|
||||||
|
rm -f "${opath}/source" || echo "Removal failed!" >&2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
if [ `dkms status -m %{module} -v "$otherver" | grep -c %{module}` -gt 0 ]; then
|
||||||
|
echo "Removing old %{module} dkms modules version $otherver from all kernels."
|
||||||
|
dkms remove -m %{module} -v "$otherver" --all ||:
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
cd ${dkms_root}
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Uninstall this version of zfs dkms modules before installation of the package.
|
||||||
|
if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
|
||||||
|
echo "Removing %{module} dkms modules version %{version} from all kernels."
|
||||||
|
dkms remove -m %{module} -v %{version} --all ||:
|
||||||
|
fi
|
||||||
|
|
||||||
|
%post
|
||||||
|
echo "Running post installation script: $0. Parameters: $*"
|
||||||
|
# Add the module to dkms, as reccommended in the dkms man page.
|
||||||
|
# This is generally rpm specfic.
|
||||||
|
# But this also may help, if we have a broken 'dkms status'.
|
||||||
|
# Because, if the sources are available and only the symlink pointing
|
||||||
|
# to them is missing, this will resolve the situation
|
||||||
|
echo "Adding %{module} dkms modules version %{version} to dkms."
|
||||||
|
dkms add -m %{module} -v %{version} %{!?not_rpm:--rpm_safe_upgrade} ||:
|
||||||
|
|
||||||
|
# After installing the package, dkms install this zfs version for the current kernel.
|
||||||
|
# Force the overwriting of old modules to avoid diff warnings in dkms status.
|
||||||
|
# Or in case of a downgrade to overwrite newer versions.
|
||||||
|
# Or if some other backed up versions have been restored before.
|
||||||
|
echo "Installing %{module} dkms modules version %{version} for the current kernel."
|
||||||
|
dkms install --force -m %{module} -v %{version} ||:
|
||||||
|
|
||||||
%preun
|
%preun
|
||||||
dkms remove -m %{module} -v %{version} --all
|
dkms_root="/var/lib/dkms/%{module}/%{version}"
|
||||||
|
echo "Running pre uninstall script: $0. Parameters: $*"
|
||||||
|
# In case of upgrade we do nothing. See above comment in pre hook.
|
||||||
|
if [ "$1" = "1" -o "$1" = "upgrade" ] ; then
|
||||||
|
echo "This is an upgrade. Skipping pre uninstall action."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
%posttrans
|
# Check if we uninstall the package. In that case remove the dkms modules.
|
||||||
/usr/lib/dkms/common.postinst %{module} %{version}
|
# '0' is the value for the first parameter for rpm packages.
|
||||||
|
# 'remove' or 'purge' are the possible names for deb packages.
|
||||||
|
if [ "$1" = "0" -o "$1" = "remove" -o "$1" = "purge" ] ; then
|
||||||
|
if [ `dkms status -m %{module} -v %{version} | grep -c %{module}` -gt 0 ]; then
|
||||||
|
echo "Removing %{module} dkms modules version %{version} from all kernels."
|
||||||
|
dkms remove -m %{module} -v %{version} --all %{!?not_rpm:--rpm_safe_upgrade} && exit 0
|
||||||
|
fi
|
||||||
|
# If removing the modules failed, it might be because of the broken 'dkms status'.
|
||||||
|
if dkms status -m %{module} -v %{version} 2>&1 | grep "${dkms_root}/source/dkms.conf does not exist"
|
||||||
|
then
|
||||||
|
echo "ERROR: dkms status is broken!" >&2
|
||||||
|
echo "You should manually remove ${dkms_root}" >&2
|
||||||
|
echo "WARNING: installed modules in /lib/modules/`uname -r`/extra could not be removed automatically!" >&2
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "Script parameter $1 did not match any removal condition."
|
||||||
|
fi
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ scripts_scripts = \
|
|||||||
|
|
||||||
if CONFIG_USER
|
if CONFIG_USER
|
||||||
dist_scripts_SCRIPTS = $(scripts_scripts)
|
dist_scripts_SCRIPTS = $(scripts_scripts)
|
||||||
|
dist_zfsexec_SCRIPTS = \
|
||||||
|
%D%/zfs_prepare_disk
|
||||||
else
|
else
|
||||||
dist_noinst_SCRIPTS += $(scripts_scripts)
|
dist_noinst_SCRIPTS += $(scripts_scripts)
|
||||||
endif
|
endif
|
||||||
|
|||||||
Executable
+17
@@ -0,0 +1,17 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
#
|
||||||
|
# This is an optional helper script that is automatically called by libzfs
|
||||||
|
# before a disk is about to be added into the pool. It can be modified by
|
||||||
|
# the user to run whatever commands are necessary to prepare a disk for
|
||||||
|
# inclusion into the pool. For example, users can add lines to this
|
||||||
|
# script to do things like update the drive's firmware or check the drive's
|
||||||
|
# health. The script is optional and can be removed if it is not needed.
|
||||||
|
#
|
||||||
|
# See the zfs_prepare_disk(8) man page for details.
|
||||||
|
#
|
||||||
|
# Example:
|
||||||
|
#
|
||||||
|
# echo "Prepare disk $VDEV_PATH ($VDEV_UPATH) for $VDEV_PREPARE in $POOL_NAME"
|
||||||
|
#
|
||||||
|
|
||||||
|
exit 0
|
||||||
@@ -122,10 +122,10 @@ tags = ['functional', 'fallocate']
|
|||||||
|
|
||||||
[tests/functional/fault:Linux]
|
[tests/functional/fault:Linux]
|
||||||
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
|
tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
|
||||||
'auto_replace_001_pos', 'auto_spare_001_pos', 'auto_spare_002_pos',
|
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
|
||||||
'auto_spare_multiple', 'auto_spare_ashift', 'auto_spare_shared',
|
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
|
||||||
'decrypt_fault', 'decompress_fault', 'scrub_after_resilver',
|
'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
|
||||||
'zpool_status_-s']
|
'scrub_after_resilver', 'zpool_status_-s']
|
||||||
tags = ['functional', 'fault']
|
tags = ['functional', 'fault']
|
||||||
|
|
||||||
[tests/functional/features/large_dnode:Linux]
|
[tests/functional/features/large_dnode:Linux]
|
||||||
|
|||||||
@@ -328,6 +328,7 @@ if os.environ.get('CI') == 'true':
|
|||||||
'fault/auto_online_001_pos': ['SKIP', ci_reason],
|
'fault/auto_online_001_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_online_002_pos': ['SKIP', ci_reason],
|
'fault/auto_online_002_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
|
'fault/auto_replace_001_pos': ['SKIP', ci_reason],
|
||||||
|
'fault/auto_replace_002_pos': ['SKIP', ci_reason],
|
||||||
'fault/auto_spare_ashift': ['SKIP', ci_reason],
|
'fault/auto_spare_ashift': ['SKIP', ci_reason],
|
||||||
'fault/auto_spare_shared': ['SKIP', ci_reason],
|
'fault/auto_spare_shared': ['SKIP', ci_reason],
|
||||||
'procfs/pool_state': ['SKIP', ci_reason],
|
'procfs/pool_state': ['SKIP', ci_reason],
|
||||||
|
|||||||
@@ -130,12 +130,14 @@ export SYSTEM_FILES_LINUX='attr
|
|||||||
chattr
|
chattr
|
||||||
exportfs
|
exportfs
|
||||||
fallocate
|
fallocate
|
||||||
|
flock
|
||||||
free
|
free
|
||||||
getfattr
|
getfattr
|
||||||
groupadd
|
groupadd
|
||||||
groupdel
|
groupdel
|
||||||
groupmod
|
groupmod
|
||||||
hostid
|
hostid
|
||||||
|
logger
|
||||||
losetup
|
losetup
|
||||||
lsattr
|
lsattr
|
||||||
lsblk
|
lsblk
|
||||||
@@ -145,21 +147,20 @@ export SYSTEM_FILES_LINUX='attr
|
|||||||
md5sum
|
md5sum
|
||||||
mkswap
|
mkswap
|
||||||
modprobe
|
modprobe
|
||||||
|
mountpoint
|
||||||
mpstat
|
mpstat
|
||||||
nsenter
|
nsenter
|
||||||
parted
|
parted
|
||||||
perf
|
perf
|
||||||
setfattr
|
setfattr
|
||||||
|
setpriv
|
||||||
sha256sum
|
sha256sum
|
||||||
udevadm
|
udevadm
|
||||||
unshare
|
unshare
|
||||||
useradd
|
useradd
|
||||||
userdel
|
userdel
|
||||||
usermod
|
usermod
|
||||||
setpriv
|
wipefs'
|
||||||
mountpoint
|
|
||||||
flock
|
|
||||||
logger'
|
|
||||||
|
|
||||||
export ZFS_FILES='zdb
|
export ZFS_FILES='zdb
|
||||||
zfs
|
zfs
|
||||||
|
|||||||
@@ -37,6 +37,12 @@
|
|||||||
. ${STF_SUITE}/include/math.shlib
|
. ${STF_SUITE}/include/math.shlib
|
||||||
. ${STF_SUITE}/include/blkdev.shlib
|
. ${STF_SUITE}/include/blkdev.shlib
|
||||||
|
|
||||||
|
# On AlmaLinux 9 we will see $PWD = '.' instead of the full path. This causes
|
||||||
|
# some tests to fail. Fix it up here.
|
||||||
|
if [ "$PWD" = "." ] ; then
|
||||||
|
PWD="$(readlink -f $PWD)"
|
||||||
|
fi
|
||||||
|
|
||||||
#
|
#
|
||||||
# Apply constrained path when available. This is required since the
|
# Apply constrained path when available. This is required since the
|
||||||
# PATH may have been modified by sudo's secure_path behavior.
|
# PATH may have been modified by sudo's secure_path behavior.
|
||||||
@@ -3334,6 +3340,21 @@ function set_tunable_impl
|
|||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function save_tunable
|
||||||
|
{
|
||||||
|
[[ ! -d $TEST_BASE_DIR ]] && return 1
|
||||||
|
[[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
|
||||||
|
echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
function restore_tunable
|
||||||
|
{
|
||||||
|
[[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
|
||||||
|
val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
|
||||||
|
set_tunable64 "$1" "$val"
|
||||||
|
rm $TEST_BASE_DIR/tunable-$1
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# Get a global system tunable
|
# Get a global system tunable
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -89,7 +89,8 @@ VDEV_VALIDATE_SKIP vdev.validate_skip vdev_validate_skip
|
|||||||
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
|
VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
|
||||||
VOL_MODE vol.mode zvol_volmode
|
VOL_MODE vol.mode zvol_volmode
|
||||||
VOL_RECURSIVE vol.recursive UNSUPPORTED
|
VOL_RECURSIVE vol.recursive UNSUPPORTED
|
||||||
VOL_USE_BLK_MQ UNSUPPORTED UNSUPPORTED
|
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
|
||||||
|
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
|
||||||
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
XATTR_COMPAT xattr_compat zfs_xattr_compat
|
||||||
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
|
||||||
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
|
||||||
|
|||||||
@@ -1431,6 +1431,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
|
|||||||
functional/fault/auto_online_001_pos.ksh \
|
functional/fault/auto_online_001_pos.ksh \
|
||||||
functional/fault/auto_online_002_pos.ksh \
|
functional/fault/auto_online_002_pos.ksh \
|
||||||
functional/fault/auto_replace_001_pos.ksh \
|
functional/fault/auto_replace_001_pos.ksh \
|
||||||
|
functional/fault/auto_replace_002_pos.ksh \
|
||||||
functional/fault/auto_spare_001_pos.ksh \
|
functional/fault/auto_spare_001_pos.ksh \
|
||||||
functional/fault/auto_spare_002_pos.ksh \
|
functional/fault/auto_spare_002_pos.ksh \
|
||||||
functional/fault/auto_spare_ashift.ksh \
|
functional/fault/auto_spare_ashift.ksh \
|
||||||
|
|||||||
@@ -31,4 +31,8 @@ verify_runnable "global"
|
|||||||
|
|
||||||
default_cleanup_noexit
|
default_cleanup_noexit
|
||||||
|
|
||||||
|
if tunable_exists BCLONE_ENABLED ; then
|
||||||
|
log_must restore_tunable BCLONE_ENABLED
|
||||||
|
fi
|
||||||
|
|
||||||
log_pass
|
log_pass
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user