mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-26 18:04:22 +03:00
Prevent zevent list from consuming all of kernel memory
There are a couple changes included here. The first is to introduce a cap on the size the ZED will grow the zevent list to. One million entries is more than enough for most use cases, and if you are overflowing that value, the problem needs to be addressed another way. The value is also tunable, for those who want the limit to be higher or lower. The other change is to add a kernel module parameter that allows snapshot creation/deletion to be exempted from the history logging; for most workloads, having these things logged is valuable, but for some workloads it produces large quantities of log spam and isn't especially helpful. Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Paul Dagnelie <pcd@delphix.com> Issue #13374 Closes #13753
This commit is contained in:
parent
d22dd77c4d
commit
17e212652d
@ -48,6 +48,7 @@ zed_conf_init(struct zed_conf *zcp)
|
|||||||
zcp->zevent_fd = -1; /* opened in zed_event_init() */
|
zcp->zevent_fd = -1; /* opened in zed_event_init() */
|
||||||
|
|
||||||
zcp->max_jobs = 16;
|
zcp->max_jobs = 16;
|
||||||
|
zcp->max_zevent_buf_len = 1 << 20;
|
||||||
|
|
||||||
if (!(zcp->pid_file = strdup(ZED_PID_FILE)) ||
|
if (!(zcp->pid_file = strdup(ZED_PID_FILE)) ||
|
||||||
!(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)) ||
|
!(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)) ||
|
||||||
@ -141,6 +142,8 @@ _zed_conf_display_help(const char *prog, boolean_t got_err)
|
|||||||
.v = ZED_STATE_FILE },
|
.v = ZED_STATE_FILE },
|
||||||
{ .o = "-j JOBS", .d = "Start at most JOBS at once.",
|
{ .o = "-j JOBS", .d = "Start at most JOBS at once.",
|
||||||
.v = "16" },
|
.v = "16" },
|
||||||
|
{ .o = "-b LEN", .d = "Cap kernel event buffer at LEN entries.",
|
||||||
|
.v = "1048576" },
|
||||||
{},
|
{},
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -230,7 +233,7 @@ _zed_conf_parse_path(char **resultp, const char *path)
|
|||||||
void
|
void
|
||||||
zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
|
zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
|
||||||
{
|
{
|
||||||
const char * const opts = ":hLVd:p:P:s:vfFMZIj:";
|
const char * const opts = ":hLVd:p:P:s:vfFMZIj:b:";
|
||||||
int opt;
|
int opt;
|
||||||
unsigned long raw;
|
unsigned long raw;
|
||||||
|
|
||||||
@ -291,6 +294,17 @@ zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
|
|||||||
zcp->max_jobs = raw;
|
zcp->max_jobs = raw;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'b':
|
||||||
|
errno = 0;
|
||||||
|
raw = strtoul(optarg, NULL, 0);
|
||||||
|
if (errno == ERANGE || raw > INT32_MAX) {
|
||||||
|
zed_log_die("%lu is too large", raw);
|
||||||
|
} if (raw == 0) {
|
||||||
|
zcp->max_zevent_buf_len = INT32_MAX;
|
||||||
|
} else {
|
||||||
|
zcp->max_zevent_buf_len = raw;
|
||||||
|
}
|
||||||
|
break;
|
||||||
case '?':
|
case '?':
|
||||||
default:
|
default:
|
||||||
if (optopt == '?')
|
if (optopt == '?')
|
||||||
|
@ -33,6 +33,7 @@ struct zed_conf {
|
|||||||
int zevent_fd; /* fd for access to zevents */
|
int zevent_fd; /* fd for access to zevents */
|
||||||
|
|
||||||
int16_t max_jobs; /* max zedlets to run at one time */
|
int16_t max_jobs; /* max zedlets to run at one time */
|
||||||
|
int32_t max_zevent_buf_len; /* max size of kernel event list */
|
||||||
|
|
||||||
boolean_t do_force:1; /* true if force enabled */
|
boolean_t do_force:1; /* true if force enabled */
|
||||||
boolean_t do_foreground:1; /* true if run in foreground */
|
boolean_t do_foreground:1; /* true if run in foreground */
|
||||||
|
@ -38,6 +38,8 @@
|
|||||||
|
|
||||||
#define MAXBUF 4096
|
#define MAXBUF 4096
|
||||||
|
|
||||||
|
static int max_zevent_buf_len = 1 << 20;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Open the libzfs interface.
|
* Open the libzfs interface.
|
||||||
*/
|
*/
|
||||||
@ -70,6 +72,9 @@ zed_event_init(struct zed_conf *zcp)
|
|||||||
zed_log_die("Failed to initialize disk events");
|
zed_log_die("Failed to initialize disk events");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (zcp->max_zevent_buf_len != 0)
|
||||||
|
max_zevent_buf_len = zcp->max_zevent_buf_len;
|
||||||
|
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -105,7 +110,7 @@ _bump_event_queue_length(void)
|
|||||||
{
|
{
|
||||||
int zzlm = -1, wr;
|
int zzlm = -1, wr;
|
||||||
char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */
|
char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */
|
||||||
long int qlen;
|
long int qlen, orig_qlen;
|
||||||
|
|
||||||
zzlm = open("/sys/module/zfs/parameters/zfs_zevent_len_max", O_RDWR);
|
zzlm = open("/sys/module/zfs/parameters/zfs_zevent_len_max", O_RDWR);
|
||||||
if (zzlm < 0)
|
if (zzlm < 0)
|
||||||
@ -116,7 +121,7 @@ _bump_event_queue_length(void)
|
|||||||
qlen_buf[sizeof (qlen_buf) - 1] = '\0';
|
qlen_buf[sizeof (qlen_buf) - 1] = '\0';
|
||||||
|
|
||||||
errno = 0;
|
errno = 0;
|
||||||
qlen = strtol(qlen_buf, NULL, 10);
|
orig_qlen = qlen = strtol(qlen_buf, NULL, 10);
|
||||||
if (errno == ERANGE)
|
if (errno == ERANGE)
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
@ -125,8 +130,14 @@ _bump_event_queue_length(void)
|
|||||||
else
|
else
|
||||||
qlen *= 2;
|
qlen *= 2;
|
||||||
|
|
||||||
if (qlen > INT_MAX)
|
/*
|
||||||
qlen = INT_MAX;
|
* Don't consume all of kernel memory with event logs if something
|
||||||
|
* goes wrong.
|
||||||
|
*/
|
||||||
|
if (qlen > max_zevent_buf_len)
|
||||||
|
qlen = max_zevent_buf_len;
|
||||||
|
if (qlen == orig_qlen)
|
||||||
|
goto done;
|
||||||
wr = snprintf(qlen_buf, sizeof (qlen_buf), "%ld", qlen);
|
wr = snprintf(qlen_buf, sizeof (qlen_buf), "%ld", qlen);
|
||||||
|
|
||||||
if (pwrite(zzlm, qlen_buf, wr, 0) < 0)
|
if (pwrite(zzlm, qlen_buf, wr, 0) < 0)
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
.Op Fl P Ar path
|
.Op Fl P Ar path
|
||||||
.Op Fl s Ar statefile
|
.Op Fl s Ar statefile
|
||||||
.Op Fl j Ar jobs
|
.Op Fl j Ar jobs
|
||||||
|
.Op Fl b Ar buflen
|
||||||
.
|
.
|
||||||
.Sh DESCRIPTION
|
.Sh DESCRIPTION
|
||||||
The
|
The
|
||||||
@ -96,6 +97,17 @@ ZEDLETs to run concurrently,
|
|||||||
delaying execution of new ones until they finish.
|
delaying execution of new ones until they finish.
|
||||||
Defaults to
|
Defaults to
|
||||||
.Sy 16 .
|
.Sy 16 .
|
||||||
|
.It Fl b Ar buflen
|
||||||
|
Cap kernel event buffer growth to
|
||||||
|
.Ar buflen
|
||||||
|
entries.
|
||||||
|
This buffer is grown when the daemon misses an event, but results in
|
||||||
|
unreclaimable memory use in the kernel.
|
||||||
|
A value of
|
||||||
|
.Sy 0
|
||||||
|
removes the cap.
|
||||||
|
Defaults to
|
||||||
|
.Sy 1048576 .
|
||||||
.El
|
.El
|
||||||
.Sh ZEVENTS
|
.Sh ZEVENTS
|
||||||
A zevent is comprised of a list of nvpairs (name/value pairs).
|
A zevent is comprised of a list of nvpairs (name/value pairs).
|
||||||
|
@ -88,6 +88,8 @@ int zfs_max_recordsize = 16 * 1024 * 1024;
|
|||||||
#endif
|
#endif
|
||||||
static int zfs_allow_redacted_dataset_mount = 0;
|
static int zfs_allow_redacted_dataset_mount = 0;
|
||||||
|
|
||||||
|
int zfs_snapshot_history_enabled = 1;
|
||||||
|
|
||||||
#define SWITCH64(x, y) \
|
#define SWITCH64(x, y) \
|
||||||
{ \
|
{ \
|
||||||
uint64_t __tmp = (x); \
|
uint64_t __tmp = (x); \
|
||||||
@ -1867,7 +1869,8 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
|
|||||||
|
|
||||||
dsl_dir_snap_cmtime_update(ds->ds_dir, tx);
|
dsl_dir_snap_cmtime_update(ds->ds_dir, tx);
|
||||||
|
|
||||||
spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " ");
|
if (zfs_snapshot_history_enabled)
|
||||||
|
spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -4985,6 +4988,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, ZMOD_RW,
|
|||||||
ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,
|
ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,
|
||||||
"Allow mounting of redacted datasets");
|
"Allow mounting of redacted datasets");
|
||||||
|
|
||||||
|
ZFS_MODULE_PARAM(zfs, zfs_, snapshot_history_enabled, INT, ZMOD_RW,
|
||||||
|
"Include snapshot events in pool history/events");
|
||||||
|
|
||||||
EXPORT_SYMBOL(dsl_dataset_hold);
|
EXPORT_SYMBOL(dsl_dataset_hold);
|
||||||
EXPORT_SYMBOL(dsl_dataset_hold_flags);
|
EXPORT_SYMBOL(dsl_dataset_hold_flags);
|
||||||
EXPORT_SYMBOL(dsl_dataset_hold_obj);
|
EXPORT_SYMBOL(dsl_dataset_hold_obj);
|
||||||
|
@ -49,6 +49,8 @@
|
|||||||
#include <sys/zthr.h>
|
#include <sys/zthr.h>
|
||||||
#include <sys/spa_impl.h>
|
#include <sys/spa_impl.h>
|
||||||
|
|
||||||
|
extern int zfs_snapshot_history_enabled;
|
||||||
|
|
||||||
int
|
int
|
||||||
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
|
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
|
||||||
{
|
{
|
||||||
@ -321,14 +323,19 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
|
|||||||
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
|
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
|
||||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||||
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
|
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
|
||||||
spa_history_log_internal_ds(ds, "defer_destroy", tx, " ");
|
if (zfs_snapshot_history_enabled) {
|
||||||
|
spa_history_log_internal_ds(ds, "defer_destroy", tx,
|
||||||
|
" ");
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
|
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
|
||||||
|
|
||||||
/* We need to log before removing it from the namespace. */
|
if (zfs_snapshot_history_enabled) {
|
||||||
spa_history_log_internal_ds(ds, "destroy", tx, " ");
|
/* We need to log before removing it from the namespace. */
|
||||||
|
spa_history_log_internal_ds(ds, "destroy", tx, " ");
|
||||||
|
}
|
||||||
|
|
||||||
dsl_scan_ds_destroyed(ds, tx);
|
dsl_scan_ds_destroyed(ds, tx);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user