Prevent zevent list from consuming all of kernel memory

There are a couple changes included here. The first is to introduce 
a cap on the size the ZED will grow the zevent list to. One million 
entries is more than enough for most use cases, and if you are 
overflowing that value, the problem needs to be addressed another 
way. The value is also tunable, for those who want the limit to be 
higher or lower. 
 
The other change is to add a kernel module parameter that allows 
snapshot creation/deletion to be exempted from the history logging; 
for most workloads, having these things logged is valuable, but for 
some workloads it produces large quantities of log spam and isn't 
especially helpful.

Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Issue #13374 
Closes #13753
This commit is contained in:
Paul Dagnelie 2022-08-22 12:36:22 -07:00 committed by GitHub
parent d22dd77c4d
commit 17e212652d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 60 additions and 9 deletions

View File

@ -48,6 +48,7 @@ zed_conf_init(struct zed_conf *zcp)
zcp->zevent_fd = -1; /* opened in zed_event_init() */
zcp->max_jobs = 16;
zcp->max_zevent_buf_len = 1 << 20;
if (!(zcp->pid_file = strdup(ZED_PID_FILE)) ||
!(zcp->zedlet_dir = strdup(ZED_ZEDLET_DIR)) ||
@ -141,6 +142,8 @@ _zed_conf_display_help(const char *prog, boolean_t got_err)
.v = ZED_STATE_FILE },
{ .o = "-j JOBS", .d = "Start at most JOBS at once.",
.v = "16" },
{ .o = "-b LEN", .d = "Cap kernel event buffer at LEN entries.",
.v = "1048576" },
{},
};
@ -230,7 +233,7 @@ _zed_conf_parse_path(char **resultp, const char *path)
void
zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
{
const char * const opts = ":hLVd:p:P:s:vfFMZIj:";
const char * const opts = ":hLVd:p:P:s:vfFMZIj:b:";
int opt;
unsigned long raw;
@ -291,6 +294,17 @@ zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv)
zcp->max_jobs = raw;
}
break;
case 'b':
errno = 0;
raw = strtoul(optarg, NULL, 0);
if (errno == ERANGE || raw > INT32_MAX) {
zed_log_die("%lu is too large", raw);
} if (raw == 0) {
zcp->max_zevent_buf_len = INT32_MAX;
} else {
zcp->max_zevent_buf_len = raw;
}
break;
case '?':
default:
if (optopt == '?')

View File

@ -33,6 +33,7 @@ struct zed_conf {
int zevent_fd; /* fd for access to zevents */
int16_t max_jobs; /* max zedlets to run at one time */
int32_t max_zevent_buf_len; /* max size of kernel event list */
boolean_t do_force:1; /* true if force enabled */
boolean_t do_foreground:1; /* true if run in foreground */

View File

@ -38,6 +38,8 @@
#define MAXBUF 4096
static int max_zevent_buf_len = 1 << 20;
/*
* Open the libzfs interface.
*/
@ -70,6 +72,9 @@ zed_event_init(struct zed_conf *zcp)
zed_log_die("Failed to initialize disk events");
}
if (zcp->max_zevent_buf_len != 0)
max_zevent_buf_len = zcp->max_zevent_buf_len;
return (0);
}
@ -105,7 +110,7 @@ _bump_event_queue_length(void)
{
int zzlm = -1, wr;
char qlen_buf[12] = {0}; /* parameter is int => max "-2147483647\n" */
long int qlen;
long int qlen, orig_qlen;
zzlm = open("/sys/module/zfs/parameters/zfs_zevent_len_max", O_RDWR);
if (zzlm < 0)
@ -116,7 +121,7 @@ _bump_event_queue_length(void)
qlen_buf[sizeof (qlen_buf) - 1] = '\0';
errno = 0;
qlen = strtol(qlen_buf, NULL, 10);
orig_qlen = qlen = strtol(qlen_buf, NULL, 10);
if (errno == ERANGE)
goto done;
@ -125,8 +130,14 @@ _bump_event_queue_length(void)
else
qlen *= 2;
if (qlen > INT_MAX)
qlen = INT_MAX;
/*
* Don't consume all of kernel memory with event logs if something
* goes wrong.
*/
if (qlen > max_zevent_buf_len)
qlen = max_zevent_buf_len;
if (qlen == orig_qlen)
goto done;
wr = snprintf(qlen_buf, sizeof (qlen_buf), "%ld", qlen);
if (pwrite(zzlm, qlen_buf, wr, 0) < 0)

View File

@ -27,6 +27,7 @@
.Op Fl P Ar path
.Op Fl s Ar statefile
.Op Fl j Ar jobs
.Op Fl b Ar buflen
.
.Sh DESCRIPTION
The
@ -96,6 +97,17 @@ ZEDLETs to run concurrently,
delaying execution of new ones until they finish.
Defaults to
.Sy 16 .
.It Fl b Ar buflen
Cap kernel event buffer growth to
.Ar buflen
entries.
This buffer is grown when the daemon misses an event, but results in
unreclaimable memory use in the kernel.
A value of
.Sy 0
removes the cap.
Defaults to
.Sy 1048576 .
.El
.Sh ZEVENTS
A zevent is comprised of a list of nvpairs (name/value pairs).

View File

@ -88,6 +88,8 @@ int zfs_max_recordsize = 16 * 1024 * 1024;
#endif
static int zfs_allow_redacted_dataset_mount = 0;
int zfs_snapshot_history_enabled = 1;
#define SWITCH64(x, y) \
{ \
uint64_t __tmp = (x); \
@ -1867,6 +1869,7 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dsl_dir_snap_cmtime_update(ds->ds_dir, tx);
if (zfs_snapshot_history_enabled)
spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, " ");
}
@ -4985,6 +4988,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, max_recordsize, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, allow_redacted_dataset_mount, INT, ZMOD_RW,
"Allow mounting of redacted datasets");
ZFS_MODULE_PARAM(zfs, zfs_, snapshot_history_enabled, INT, ZMOD_RW,
"Include snapshot events in pool history/events");
EXPORT_SYMBOL(dsl_dataset_hold);
EXPORT_SYMBOL(dsl_dataset_hold_flags);
EXPORT_SYMBOL(dsl_dataset_hold_obj);

View File

@ -49,6 +49,8 @@
#include <sys/zthr.h>
#include <sys/spa_impl.h>
extern int zfs_snapshot_history_enabled;
int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{
@ -321,14 +323,19 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
spa_history_log_internal_ds(ds, "defer_destroy", tx, " ");
if (zfs_snapshot_history_enabled) {
spa_history_log_internal_ds(ds, "defer_destroy", tx,
" ");
}
return;
}
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
if (zfs_snapshot_history_enabled) {
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, " ");
}
dsl_scan_ds_destroyed(ds, tx);