mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 19:50:25 +03:00
spa_load_verify() may consume too much memory
When a pool is imported it will scan the pool to verify the integrity of the data and metadata. The amount it scans will depend on the import flags provided. On systems with small amounts of memory or when importing a pool from the crash kernel, it's possible for spa_load_verify to issue too many I/Os that it consumes all the memory of the system resulting in an OOM message or a hang. To prevent this, we limit the amount of memory that the initial pool scan can consume. This change will, by default, use 1/16th of the ARC for scan I/Os to prevent running the system out of memory during import. Reviewed-by: Matt Ahrens <matt@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Serapheim Dimitropoulos <serapheim@delphix.com> Signed-off-by: George Wilson george.wilson@delphix.com External-issue: DLPX-65237 External-issue: DLPX-65238 Closes #9146
This commit is contained in:
parent
a43570c5f3
commit
c8242a96ba
@ -111,7 +111,7 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
|
|||||||
|
|
||||||
uint64_t *zopt_object = NULL;
|
uint64_t *zopt_object = NULL;
|
||||||
static unsigned zopt_objects = 0;
|
static unsigned zopt_objects = 0;
|
||||||
uint64_t max_inflight = 1000;
|
uint64_t max_inflight_bytes = 256 * 1024 * 1024; /* 256MB */
|
||||||
static int leaked_objects = 0;
|
static int leaked_objects = 0;
|
||||||
static range_tree_t *mos_refd_objs;
|
static range_tree_t *mos_refd_objs;
|
||||||
|
|
||||||
@ -3806,7 +3806,7 @@ zdb_blkptr_done(zio_t *zio)
|
|||||||
abd_free(zio->io_abd);
|
abd_free(zio->io_abd);
|
||||||
|
|
||||||
mutex_enter(&spa->spa_scrub_lock);
|
mutex_enter(&spa->spa_scrub_lock);
|
||||||
spa->spa_load_verify_ios--;
|
spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp);
|
||||||
cv_broadcast(&spa->spa_scrub_io_cv);
|
cv_broadcast(&spa->spa_scrub_io_cv);
|
||||||
|
|
||||||
if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
|
if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
|
||||||
@ -3877,9 +3877,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||||||
flags |= ZIO_FLAG_SPECULATIVE;
|
flags |= ZIO_FLAG_SPECULATIVE;
|
||||||
|
|
||||||
mutex_enter(&spa->spa_scrub_lock);
|
mutex_enter(&spa->spa_scrub_lock);
|
||||||
while (spa->spa_load_verify_ios > max_inflight)
|
while (spa->spa_load_verify_bytes > max_inflight_bytes)
|
||||||
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
||||||
spa->spa_load_verify_ios++;
|
spa->spa_load_verify_bytes += size;
|
||||||
mutex_exit(&spa->spa_scrub_lock);
|
mutex_exit(&spa->spa_scrub_lock);
|
||||||
|
|
||||||
zio_nowait(zio_read(NULL, spa, bp, abd, size,
|
zio_nowait(zio_read(NULL, spa, bp, abd, size,
|
||||||
@ -4895,6 +4895,7 @@ dump_block_stats(spa_t *spa)
|
|||||||
ZIO_FLAG_GODFATHER);
|
ZIO_FLAG_GODFATHER);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
ASSERT0(spa->spa_load_verify_bytes);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Done after zio_wait() since zcb_haderrors is modified in
|
* Done after zio_wait() since zcb_haderrors is modified in
|
||||||
@ -6681,10 +6682,10 @@ main(int argc, char **argv)
|
|||||||
break;
|
break;
|
||||||
/* NB: Sort single match options below. */
|
/* NB: Sort single match options below. */
|
||||||
case 'I':
|
case 'I':
|
||||||
max_inflight = strtoull(optarg, NULL, 0);
|
max_inflight_bytes = strtoull(optarg, NULL, 0);
|
||||||
if (max_inflight == 0) {
|
if (max_inflight_bytes == 0) {
|
||||||
(void) fprintf(stderr, "maximum number "
|
(void) fprintf(stderr, "maximum number "
|
||||||
"of inflight I/Os must be greater "
|
"of inflight bytes must be greater "
|
||||||
"than 0\n");
|
"than 0\n");
|
||||||
usage();
|
usage();
|
||||||
}
|
}
|
||||||
|
@ -272,7 +272,9 @@ struct spa {
|
|||||||
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
|
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
|
||||||
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
|
kmutex_t spa_scrub_lock; /* resilver/scrub lock */
|
||||||
uint64_t spa_scrub_inflight; /* in-flight scrub bytes */
|
uint64_t spa_scrub_inflight; /* in-flight scrub bytes */
|
||||||
uint64_t spa_load_verify_ios; /* in-flight verification IOs */
|
|
||||||
|
/* in-flight verification bytes */
|
||||||
|
uint64_t spa_load_verify_bytes;
|
||||||
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
|
kcondvar_t spa_scrub_io_cv; /* scrub I/O completion */
|
||||||
uint8_t spa_scrub_active; /* active or suspended? */
|
uint8_t spa_scrub_active; /* active or suspended? */
|
||||||
uint8_t spa_scrub_type; /* type of scrub we're doing */
|
uint8_t spa_scrub_type; /* type of scrub we're doing */
|
||||||
|
@ -543,13 +543,13 @@ Default value: \fB1\fR.
|
|||||||
.sp
|
.sp
|
||||||
.ne 2
|
.ne 2
|
||||||
.na
|
.na
|
||||||
\fBspa_load_verify_maxinflight\fR (int)
|
\fBspa_load_verify_shift\fR (int)
|
||||||
.ad
|
.ad
|
||||||
.RS 12n
|
.RS 12n
|
||||||
Maximum concurrent I/Os during the traversal performed during an "extreme
|
Sets the maximum number of bytes to consume during pool import to the log2
|
||||||
rewind" (\fB-X\fR) pool import.
|
fraction of the target arc size.
|
||||||
.sp
|
.sp
|
||||||
Default value: \fB10000\fR.
|
Default value: \fB4\fR.
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
|
@ -90,6 +90,7 @@
|
|||||||
#include <sys/fm/util.h>
|
#include <sys/fm/util.h>
|
||||||
#include <sys/callb.h>
|
#include <sys/callb.h>
|
||||||
#include <sys/zone.h>
|
#include <sys/zone.h>
|
||||||
|
#include <sys/vmsystm.h>
|
||||||
#endif /* _KERNEL */
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
#include "zfs_prop.h"
|
#include "zfs_prop.h"
|
||||||
@ -2197,16 +2198,16 @@ spa_load_verify_done(zio_t *zio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
mutex_enter(&spa->spa_scrub_lock);
|
mutex_enter(&spa->spa_scrub_lock);
|
||||||
spa->spa_load_verify_ios--;
|
spa->spa_load_verify_bytes -= BP_GET_PSIZE(bp);
|
||||||
cv_broadcast(&spa->spa_scrub_io_cv);
|
cv_broadcast(&spa->spa_scrub_io_cv);
|
||||||
mutex_exit(&spa->spa_scrub_lock);
|
mutex_exit(&spa->spa_scrub_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maximum number of concurrent scrub i/os to create while verifying
|
* Maximum number of inflight bytes is the log2 faction of the arc size.
|
||||||
* a pool while importing it.
|
* By default, we set it to 1/16th of the arc.
|
||||||
*/
|
*/
|
||||||
int spa_load_verify_maxinflight = 10000;
|
int spa_load_verify_shift = 4;
|
||||||
int spa_load_verify_metadata = B_TRUE;
|
int spa_load_verify_metadata = B_TRUE;
|
||||||
int spa_load_verify_data = B_TRUE;
|
int spa_load_verify_data = B_TRUE;
|
||||||
|
|
||||||
@ -2228,13 +2229,14 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
|||||||
if (!BP_IS_METADATA(bp) && !spa_load_verify_data)
|
if (!BP_IS_METADATA(bp) && !spa_load_verify_data)
|
||||||
return (0);
|
return (0);
|
||||||
|
|
||||||
|
int maxinflight_bytes = arc_target_bytes() >> spa_load_verify_shift;
|
||||||
zio_t *rio = arg;
|
zio_t *rio = arg;
|
||||||
size_t size = BP_GET_PSIZE(bp);
|
size_t size = BP_GET_PSIZE(bp);
|
||||||
|
|
||||||
mutex_enter(&spa->spa_scrub_lock);
|
mutex_enter(&spa->spa_scrub_lock);
|
||||||
while (spa->spa_load_verify_ios >= spa_load_verify_maxinflight)
|
while (spa->spa_load_verify_bytes >= maxinflight_bytes)
|
||||||
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
||||||
spa->spa_load_verify_ios++;
|
spa->spa_load_verify_bytes += size;
|
||||||
mutex_exit(&spa->spa_scrub_lock);
|
mutex_exit(&spa->spa_scrub_lock);
|
||||||
|
|
||||||
zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size,
|
zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size,
|
||||||
@ -2287,12 +2289,14 @@ spa_load_verify(spa_t *spa)
|
|||||||
"spa_load_verify_metadata=%u)",
|
"spa_load_verify_metadata=%u)",
|
||||||
spa_load_verify_data, spa_load_verify_metadata);
|
spa_load_verify_data, spa_load_verify_metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
error = traverse_pool(spa, spa->spa_verify_min_txg,
|
error = traverse_pool(spa, spa->spa_verify_min_txg,
|
||||||
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
|
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA |
|
||||||
TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio);
|
TRAVERSE_NO_DECRYPT, spa_load_verify_cb, rio);
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) zio_wait(rio);
|
(void) zio_wait(rio);
|
||||||
|
ASSERT0(spa->spa_load_verify_bytes);
|
||||||
|
|
||||||
spa->spa_load_meta_errors = sle.sle_meta_count;
|
spa->spa_load_meta_errors = sle.sle_meta_count;
|
||||||
spa->spa_load_data_errors = sle.sle_data_count;
|
spa->spa_load_data_errors = sle.sle_data_count;
|
||||||
@ -9309,9 +9313,11 @@ EXPORT_SYMBOL(spa_event_notify);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_KERNEL)
|
#if defined(_KERNEL)
|
||||||
module_param(spa_load_verify_maxinflight, int, 0644);
|
/* BEGIN CSTYLED */
|
||||||
MODULE_PARM_DESC(spa_load_verify_maxinflight,
|
module_param(spa_load_verify_shift, int, 0644);
|
||||||
"Max concurrent traversal I/Os while verifying pool during import -X");
|
MODULE_PARM_DESC(spa_load_verify_shift, "log2(fraction of arc that can "
|
||||||
|
"be used by inflight I/Os when verifying pool during import");
|
||||||
|
/* END CSTYLED */
|
||||||
|
|
||||||
module_param(spa_load_verify_metadata, int, 0644);
|
module_param(spa_load_verify_metadata, int, 0644);
|
||||||
MODULE_PARM_DESC(spa_load_verify_metadata,
|
MODULE_PARM_DESC(spa_load_verify_metadata,
|
||||||
|
Loading…
Reference in New Issue
Block a user