mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-12 19:20:28 +03:00
600a02b884
Previous flushing algorithm limited only total number of log blocks to the minimum of 256K and 4x number of metaslabs in the pool. As result, system with 1500 disks with 1000 metaslabs each, touching several new metaslabs each TXG could grow spacemap log to huge size without much benefits. We've observed one of such systems importing pool for about 45 minutes. This patch improves the situation from five sides: - By limiting maximum period for each metaslab to be flushed to 1000 TXGs, that effectively limits maximum number of per-TXG spacemap logs to load to the same number. - By making flushing more smooth via accounting number of metaslabs that were touched after the last flush and actually need another flush, not just ms_unflushed_txg bump. - By applying zfs_unflushed_log_block_pct to the number of metaslabs that were touched after the last flush, not all metaslabs in the pool. - By aggressively prefetching per-TXG spacemap logs up to 16 TXGs in advance, making log spacemap load process for wide HDD pool CPU-bound, accelerating it by many times. - By reducing zfs_unflushed_log_block_max from 256K to 128K, reducing single-threaded by nature log processing time from ~10 to ~5 minutes. As further optimization we could skip bumping ms_unflushed_txg for metaslabs not touched since the last flush, but that would be an incompatible change, requiring new pool feature. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored-By: iXsystems, Inc. Closes #12789
85 lines
2.9 KiB
C
85 lines
2.9 KiB
C
/*
|
|
* CDDL HEADER START
|
|
*
|
|
* The contents of this file are subject to the terms of the
|
|
* Common Development and Distribution License (the "License").
|
|
* You may not use this file except in compliance with the License.
|
|
*
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
* See the License for the specific language governing permissions
|
|
* and limitations under the License.
|
|
*
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
*
|
|
* CDDL HEADER END
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 2018, 2019 by Delphix. All rights reserved.
|
|
*/
|
|
|
|
#ifndef _SYS_SPA_LOG_SPACEMAP_H
|
|
#define _SYS_SPA_LOG_SPACEMAP_H
|
|
|
|
#include <sys/avl.h>
|
|
|
|
typedef struct log_summary_entry {
|
|
uint64_t lse_start; /* start TXG */
|
|
uint64_t lse_end; /* last TXG */
|
|
uint64_t lse_txgcount; /* # of TXGs */
|
|
uint64_t lse_mscount; /* # of metaslabs needed to be flushed */
|
|
uint64_t lse_msdcount; /* # of dirty metaslabs needed to be flushed */
|
|
uint64_t lse_blkcount; /* blocks held by this entry */
|
|
list_node_t lse_node;
|
|
} log_summary_entry_t;
|
|
|
|
typedef struct spa_unflushed_stats {
|
|
/* used for memory heuristic */
|
|
uint64_t sus_memused; /* current memory used for unflushed trees */
|
|
|
|
/* used for block heuristic */
|
|
uint64_t sus_blocklimit; /* max # of log blocks allowed */
|
|
uint64_t sus_nblocks; /* # of blocks in log space maps currently */
|
|
} spa_unflushed_stats_t;
|
|
|
|
typedef struct spa_log_sm {
|
|
uint64_t sls_sm_obj; /* space map object ID */
|
|
uint64_t sls_txg; /* txg logged on the space map */
|
|
uint64_t sls_nblocks; /* number of blocks in this log */
|
|
uint64_t sls_mscount; /* # of metaslabs flushed in the log's txg */
|
|
avl_node_t sls_node; /* node in spa_sm_logs_by_txg */
|
|
space_map_t *sls_sm; /* space map pointer, if open */
|
|
} spa_log_sm_t;
|
|
|
|
int spa_ld_log_spacemaps(spa_t *);
|
|
|
|
void spa_generate_syncing_log_sm(spa_t *, dmu_tx_t *);
|
|
void spa_flush_metaslabs(spa_t *, dmu_tx_t *);
|
|
void spa_sync_close_syncing_log_sm(spa_t *);
|
|
|
|
void spa_cleanup_old_sm_logs(spa_t *, dmu_tx_t *);
|
|
|
|
uint64_t spa_log_sm_blocklimit(spa_t *);
|
|
void spa_log_sm_set_blocklimit(spa_t *);
|
|
uint64_t spa_log_sm_nblocks(spa_t *);
|
|
uint64_t spa_log_sm_memused(spa_t *);
|
|
|
|
void spa_log_sm_decrement_mscount(spa_t *, uint64_t);
|
|
void spa_log_sm_increment_current_mscount(spa_t *);
|
|
|
|
void spa_log_summary_add_flushed_metaslab(spa_t *, boolean_t);
|
|
void spa_log_summary_dirty_flushed_metaslab(spa_t *, uint64_t);
|
|
void spa_log_summary_decrement_mscount(spa_t *, uint64_t, boolean_t);
|
|
void spa_log_summary_decrement_blkcount(spa_t *, uint64_t);
|
|
|
|
boolean_t spa_flush_all_logs_requested(spa_t *);
|
|
|
|
extern int zfs_keep_log_spacemaps_at_export;
|
|
|
|
#endif /* _SYS_SPA_LOG_SPACEMAP_H */
|