mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Improve resilver ETAs
When resilvering the estimated time remaining is calculated using the average issue rate over the current pass. Where the current pass starts when a scan was started, or restarted, if the pool was exported/imported. For dRAID pools in particular this can result in wildly optimistic estimates since the issue rate will be very high while scanning when non-degraded regions of the pool are scanned. Once repair I/O starts being issued performance drops to a realistic number but the estimated performance is still significantly skewed. To address this we redefine a pass such that it starts after a scanning phase completes so the issue rate is more reflective of recent performance. Additionally, the zfs_scan_report_txgs module option can be set to reset the pass statistics more often. Reviewed-by: Akash B <akash-b@hpe.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #14410
This commit is contained in:
		
							parent
							
								
									a68dfdb88c
								
							
						
					
					
						commit
						9fe3da9364
					
				| @ -7549,19 +7549,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) | |||||||
| 
 | 
 | ||||||
| 	zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf)); | 	zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf)); | ||||||
| 
 | 
 | ||||||
| 	assert(ps->pss_func == POOL_SCAN_SCRUB || | 	int is_resilver = ps->pss_func == POOL_SCAN_RESILVER; | ||||||
| 	    ps->pss_func == POOL_SCAN_RESILVER); | 	int is_scrub = ps->pss_func == POOL_SCAN_SCRUB; | ||||||
|  | 	assert(is_resilver || is_scrub); | ||||||
| 
 | 
 | ||||||
| 	/* Scan is finished or canceled. */ | 	/* Scan is finished or canceled. */ | ||||||
| 	if (ps->pss_state == DSS_FINISHED) { | 	if (ps->pss_state == DSS_FINISHED) { | ||||||
| 		secs_to_dhms(end - start, time_buf); | 		secs_to_dhms(end - start, time_buf); | ||||||
| 
 | 
 | ||||||
| 		if (ps->pss_func == POOL_SCAN_SCRUB) { | 		if (is_scrub) { | ||||||
| 			(void) printf(gettext("scrub repaired %s " | 			(void) printf(gettext("scrub repaired %s " | ||||||
| 			    "in %s with %llu errors on %s"), processed_buf, | 			    "in %s with %llu errors on %s"), processed_buf, | ||||||
| 			    time_buf, (u_longlong_t)ps->pss_errors, | 			    time_buf, (u_longlong_t)ps->pss_errors, | ||||||
| 			    ctime(&end)); | 			    ctime(&end)); | ||||||
| 		} else if (ps->pss_func == POOL_SCAN_RESILVER) { | 		} else if (is_resilver) { | ||||||
| 			(void) printf(gettext("resilvered %s " | 			(void) printf(gettext("resilvered %s " | ||||||
| 			    "in %s with %llu errors on %s"), processed_buf, | 			    "in %s with %llu errors on %s"), processed_buf, | ||||||
| 			    time_buf, (u_longlong_t)ps->pss_errors, | 			    time_buf, (u_longlong_t)ps->pss_errors, | ||||||
| @ -7569,10 +7570,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) | |||||||
| 		} | 		} | ||||||
| 		return; | 		return; | ||||||
| 	} else if (ps->pss_state == DSS_CANCELED) { | 	} else if (ps->pss_state == DSS_CANCELED) { | ||||||
| 		if (ps->pss_func == POOL_SCAN_SCRUB) { | 		if (is_scrub) { | ||||||
| 			(void) printf(gettext("scrub canceled on %s"), | 			(void) printf(gettext("scrub canceled on %s"), | ||||||
| 			    ctime(&end)); | 			    ctime(&end)); | ||||||
| 		} else if (ps->pss_func == POOL_SCAN_RESILVER) { | 		} else if (is_resilver) { | ||||||
| 			(void) printf(gettext("resilver canceled on %s"), | 			(void) printf(gettext("resilver canceled on %s"), | ||||||
| 			    ctime(&end)); | 			    ctime(&end)); | ||||||
| 		} | 		} | ||||||
| @ -7582,7 +7583,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) | |||||||
| 	assert(ps->pss_state == DSS_SCANNING); | 	assert(ps->pss_state == DSS_SCANNING); | ||||||
| 
 | 
 | ||||||
| 	/* Scan is in progress. Resilvers can't be paused. */ | 	/* Scan is in progress. Resilvers can't be paused. */ | ||||||
| 	if (ps->pss_func == POOL_SCAN_SCRUB) { | 	if (is_scrub) { | ||||||
| 		if (pause == 0) { | 		if (pause == 0) { | ||||||
| 			(void) printf(gettext("scrub in progress since %s"), | 			(void) printf(gettext("scrub in progress since %s"), | ||||||
| 			    ctime(&start)); | 			    ctime(&start)); | ||||||
| @ -7592,7 +7593,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) | |||||||
| 			(void) printf(gettext("\tscrub started on %s"), | 			(void) printf(gettext("\tscrub started on %s"), | ||||||
| 			    ctime(&start)); | 			    ctime(&start)); | ||||||
| 		} | 		} | ||||||
| 	} else if (ps->pss_func == POOL_SCAN_RESILVER) { | 	} else if (is_resilver) { | ||||||
| 		(void) printf(gettext("resilver in progress since %s"), | 		(void) printf(gettext("resilver in progress since %s"), | ||||||
| 		    ctime(&start)); | 		    ctime(&start)); | ||||||
| 	} | 	} | ||||||
| @ -7634,17 +7635,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) | |||||||
| 		    scanned_buf, issued_buf, total_buf); | 		    scanned_buf, issued_buf, total_buf); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (ps->pss_func == POOL_SCAN_RESILVER) { | 	if (is_resilver) { | ||||||
| 		(void) printf(gettext("\t%s resilvered, %.2f%% done"), | 		(void) printf(gettext("\t%s resilvered, %.2f%% done"), | ||||||
| 		    processed_buf, 100 * fraction_done); | 		    processed_buf, 100 * fraction_done); | ||||||
| 	} else if (ps->pss_func == POOL_SCAN_SCRUB) { | 	} else if (is_scrub) { | ||||||
| 		(void) printf(gettext("\t%s repaired, %.2f%% done"), | 		(void) printf(gettext("\t%s repaired, %.2f%% done"), | ||||||
| 		    processed_buf, 100 * fraction_done); | 		    processed_buf, 100 * fraction_done); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (pause == 0) { | 	if (pause == 0) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * Only provide an estimate iff: | ||||||
|  | 		 * 1) the time remaining is valid, and | ||||||
|  | 		 * 2) the issue rate exceeds 10 MB/s, and | ||||||
|  | 		 * 3) it's either: | ||||||
|  | 		 *    a) a resilver which has started repairs, or | ||||||
|  | 		 *    b) a scrub which has entered the issue phase. | ||||||
|  | 		 */ | ||||||
| 		if (total_secs_left != UINT64_MAX && | 		if (total_secs_left != UINT64_MAX && | ||||||
| 		    issue_rate >= 10 * 1024 * 1024) { | 		    issue_rate >= 10 * 1024 * 1024 && | ||||||
|  | 		    ((is_resilver && ps->pss_processed > 0) || | ||||||
|  | 		    (is_scrub && issued > 0))) { | ||||||
| 			(void) printf(gettext(", %s to go\n"), time_buf); | 			(void) printf(gettext(", %s to go\n"), time_buf); | ||||||
| 		} else { | 		} else { | ||||||
| 			(void) printf(gettext(", no estimated " | 			(void) printf(gettext(", no estimated " | ||||||
|  | |||||||
| @ -1831,6 +1831,13 @@ When we cross this limit from above it is because we are issuing verification I/ | |||||||
| In this case (unless the metadata scan is done) we stop issuing verification I/O | In this case (unless the metadata scan is done) we stop issuing verification I/O | ||||||
| and start scanning metadata again until we get to the hard limit. | and start scanning metadata again until we get to the hard limit. | ||||||
| . | . | ||||||
|  | .It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint | ||||||
|  | When reporting resilver throughput and estimated completion time use the | ||||||
|  | performance observed over roughly the last | ||||||
|  | .Sy zfs_scan_report_txgs | ||||||
|  | TXGs. | ||||||
|  | When set to zero performance is calculated over the time between checkpoints. | ||||||
|  | . | ||||||
| .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int | .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int | ||||||
| Enforce tight memory limits on pool scans when a sequential scan is in progress. | Enforce tight memory limits on pool scans when a sequential scan is in progress. | ||||||
| When disabled, the memory limit may be exceeded by fast disks. | When disabled, the memory limit may be exceeded by fast disks. | ||||||
|  | |||||||
| @ -131,6 +131,15 @@ static uint64_t dsl_scan_count_data_disks(vdev_t *vd); | |||||||
| extern int zfs_vdev_async_write_active_min_dirty_percent; | extern int zfs_vdev_async_write_active_min_dirty_percent; | ||||||
| static int zfs_scan_blkstats = 0; | static int zfs_scan_blkstats = 0; | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * 'zpool status' uses bytes processed per pass to report throughput and | ||||||
|  |  * estimate time remaining.  We define a pass to start when the scanning | ||||||
|  |  * phase completes for a sequential resilver.  Optionally, this value | ||||||
|  |  * may be used to reset the pass statistics every N txgs to provide an | ||||||
|  |  * estimated completion time based on currently observed performance. | ||||||
|  |  */ | ||||||
|  | static uint_t zfs_scan_report_txgs = 0; | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * By default zfs will check to ensure it is not over the hard memory |  * By default zfs will check to ensure it is not over the hard memory | ||||||
|  * limit before each txg. If finer-grained control of this is needed |  * limit before each txg. If finer-grained control of this is needed | ||||||
| @ -584,6 +593,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	spa_scan_stat_init(spa); | 	spa_scan_stat_init(spa); | ||||||
|  | 	vdev_scan_stat_init(spa->spa_root_vdev); | ||||||
|  | 
 | ||||||
| 	return (0); | 	return (0); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -742,6 +753,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) | |||||||
| 	scn->scn_last_checkpoint = 0; | 	scn->scn_last_checkpoint = 0; | ||||||
| 	scn->scn_checkpointing = B_FALSE; | 	scn->scn_checkpointing = B_FALSE; | ||||||
| 	spa_scan_stat_init(spa); | 	spa_scan_stat_init(spa); | ||||||
|  | 	vdev_scan_stat_init(spa->spa_root_vdev); | ||||||
| 
 | 
 | ||||||
| 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { | 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { | ||||||
| 		scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; | 		scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; | ||||||
| @ -3637,6 +3649,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) | |||||||
| 		return; | 		return; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Disabled by default, set zfs_scan_report_txgs to report | ||||||
|  | 	 * average performance over the last zfs_scan_report_txgs TXGs. | ||||||
|  | 	 */ | ||||||
|  | 	if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 && | ||||||
|  | 	    tx->tx_txg % zfs_scan_report_txgs == 0) { | ||||||
|  | 		scn->scn_issued_before_pass += spa->spa_scan_pass_issued; | ||||||
|  | 		spa_scan_stat_init(spa); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * It is possible to switch from unsorted to sorted at any time, | 	 * It is possible to switch from unsorted to sorted at any time, | ||||||
| 	 * but afterwards the scan will remain sorted unless reloaded from | 	 * but afterwards the scan will remain sorted unless reloaded from | ||||||
| @ -3759,6 +3781,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) | |||||||
| 			if (scn->scn_is_sorted) { | 			if (scn->scn_is_sorted) { | ||||||
| 				scn->scn_checkpointing = B_TRUE; | 				scn->scn_checkpointing = B_TRUE; | ||||||
| 				scn->scn_clearing = B_TRUE; | 				scn->scn_clearing = B_TRUE; | ||||||
|  | 				scn->scn_issued_before_pass += | ||||||
|  | 				    spa->spa_scan_pass_issued; | ||||||
|  | 				spa_scan_stat_init(spa); | ||||||
| 			} | 			} | ||||||
| 			zfs_dbgmsg("scan complete txg %llu", | 			zfs_dbgmsg("scan complete txg %llu", | ||||||
| 			    (longlong_t)tx->tx_txg); | 			    (longlong_t)tx->tx_txg); | ||||||
| @ -4485,6 +4510,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW, | |||||||
| ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW, | ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW, | ||||||
| 	"Tunable to adjust bias towards more filled segments during scans"); | 	"Tunable to adjust bias towards more filled segments during scans"); | ||||||
| 
 | 
 | ||||||
|  | ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW, | ||||||
|  | 	"Tunable to report resilver performance over the last N txgs"); | ||||||
|  | 
 | ||||||
| ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, | ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, | ||||||
| 	"Process all resilvers immediately"); | 	"Process all resilvers immediately"); | ||||||
| /* END CSTYLED */ | /* END CSTYLED */ | ||||||
|  | |||||||
| @ -2564,7 +2564,6 @@ spa_scan_stat_init(spa_t *spa) | |||||||
| 	spa->spa_scan_pass_scrub_spent_paused = 0; | 	spa->spa_scan_pass_scrub_spent_paused = 0; | ||||||
| 	spa->spa_scan_pass_exam = 0; | 	spa->spa_scan_pass_exam = 0; | ||||||
| 	spa->spa_scan_pass_issued = 0; | 	spa->spa_scan_pass_issued = 0; | ||||||
| 	vdev_scan_stat_init(spa->spa_root_vdev); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Brian Behlendorf
						Brian Behlendorf