mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Increase default zfs_rebuild_vdev_limit to 64MB
When testing distributed rebuild performance with more capable hardware it was observed than increasing the zfs_rebuild_vdev_limit to 64M reduced the rebuild time by 17%. Beyond 64MB there was some improvement (~2%) but it was not significant when weighed against the increased memory usage. Memory usage is capped at 1/4 of arc_c_max. Additionally, vr_bytes_inflight_max has been moved so it's updated per-metaslab to allow the size to be adjust while a rebuild is running. Reviewed-by: Akash B <akash-b@hpe.com> Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #14428
This commit is contained in:
		
							parent
							
								
									fa28e26e42
								
							
						
					
					
						commit
						cdbe1d65c4
					
				| @ -1712,7 +1712,7 @@ completes in order to verify the checksums of all blocks which have been | |||||||
| resilvered. | resilvered. | ||||||
| This is enabled by default and strongly recommended. | This is enabled by default and strongly recommended. | ||||||
| . | . | ||||||
| .It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong | .It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq ulong | ||||||
| Maximum amount of I/O that can be concurrently issued for a sequential | Maximum amount of I/O that can be concurrently issued for a sequential | ||||||
| resilver per leaf device, given in bytes. | resilver per leaf device, given in bytes. | ||||||
| . | . | ||||||
|  | |||||||
| @ -34,6 +34,7 @@ | |||||||
| #include <sys/zio.h> | #include <sys/zio.h> | ||||||
| #include <sys/dmu_tx.h> | #include <sys/dmu_tx.h> | ||||||
| #include <sys/arc.h> | #include <sys/arc.h> | ||||||
|  | #include <sys/arc_impl.h> | ||||||
| #include <sys/zap.h> | #include <sys/zap.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -116,13 +117,12 @@ unsigned long zfs_rebuild_max_segment = 1024 * 1024; | |||||||
|  * segment size is also large (zfs_rebuild_max_segment=1M).  This helps keep |  * segment size is also large (zfs_rebuild_max_segment=1M).  This helps keep | ||||||
|  * the queue depth short. |  * the queue depth short. | ||||||
|  * |  * | ||||||
|  * 32MB was selected as the default value to achieve good performance with |  * 64MB was observed to deliver the best performance and set as the default. | ||||||
|  * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential |  * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c) | ||||||
|  * rebuild was unable to saturate all of the drives using smaller values. |  * and a rebuild rate of 1.2GB/s was measured to the distribute spare. | ||||||
|  * With a value of 32MB the sequential resilver write rate was measured at |  * Smaller values were unable to fully saturate the available pool I/O. | ||||||
|  * 800MB/s sustained while rebuilding to a distributed spare. |  | ||||||
|  */ |  */ | ||||||
| unsigned long zfs_rebuild_vdev_limit = 32 << 20; | unsigned long zfs_rebuild_vdev_limit = 64 << 20; | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Automatically start a pool scrub when the last active sequential resilver |  * Automatically start a pool scrub when the last active sequential resilver | ||||||
| @ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg) | |||||||
| { | { | ||||||
| 	vdev_t *vd = arg; | 	vdev_t *vd = arg; | ||||||
| 	spa_t *spa = vd->vdev_spa; | 	spa_t *spa = vd->vdev_spa; | ||||||
|  | 	vdev_t *rvd = spa->spa_root_vdev; | ||||||
| 	int error = 0; | 	int error = 0; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| @ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg) | |||||||
| 	vr->vr_pass_bytes_scanned = 0; | 	vr->vr_pass_bytes_scanned = 0; | ||||||
| 	vr->vr_pass_bytes_issued = 0; | 	vr->vr_pass_bytes_issued = 0; | ||||||
| 
 | 
 | ||||||
| 	vr->vr_bytes_inflight_max = MAX(1ULL << 20, |  | ||||||
| 	    zfs_rebuild_vdev_limit * vd->vdev_children); |  | ||||||
| 
 |  | ||||||
| 	uint64_t update_est_time = gethrtime(); | 	uint64_t update_est_time = gethrtime(); | ||||||
| 	vdev_rebuild_update_bytes_est(vd, 0); | 	vdev_rebuild_update_bytes_est(vd, 0); | ||||||
| 
 | 
 | ||||||
| @ -804,6 +802,17 @@ vdev_rebuild_thread(void *arg) | |||||||
| 		metaslab_t *msp = vd->vdev_ms[i]; | 		metaslab_t *msp = vd->vdev_ms[i]; | ||||||
| 		vr->vr_scan_msp = msp; | 		vr->vr_scan_msp = msp; | ||||||
| 
 | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * Calculate the max number of in-flight bytes for top-level | ||||||
|  | 		 * vdev scanning operations (minimum 1MB, maximum 1/4 of | ||||||
|  | 		 * arc_c_max shared by all top-level vdevs).  Limits for the | ||||||
|  | 		 * issuing phase are done per top-level vdev and are handled | ||||||
|  | 		 * separately. | ||||||
|  | 		 */ | ||||||
|  | 		uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1); | ||||||
|  | 		vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20, | ||||||
|  | 		    zfs_rebuild_vdev_limit * vd->vdev_children)); | ||||||
|  | 
 | ||||||
| 		/*
 | 		/*
 | ||||||
| 		 * Removal of vdevs from the vdev tree may eliminate the need | 		 * Removal of vdevs from the vdev tree may eliminate the need | ||||||
| 		 * for the rebuild, in which case it should be canceled.  The | 		 * for the rebuild, in which case it should be canceled.  The | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Brian Behlendorf
						Brian Behlendorf