mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Increase default zfs_rebuild_vdev_limit to 64MB
When testing distributed rebuild performance with more capable hardware it was observed than increasing the zfs_rebuild_vdev_limit to 64M reduced the rebuild time by 17%. Beyond 64MB there was some improvement (~2%) but it was not significant when weighed against the increased memory usage. Memory usage is capped at 1/4 of arc_c_max. Additionally, vr_bytes_inflight_max has been moved so it's updated per-metaslab to allow the size to be adjust while a rebuild is running. Reviewed-by: Akash B <akash-b@hpe.com> Reviewed-by: Tony Nguyen <tony.nguyen@delphix.com> Reviewed-by: Alexander Motin <mav@FreeBSD.org> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #14428
This commit is contained in:
		
							parent
							
								
									fa28e26e42
								
							
						
					
					
						commit
						cdbe1d65c4
					
				| @ -1712,7 +1712,7 @@ completes in order to verify the checksums of all blocks which have been | ||||
| resilvered. | ||||
| This is enabled by default and strongly recommended. | ||||
| . | ||||
| .It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong | ||||
| .It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq ulong | ||||
| Maximum amount of I/O that can be concurrently issued for a sequential | ||||
| resilver per leaf device, given in bytes. | ||||
| . | ||||
|  | ||||
| @ -34,6 +34,7 @@ | ||||
| #include <sys/zio.h> | ||||
| #include <sys/dmu_tx.h> | ||||
| #include <sys/arc.h> | ||||
| #include <sys/arc_impl.h> | ||||
| #include <sys/zap.h> | ||||
| 
 | ||||
| /*
 | ||||
| @ -116,13 +117,12 @@ unsigned long zfs_rebuild_max_segment = 1024 * 1024; | ||||
|  * segment size is also large (zfs_rebuild_max_segment=1M).  This helps keep | ||||
|  * the queue depth short. | ||||
|  * | ||||
|  * 32MB was selected as the default value to achieve good performance with | ||||
|  * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential | ||||
|  * rebuild was unable to saturate all of the drives using smaller values. | ||||
|  * With a value of 32MB the sequential resilver write rate was measured at | ||||
|  * 800MB/s sustained while rebuilding to a distributed spare. | ||||
|  * 64MB was observed to deliver the best performance and set as the default. | ||||
|  * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c) | ||||
|  * and a rebuild rate of 1.2GB/s was measured to the distribute spare. | ||||
|  * Smaller values were unable to fully saturate the available pool I/O. | ||||
|  */ | ||||
| unsigned long zfs_rebuild_vdev_limit = 32 << 20; | ||||
| unsigned long zfs_rebuild_vdev_limit = 64 << 20; | ||||
| 
 | ||||
| /*
 | ||||
|  * Automatically start a pool scrub when the last active sequential resilver | ||||
| @ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg) | ||||
| { | ||||
| 	vdev_t *vd = arg; | ||||
| 	spa_t *spa = vd->vdev_spa; | ||||
| 	vdev_t *rvd = spa->spa_root_vdev; | ||||
| 	int error = 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg) | ||||
| 	vr->vr_pass_bytes_scanned = 0; | ||||
| 	vr->vr_pass_bytes_issued = 0; | ||||
| 
 | ||||
| 	vr->vr_bytes_inflight_max = MAX(1ULL << 20, | ||||
| 	    zfs_rebuild_vdev_limit * vd->vdev_children); | ||||
| 
 | ||||
| 	uint64_t update_est_time = gethrtime(); | ||||
| 	vdev_rebuild_update_bytes_est(vd, 0); | ||||
| 
 | ||||
| @ -804,6 +802,17 @@ vdev_rebuild_thread(void *arg) | ||||
| 		metaslab_t *msp = vd->vdev_ms[i]; | ||||
| 		vr->vr_scan_msp = msp; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Calculate the max number of in-flight bytes for top-level | ||||
| 		 * vdev scanning operations (minimum 1MB, maximum 1/4 of | ||||
| 		 * arc_c_max shared by all top-level vdevs).  Limits for the | ||||
| 		 * issuing phase are done per top-level vdev and are handled | ||||
| 		 * separately. | ||||
| 		 */ | ||||
| 		uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1); | ||||
| 		vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20, | ||||
| 		    zfs_rebuild_vdev_limit * vd->vdev_children)); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Removal of vdevs from the vdev tree may eliminate the need | ||||
| 		 * for the rebuild, in which case it should be canceled.  The | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Brian Behlendorf
						Brian Behlendorf