mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Several B-tree optimizations
- Introduce first element offset within a leaf. It allows to reduce by ~50% average memmove() size when adding/removing elements. If the added/removed element is in the first half of the leaf, we may shift elements before it and adjust the bth_first instead of moving more elements after it. - Use memcpy() instead of memmove() when we know there is no overlap. - Switch from uint64_t to uint32_t. It does not limit anything, but 32-bit arches should appreciate it greatly in hot paths. - Store leaf capacity in struct btree to avoid 64-bit divisions. - Adjust zfs_btree_insert_into_leaf() to always result in balanced leaves after splitting, no matter where the new element was inserted. Not that we care about it much, but it should also allow B-trees with as little as two elements per leaf instead of 4 previously. When scrubbing pool of 12 SSDs, storing 1.5TB of 4KB zvol blocks this reduces amount of time spent in memmove() inside the scan thread from 13.7% to 5.7% and total scrub time by ~15 seconds out of 9 minutes. It should also reduce spacemaps load time, but I haven't measured it. Reviewed-by: Paul Dagnelie <pcd@delphix.com> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored-By: iXsystems, Inc. Closes #13582
This commit is contained in:
		
							parent
							
								
									a861aa2b9e
								
							
						
					
					
						commit
						dc91a6a660
					
				| @ -72,7 +72,11 @@ extern kmem_cache_t *zfs_btree_leaf_cache; | ||||
| 
 | ||||
| typedef struct zfs_btree_hdr { | ||||
| 	struct zfs_btree_core	*bth_parent; | ||||
| 	boolean_t		bth_core; | ||||
| 	/*
 | ||||
| 	 * Set to -1 to indicate core nodes. Other values represent first | ||||
| 	 * valid element offset for leaf nodes. | ||||
| 	 */ | ||||
| 	uint32_t		bth_first; | ||||
| 	/*
 | ||||
| 	 * For both leaf and core nodes, represents the number of elements in | ||||
| 	 * the node. For core nodes, they will have bth_count + 1 children. | ||||
| @ -91,9 +95,12 @@ typedef struct zfs_btree_leaf { | ||||
| 	uint8_t		btl_elems[]; | ||||
| } zfs_btree_leaf_t; | ||||
| 
 | ||||
| #define	BTREE_LEAF_ESIZE	(BTREE_LEAF_SIZE - \ | ||||
|     offsetof(zfs_btree_leaf_t, btl_elems)) | ||||
| 
 | ||||
| typedef struct zfs_btree_index { | ||||
| 	zfs_btree_hdr_t	*bti_node; | ||||
| 	uint64_t	bti_offset; | ||||
| 	uint32_t	bti_offset; | ||||
| 	/*
 | ||||
| 	 * True if the location is before the list offset, false if it's at | ||||
| 	 * the listed offset. | ||||
| @ -105,6 +112,7 @@ typedef struct btree { | ||||
| 	zfs_btree_hdr_t		*bt_root; | ||||
| 	int64_t			bt_height; | ||||
| 	size_t			bt_elem_size; | ||||
| 	uint32_t		bt_leaf_cap; | ||||
| 	uint64_t		bt_num_elems; | ||||
| 	uint64_t		bt_num_nodes; | ||||
| 	zfs_btree_leaf_t	*bt_bulk; // non-null if bulk loading
 | ||||
|  | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Alexander Motin
						Alexander Motin