mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Combine OS-independent ABD Code into Common Source File
Reorganizing ABD code base so OS-independent ABD code has been placed into a common abd.c file. OS-dependent ABD code has been left in each OS's ABD source files, and these source files have been renamed to abd_os. The OS-independent ABD code is now under: module/zfs/abd.c With the OS-dependent code in: module/os/linux/zfs/abd_os.c module/os/freebsd/zfs/abd_os.c Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Brian Atkinson <batkinson@lanl.gov> Closes #10293
This commit is contained in:
		
							parent
							
								
									bd95f00d4b
								
							
						
					
					
						commit
						fc551d7efb
					
				| @ -2,6 +2,7 @@ SUBDIRS = fm fs crypto lua sysevent | |||||||
| 
 | 
 | ||||||
| COMMON_H = \
 | COMMON_H = \
 | ||||||
| 	$(top_srcdir)/include/sys/abd.h \
 | 	$(top_srcdir)/include/sys/abd.h \
 | ||||||
|  | 	$(top_srcdir)/include/sys/abd_impl.h \
 | ||||||
| 	$(top_srcdir)/include/sys/aggsum.h \
 | 	$(top_srcdir)/include/sys/aggsum.h \
 | ||||||
| 	$(top_srcdir)/include/sys/arc.h \
 | 	$(top_srcdir)/include/sys/arc.h \
 | ||||||
| 	$(top_srcdir)/include/sys/arc_impl.h \
 | 	$(top_srcdir)/include/sys/arc_impl.h \
 | ||||||
|  | |||||||
| @ -35,56 +35,14 @@ | |||||||
| extern "C" { | extern "C" { | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
| typedef enum abd_flags { | struct abd; /* forward declaration */ | ||||||
| 	ABD_FLAG_LINEAR	= 1 << 0,	/* is buffer linear (or scattered)? */ | typedef struct abd abd_t; | ||||||
| 	ABD_FLAG_OWNER	= 1 << 1,	/* does it own its data buffers? */ |  | ||||||
| 	ABD_FLAG_META	= 1 << 2,	/* does this represent FS metadata? */ |  | ||||||
| 	ABD_FLAG_MULTI_ZONE  = 1 << 3,	/* pages split over memory zones */ |  | ||||||
| 	ABD_FLAG_MULTI_CHUNK = 1 << 4,	/* pages split over multiple chunks */ |  | ||||||
| 	ABD_FLAG_LINEAR_PAGE = 1 << 5,	/* linear but allocd from page */ |  | ||||||
| } abd_flags_t; |  | ||||||
| 
 |  | ||||||
| typedef struct abd { |  | ||||||
| 	abd_flags_t	abd_flags; |  | ||||||
| 	uint_t		abd_size;	/* excludes scattered abd_offset */ |  | ||||||
| 	struct abd	*abd_parent; |  | ||||||
| 	zfs_refcount_t	abd_children; |  | ||||||
| 	union { |  | ||||||
| 		struct abd_scatter { |  | ||||||
| 			uint_t		abd_offset; |  | ||||||
| #if defined(__FreeBSD__) && defined(_KERNEL) |  | ||||||
| 			uint_t  abd_chunk_size; |  | ||||||
| 			void    *abd_chunks[]; |  | ||||||
| #else |  | ||||||
| 			uint_t		abd_nents; |  | ||||||
| 			struct scatterlist *abd_sgl; |  | ||||||
| #endif |  | ||||||
| 		} abd_scatter; |  | ||||||
| 		struct abd_linear { |  | ||||||
| 			void		*abd_buf; |  | ||||||
| 			struct scatterlist *abd_sgl; /* for LINEAR_PAGE */ |  | ||||||
| 		} abd_linear; |  | ||||||
| 	} abd_u; |  | ||||||
| } abd_t; |  | ||||||
| 
 | 
 | ||||||
| typedef int abd_iter_func_t(void *buf, size_t len, void *private); | typedef int abd_iter_func_t(void *buf, size_t len, void *private); | ||||||
| typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private); | typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private); | ||||||
| 
 | 
 | ||||||
| extern int zfs_abd_scatter_enabled; | extern int zfs_abd_scatter_enabled; | ||||||
| 
 | 
 | ||||||
| static inline boolean_t |  | ||||||
| abd_is_linear(abd_t *abd) |  | ||||||
| { |  | ||||||
| 	return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline boolean_t |  | ||||||
| abd_is_linear_page(abd_t *abd) |  | ||||||
| { |  | ||||||
| 	return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ? |  | ||||||
| 	    B_TRUE : B_FALSE); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Allocations and deallocations |  * Allocations and deallocations | ||||||
|  */ |  */ | ||||||
| @ -124,12 +82,8 @@ void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t); | |||||||
| int abd_cmp(abd_t *, abd_t *); | int abd_cmp(abd_t *, abd_t *); | ||||||
| int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t); | int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t); | ||||||
| void abd_zero_off(abd_t *, size_t, size_t); | void abd_zero_off(abd_t *, size_t, size_t); | ||||||
| 
 | void abd_verify(abd_t *); | ||||||
| #if defined(_KERNEL) | uint_t abd_get_size(abd_t *); | ||||||
| unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int, |  | ||||||
| 		size_t); |  | ||||||
| unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t); |  | ||||||
| #endif |  | ||||||
| 
 | 
 | ||||||
| void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, | void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, | ||||||
| 	ssize_t csize, ssize_t dsize, const unsigned parity, | 	ssize_t csize, ssize_t dsize, const unsigned parity, | ||||||
| @ -174,13 +128,29 @@ abd_zero(abd_t *abd, size_t size) | |||||||
| 	abd_zero_off(abd, 0, size); | 	abd_zero_off(abd, 0, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * ABD type check functions | ||||||
|  |  */ | ||||||
|  | boolean_t abd_is_linear(abd_t *); | ||||||
|  | boolean_t abd_is_linear_page(abd_t *); | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Module lifecycle |  * Module lifecycle | ||||||
|  |  * Defined in each specific OS's abd_os.c | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| void abd_init(void); | void abd_init(void); | ||||||
| void abd_fini(void); | void abd_fini(void); | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Linux ABD bio functions | ||||||
|  |  */ | ||||||
|  | #if defined(__linux__) && defined(_KERNEL) | ||||||
|  | unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int, | ||||||
|  |     size_t); | ||||||
|  | unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||||
| } | } | ||||||
| #endif | #endif | ||||||
|  | |||||||
							
								
								
									
										126
									
								
								include/sys/abd_impl.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								include/sys/abd_impl.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,126 @@ | |||||||
|  | /*
 | ||||||
|  |  * CDDL HEADER START | ||||||
|  |  * | ||||||
|  |  * The contents of this file are subject to the terms of the | ||||||
|  |  * Common Development and Distribution License (the "License"). | ||||||
|  |  * You may not use this file except in compliance with the License. | ||||||
|  |  * | ||||||
|  |  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | ||||||
|  |  * or http://www.opensolaris.org/os/licensing.
 | ||||||
|  |  * See the License for the specific language governing permissions | ||||||
|  |  * and limitations under the License. | ||||||
|  |  * | ||||||
|  |  * When distributing Covered Code, include this CDDL HEADER in each | ||||||
|  |  * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | ||||||
|  |  * If applicable, add the following below this CDDL HEADER, with the | ||||||
|  |  * fields enclosed by brackets "[]" replaced with your own identifying | ||||||
|  |  * information: Portions Copyright [yyyy] [name of copyright owner] | ||||||
|  |  * | ||||||
|  |  * CDDL HEADER END | ||||||
|  |  */ | ||||||
|  | /*
 | ||||||
|  |  * Copyright (c) 2014 by Chunwei Chen. All rights reserved. | ||||||
|  |  * Copyright (c) 2016, 2019 by Delphix. All rights reserved. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #ifndef _ABD_IMPL_H | ||||||
|  | #define	_ABD_IMPL_H | ||||||
|  | 
 | ||||||
|  | #include <sys/abd.h> | ||||||
|  | 
 | ||||||
|  | #ifdef __cplusplus | ||||||
|  | extern "C" { | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | typedef enum abd_flags { | ||||||
|  | 	ABD_FLAG_LINEAR		= 1 << 0, /* is buffer linear (or scattered)? */ | ||||||
|  | 	ABD_FLAG_OWNER		= 1 << 1, /* does it own its data buffers? */ | ||||||
|  | 	ABD_FLAG_META		= 1 << 2, /* does this represent FS metadata? */ | ||||||
|  | 	ABD_FLAG_MULTI_ZONE  	= 1 << 3, /* pages split over memory zones */ | ||||||
|  | 	ABD_FLAG_MULTI_CHUNK 	= 1 << 4, /* pages split over multiple chunks */ | ||||||
|  | 	ABD_FLAG_LINEAR_PAGE 	= 1 << 5, /* linear but allocd from page */ | ||||||
|  | } abd_flags_t; | ||||||
|  | 
 | ||||||
|  | typedef enum abd_stats_op { | ||||||
|  | 	ABDSTAT_INCR, /* Increase abdstat values */ | ||||||
|  | 	ABDSTAT_DECR  /* Decrease abdstat values */ | ||||||
|  | } abd_stats_op_t; | ||||||
|  | 
 | ||||||
|  | struct abd { | ||||||
|  | 	abd_flags_t	abd_flags; | ||||||
|  | 	uint_t		abd_size;	/* excludes scattered abd_offset */ | ||||||
|  | 	struct abd	*abd_parent; | ||||||
|  | 	zfs_refcount_t	abd_children; | ||||||
|  | 	union { | ||||||
|  | 		struct abd_scatter { | ||||||
|  | 			uint_t		abd_offset; | ||||||
|  | #if defined(__FreeBSD__) && defined(_KERNEL) | ||||||
|  | 			uint_t  abd_chunk_size; | ||||||
|  | 			void    *abd_chunks[]; | ||||||
|  | #else | ||||||
|  | 			uint_t		abd_nents; | ||||||
|  | 			struct scatterlist *abd_sgl; | ||||||
|  | #endif | ||||||
|  | 		} abd_scatter; | ||||||
|  | 		struct abd_linear { | ||||||
|  | 			void		*abd_buf; | ||||||
|  | 			struct scatterlist *abd_sgl; /* for LINEAR_PAGE */ | ||||||
|  | 		} abd_linear; | ||||||
|  | 	} abd_u; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct scatterlist; /* forward declaration */ | ||||||
|  | 
 | ||||||
|  | struct abd_iter { | ||||||
|  | 	/* public interface */ | ||||||
|  | 	void		*iter_mapaddr;	/* addr corresponding to iter_pos */ | ||||||
|  | 	size_t		iter_mapsize;	/* length of data valid at mapaddr */ | ||||||
|  | 
 | ||||||
|  | 	/* private */ | ||||||
|  | 	abd_t		*iter_abd;	/* ABD being iterated through */ | ||||||
|  | 	size_t		iter_pos; | ||||||
|  | 	size_t		iter_offset;	/* offset in current sg/abd_buf, */ | ||||||
|  | 					/* abd_offset included */ | ||||||
|  | 	struct scatterlist *iter_sg;	/* current sg */ | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * OS specific functions | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | abd_t *abd_alloc_struct(size_t); | ||||||
|  | abd_t *abd_get_offset_scatter(abd_t *, size_t); | ||||||
|  | void abd_free_struct(abd_t *); | ||||||
|  | void abd_alloc_chunks(abd_t *, size_t); | ||||||
|  | void abd_free_chunks(abd_t *); | ||||||
|  | boolean_t abd_size_alloc_linear(size_t); | ||||||
|  | void abd_update_scatter_stats(abd_t *, abd_stats_op_t); | ||||||
|  | void abd_update_linear_stats(abd_t *, abd_stats_op_t); | ||||||
|  | void abd_verify_scatter(abd_t *); | ||||||
|  | void abd_free_linear_page(abd_t *); | ||||||
|  | void abd_enter_critical(unsigned long); | ||||||
|  | void abd_exit_critical(unsigned long); | ||||||
|  | /* OS specific abd_iter functions */ | ||||||
|  | void abd_iter_init(struct abd_iter  *, abd_t *); | ||||||
|  | boolean_t abd_iter_at_end(struct abd_iter *); | ||||||
|  | void abd_iter_advance(struct abd_iter *, size_t); | ||||||
|  | void abd_iter_map(struct abd_iter *); | ||||||
|  | void abd_iter_unmap(struct abd_iter *); | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Helper macros | ||||||
|  |  */ | ||||||
|  | #define	ABDSTAT(stat)		(abd_stats.stat.value.ui64) | ||||||
|  | #define	ABDSTAT_INCR(stat, val) \ | ||||||
|  | 	atomic_add_64(&abd_stats.stat.value.ui64, (val)) | ||||||
|  | #define	ABDSTAT_BUMP(stat)	ABDSTAT_INCR(stat, 1) | ||||||
|  | #define	ABDSTAT_BUMPDOWN(stat)	ABDSTAT_INCR(stat, -1) | ||||||
|  | 
 | ||||||
|  | #define	ABD_SCATTER(abd)	(abd->abd_u.abd_scatter) | ||||||
|  | #define	ABD_LINEAR_BUF(abd)	(abd->abd_u.abd_linear.abd_buf) | ||||||
|  | 
 | ||||||
|  | #ifdef __cplusplus | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | #endif	/* _ABD_IMPL_H */ | ||||||
| @ -39,6 +39,7 @@ KERNEL_C = \ | |||||||
| 	zpool_prop.c \
 | 	zpool_prop.c \
 | ||||||
| 	zprop_common.c \
 | 	zprop_common.c \
 | ||||||
| 	abd.c \
 | 	abd.c \
 | ||||||
|  | 	abd_os.c \
 | ||||||
| 	aggsum.c \
 | 	aggsum.c \
 | ||||||
| 	arc.c \
 | 	arc.c \
 | ||||||
| 	arc_os.c \
 | 	arc_os.c \
 | ||||||
|  | |||||||
| @ -127,7 +127,7 @@ SRCS+= spl_atomic.c | |||||||
| .endif | .endif | ||||||
| 
 | 
 | ||||||
| #os/freebsd/zfs
 | #os/freebsd/zfs
 | ||||||
| SRCS+=	abd.c \
 | SRCS+=	abd_os.c \
 | ||||||
| 	crypto_os.c \
 | 	crypto_os.c \
 | ||||||
| 	dmu_os.c \
 | 	dmu_os.c \
 | ||||||
| 	hkdf.c \
 | 	hkdf.c \
 | ||||||
| @ -169,7 +169,8 @@ SRCS+=	zfeature_common.c \ | |||||||
| 	zprop_common.c | 	zprop_common.c | ||||||
| 
 | 
 | ||||||
| #zfs
 | #zfs
 | ||||||
| SRCS+=	aggsum.c \
 | SRCS+=	abd.c \
 | ||||||
|  | 	aggsum.c \
 | ||||||
| 	arc.c \
 | 	arc.c \
 | ||||||
| 	arc_os.c \
 | 	arc_os.c \
 | ||||||
| 	blkptr.c \
 | 	blkptr.c \
 | ||||||
|  | |||||||
							
								
								
									
										433
									
								
								module/os/freebsd/zfs/abd_os.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										433
									
								
								module/os/freebsd/zfs/abd_os.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,433 @@ | |||||||
|  | /*
 | ||||||
|  |  * This file and its contents are supplied under the terms of the | ||||||
|  |  * Common Development and Distribution License ("CDDL"), version 1.0. | ||||||
|  |  * You may only use this file in accordance with the terms of version | ||||||
|  |  * 1.0 of the CDDL. | ||||||
|  |  * | ||||||
|  |  * A full copy of the text of the CDDL should have accompanied this | ||||||
|  |  * source.  A copy of the CDDL is also available via the Internet at | ||||||
|  |  * http://www.illumos.org/license/CDDL.
 | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Copyright (c) 2014 by Chunwei Chen. All rights reserved. | ||||||
|  |  * Copyright (c) 2016 by Delphix. All rights reserved. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * See abd.c for a general overview of the arc buffered data (ABD). | ||||||
|  |  * | ||||||
|  |  * Using a large proportion of scattered ABDs decreases ARC fragmentation since | ||||||
|  |  * when we are at the limit of allocatable space, using equal-size chunks will | ||||||
|  |  * allow us to quickly reclaim enough space for a new large allocation (assuming | ||||||
|  |  * it is also scattered). | ||||||
|  |  * | ||||||
|  |  * ABDs are allocated scattered by default unless the caller uses | ||||||
|  |  * abd_alloc_linear() or zfs_abd_scatter_enabled is disabled. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include <sys/abd_impl.h> | ||||||
|  | #include <sys/param.h> | ||||||
|  | #include <sys/zio.h> | ||||||
|  | #include <sys/zfs_context.h> | ||||||
|  | #include <sys/zfs_znode.h> | ||||||
|  | 
 | ||||||
|  | typedef struct abd_stats { | ||||||
|  | 	kstat_named_t abdstat_struct_size; | ||||||
|  | 	kstat_named_t abdstat_scatter_cnt; | ||||||
|  | 	kstat_named_t abdstat_scatter_data_size; | ||||||
|  | 	kstat_named_t abdstat_scatter_chunk_waste; | ||||||
|  | 	kstat_named_t abdstat_linear_cnt; | ||||||
|  | 	kstat_named_t abdstat_linear_data_size; | ||||||
|  | } abd_stats_t; | ||||||
|  | 
 | ||||||
|  | static abd_stats_t abd_stats = { | ||||||
|  | 	/* Amount of memory occupied by all of the abd_t struct allocations */ | ||||||
|  | 	{ "struct_size",			KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The number of scatter ABDs which are currently allocated, excluding | ||||||
|  | 	 * ABDs which don't own their data (for instance the ones which were | ||||||
|  | 	 * allocated through abd_get_offset()). | ||||||
|  | 	 */ | ||||||
|  | 	{ "scatter_cnt",			KSTAT_DATA_UINT64 }, | ||||||
|  | 	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */ | ||||||
|  | 	{ "scatter_data_size",			KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The amount of space wasted at the end of the last chunk across all | ||||||
|  | 	 * scatter ABDs tracked by scatter_cnt. | ||||||
|  | 	 */ | ||||||
|  | 	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The number of linear ABDs which are currently allocated, excluding | ||||||
|  | 	 * ABDs which don't own their data (for instance the ones which were | ||||||
|  | 	 * allocated through abd_get_offset() and abd_get_from_buf()). If an | ||||||
|  | 	 * ABD takes ownership of its buf then it will become tracked. | ||||||
|  | 	 */ | ||||||
|  | 	{ "linear_cnt",				KSTAT_DATA_UINT64 }, | ||||||
|  | 	/* Amount of data stored in all linear ABDs tracked by linear_cnt */ | ||||||
|  | 	{ "linear_data_size",			KSTAT_DATA_UINT64 }, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * The size of the chunks ABD allocates. Because the sizes allocated from the | ||||||
|  |  * kmem_cache can't change, this tunable can only be modified at boot. Changing | ||||||
|  |  * it at runtime would cause ABD iteration to work incorrectly for ABDs which | ||||||
|  |  * were allocated with the old size, so a safeguard has been put in place which | ||||||
|  |  * will cause the machine to panic if you change it and try to access the data | ||||||
|  |  * within a scattered ABD. | ||||||
|  |  */ | ||||||
|  | size_t zfs_abd_chunk_size = 4096; | ||||||
|  | 
 | ||||||
|  | #if defined(_KERNEL) | ||||||
|  | SYSCTL_DECL(_vfs_zfs); | ||||||
|  | 
 | ||||||
|  | SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN, | ||||||
|  | 	&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers"); | ||||||
|  | SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN, | ||||||
|  | 	&zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates"); | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | kmem_cache_t *abd_chunk_cache; | ||||||
|  | static kstat_t *abd_ksp; | ||||||
|  | 
 | ||||||
|  | static void | ||||||
|  | abd_free_chunk(void *c) | ||||||
|  | { | ||||||
|  | 	kmem_cache_free(abd_chunk_cache, c); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static size_t | ||||||
|  | abd_chunkcnt_for_bytes(size_t size) | ||||||
|  | { | ||||||
|  | 	return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline size_t | ||||||
|  | abd_scatter_chunkcnt(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	ASSERT(!abd_is_linear(abd)); | ||||||
|  | 	return (abd_chunkcnt_for_bytes( | ||||||
|  | 	    ABD_SCATTER(abd).abd_offset + abd->abd_size)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | boolean_t | ||||||
|  | abd_size_alloc_linear(size_t size) | ||||||
|  | { | ||||||
|  | 	return (size <= zfs_abd_chunk_size ? B_TRUE : B_FALSE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op) | ||||||
|  | { | ||||||
|  | 	size_t n = abd_scatter_chunkcnt(abd); | ||||||
|  | 	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); | ||||||
|  | 	if (op == ABDSTAT_INCR) { | ||||||
|  | 		ABDSTAT_BUMP(abdstat_scatter_cnt); | ||||||
|  | 		ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size); | ||||||
|  | 		ABDSTAT_INCR(abdstat_scatter_chunk_waste, | ||||||
|  | 		    n * zfs_abd_chunk_size - abd->abd_size); | ||||||
|  | 	} else { | ||||||
|  | 		ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); | ||||||
|  | 		ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size); | ||||||
|  | 		ABDSTAT_INCR(abdstat_scatter_chunk_waste, | ||||||
|  | 		    abd->abd_size - n * zfs_abd_chunk_size); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_update_linear_stats(abd_t *abd, abd_stats_op_t op) | ||||||
|  | { | ||||||
|  | 	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); | ||||||
|  | 	if (op == ABDSTAT_INCR) { | ||||||
|  | 		ABDSTAT_BUMP(abdstat_linear_cnt); | ||||||
|  | 		ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size); | ||||||
|  | 	} else { | ||||||
|  | 		ABDSTAT_BUMPDOWN(abdstat_linear_cnt); | ||||||
|  | 		ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_verify_scatter(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	/*
 | ||||||
|  | 	 * There is no scatter linear pages in FreeBSD so there is an | ||||||
|  | 	 * if an error if the ABD has been marked as a linear page. | ||||||
|  | 	 */ | ||||||
|  | 	VERIFY(!abd_is_linear_page(abd)); | ||||||
|  | 	ASSERT3U(ABD_SCATTER(abd).abd_offset, <, | ||||||
|  | 	    zfs_abd_chunk_size); | ||||||
|  | 	size_t n = abd_scatter_chunkcnt(abd); | ||||||
|  | 	for (int i = 0; i < n; i++) { | ||||||
|  | 		ASSERT3P( | ||||||
|  | 		    ABD_SCATTER(abd).abd_chunks[i], !=, NULL); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_alloc_chunks(abd_t *abd, size_t size) | ||||||
|  | { | ||||||
|  | 	size_t n = abd_chunkcnt_for_bytes(size); | ||||||
|  | 	for (int i = 0; i < n; i++) { | ||||||
|  | 		void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); | ||||||
|  | 		ASSERT3P(c, !=, NULL); | ||||||
|  | 		ABD_SCATTER(abd).abd_chunks[i] = c; | ||||||
|  | 	} | ||||||
|  | 	ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_free_chunks(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	size_t n = abd_scatter_chunkcnt(abd); | ||||||
|  | 	for (int i = 0; i < n; i++) { | ||||||
|  | 		abd_free_chunk(ABD_SCATTER(abd).abd_chunks[i]); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | abd_t * | ||||||
|  | abd_alloc_struct(size_t size) | ||||||
|  | { | ||||||
|  | 	size_t chunkcnt = abd_chunkcnt_for_bytes(size); | ||||||
|  | 	size_t abd_size = offsetof(abd_t, | ||||||
|  | 	    abd_u.abd_scatter.abd_chunks[chunkcnt]); | ||||||
|  | 	abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE); | ||||||
|  | 	ASSERT3P(abd, !=, NULL); | ||||||
|  | 	ABDSTAT_INCR(abdstat_struct_size, abd_size); | ||||||
|  | 
 | ||||||
|  | 	return (abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_free_struct(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd); | ||||||
|  | 	int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); | ||||||
|  | 	kmem_free(abd, size); | ||||||
|  | 	ABDSTAT_INCR(abdstat_struct_size, -size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_init(void) | ||||||
|  | { | ||||||
|  | 	abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, | ||||||
|  | 	    NULL, NULL, NULL, NULL, 0, KMC_NOTOUCH | KMC_NODEBUG); | ||||||
|  | 
 | ||||||
|  | 	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, | ||||||
|  | 	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); | ||||||
|  | 	if (abd_ksp != NULL) { | ||||||
|  | 		abd_ksp->ks_data = &abd_stats; | ||||||
|  | 		kstat_install(abd_ksp); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_fini(void) | ||||||
|  | { | ||||||
|  | 	if (abd_ksp != NULL) { | ||||||
|  | 		kstat_delete(abd_ksp); | ||||||
|  | 		abd_ksp = NULL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	kmem_cache_destroy(abd_chunk_cache); | ||||||
|  | 	abd_chunk_cache = NULL; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_free_linear_page(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	/*
 | ||||||
|  | 	 * FreeBSD does not have have scatter linear pages | ||||||
|  | 	 * so there is an error. | ||||||
|  | 	 */ | ||||||
|  | 	VERIFY(0); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * If we're going to use this ABD for doing I/O using the block layer, the | ||||||
|  |  * consumer of the ABD data doesn't care if it's scattered or not, and we don't | ||||||
|  |  * plan to store this ABD in memory for a long period of time, we should | ||||||
|  |  * allocate the ABD type that requires the least data copying to do the I/O. | ||||||
|  |  * | ||||||
|  |  * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os | ||||||
|  |  * using a scatter/gather list we should switch to that and replace this call | ||||||
|  |  * with vanilla abd_alloc(). | ||||||
|  |  */ | ||||||
|  | abd_t * | ||||||
|  | abd_alloc_for_io(size_t size, boolean_t is_metadata) | ||||||
|  | { | ||||||
|  | 	return (abd_alloc_linear(size, is_metadata)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * This is just a helper function to abd_get_offset_scatter() to alloc a | ||||||
|  |  * scatter ABD using the calculated chunkcnt based on the offset within the | ||||||
|  |  * parent ABD. | ||||||
|  |  */ | ||||||
|  | static abd_t * | ||||||
|  | abd_alloc_scatter_offset_chunkcnt(size_t chunkcnt) | ||||||
|  | { | ||||||
|  | 	size_t abd_size = offsetof(abd_t, | ||||||
|  | 	    abd_u.abd_scatter.abd_chunks[chunkcnt]); | ||||||
|  | 	abd_t *abd = kmem_alloc(abd_size, KM_PUSHPAGE); | ||||||
|  | 	ASSERT3P(abd, !=, NULL); | ||||||
|  | 	ABDSTAT_INCR(abdstat_struct_size, abd_size); | ||||||
|  | 
 | ||||||
|  | 	return (abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | abd_t * | ||||||
|  | abd_get_offset_scatter(abd_t *sabd, size_t off) | ||||||
|  | { | ||||||
|  | 	abd_t *abd = NULL; | ||||||
|  | 
 | ||||||
|  | 	abd_verify(sabd); | ||||||
|  | 	ASSERT3U(off, <=, sabd->abd_size); | ||||||
|  | 
 | ||||||
|  | 	size_t new_offset = ABD_SCATTER(sabd).abd_offset + off; | ||||||
|  | 	size_t chunkcnt = abd_scatter_chunkcnt(sabd) - | ||||||
|  | 	    (new_offset / zfs_abd_chunk_size); | ||||||
|  | 
 | ||||||
|  | 	abd = abd_alloc_scatter_offset_chunkcnt(chunkcnt); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Even if this buf is filesystem metadata, we only track that | ||||||
|  | 	 * if we own the underlying data buffer, which is not true in | ||||||
|  | 	 * this case. Therefore, we don't ever use ABD_FLAG_META here. | ||||||
|  | 	 */ | ||||||
|  | 	abd->abd_flags = 0; | ||||||
|  | 
 | ||||||
|  | 	ABD_SCATTER(abd).abd_offset = new_offset % zfs_abd_chunk_size; | ||||||
|  | 	ABD_SCATTER(abd).abd_chunk_size = zfs_abd_chunk_size; | ||||||
|  | 
 | ||||||
|  | 	/* Copy the scatterlist starting at the correct offset */ | ||||||
|  | 	(void) memcpy(&ABD_SCATTER(abd).abd_chunks, | ||||||
|  | 	    &ABD_SCATTER(sabd).abd_chunks[new_offset / | ||||||
|  | 	    zfs_abd_chunk_size], | ||||||
|  | 	    chunkcnt * sizeof (void *)); | ||||||
|  | 
 | ||||||
|  | 	return (abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline size_t | ||||||
|  | abd_iter_scatter_chunk_offset(struct abd_iter *aiter) | ||||||
|  | { | ||||||
|  | 	ASSERT(!abd_is_linear(aiter->iter_abd)); | ||||||
|  | 	return ((ABD_SCATTER(aiter->iter_abd).abd_offset + | ||||||
|  | 	    aiter->iter_pos) % zfs_abd_chunk_size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline size_t | ||||||
|  | abd_iter_scatter_chunk_index(struct abd_iter *aiter) | ||||||
|  | { | ||||||
|  | 	ASSERT(!abd_is_linear(aiter->iter_abd)); | ||||||
|  | 	return ((ABD_SCATTER(aiter->iter_abd).abd_offset + | ||||||
|  | 	    aiter->iter_pos) / zfs_abd_chunk_size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Initialize the abd_iter. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_iter_init(struct abd_iter *aiter, abd_t *abd) | ||||||
|  | { | ||||||
|  | 	abd_verify(abd); | ||||||
|  | 	aiter->iter_abd = abd; | ||||||
|  | 	aiter->iter_pos = 0; | ||||||
|  | 	aiter->iter_mapaddr = NULL; | ||||||
|  | 	aiter->iter_mapsize = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * This is just a helper function to see if we have exhausted the | ||||||
|  |  * abd_iter and reached the end. | ||||||
|  |  */ | ||||||
|  | boolean_t | ||||||
|  | abd_iter_at_end(struct abd_iter *aiter) | ||||||
|  | { | ||||||
|  | 	return (aiter->iter_pos == aiter->iter_abd->abd_size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Advance the iterator by a certain amount. Cannot be called when a chunk is | ||||||
|  |  * in use. This can be safely called when the aiter has already exhausted, in | ||||||
|  |  * which case this does nothing. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_iter_advance(struct abd_iter *aiter, size_t amount) | ||||||
|  | { | ||||||
|  | 	ASSERT3P(aiter->iter_mapaddr, ==, NULL); | ||||||
|  | 	ASSERT0(aiter->iter_mapsize); | ||||||
|  | 
 | ||||||
|  | 	/* There's nothing left to advance to, so do nothing */ | ||||||
|  | 	if (abd_iter_at_end(aiter)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	aiter->iter_pos += amount; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Map the current chunk into aiter. This can be safely called when the aiter | ||||||
|  |  * has already exhausted, in which case this does nothing. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_iter_map(struct abd_iter *aiter) | ||||||
|  | { | ||||||
|  | 	void *paddr; | ||||||
|  | 	size_t offset = 0; | ||||||
|  | 
 | ||||||
|  | 	ASSERT3P(aiter->iter_mapaddr, ==, NULL); | ||||||
|  | 	ASSERT0(aiter->iter_mapsize); | ||||||
|  | 
 | ||||||
|  | 	/* Panic if someone has changed zfs_abd_chunk_size */ | ||||||
|  | 	IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size == | ||||||
|  | 	    ABD_SCATTER(aiter->iter_abd).abd_chunk_size); | ||||||
|  | 
 | ||||||
|  | 	/* There's nothing left to iterate over, so do nothing */ | ||||||
|  | 	if (abd_iter_at_end(aiter)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	if (abd_is_linear(aiter->iter_abd)) { | ||||||
|  | 		offset = aiter->iter_pos; | ||||||
|  | 		aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; | ||||||
|  | 		paddr = ABD_LINEAR_BUF(aiter->iter_abd); | ||||||
|  | 	} else { | ||||||
|  | 		size_t index = abd_iter_scatter_chunk_index(aiter); | ||||||
|  | 		offset = abd_iter_scatter_chunk_offset(aiter); | ||||||
|  | 		aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset, | ||||||
|  | 		    aiter->iter_abd->abd_size - aiter->iter_pos); | ||||||
|  | 		paddr = ABD_SCATTER(aiter->iter_abd).abd_chunks[index]; | ||||||
|  | 	} | ||||||
|  | 	aiter->iter_mapaddr = (char *)paddr + offset; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Unmap the current chunk from aiter. This can be safely called when the aiter | ||||||
|  |  * has already exhausted, in which case this does nothing. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_iter_unmap(struct abd_iter *aiter) | ||||||
|  | { | ||||||
|  | 	/* There's nothing left to unmap, so do nothing */ | ||||||
|  | 	if (abd_iter_at_end(aiter)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	ASSERT3P(aiter->iter_mapaddr, !=, NULL); | ||||||
|  | 	ASSERT3U(aiter->iter_mapsize, >, 0); | ||||||
|  | 
 | ||||||
|  | 	aiter->iter_mapaddr = NULL; | ||||||
|  | 	aiter->iter_mapsize = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_enter_critical(unsigned long flags) | ||||||
|  | { | ||||||
|  | 	critical_enter(); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_exit_critical(unsigned long flags) | ||||||
|  | { | ||||||
|  | 	critical_exit(); | ||||||
|  | } | ||||||
| @ -7,7 +7,7 @@ ccflags-$(CONFIG_SPARC64) += -Wno-unused-value | |||||||
| 
 | 
 | ||||||
| ccflags-y += -I@abs_top_srcdir@/module/os/linux/zfs | ccflags-y += -I@abs_top_srcdir@/module/os/linux/zfs | ||||||
| 
 | 
 | ||||||
| $(MODULE)-objs += ../os/linux/zfs/abd.o | $(MODULE)-objs += ../os/linux/zfs/abd_os.o | ||||||
| $(MODULE)-objs += ../os/linux/zfs/arc_os.o | $(MODULE)-objs += ../os/linux/zfs/arc_os.o | ||||||
| $(MODULE)-objs += ../os/linux/zfs/mmp_os.o | $(MODULE)-objs += ../os/linux/zfs/mmp_os.o | ||||||
| $(MODULE)-objs += ../os/linux/zfs/policy.o | $(MODULE)-objs += ../os/linux/zfs/policy.o | ||||||
|  | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										891
									
								
								module/os/linux/zfs/abd_os.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										891
									
								
								module/os/linux/zfs/abd_os.c
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,891 @@ | |||||||
|  | /*
 | ||||||
|  |  * CDDL HEADER START | ||||||
|  |  * | ||||||
|  |  * The contents of this file are subject to the terms of the | ||||||
|  |  * Common Development and Distribution License (the "License"). | ||||||
|  |  * You may not use this file except in compliance with the License. | ||||||
|  |  * | ||||||
|  |  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | ||||||
|  |  * or http://www.opensolaris.org/os/licensing.
 | ||||||
|  |  * See the License for the specific language governing permissions | ||||||
|  |  * and limitations under the License. | ||||||
|  |  * | ||||||
|  |  * When distributing Covered Code, include this CDDL HEADER in each | ||||||
|  |  * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | ||||||
|  |  * If applicable, add the following below this CDDL HEADER, with the | ||||||
|  |  * fields enclosed by brackets "[]" replaced with your own identifying | ||||||
|  |  * information: Portions Copyright [yyyy] [name of copyright owner] | ||||||
|  |  * | ||||||
|  |  * CDDL HEADER END | ||||||
|  |  */ | ||||||
|  | /*
 | ||||||
|  |  * Copyright (c) 2014 by Chunwei Chen. All rights reserved. | ||||||
|  |  * Copyright (c) 2019 by Delphix. All rights reserved. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * See abd.c for an general overview of the arc buffered data (ABD). | ||||||
|  |  * | ||||||
|  |  * Linear buffers act exactly like normal buffers and are always mapped into the | ||||||
|  |  * kernel's virtual memory space, while scattered ABD data chunks are allocated | ||||||
|  |  * as physical pages and then mapped in only while they are actually being | ||||||
|  |  * accessed through one of the abd_* library functions. Using scattered ABDs | ||||||
|  |  * provides several benefits: | ||||||
|  |  * | ||||||
|  |  *  (1) They avoid use of kmem_*, preventing performance problems where running | ||||||
|  |  *      kmem_reap on very large memory systems never finishes and causes | ||||||
|  |  *      constant TLB shootdowns. | ||||||
|  |  * | ||||||
|  |  *  (2) Fragmentation is less of an issue since when we are at the limit of | ||||||
|  |  *      allocatable space, we won't have to search around for a long free | ||||||
|  |  *      hole in the VA space for large ARC allocations. Each chunk is mapped in | ||||||
|  |  *      individually, so even if we are using HIGHMEM (see next point) we | ||||||
|  |  *      wouldn't need to worry about finding a contiguous address range. | ||||||
|  |  * | ||||||
|  |  *  (3) If we are not using HIGHMEM, then all physical memory is always | ||||||
|  |  *      mapped into the kernel's address space, so we also avoid the map / | ||||||
|  |  *      unmap costs on each ABD access. | ||||||
|  |  * | ||||||
|  |  * If we are not using HIGHMEM, scattered buffers which have only one chunk | ||||||
|  |  * can be treated as linear buffers, because they are contiguous in the | ||||||
|  |  * kernel's virtual address space. See abd_alloc_chunks() for details. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | #include <sys/abd_impl.h> | ||||||
|  | #include <sys/param.h> | ||||||
|  | #include <sys/zio.h> | ||||||
|  | #include <sys/zfs_context.h> | ||||||
|  | #include <sys/zfs_znode.h> | ||||||
|  | #ifdef _KERNEL | ||||||
|  | #include <linux/kmap_compat.h> | ||||||
|  | #include <linux/scatterlist.h> | ||||||
|  | #else | ||||||
|  | #define	MAX_ORDER	1 | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | typedef struct abd_stats { | ||||||
|  | 	kstat_named_t abdstat_struct_size; | ||||||
|  | 	kstat_named_t abdstat_linear_cnt; | ||||||
|  | 	kstat_named_t abdstat_linear_data_size; | ||||||
|  | 	kstat_named_t abdstat_scatter_cnt; | ||||||
|  | 	kstat_named_t abdstat_scatter_data_size; | ||||||
|  | 	kstat_named_t abdstat_scatter_chunk_waste; | ||||||
|  | 	kstat_named_t abdstat_scatter_orders[MAX_ORDER]; | ||||||
|  | 	kstat_named_t abdstat_scatter_page_multi_chunk; | ||||||
|  | 	kstat_named_t abdstat_scatter_page_multi_zone; | ||||||
|  | 	kstat_named_t abdstat_scatter_page_alloc_retry; | ||||||
|  | 	kstat_named_t abdstat_scatter_sg_table_retry; | ||||||
|  | } abd_stats_t; | ||||||
|  | 
 | ||||||
|  | static abd_stats_t abd_stats = { | ||||||
|  | 	/* Amount of memory occupied by all of the abd_t struct allocations */ | ||||||
|  | 	{ "struct_size",			KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The number of linear ABDs which are currently allocated, excluding | ||||||
|  | 	 * ABDs which don't own their data (for instance the ones which were | ||||||
|  | 	 * allocated through abd_get_offset() and abd_get_from_buf()). If an | ||||||
|  | 	 * ABD takes ownership of its buf then it will become tracked. | ||||||
|  | 	 */ | ||||||
|  | 	{ "linear_cnt",				KSTAT_DATA_UINT64 }, | ||||||
|  | 	/* Amount of data stored in all linear ABDs tracked by linear_cnt */ | ||||||
|  | 	{ "linear_data_size",			KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The number of scatter ABDs which are currently allocated, excluding | ||||||
|  | 	 * ABDs which don't own their data (for instance the ones which were | ||||||
|  | 	 * allocated through abd_get_offset()). | ||||||
|  | 	 */ | ||||||
|  | 	{ "scatter_cnt",			KSTAT_DATA_UINT64 }, | ||||||
|  | 	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */ | ||||||
|  | 	{ "scatter_data_size",			KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The amount of space wasted at the end of the last chunk across all | ||||||
|  | 	 * scatter ABDs tracked by scatter_cnt. | ||||||
|  | 	 */ | ||||||
|  | 	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The number of compound allocations of a given order.  These | ||||||
|  | 	 * allocations are spread over all currently allocated ABDs, and | ||||||
|  | 	 * act as a measure of memory fragmentation. | ||||||
|  | 	 */ | ||||||
|  | 	{ { "scatter_order_N",			KSTAT_DATA_UINT64 } }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The number of scatter ABDs which contain multiple chunks. | ||||||
|  | 	 * ABDs are preferentially allocated from the minimum number of | ||||||
|  | 	 * contiguous multi-page chunks, a single chunk is optimal. | ||||||
|  | 	 */ | ||||||
|  | 	{ "scatter_page_multi_chunk",		KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 * The number of scatter ABDs which are split across memory zones. | ||||||
|  | 	 * ABDs are preferentially allocated using pages from a single zone. | ||||||
|  | 	 */ | ||||||
|  | 	{ "scatter_page_multi_zone",		KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 *  The total number of retries encountered when attempting to | ||||||
|  | 	 *  allocate the pages to populate the scatter ABD. | ||||||
|  | 	 */ | ||||||
|  | 	{ "scatter_page_alloc_retry",		KSTAT_DATA_UINT64 }, | ||||||
|  | 	/*
 | ||||||
|  | 	 *  The total number of retries encountered when attempting to | ||||||
|  | 	 *  allocate the sg table for an ABD. | ||||||
|  | 	 */ | ||||||
|  | 	{ "scatter_sg_table_retry",		KSTAT_DATA_UINT64 }, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | #define	abd_for_each_sg(abd, sg, n, i)	\ | ||||||
|  | 	for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i) | ||||||
|  | 
 | ||||||
|  | unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * zfs_abd_scatter_min_size is the minimum allocation size to use scatter | ||||||
|  |  * ABD's.  Smaller allocations will use linear ABD's which uses | ||||||
|  |  * zio_[data_]buf_alloc(). | ||||||
|  |  * | ||||||
|  |  * Scatter ABD's use at least one page each, so sub-page allocations waste | ||||||
|  |  * some space when allocated as scatter (e.g. 2KB scatter allocation wastes | ||||||
|  |  * half of each page).  Using linear ABD's for small allocations means that | ||||||
|  |  * they will be put on slabs which contain many allocations.  This can | ||||||
|  |  * improve memory efficiency, but it also makes it much harder for ARC | ||||||
|  |  * evictions to actually free pages, because all the buffers on one slab need | ||||||
|  |  * to be freed in order for the slab (and underlying pages) to be freed. | ||||||
|  |  * Typically, 512B and 1KB kmem caches have 16 buffers per slab, so it's | ||||||
|  |  * possible for them to actually waste more memory than scatter (one page per | ||||||
|  |  * buf = wasting 3/4 or 7/8th; one buf per slab = wasting 15/16th). | ||||||
|  |  * | ||||||
|  |  * Spill blocks are typically 512B and are heavily used on systems running | ||||||
|  |  * selinux with the default dnode size and the `xattr=sa` property set. | ||||||
|  |  * | ||||||
|  |  * By default we use linear allocations for 512B and 1KB, and scatter | ||||||
|  |  * allocations for larger (1.5KB and up). | ||||||
|  |  */ | ||||||
|  | int zfs_abd_scatter_min_size = 512 * 3; | ||||||
|  | 
 | ||||||
|  | static kmem_cache_t *abd_cache = NULL; | ||||||
|  | static kstat_t *abd_ksp; | ||||||
|  | 
 | ||||||
|  | static size_t | ||||||
|  | abd_chunkcnt_for_bytes(size_t size) | ||||||
|  | { | ||||||
|  | 	return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | abd_t * | ||||||
|  | abd_alloc_struct(size_t size) | ||||||
|  | { | ||||||
|  | 	/*
 | ||||||
|  | 	 * In Linux we do not use the size passed in during ABD | ||||||
|  | 	 * allocation, so we just ignore it. | ||||||
|  | 	 */ | ||||||
|  | 	abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE); | ||||||
|  | 	ASSERT3P(abd, !=, NULL); | ||||||
|  | 	ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t)); | ||||||
|  | 
 | ||||||
|  | 	return (abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_free_struct(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	kmem_cache_free(abd_cache, abd); | ||||||
|  | 	ABDSTAT_INCR(abdstat_struct_size, -(int)sizeof (abd_t)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #ifdef _KERNEL | ||||||
|  | /*
 | ||||||
|  |  * Mark zfs data pages so they can be excluded from kernel crash dumps | ||||||
|  |  */ | ||||||
|  | #ifdef _LP64 | ||||||
|  | #define	ABD_FILE_CACHE_PAGE	0x2F5ABDF11ECAC4E | ||||||
|  | 
 | ||||||
|  | static inline void | ||||||
|  | abd_mark_zfs_page(struct page *page) | ||||||
|  | { | ||||||
|  | 	get_page(page); | ||||||
|  | 	SetPagePrivate(page); | ||||||
|  | 	set_page_private(page, ABD_FILE_CACHE_PAGE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline void | ||||||
|  | abd_unmark_zfs_page(struct page *page) | ||||||
|  | { | ||||||
|  | 	set_page_private(page, 0UL); | ||||||
|  | 	ClearPagePrivate(page); | ||||||
|  | 	put_page(page); | ||||||
|  | } | ||||||
|  | #else | ||||||
|  | #define	abd_mark_zfs_page(page) | ||||||
|  | #define	abd_unmark_zfs_page(page) | ||||||
|  | #endif /* _LP64 */ | ||||||
|  | 
 | ||||||
|  | #ifndef CONFIG_HIGHMEM | ||||||
|  | 
 | ||||||
|  | #ifndef __GFP_RECLAIM | ||||||
|  | #define	__GFP_RECLAIM		__GFP_WAIT | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * The goal is to minimize fragmentation by preferentially populating ABDs | ||||||
|  |  * with higher order compound pages from a single zone.  Allocation size is | ||||||
|  |  * progressively decreased until it can be satisfied without performing | ||||||
|  |  * reclaim or compaction.  When necessary this function will degenerate to | ||||||
|  |  * allocating individual pages and allowing reclaim to satisfy allocations. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_alloc_chunks(abd_t *abd, size_t size) | ||||||
|  | { | ||||||
|  | 	struct list_head pages; | ||||||
|  | 	struct sg_table table; | ||||||
|  | 	struct scatterlist *sg; | ||||||
|  | 	struct page *page, *tmp_page = NULL; | ||||||
|  | 	gfp_t gfp = __GFP_NOWARN | GFP_NOIO; | ||||||
|  | 	gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM; | ||||||
|  | 	int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1); | ||||||
|  | 	int nr_pages = abd_chunkcnt_for_bytes(size); | ||||||
|  | 	int chunks = 0, zones = 0; | ||||||
|  | 	size_t remaining_size; | ||||||
|  | 	int nid = NUMA_NO_NODE; | ||||||
|  | 	int alloc_pages = 0; | ||||||
|  | 
 | ||||||
|  | 	INIT_LIST_HEAD(&pages); | ||||||
|  | 
 | ||||||
|  | 	while (alloc_pages < nr_pages) { | ||||||
|  | 		unsigned chunk_pages; | ||||||
|  | 		int order; | ||||||
|  | 
 | ||||||
|  | 		order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order); | ||||||
|  | 		chunk_pages = (1U << order); | ||||||
|  | 
 | ||||||
|  | 		page = alloc_pages_node(nid, order ? gfp_comp : gfp, order); | ||||||
|  | 		if (page == NULL) { | ||||||
|  | 			if (order == 0) { | ||||||
|  | 				ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry); | ||||||
|  | 				schedule_timeout_interruptible(1); | ||||||
|  | 			} else { | ||||||
|  | 				max_order = MAX(0, order - 1); | ||||||
|  | 			} | ||||||
|  | 			continue; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		list_add_tail(&page->lru, &pages); | ||||||
|  | 
 | ||||||
|  | 		if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid)) | ||||||
|  | 			zones++; | ||||||
|  | 
 | ||||||
|  | 		nid = page_to_nid(page); | ||||||
|  | 		ABDSTAT_BUMP(abdstat_scatter_orders[order]); | ||||||
|  | 		chunks++; | ||||||
|  | 		alloc_pages += chunk_pages; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ASSERT3S(alloc_pages, ==, nr_pages); | ||||||
|  | 
 | ||||||
|  | 	while (sg_alloc_table(&table, chunks, gfp)) { | ||||||
|  | 		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry); | ||||||
|  | 		schedule_timeout_interruptible(1); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	sg = table.sgl; | ||||||
|  | 	remaining_size = size; | ||||||
|  | 	list_for_each_entry_safe(page, tmp_page, &pages, lru) { | ||||||
|  | 		size_t sg_size = MIN(PAGESIZE << compound_order(page), | ||||||
|  | 		    remaining_size); | ||||||
|  | 		sg_set_page(sg, page, sg_size, 0); | ||||||
|  | 		abd_mark_zfs_page(page); | ||||||
|  | 		remaining_size -= sg_size; | ||||||
|  | 
 | ||||||
|  | 		sg = sg_next(sg); | ||||||
|  | 		list_del(&page->lru); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * These conditions ensure that a possible transformation to a linear | ||||||
|  | 	 * ABD would be valid. | ||||||
|  | 	 */ | ||||||
|  | 	ASSERT(!PageHighMem(sg_page(table.sgl))); | ||||||
|  | 	ASSERT0(ABD_SCATTER(abd).abd_offset); | ||||||
|  | 
 | ||||||
|  | 	if (table.nents == 1) { | ||||||
|  | 		/*
 | ||||||
|  | 		 * Since there is only one entry, this ABD can be represented | ||||||
|  | 		 * as a linear buffer.  All single-page (4K) ABD's can be | ||||||
|  | 		 * represented this way.  Some multi-page ABD's can also be | ||||||
|  | 		 * represented this way, if we were able to allocate a single | ||||||
|  | 		 * "chunk" (higher-order "page" which represents a power-of-2 | ||||||
|  | 		 * series of physically-contiguous pages).  This is often the | ||||||
|  | 		 * case for 2-page (8K) ABD's. | ||||||
|  | 		 * | ||||||
|  | 		 * Representing a single-entry scatter ABD as a linear ABD | ||||||
|  | 		 * has the performance advantage of avoiding the copy (and | ||||||
|  | 		 * allocation) in abd_borrow_buf_copy / abd_return_buf_copy. | ||||||
|  | 		 * A performance increase of around 5% has been observed for | ||||||
|  | 		 * ARC-cached reads (of small blocks which can take advantage | ||||||
|  | 		 * of this). | ||||||
|  | 		 * | ||||||
|  | 		 * Note that this optimization is only possible because the | ||||||
|  | 		 * pages are always mapped into the kernel's address space. | ||||||
|  | 		 * This is not the case for highmem pages, so the | ||||||
|  | 		 * optimization can not be made there. | ||||||
|  | 		 */ | ||||||
|  | 		abd->abd_flags |= ABD_FLAG_LINEAR; | ||||||
|  | 		abd->abd_flags |= ABD_FLAG_LINEAR_PAGE; | ||||||
|  | 		abd->abd_u.abd_linear.abd_sgl = table.sgl; | ||||||
|  | 		ABD_LINEAR_BUF(abd) = page_address(sg_page(table.sgl)); | ||||||
|  | 	} else if (table.nents > 1) { | ||||||
|  | 		ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk); | ||||||
|  | 		abd->abd_flags |= ABD_FLAG_MULTI_CHUNK; | ||||||
|  | 
 | ||||||
|  | 		if (zones) { | ||||||
|  | 			ABDSTAT_BUMP(abdstat_scatter_page_multi_zone); | ||||||
|  | 			abd->abd_flags |= ABD_FLAG_MULTI_ZONE; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		ABD_SCATTER(abd).abd_sgl = table.sgl; | ||||||
|  | 		ABD_SCATTER(abd).abd_nents = table.nents; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | #else | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Allocate N individual pages to construct a scatter ABD.  This function | ||||||
|  |  * makes no attempt to request contiguous pages and requires the minimal | ||||||
|  |  * number of kernel interfaces.  It's designed for maximum compatibility. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_alloc_chunks(abd_t *abd, size_t size) | ||||||
|  | { | ||||||
|  | 	struct scatterlist *sg = NULL; | ||||||
|  | 	struct sg_table table; | ||||||
|  | 	struct page *page; | ||||||
|  | 	gfp_t gfp = __GFP_NOWARN | GFP_NOIO; | ||||||
|  | 	int nr_pages = abd_chunkcnt_for_bytes(size); | ||||||
|  | 	int i = 0; | ||||||
|  | 
 | ||||||
|  | 	while (sg_alloc_table(&table, nr_pages, gfp)) { | ||||||
|  | 		ABDSTAT_BUMP(abdstat_scatter_sg_table_retry); | ||||||
|  | 		schedule_timeout_interruptible(1); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ASSERT3U(table.nents, ==, nr_pages); | ||||||
|  | 	ABD_SCATTER(abd).abd_sgl = table.sgl; | ||||||
|  | 	ABD_SCATTER(abd).abd_nents = nr_pages; | ||||||
|  | 
 | ||||||
|  | 	abd_for_each_sg(abd, sg, nr_pages, i) { | ||||||
|  | 		while ((page = __page_cache_alloc(gfp)) == NULL) { | ||||||
|  | 			ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry); | ||||||
|  | 			schedule_timeout_interruptible(1); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		ABDSTAT_BUMP(abdstat_scatter_orders[0]); | ||||||
|  | 		sg_set_page(sg, page, PAGESIZE, 0); | ||||||
|  | 		abd_mark_zfs_page(page); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (nr_pages > 1) { | ||||||
|  | 		ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk); | ||||||
|  | 		abd->abd_flags |= ABD_FLAG_MULTI_CHUNK; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | #endif /* !CONFIG_HIGHMEM */ | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * This must be called if any of the sg_table allocation functions | ||||||
|  |  * are called. | ||||||
|  |  */ | ||||||
|  | static void | ||||||
|  | abd_free_sg_table(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	struct sg_table table; | ||||||
|  | 
 | ||||||
|  | 	table.sgl = ABD_SCATTER(abd).abd_sgl; | ||||||
|  | 	table.nents = table.orig_nents = ABD_SCATTER(abd).abd_nents; | ||||||
|  | 	sg_free_table(&table); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_free_chunks(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	struct scatterlist *sg = NULL; | ||||||
|  | 	struct page *page; | ||||||
|  | 	int nr_pages = ABD_SCATTER(abd).abd_nents; | ||||||
|  | 	int order, i = 0; | ||||||
|  | 
 | ||||||
|  | 	if (abd->abd_flags & ABD_FLAG_MULTI_ZONE) | ||||||
|  | 		ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone); | ||||||
|  | 
 | ||||||
|  | 	if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK) | ||||||
|  | 		ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk); | ||||||
|  | 
 | ||||||
|  | 	abd_for_each_sg(abd, sg, nr_pages, i) { | ||||||
|  | 		page = sg_page(sg); | ||||||
|  | 		abd_unmark_zfs_page(page); | ||||||
|  | 		order = compound_order(page); | ||||||
|  | 		__free_pages(page, order); | ||||||
|  | 		ASSERT3U(sg->length, <=, PAGE_SIZE << order); | ||||||
|  | 		ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]); | ||||||
|  | 	} | ||||||
|  | 	abd_free_sg_table(abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #else /* _KERNEL */ | ||||||
|  | 
 | ||||||
|  | #ifndef PAGE_SHIFT | ||||||
|  | #define	PAGE_SHIFT (highbit64(PAGESIZE)-1) | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
|  | struct page; | ||||||
|  | 
 | ||||||
|  | #define	zfs_kmap_atomic(chunk, km)	((void *)chunk) | ||||||
|  | #define	zfs_kunmap_atomic(addr, km)	do { (void)(addr); } while (0) | ||||||
|  | #define	local_irq_save(flags)		do { (void)(flags); } while (0) | ||||||
|  | #define	local_irq_restore(flags)	do { (void)(flags); } while (0) | ||||||
|  | #define	nth_page(pg, i) \ | ||||||
|  | 	((struct page *)((void *)(pg) + (i) * PAGESIZE)) | ||||||
|  | 
 | ||||||
|  | struct scatterlist { | ||||||
|  | 	struct page *page; | ||||||
|  | 	int length; | ||||||
|  | 	int end; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | static void | ||||||
|  | sg_init_table(struct scatterlist *sg, int nr) | ||||||
|  | { | ||||||
|  | 	memset(sg, 0, nr * sizeof (struct scatterlist)); | ||||||
|  | 	sg[nr - 1].end = 1; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * This must be called if any of the sg_table allocation functions | ||||||
|  |  * are called. | ||||||
|  |  */ | ||||||
|  | static void | ||||||
|  | abd_free_sg_table(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	int nents = ABD_SCATTER(abd).abd_nents; | ||||||
|  | 	vmem_free(ABD_SCATTER(abd).abd_sgl, | ||||||
|  | 	    nents * sizeof (struct scatterlist)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #define	for_each_sg(sgl, sg, nr, i)	\ | ||||||
|  | 	for ((i) = 0, (sg) = (sgl); (i) < (nr); (i)++, (sg) = sg_next(sg)) | ||||||
|  | 
 | ||||||
|  | static inline void | ||||||
|  | sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len, | ||||||
|  |     unsigned int offset) | ||||||
|  | { | ||||||
|  | 	/* currently we don't use offset */ | ||||||
|  | 	ASSERT(offset == 0); | ||||||
|  | 	sg->page = page; | ||||||
|  | 	sg->length = len; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline struct page * | ||||||
|  | sg_page(struct scatterlist *sg) | ||||||
|  | { | ||||||
|  | 	return (sg->page); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static inline struct scatterlist * | ||||||
|  | sg_next(struct scatterlist *sg) | ||||||
|  | { | ||||||
|  | 	if (sg->end) | ||||||
|  | 		return (NULL); | ||||||
|  | 
 | ||||||
|  | 	return (sg + 1); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_alloc_chunks(abd_t *abd, size_t size) | ||||||
|  | { | ||||||
|  | 	unsigned nr_pages = abd_chunkcnt_for_bytes(size); | ||||||
|  | 	struct scatterlist *sg; | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	ABD_SCATTER(abd).abd_sgl = vmem_alloc(nr_pages * | ||||||
|  | 	    sizeof (struct scatterlist), KM_SLEEP); | ||||||
|  | 	sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages); | ||||||
|  | 
 | ||||||
|  | 	abd_for_each_sg(abd, sg, nr_pages, i) { | ||||||
|  | 		struct page *p = umem_alloc_aligned(PAGESIZE, 64, KM_SLEEP); | ||||||
|  | 		sg_set_page(sg, p, PAGESIZE, 0); | ||||||
|  | 	} | ||||||
|  | 	ABD_SCATTER(abd).abd_nents = nr_pages; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_free_chunks(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	int i, n = ABD_SCATTER(abd).abd_nents; | ||||||
|  | 	struct scatterlist *sg; | ||||||
|  | 
 | ||||||
|  | 	abd_for_each_sg(abd, sg, n, i) { | ||||||
|  | 		for (int j = 0; j < sg->length; j += PAGESIZE) { | ||||||
|  | 			struct page *p = nth_page(sg_page(sg), j >> PAGE_SHIFT); | ||||||
|  | 			umem_free(p, PAGESIZE); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	abd_free_sg_table(abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #endif /* _KERNEL */ | ||||||
|  | 
 | ||||||
|  | boolean_t | ||||||
|  | abd_size_alloc_linear(size_t size) | ||||||
|  | { | ||||||
|  | 	return (size < zfs_abd_scatter_min_size ? B_TRUE : B_FALSE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_update_scatter_stats(abd_t *abd, abd_stats_op_t op) | ||||||
|  | { | ||||||
|  | 	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); | ||||||
|  | 	if (op == ABDSTAT_INCR) { | ||||||
|  | 		ABDSTAT_BUMP(abdstat_scatter_cnt); | ||||||
|  | 		ABDSTAT_INCR(abdstat_scatter_data_size, abd->abd_size); | ||||||
|  | 		ABDSTAT_INCR(abdstat_scatter_chunk_waste, | ||||||
|  | 		    P2ROUNDUP(abd->abd_size, PAGESIZE) - abd->abd_size); | ||||||
|  | 	} else { | ||||||
|  | 		ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); | ||||||
|  | 		ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size); | ||||||
|  | 		ABDSTAT_INCR(abdstat_scatter_chunk_waste, | ||||||
|  | 		    (int)abd->abd_size | ||||||
|  | 		    -(int)P2ROUNDUP(abd->abd_size, PAGESIZE)); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_update_linear_stats(abd_t *abd, abd_stats_op_t op) | ||||||
|  | { | ||||||
|  | 	ASSERT(op == ABDSTAT_INCR || op == ABDSTAT_DECR); | ||||||
|  | 	if (op == ABDSTAT_INCR) { | ||||||
|  | 		ABDSTAT_BUMP(abdstat_linear_cnt); | ||||||
|  | 		ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size); | ||||||
|  | 	} else { | ||||||
|  | 		ABDSTAT_BUMPDOWN(abdstat_linear_cnt); | ||||||
|  | 		ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_verify_scatter(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	size_t n; | ||||||
|  | 	int i = 0; | ||||||
|  | 	struct scatterlist *sg = NULL; | ||||||
|  | 
 | ||||||
|  | 	ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0); | ||||||
|  | 	ASSERT3U(ABD_SCATTER(abd).abd_offset, <, | ||||||
|  | 	    ABD_SCATTER(abd).abd_sgl->length); | ||||||
|  | 	n = ABD_SCATTER(abd).abd_nents; | ||||||
|  | 	abd_for_each_sg(abd, sg, n, i) { | ||||||
|  | 		ASSERT3P(sg_page(sg), !=, NULL); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_init(void) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 
 | ||||||
|  | 	abd_cache = kmem_cache_create("abd_t", sizeof (abd_t), | ||||||
|  | 	    0, NULL, NULL, NULL, NULL, NULL, 0); | ||||||
|  | 
 | ||||||
|  | 	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, | ||||||
|  | 	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); | ||||||
|  | 	if (abd_ksp != NULL) { | ||||||
|  | 		for (i = 0; i < MAX_ORDER; i++) { | ||||||
|  | 			snprintf(abd_stats.abdstat_scatter_orders[i].name, | ||||||
|  | 			    KSTAT_STRLEN, "scatter_order_%d", i); | ||||||
|  | 			abd_stats.abdstat_scatter_orders[i].data_type = | ||||||
|  | 			    KSTAT_DATA_UINT64; | ||||||
|  | 		} | ||||||
|  | 		abd_ksp->ks_data = &abd_stats; | ||||||
|  | 		kstat_install(abd_ksp); | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_fini(void) | ||||||
|  | { | ||||||
|  | 	if (abd_ksp != NULL) { | ||||||
|  | 		kstat_delete(abd_ksp); | ||||||
|  | 		abd_ksp = NULL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	if (abd_cache) { | ||||||
|  | 		kmem_cache_destroy(abd_cache); | ||||||
|  | 		abd_cache = NULL; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_free_linear_page(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	/* Transform it back into a scatter ABD for freeing */ | ||||||
|  | 	struct scatterlist *sg = abd->abd_u.abd_linear.abd_sgl; | ||||||
|  | 	abd->abd_flags &= ~ABD_FLAG_LINEAR; | ||||||
|  | 	abd->abd_flags &= ~ABD_FLAG_LINEAR_PAGE; | ||||||
|  | 	ABD_SCATTER(abd).abd_nents = 1; | ||||||
|  | 	ABD_SCATTER(abd).abd_offset = 0; | ||||||
|  | 	ABD_SCATTER(abd).abd_sgl = sg; | ||||||
|  | 	abd_free_chunks(abd); | ||||||
|  | 
 | ||||||
|  | 	zfs_refcount_destroy(&abd->abd_children); | ||||||
|  | 	abd_update_scatter_stats(abd, ABDSTAT_DECR); | ||||||
|  | 	abd_free_struct(abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * If we're going to use this ABD for doing I/O using the block layer, the | ||||||
|  |  * consumer of the ABD data doesn't care if it's scattered or not, and we don't | ||||||
|  |  * plan to store this ABD in memory for a long period of time, we should | ||||||
|  |  * allocate the ABD type that requires the least data copying to do the I/O. | ||||||
|  |  * | ||||||
|  |  * On Linux the optimal thing to do would be to use abd_get_offset() and | ||||||
|  |  * construct a new ABD which shares the original pages thereby eliminating | ||||||
|  |  * the copy.  But for the moment a new linear ABD is allocated until this | ||||||
|  |  * performance optimization can be implemented. | ||||||
|  |  */ | ||||||
|  | abd_t * | ||||||
|  | abd_alloc_for_io(size_t size, boolean_t is_metadata) | ||||||
|  | { | ||||||
|  | 	return (abd_alloc(size, is_metadata)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | abd_t * | ||||||
|  | abd_get_offset_scatter(abd_t *sabd, size_t off) | ||||||
|  | { | ||||||
|  | 	abd_t *abd = NULL; | ||||||
|  | 	int i = 0; | ||||||
|  | 	struct scatterlist *sg = NULL; | ||||||
|  | 
 | ||||||
|  | 	abd_verify(sabd); | ||||||
|  | 	ASSERT3U(off, <=, sabd->abd_size); | ||||||
|  | 
 | ||||||
|  | 	size_t new_offset = ABD_SCATTER(sabd).abd_offset + off; | ||||||
|  | 
 | ||||||
|  | 	abd = abd_alloc_struct(0); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Even if this buf is filesystem metadata, we only track that | ||||||
|  | 	 * if we own the underlying data buffer, which is not true in | ||||||
|  | 	 * this case. Therefore, we don't ever use ABD_FLAG_META here. | ||||||
|  | 	 */ | ||||||
|  | 	abd->abd_flags = 0; | ||||||
|  | 
 | ||||||
|  | 	abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) { | ||||||
|  | 		if (new_offset < sg->length) | ||||||
|  | 			break; | ||||||
|  | 		new_offset -= sg->length; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ABD_SCATTER(abd).abd_sgl = sg; | ||||||
|  | 	ABD_SCATTER(abd).abd_offset = new_offset; | ||||||
|  | 	ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i; | ||||||
|  | 
 | ||||||
|  | 	return (abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Initialize the abd_iter. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_iter_init(struct abd_iter *aiter, abd_t *abd) | ||||||
|  | { | ||||||
|  | 	abd_verify(abd); | ||||||
|  | 	aiter->iter_abd = abd; | ||||||
|  | 	aiter->iter_mapaddr = NULL; | ||||||
|  | 	aiter->iter_mapsize = 0; | ||||||
|  | 	aiter->iter_pos = 0; | ||||||
|  | 	if (abd_is_linear(abd)) { | ||||||
|  | 		aiter->iter_offset = 0; | ||||||
|  | 		aiter->iter_sg = NULL; | ||||||
|  | 	} else { | ||||||
|  | 		aiter->iter_offset = ABD_SCATTER(abd).abd_offset; | ||||||
|  | 		aiter->iter_sg = ABD_SCATTER(abd).abd_sgl; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * This is just a helper function to see if we have exhausted the | ||||||
|  |  * abd_iter and reached the end. | ||||||
|  |  */ | ||||||
|  | boolean_t | ||||||
|  | abd_iter_at_end(struct abd_iter *aiter) | ||||||
|  | { | ||||||
|  | 	return (aiter->iter_pos == aiter->iter_abd->abd_size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Advance the iterator by a certain amount. Cannot be called when a chunk is | ||||||
|  |  * in use. This can be safely called when the aiter has already exhausted, in | ||||||
|  |  * which case this does nothing. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_iter_advance(struct abd_iter *aiter, size_t amount) | ||||||
|  | { | ||||||
|  | 	ASSERT3P(aiter->iter_mapaddr, ==, NULL); | ||||||
|  | 	ASSERT0(aiter->iter_mapsize); | ||||||
|  | 
 | ||||||
|  | 	/* There's nothing left to advance to, so do nothing */ | ||||||
|  | 	if (abd_iter_at_end(aiter)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	aiter->iter_pos += amount; | ||||||
|  | 	aiter->iter_offset += amount; | ||||||
|  | 	if (!abd_is_linear(aiter->iter_abd)) { | ||||||
|  | 		while (aiter->iter_offset >= aiter->iter_sg->length) { | ||||||
|  | 			aiter->iter_offset -= aiter->iter_sg->length; | ||||||
|  | 			aiter->iter_sg = sg_next(aiter->iter_sg); | ||||||
|  | 			if (aiter->iter_sg == NULL) { | ||||||
|  | 				ASSERT0(aiter->iter_offset); | ||||||
|  | 				break; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Map the current chunk into aiter. This can be safely called when the aiter | ||||||
|  |  * has already exhausted, in which case this does nothing. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_iter_map(struct abd_iter *aiter) | ||||||
|  | { | ||||||
|  | 	void *paddr; | ||||||
|  | 	size_t offset = 0; | ||||||
|  | 
 | ||||||
|  | 	ASSERT3P(aiter->iter_mapaddr, ==, NULL); | ||||||
|  | 	ASSERT0(aiter->iter_mapsize); | ||||||
|  | 
 | ||||||
|  | 	/* There's nothing left to iterate over, so do nothing */ | ||||||
|  | 	if (abd_iter_at_end(aiter)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	if (abd_is_linear(aiter->iter_abd)) { | ||||||
|  | 		ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset); | ||||||
|  | 		offset = aiter->iter_offset; | ||||||
|  | 		aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; | ||||||
|  | 		paddr = ABD_LINEAR_BUF(aiter->iter_abd); | ||||||
|  | 	} else { | ||||||
|  | 		offset = aiter->iter_offset; | ||||||
|  | 		aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset, | ||||||
|  | 		    aiter->iter_abd->abd_size - aiter->iter_pos); | ||||||
|  | 
 | ||||||
|  | 		paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg), | ||||||
|  | 		    km_table[aiter->iter_km]); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	aiter->iter_mapaddr = (char *)paddr + offset; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Unmap the current chunk from aiter. This can be safely called when the aiter | ||||||
|  |  * has already exhausted, in which case this does nothing. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_iter_unmap(struct abd_iter *aiter) | ||||||
|  | { | ||||||
|  | 	/* There's nothing left to unmap, so do nothing */ | ||||||
|  | 	if (abd_iter_at_end(aiter)) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	if (!abd_is_linear(aiter->iter_abd)) { | ||||||
|  | 		/* LINTED E_FUNC_SET_NOT_USED */ | ||||||
|  | 		zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset, | ||||||
|  | 		    km_table[aiter->iter_km]); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	ASSERT3P(aiter->iter_mapaddr, !=, NULL); | ||||||
|  | 	ASSERT3U(aiter->iter_mapsize, >, 0); | ||||||
|  | 
 | ||||||
|  | 	aiter->iter_mapaddr = NULL; | ||||||
|  | 	aiter->iter_mapsize = 0; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_enter_critical(unsigned long flags) | ||||||
|  | { | ||||||
|  | 	local_irq_save(flags); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | void | ||||||
|  | abd_exit_critical(unsigned long flags) | ||||||
|  | { | ||||||
|  | 	local_irq_restore(flags); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | #if defined(_KERNEL) | ||||||
|  | /*
 | ||||||
|  |  * bio_nr_pages for ABD. | ||||||
|  |  * @off is the offset in @abd | ||||||
|  |  */ | ||||||
|  | unsigned long | ||||||
|  | abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off) | ||||||
|  | { | ||||||
|  | 	unsigned long pos; | ||||||
|  | 
 | ||||||
|  | 	if (abd_is_linear(abd)) | ||||||
|  | 		pos = (unsigned long)abd_to_buf(abd) + off; | ||||||
|  | 	else | ||||||
|  | 		pos = ABD_SCATTER(abd).abd_offset + off; | ||||||
|  | 
 | ||||||
|  | 	return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) - | ||||||
|  | 	    (pos >> PAGE_SHIFT); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * bio_map for scatter ABD. | ||||||
|  |  * @off is the offset in @abd | ||||||
|  |  * Remaining IO size is returned | ||||||
|  |  */ | ||||||
|  | unsigned int | ||||||
|  | abd_scatter_bio_map_off(struct bio *bio, abd_t *abd, | ||||||
|  |     unsigned int io_size, size_t off) | ||||||
|  | { | ||||||
|  | 	int i; | ||||||
|  | 	struct abd_iter aiter; | ||||||
|  | 
 | ||||||
|  | 	ASSERT(!abd_is_linear(abd)); | ||||||
|  | 	ASSERT3U(io_size, <=, abd->abd_size - off); | ||||||
|  | 
 | ||||||
|  | 	abd_iter_init(&aiter, abd); | ||||||
|  | 	abd_iter_advance(&aiter, off); | ||||||
|  | 
 | ||||||
|  | 	for (i = 0; i < bio->bi_max_vecs; i++) { | ||||||
|  | 		struct page *pg; | ||||||
|  | 		size_t len, sgoff, pgoff; | ||||||
|  | 		struct scatterlist *sg; | ||||||
|  | 
 | ||||||
|  | 		if (io_size <= 0) | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		sg = aiter.iter_sg; | ||||||
|  | 		sgoff = aiter.iter_offset; | ||||||
|  | 		pgoff = sgoff & (PAGESIZE - 1); | ||||||
|  | 		len = MIN(io_size, PAGESIZE - pgoff); | ||||||
|  | 		ASSERT(len > 0); | ||||||
|  | 
 | ||||||
|  | 		pg = nth_page(sg_page(sg), sgoff >> PAGE_SHIFT); | ||||||
|  | 		if (bio_add_page(bio, pg, len, pgoff) != len) | ||||||
|  | 			break; | ||||||
|  | 
 | ||||||
|  | 		io_size -= len; | ||||||
|  | 		abd_iter_advance(&aiter, len); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return (io_size); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /* Tunable Parameters */ | ||||||
|  | module_param(zfs_abd_scatter_enabled, int, 0644); | ||||||
|  | MODULE_PARM_DESC(zfs_abd_scatter_enabled, | ||||||
|  | 	"Toggle whether ABD allocations must be linear."); | ||||||
|  | module_param(zfs_abd_scatter_min_size, int, 0644); | ||||||
|  | MODULE_PARM_DESC(zfs_abd_scatter_min_size, | ||||||
|  | 	"Minimum size of scatter allocations."); | ||||||
|  | /* CSTYLED */ | ||||||
|  | module_param(zfs_abd_scatter_max_order, uint, 0644); | ||||||
|  | MODULE_PARM_DESC(zfs_abd_scatter_max_order, | ||||||
|  | 	"Maximum order allocation used for a scatter ABD."); | ||||||
|  | #endif | ||||||
| @ -14,6 +14,7 @@ ccflags-y += $(NO_UNUSED_BUT_SET_VARIABLE) | |||||||
| # Suppress unused-value warnings in sparc64 architecture headers
 | # Suppress unused-value warnings in sparc64 architecture headers
 | ||||||
| ccflags-$(CONFIG_SPARC64) += -Wno-unused-value | ccflags-$(CONFIG_SPARC64) += -Wno-unused-value | ||||||
| 
 | 
 | ||||||
|  | $(MODULE)-objs += abd.o | ||||||
| $(MODULE)-objs += aggsum.o | $(MODULE)-objs += aggsum.o | ||||||
| $(MODULE)-objs += arc.o | $(MODULE)-objs += arc.o | ||||||
| $(MODULE)-objs += blkptr.o | $(MODULE)-objs += blkptr.o | ||||||
|  | |||||||
| @ -1,17 +1,26 @@ | |||||||
| /*
 | /*
 | ||||||
|  * This file and its contents are supplied under the terms of the |  * CDDL HEADER START | ||||||
|  * Common Development and Distribution License ("CDDL"), version 1.0. |  | ||||||
|  * You may only use this file in accordance with the terms of version |  | ||||||
|  * 1.0 of the CDDL. |  | ||||||
|  * |  * | ||||||
|  * A full copy of the text of the CDDL should have accompanied this |  * The contents of this file are subject to the terms of the | ||||||
|  * source.  A copy of the CDDL is also available via the Internet at |  * Common Development and Distribution License (the "License"). | ||||||
|  * http://www.illumos.org/license/CDDL.
 |  * You may not use this file except in compliance with the License. | ||||||
|  |  * | ||||||
|  |  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | ||||||
|  |  * or http://www.opensolaris.org/os/licensing.
 | ||||||
|  |  * See the License for the specific language governing permissions | ||||||
|  |  * and limitations under the License. | ||||||
|  |  * | ||||||
|  |  * When distributing Covered Code, include this CDDL HEADER in each | ||||||
|  |  * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | ||||||
|  |  * If applicable, add the following below this CDDL HEADER, with the | ||||||
|  |  * fields enclosed by brackets "[]" replaced with your own identifying | ||||||
|  |  * information: Portions Copyright [yyyy] [name of copyright owner] | ||||||
|  |  * | ||||||
|  |  * CDDL HEADER END | ||||||
|  */ |  */ | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Copyright (c) 2014 by Chunwei Chen. All rights reserved. |  * Copyright (c) 2014 by Chunwei Chen. All rights reserved. | ||||||
|  * Copyright (c) 2016 by Delphix. All rights reserved. |  * Copyright (c) 2019 by Delphix. All rights reserved. | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -50,11 +59,6 @@ | |||||||
|  *                                      +----------------->| chunk N-1 | |  *                                      +----------------->| chunk N-1 | | ||||||
|  *                                                         +-----------+ |  *                                                         +-----------+ | ||||||
|  * |  * | ||||||
|  * Using a large proportion of scattered ABDs decreases ARC fragmentation since |  | ||||||
|  * when we are at the limit of allocatable space, using equal-size chunks will |  | ||||||
|  * allow us to quickly reclaim enough space for a new large allocation (assuming |  | ||||||
|  * it is also scattered). |  | ||||||
|  * |  | ||||||
|  * In addition to directly allocating a linear or scattered ABD, it is also |  * In addition to directly allocating a linear or scattered ABD, it is also | ||||||
|  * possible to create an ABD by requesting the "sub-ABD" starting at an offset |  * possible to create an ABD by requesting the "sub-ABD" starting at an offset | ||||||
|  * within an existing ABD. In linear buffers this is simple (set abd_buf of |  * within an existing ABD. In linear buffers this is simple (set abd_buf of | ||||||
| @ -83,186 +87,55 @@ | |||||||
|  * compare, copy, read, write, and fill with zeroes. If you need a custom |  * compare, copy, read, write, and fill with zeroes. If you need a custom | ||||||
|  * function which progressively accesses the whole ABD, use the abd_iterate_* |  * function which progressively accesses the whole ABD, use the abd_iterate_* | ||||||
|  * functions. |  * functions. | ||||||
|  |  * | ||||||
|  |  * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to | ||||||
|  |  * B_FALSE. | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| #include <sys/abd.h> | #include <sys/abd_impl.h> | ||||||
| #include <sys/param.h> | #include <sys/param.h> | ||||||
| #include <sys/zio.h> | #include <sys/zio.h> | ||||||
| #include <sys/zfs_context.h> | #include <sys/zfs_context.h> | ||||||
| #include <sys/zfs_znode.h> | #include <sys/zfs_znode.h> | ||||||
| 
 | 
 | ||||||
| typedef struct abd_stats { | /* see block comment above for description */ | ||||||
| 	kstat_named_t abdstat_struct_size; | int zfs_abd_scatter_enabled = B_TRUE; | ||||||
| 	kstat_named_t abdstat_scatter_cnt; |  | ||||||
| 	kstat_named_t abdstat_scatter_data_size; |  | ||||||
| 	kstat_named_t abdstat_scatter_chunk_waste; |  | ||||||
| 	kstat_named_t abdstat_linear_cnt; |  | ||||||
| 	kstat_named_t abdstat_linear_data_size; |  | ||||||
| } abd_stats_t; |  | ||||||
| 
 | 
 | ||||||
| static abd_stats_t abd_stats = { | boolean_t | ||||||
| 	/* Amount of memory occupied by all of the abd_t struct allocations */ | abd_is_linear(abd_t *abd) | ||||||
| 	{ "struct_size",			KSTAT_DATA_UINT64 }, |  | ||||||
| 	/*
 |  | ||||||
| 	 * The number of scatter ABDs which are currently allocated, excluding |  | ||||||
| 	 * ABDs which don't own their data (for instance the ones which were |  | ||||||
| 	 * allocated through abd_get_offset()). |  | ||||||
| 	 */ |  | ||||||
| 	{ "scatter_cnt",			KSTAT_DATA_UINT64 }, |  | ||||||
| 	/* Amount of data stored in all scatter ABDs tracked by scatter_cnt */ |  | ||||||
| 	{ "scatter_data_size",			KSTAT_DATA_UINT64 }, |  | ||||||
| 	/*
 |  | ||||||
| 	 * The amount of space wasted at the end of the last chunk across all |  | ||||||
| 	 * scatter ABDs tracked by scatter_cnt. |  | ||||||
| 	 */ |  | ||||||
| 	{ "scatter_chunk_waste",		KSTAT_DATA_UINT64 }, |  | ||||||
| 	/*
 |  | ||||||
| 	 * The number of linear ABDs which are currently allocated, excluding |  | ||||||
| 	 * ABDs which don't own their data (for instance the ones which were |  | ||||||
| 	 * allocated through abd_get_offset() and abd_get_from_buf()). If an |  | ||||||
| 	 * ABD takes ownership of its buf then it will become tracked. |  | ||||||
| 	 */ |  | ||||||
| 	{ "linear_cnt",				KSTAT_DATA_UINT64 }, |  | ||||||
| 	/* Amount of data stored in all linear ABDs tracked by linear_cnt */ |  | ||||||
| 	{ "linear_data_size",			KSTAT_DATA_UINT64 }, |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| #define	ABDSTAT(stat)		(abd_stats.stat.value.ui64) |  | ||||||
| #define	ABDSTAT_INCR(stat, val) \ |  | ||||||
| 	atomic_add_64(&abd_stats.stat.value.ui64, (val)) |  | ||||||
| #define	ABDSTAT_BUMP(stat)	ABDSTAT_INCR(stat, 1) |  | ||||||
| #define	ABDSTAT_BUMPDOWN(stat)	ABDSTAT_INCR(stat, -1) |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * It is possible to make all future ABDs be linear by setting this to B_FALSE. |  | ||||||
|  * Otherwise, ABDs are allocated scattered by default unless the caller uses |  | ||||||
|  * abd_alloc_linear(). |  | ||||||
|  */ |  | ||||||
| boolean_t zfs_abd_scatter_enabled = B_TRUE; |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * The size of the chunks ABD allocates. Because the sizes allocated from the |  | ||||||
|  * kmem_cache can't change, this tunable can only be modified at boot. Changing |  | ||||||
|  * it at runtime would cause ABD iteration to work incorrectly for ABDs which |  | ||||||
|  * were allocated with the old size, so a safeguard has been put in place which |  | ||||||
|  * will cause the machine to panic if you change it and try to access the data |  | ||||||
|  * within a scattered ABD. |  | ||||||
|  */ |  | ||||||
| size_t zfs_abd_chunk_size = 4096; |  | ||||||
| 
 |  | ||||||
| #if defined(_KERNEL) |  | ||||||
| SYSCTL_DECL(_vfs_zfs); |  | ||||||
| 
 |  | ||||||
| SYSCTL_INT(_vfs_zfs, OID_AUTO, abd_scatter_enabled, CTLFLAG_RWTUN, |  | ||||||
| 	&zfs_abd_scatter_enabled, 0, "Enable scattered ARC data buffers"); |  | ||||||
| SYSCTL_ULONG(_vfs_zfs, OID_AUTO, abd_chunk_size, CTLFLAG_RDTUN, |  | ||||||
| 	&zfs_abd_chunk_size, 0, "The size of the chunks ABD allocates"); |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| kmem_cache_t *abd_chunk_cache; |  | ||||||
| static kstat_t *abd_ksp; |  | ||||||
| 
 |  | ||||||
| extern inline boolean_t abd_is_linear(abd_t *abd); |  | ||||||
| extern inline void abd_copy(abd_t *dabd, abd_t *sabd, size_t size); |  | ||||||
| extern inline void abd_copy_from_buf(abd_t *abd, const void *buf, size_t size); |  | ||||||
| extern inline void abd_copy_to_buf(void* buf, abd_t *abd, size_t size); |  | ||||||
| extern inline int abd_cmp_buf(abd_t *abd, const void *buf, size_t size); |  | ||||||
| extern inline void abd_zero(abd_t *abd, size_t size); |  | ||||||
| 
 |  | ||||||
| static void * |  | ||||||
| abd_alloc_chunk() |  | ||||||
| { | { | ||||||
| 	void *c = kmem_cache_alloc(abd_chunk_cache, KM_PUSHPAGE); | 	return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE); | ||||||
| 	ASSERT3P(c, !=, NULL); |  | ||||||
| 	return (c); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void | boolean_t | ||||||
| abd_free_chunk(void *c) | abd_is_linear_page(abd_t *abd) | ||||||
| { | { | ||||||
| 	kmem_cache_free(abd_chunk_cache, c); | 	return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ? | ||||||
|  | 	    B_TRUE : B_FALSE); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void | void | ||||||
| abd_init(void) |  | ||||||
| { |  | ||||||
| 	abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0, |  | ||||||
| 	    NULL, NULL, NULL, NULL, 0, KMC_NOTOUCH | KMC_NODEBUG); |  | ||||||
| 
 |  | ||||||
| 	abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED, |  | ||||||
| 	    sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); |  | ||||||
| 	if (abd_ksp != NULL) { |  | ||||||
| 		abd_ksp->ks_data = &abd_stats; |  | ||||||
| 		kstat_install(abd_ksp); |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void |  | ||||||
| abd_fini(void) |  | ||||||
| { |  | ||||||
| 	if (abd_ksp != NULL) { |  | ||||||
| 		kstat_delete(abd_ksp); |  | ||||||
| 		abd_ksp = NULL; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	kmem_cache_destroy(abd_chunk_cache); |  | ||||||
| 	abd_chunk_cache = NULL; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline size_t |  | ||||||
| abd_chunkcnt_for_bytes(size_t size) |  | ||||||
| { |  | ||||||
| 	return (P2ROUNDUP(size, zfs_abd_chunk_size) / zfs_abd_chunk_size); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline size_t |  | ||||||
| abd_scatter_chunkcnt(abd_t *abd) |  | ||||||
| { |  | ||||||
| 	ASSERT(!abd_is_linear(abd)); |  | ||||||
| 	return (abd_chunkcnt_for_bytes( |  | ||||||
| 	    abd->abd_u.abd_scatter.abd_offset + abd->abd_size)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline void |  | ||||||
| abd_verify(abd_t *abd) | abd_verify(abd_t *abd) | ||||||
| { | { | ||||||
| 	ASSERT3U(abd->abd_size, >, 0); | 	ASSERT3U(abd->abd_size, >, 0); | ||||||
| 	ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); | 	ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE); | ||||||
| 	ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | | 	ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR | | ||||||
| 	    ABD_FLAG_OWNER | ABD_FLAG_META)); | 	    ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE | | ||||||
|  | 	    ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE)); | ||||||
| 	IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); | 	IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER)); | ||||||
| 	IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); | 	IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER); | ||||||
| 	if (abd_is_linear(abd)) { | 	if (abd_is_linear(abd)) { | ||||||
| 		ASSERT3P(abd->abd_u.abd_linear.abd_buf, !=, NULL); | 		ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL); | ||||||
| 	} else { | 	} else { | ||||||
| 		ASSERT3U(abd->abd_u.abd_scatter.abd_offset, <, | 		abd_verify_scatter(abd); | ||||||
| 		    zfs_abd_chunk_size); |  | ||||||
| 		size_t n = abd_scatter_chunkcnt(abd); |  | ||||||
| 		for (int i = 0; i < n; i++) { |  | ||||||
| 			ASSERT3P( |  | ||||||
| 			    abd->abd_u.abd_scatter.abd_chunks[i], !=, NULL); |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline abd_t * | uint_t | ||||||
| abd_alloc_struct(size_t chunkcnt) | abd_get_size(abd_t *abd) | ||||||
| { | { | ||||||
| 	size_t size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); | 	abd_verify(abd); | ||||||
| 	abd_t *abd = kmem_alloc(size, KM_PUSHPAGE); | 	return (abd->abd_size); | ||||||
| 	ASSERT3P(abd, !=, NULL); |  | ||||||
| 	ABDSTAT_INCR(abdstat_struct_size, size); |  | ||||||
| 
 |  | ||||||
| 	return (abd); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline void |  | ||||||
| abd_free_struct(abd_t *abd) |  | ||||||
| { |  | ||||||
| 	size_t chunkcnt = abd_is_linear(abd) ? 0 : abd_scatter_chunkcnt(abd); |  | ||||||
| 	int size = offsetof(abd_t, abd_u.abd_scatter.abd_chunks[chunkcnt]); |  | ||||||
| 	kmem_free(abd, size); |  | ||||||
| 	ABDSTAT_INCR(abdstat_struct_size, -size); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -272,15 +145,16 @@ abd_free_struct(abd_t *abd) | |||||||
| abd_t * | abd_t * | ||||||
| abd_alloc(size_t size, boolean_t is_metadata) | abd_alloc(size_t size, boolean_t is_metadata) | ||||||
| { | { | ||||||
| 	if (!zfs_abd_scatter_enabled || size <= zfs_abd_chunk_size) | 	if (!zfs_abd_scatter_enabled || abd_size_alloc_linear(size)) | ||||||
| 		return (abd_alloc_linear(size, is_metadata)); | 		return (abd_alloc_linear(size, is_metadata)); | ||||||
| 
 | 
 | ||||||
| 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); | 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE); | ||||||
| 
 | 
 | ||||||
| 	size_t n = abd_chunkcnt_for_bytes(size); | 	abd_t *abd = abd_alloc_struct(size); | ||||||
| 	abd_t *abd = abd_alloc_struct(n); |  | ||||||
| 
 |  | ||||||
| 	abd->abd_flags = ABD_FLAG_OWNER; | 	abd->abd_flags = ABD_FLAG_OWNER; | ||||||
|  | 	abd->abd_u.abd_scatter.abd_offset = 0; | ||||||
|  | 	abd_alloc_chunks(abd, size); | ||||||
|  | 
 | ||||||
| 	if (is_metadata) { | 	if (is_metadata) { | ||||||
| 		abd->abd_flags |= ABD_FLAG_META; | 		abd->abd_flags |= ABD_FLAG_META; | ||||||
| 	} | 	} | ||||||
| @ -288,19 +162,7 @@ abd_alloc(size_t size, boolean_t is_metadata) | |||||||
| 	abd->abd_parent = NULL; | 	abd->abd_parent = NULL; | ||||||
| 	zfs_refcount_create(&abd->abd_children); | 	zfs_refcount_create(&abd->abd_children); | ||||||
| 
 | 
 | ||||||
| 	abd->abd_u.abd_scatter.abd_offset = 0; | 	abd_update_scatter_stats(abd, ABDSTAT_INCR); | ||||||
| 	abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size; |  | ||||||
| 
 |  | ||||||
| 	for (int i = 0; i < n; i++) { |  | ||||||
| 		void *c = abd_alloc_chunk(); |  | ||||||
| 		ASSERT3P(c, !=, NULL); |  | ||||||
| 		abd->abd_u.abd_scatter.abd_chunks[i] = c; |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	ABDSTAT_BUMP(abdstat_scatter_cnt); |  | ||||||
| 	ABDSTAT_INCR(abdstat_scatter_data_size, size); |  | ||||||
| 	ABDSTAT_INCR(abdstat_scatter_chunk_waste, |  | ||||||
| 	    n * zfs_abd_chunk_size - size); |  | ||||||
| 
 | 
 | ||||||
| 	return (abd); | 	return (abd); | ||||||
| } | } | ||||||
| @ -308,17 +170,32 @@ abd_alloc(size_t size, boolean_t is_metadata) | |||||||
| static void | static void | ||||||
| abd_free_scatter(abd_t *abd) | abd_free_scatter(abd_t *abd) | ||||||
| { | { | ||||||
| 	size_t n = abd_scatter_chunkcnt(abd); | 	abd_free_chunks(abd); | ||||||
| 	for (int i = 0; i < n; i++) { | 
 | ||||||
| 		abd_free_chunk(abd->abd_u.abd_scatter.abd_chunks[i]); | 	zfs_refcount_destroy(&abd->abd_children); | ||||||
|  | 	abd_update_scatter_stats(abd, ABDSTAT_DECR); | ||||||
|  | 	abd_free_struct(abd); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not | ||||||
|  |  * free the underlying scatterlist or buffer. | ||||||
|  |  */ | ||||||
|  | void | ||||||
|  | abd_put(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	if (abd == NULL) | ||||||
|  | 		return; | ||||||
|  | 
 | ||||||
|  | 	abd_verify(abd); | ||||||
|  | 	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); | ||||||
|  | 
 | ||||||
|  | 	if (abd->abd_parent != NULL) { | ||||||
|  | 		(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children, | ||||||
|  | 		    abd->abd_size, abd); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	zfs_refcount_destroy(&abd->abd_children); | 	zfs_refcount_destroy(&abd->abd_children); | ||||||
| 	ABDSTAT_BUMPDOWN(abdstat_scatter_cnt); |  | ||||||
| 	ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size); |  | ||||||
| 	ABDSTAT_INCR(abdstat_scatter_chunk_waste, |  | ||||||
| 	    abd->abd_size - n * zfs_abd_chunk_size); |  | ||||||
| 
 |  | ||||||
| 	abd_free_struct(abd); | 	abd_free_struct(abd); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -343,13 +220,12 @@ abd_alloc_linear(size_t size, boolean_t is_metadata) | |||||||
| 	zfs_refcount_create(&abd->abd_children); | 	zfs_refcount_create(&abd->abd_children); | ||||||
| 
 | 
 | ||||||
| 	if (is_metadata) { | 	if (is_metadata) { | ||||||
| 		abd->abd_u.abd_linear.abd_buf = zio_buf_alloc(size); | 		ABD_LINEAR_BUF(abd) = zio_buf_alloc(size); | ||||||
| 	} else { | 	} else { | ||||||
| 		abd->abd_u.abd_linear.abd_buf = zio_data_buf_alloc(size); | 		ABD_LINEAR_BUF(abd) = zio_data_buf_alloc(size); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	ABDSTAT_BUMP(abdstat_linear_cnt); | 	abd_update_linear_stats(abd, ABDSTAT_INCR); | ||||||
| 	ABDSTAT_INCR(abdstat_linear_data_size, size); |  | ||||||
| 
 | 
 | ||||||
| 	return (abd); | 	return (abd); | ||||||
| } | } | ||||||
| @ -357,15 +233,18 @@ abd_alloc_linear(size_t size, boolean_t is_metadata) | |||||||
| static void | static void | ||||||
| abd_free_linear(abd_t *abd) | abd_free_linear(abd_t *abd) | ||||||
| { | { | ||||||
|  | 	if (abd_is_linear_page(abd)) { | ||||||
|  | 		abd_free_linear_page(abd); | ||||||
|  | 		return; | ||||||
|  | 	} | ||||||
| 	if (abd->abd_flags & ABD_FLAG_META) { | 	if (abd->abd_flags & ABD_FLAG_META) { | ||||||
| 		zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); | 		zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size); | ||||||
| 	} else { | 	} else { | ||||||
| 		zio_data_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size); | 		zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	zfs_refcount_destroy(&abd->abd_children); | 	zfs_refcount_destroy(&abd->abd_children); | ||||||
| 	ABDSTAT_BUMPDOWN(abdstat_linear_cnt); | 	abd_update_linear_stats(abd, ABDSTAT_DECR); | ||||||
| 	ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); |  | ||||||
| 
 | 
 | ||||||
| 	abd_free_struct(abd); | 	abd_free_struct(abd); | ||||||
| } | } | ||||||
| @ -397,39 +276,23 @@ abd_t * | |||||||
| abd_alloc_sametype(abd_t *sabd, size_t size) | abd_alloc_sametype(abd_t *sabd, size_t size) | ||||||
| { | { | ||||||
| 	boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; | 	boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0; | ||||||
| 	if (abd_is_linear(sabd)) { | 	if (abd_is_linear(sabd) && | ||||||
|  | 	    !abd_is_linear_page(sabd)) { | ||||||
| 		return (abd_alloc_linear(size, is_metadata)); | 		return (abd_alloc_linear(size, is_metadata)); | ||||||
| 	} else { | 	} else { | ||||||
| 		return (abd_alloc(size, is_metadata)); | 		return (abd_alloc(size, is_metadata)); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * If we're going to use this ABD for doing I/O using the block layer, the |  | ||||||
|  * consumer of the ABD data doesn't care if it's scattered or not, and we don't |  | ||||||
|  * plan to store this ABD in memory for a long period of time, we should |  | ||||||
|  * allocate the ABD type that requires the least data copying to do the I/O. |  | ||||||
|  * |  | ||||||
|  * Currently this is linear ABDs, however if ldi_strategy() can ever issue I/Os |  | ||||||
|  * using a scatter/gather list we should switch to that and replace this call |  | ||||||
|  * with vanilla abd_alloc(). |  | ||||||
|  */ |  | ||||||
| abd_t * |  | ||||||
| abd_alloc_for_io(size_t size, boolean_t is_metadata) |  | ||||||
| { |  | ||||||
| 	return (abd_alloc_linear(size, is_metadata)); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Allocate a new ABD to point to offset off of sabd. It shares the underlying |  * Allocate a new ABD to point to offset off of sabd. It shares the underlying | ||||||
|  * buffer data with sabd. Use abd_put() to free. sabd must not be freed while |  * buffer data with sabd. Use abd_put() to free. sabd must not be freed while | ||||||
|  * any derived ABDs exist. |  * any derived ABDs exist. | ||||||
|  */ |  */ | ||||||
| /* ARGSUSED */ | static abd_t * | ||||||
| static inline abd_t * |  | ||||||
| abd_get_offset_impl(abd_t *sabd, size_t off, size_t size) | abd_get_offset_impl(abd_t *sabd, size_t off, size_t size) | ||||||
| { | { | ||||||
| 	abd_t *abd; | 	abd_t *abd = NULL; | ||||||
| 
 | 
 | ||||||
| 	abd_verify(sabd); | 	abd_verify(sabd); | ||||||
| 	ASSERT3U(off, <=, sabd->abd_size); | 	ASSERT3U(off, <=, sabd->abd_size); | ||||||
| @ -444,60 +307,33 @@ abd_get_offset_impl(abd_t *sabd, size_t off, size_t size) | |||||||
| 		 */ | 		 */ | ||||||
| 		abd->abd_flags = ABD_FLAG_LINEAR; | 		abd->abd_flags = ABD_FLAG_LINEAR; | ||||||
| 
 | 
 | ||||||
| 		abd->abd_u.abd_linear.abd_buf = | 		ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off; | ||||||
| 		    (char *)sabd->abd_u.abd_linear.abd_buf + off; |  | ||||||
| 	} else { | 	} else { | ||||||
| 		size_t new_offset = sabd->abd_u.abd_scatter.abd_offset + off; | 		abd = abd_get_offset_scatter(sabd, off); | ||||||
| 		size_t chunkcnt = abd_scatter_chunkcnt(sabd) - |  | ||||||
| 		    (new_offset / zfs_abd_chunk_size); |  | ||||||
| 
 |  | ||||||
| 		abd = abd_alloc_struct(chunkcnt); |  | ||||||
| 
 |  | ||||||
| 		/*
 |  | ||||||
| 		 * Even if this buf is filesystem metadata, we only track that |  | ||||||
| 		 * if we own the underlying data buffer, which is not true in |  | ||||||
| 		 * this case. Therefore, we don't ever use ABD_FLAG_META here. |  | ||||||
| 		 */ |  | ||||||
| 		abd->abd_flags = 0; |  | ||||||
| 
 |  | ||||||
| 		abd->abd_u.abd_scatter.abd_offset = |  | ||||||
| 		    new_offset % zfs_abd_chunk_size; |  | ||||||
| 		abd->abd_u.abd_scatter.abd_chunk_size = zfs_abd_chunk_size; |  | ||||||
| 
 |  | ||||||
| 		/* Copy the scatterlist starting at the correct offset */ |  | ||||||
| 		(void) memcpy(&abd->abd_u.abd_scatter.abd_chunks, |  | ||||||
| 		    &sabd->abd_u.abd_scatter.abd_chunks[new_offset / |  | ||||||
| 		    zfs_abd_chunk_size], |  | ||||||
| 		    chunkcnt * sizeof (void *)); |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (size == 0) | 	abd->abd_size = size; | ||||||
| 		abd->abd_size = sabd->abd_size - off; |  | ||||||
| 	else |  | ||||||
| 		abd->abd_size = size; |  | ||||||
| 	abd->abd_parent = sabd; | 	abd->abd_parent = sabd; | ||||||
| 	zfs_refcount_create(&abd->abd_children); | 	zfs_refcount_create(&abd->abd_children); | ||||||
| 	(void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd); | 	(void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd); | ||||||
| 
 |  | ||||||
| 	return (abd); | 	return (abd); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| abd_t * | abd_t * | ||||||
| abd_get_offset(abd_t *sabd, size_t off) | abd_get_offset(abd_t *sabd, size_t off) | ||||||
| { | { | ||||||
| 
 | 	size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0; | ||||||
| 	return (abd_get_offset_impl(sabd, off, 0)); | 	VERIFY3U(size, >, 0); | ||||||
|  | 	return (abd_get_offset_impl(sabd, off, size)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| abd_t * | abd_t * | ||||||
| abd_get_offset_size(abd_t *sabd, size_t off, size_t size) | abd_get_offset_size(abd_t *sabd, size_t off, size_t size) | ||||||
| { | { | ||||||
| 	ASSERT3U(off + size, <=, sabd->abd_size); | 	ASSERT3U(off + size, <=, sabd->abd_size); | ||||||
| 
 |  | ||||||
| 	return (abd_get_offset_impl(sabd, off, size)); | 	return (abd_get_offset_impl(sabd, off, size)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Allocate a linear ABD structure for buf. You must free this with abd_put() |  * Allocate a linear ABD structure for buf. You must free this with abd_put() | ||||||
|  * since the resulting ABD doesn't own its own buffer. |  * since the resulting ABD doesn't own its own buffer. | ||||||
| @ -519,32 +355,11 @@ abd_get_from_buf(void *buf, size_t size) | |||||||
| 	abd->abd_parent = NULL; | 	abd->abd_parent = NULL; | ||||||
| 	zfs_refcount_create(&abd->abd_children); | 	zfs_refcount_create(&abd->abd_children); | ||||||
| 
 | 
 | ||||||
| 	abd->abd_u.abd_linear.abd_buf = buf; | 	ABD_LINEAR_BUF(abd) = buf; | ||||||
| 
 | 
 | ||||||
| 	return (abd); | 	return (abd); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 |  | ||||||
|  * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not |  | ||||||
|  * free the underlying scatterlist or buffer. |  | ||||||
|  */ |  | ||||||
| void |  | ||||||
| abd_put(abd_t *abd) |  | ||||||
| { |  | ||||||
| 	if (abd == NULL) |  | ||||||
| 		return; |  | ||||||
| 	abd_verify(abd); |  | ||||||
| 	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER)); |  | ||||||
| 
 |  | ||||||
| 	if (abd->abd_parent != NULL) { |  | ||||||
| 		(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children, |  | ||||||
| 		    abd->abd_size, abd); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	zfs_refcount_destroy(&abd->abd_children); |  | ||||||
| 	abd_free_struct(abd); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 | /*
 | ||||||
|  * Get the raw buffer associated with a linear ABD. |  * Get the raw buffer associated with a linear ABD. | ||||||
|  */ |  */ | ||||||
| @ -553,7 +368,7 @@ abd_to_buf(abd_t *abd) | |||||||
| { | { | ||||||
| 	ASSERT(abd_is_linear(abd)); | 	ASSERT(abd_is_linear(abd)); | ||||||
| 	abd_verify(abd); | 	abd_verify(abd); | ||||||
| 	return (abd->abd_u.abd_linear.abd_buf); | 	return (ABD_LINEAR_BUF(abd)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -574,7 +389,6 @@ abd_borrow_buf(abd_t *abd, size_t n) | |||||||
| 		buf = zio_buf_alloc(n); | 		buf = zio_buf_alloc(n); | ||||||
| 	} | 	} | ||||||
| 	(void) zfs_refcount_add_many(&abd->abd_children, n, buf); | 	(void) zfs_refcount_add_many(&abd->abd_children, n, buf); | ||||||
| 
 |  | ||||||
| 	return (buf); | 	return (buf); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -617,6 +431,31 @@ abd_return_buf_copy(abd_t *abd, void *buf, size_t n) | |||||||
| 	abd_return_buf(abd, buf, n); | 	abd_return_buf(abd, buf, n); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | void | ||||||
|  | abd_release_ownership_of_buf(abd_t *abd) | ||||||
|  | { | ||||||
|  | 	ASSERT(abd_is_linear(abd)); | ||||||
|  | 	ASSERT(abd->abd_flags & ABD_FLAG_OWNER); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * abd_free() needs to handle LINEAR_PAGE ABD's specially. | ||||||
|  | 	 * Since that flag does not survive the | ||||||
|  | 	 * abd_release_ownership_of_buf() -> abd_get_from_buf() -> | ||||||
|  | 	 * abd_take_ownership_of_buf() sequence, we don't allow releasing | ||||||
|  | 	 * these "linear but not zio_[data_]buf_alloc()'ed" ABD's. | ||||||
|  | 	 */ | ||||||
|  | 	ASSERT(!abd_is_linear_page(abd)); | ||||||
|  | 
 | ||||||
|  | 	abd_verify(abd); | ||||||
|  | 
 | ||||||
|  | 	abd->abd_flags &= ~ABD_FLAG_OWNER; | ||||||
|  | 	/* Disable this flag since we no longer own the data buffer */ | ||||||
|  | 	abd->abd_flags &= ~ABD_FLAG_META; | ||||||
|  | 
 | ||||||
|  | 	abd_update_linear_stats(abd, ABDSTAT_DECR); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Give this ABD ownership of the buffer that it's storing. Can only be used on |  * Give this ABD ownership of the buffer that it's storing. Can only be used on | ||||||
|  * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated |  * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated | ||||||
| @ -635,130 +474,7 @@ abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata) | |||||||
| 		abd->abd_flags |= ABD_FLAG_META; | 		abd->abd_flags |= ABD_FLAG_META; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	ABDSTAT_BUMP(abdstat_linear_cnt); | 	abd_update_linear_stats(abd, ABDSTAT_INCR); | ||||||
| 	ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void |  | ||||||
| abd_release_ownership_of_buf(abd_t *abd) |  | ||||||
| { |  | ||||||
| 	ASSERT(abd_is_linear(abd)); |  | ||||||
| 	ASSERT(abd->abd_flags & ABD_FLAG_OWNER); |  | ||||||
| 	abd_verify(abd); |  | ||||||
| 
 |  | ||||||
| 	abd->abd_flags &= ~ABD_FLAG_OWNER; |  | ||||||
| 	/* Disable this flag since we no longer own the data buffer */ |  | ||||||
| 	abd->abd_flags &= ~ABD_FLAG_META; |  | ||||||
| 
 |  | ||||||
| 	ABDSTAT_BUMPDOWN(abdstat_linear_cnt); |  | ||||||
| 	ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| struct abd_iter { |  | ||||||
| 	abd_t		*iter_abd;	/* ABD being iterated through */ |  | ||||||
| 	size_t		iter_pos;	/* position (relative to abd_offset) */ |  | ||||||
| 	void		*iter_mapaddr;	/* addr corresponding to iter_pos */ |  | ||||||
| 	size_t		iter_mapsize;	/* length of data valid at mapaddr */ |  | ||||||
| }; |  | ||||||
| 
 |  | ||||||
| static inline size_t |  | ||||||
| abd_iter_scatter_chunk_offset(struct abd_iter *aiter) |  | ||||||
| { |  | ||||||
| 	ASSERT(!abd_is_linear(aiter->iter_abd)); |  | ||||||
| 	return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset + |  | ||||||
| 	    aiter->iter_pos) % zfs_abd_chunk_size); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static inline size_t |  | ||||||
| abd_iter_scatter_chunk_index(struct abd_iter *aiter) |  | ||||||
| { |  | ||||||
| 	ASSERT(!abd_is_linear(aiter->iter_abd)); |  | ||||||
| 	return ((aiter->iter_abd->abd_u.abd_scatter.abd_offset + |  | ||||||
| 	    aiter->iter_pos) / zfs_abd_chunk_size); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Initialize the abd_iter. |  | ||||||
|  */ |  | ||||||
| static void |  | ||||||
| abd_iter_init(struct abd_iter *aiter, abd_t *abd) |  | ||||||
| { |  | ||||||
| 	abd_verify(abd); |  | ||||||
| 	aiter->iter_abd = abd; |  | ||||||
| 	aiter->iter_pos = 0; |  | ||||||
| 	aiter->iter_mapaddr = NULL; |  | ||||||
| 	aiter->iter_mapsize = 0; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Advance the iterator by a certain amount. Cannot be called when a chunk is |  | ||||||
|  * in use. This can be safely called when the aiter has already exhausted, in |  | ||||||
|  * which case this does nothing. |  | ||||||
|  */ |  | ||||||
| static void |  | ||||||
| abd_iter_advance(struct abd_iter *aiter, size_t amount) |  | ||||||
| { |  | ||||||
| 	ASSERT3P(aiter->iter_mapaddr, ==, NULL); |  | ||||||
| 	ASSERT0(aiter->iter_mapsize); |  | ||||||
| 
 |  | ||||||
| 	/* There's nothing left to advance to, so do nothing */ |  | ||||||
| 	if (aiter->iter_pos == aiter->iter_abd->abd_size) |  | ||||||
| 		return; |  | ||||||
| 
 |  | ||||||
| 	aiter->iter_pos += amount; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Map the current chunk into aiter. This can be safely called when the aiter |  | ||||||
|  * has already exhausted, in which case this does nothing. |  | ||||||
|  */ |  | ||||||
| static void |  | ||||||
| abd_iter_map(struct abd_iter *aiter) |  | ||||||
| { |  | ||||||
| 	void *paddr; |  | ||||||
| 	size_t offset = 0; |  | ||||||
| 
 |  | ||||||
| 	ASSERT3P(aiter->iter_mapaddr, ==, NULL); |  | ||||||
| 	ASSERT0(aiter->iter_mapsize); |  | ||||||
| 
 |  | ||||||
| 	/* Panic if someone has changed zfs_abd_chunk_size */ |  | ||||||
| 	IMPLY(!abd_is_linear(aiter->iter_abd), zfs_abd_chunk_size == |  | ||||||
| 	    aiter->iter_abd->abd_u.abd_scatter.abd_chunk_size); |  | ||||||
| 
 |  | ||||||
| 	/* There's nothing left to iterate over, so do nothing */ |  | ||||||
| 	if (aiter->iter_pos == aiter->iter_abd->abd_size) |  | ||||||
| 		return; |  | ||||||
| 
 |  | ||||||
| 	if (abd_is_linear(aiter->iter_abd)) { |  | ||||||
| 		offset = aiter->iter_pos; |  | ||||||
| 		aiter->iter_mapsize = aiter->iter_abd->abd_size - offset; |  | ||||||
| 		paddr = aiter->iter_abd->abd_u.abd_linear.abd_buf; |  | ||||||
| 	} else { |  | ||||||
| 		size_t index = abd_iter_scatter_chunk_index(aiter); |  | ||||||
| 		offset = abd_iter_scatter_chunk_offset(aiter); |  | ||||||
| 		aiter->iter_mapsize = MIN(zfs_abd_chunk_size - offset, |  | ||||||
| 		    aiter->iter_abd->abd_size - aiter->iter_pos); |  | ||||||
| 		paddr = aiter->iter_abd->abd_u.abd_scatter.abd_chunks[index]; |  | ||||||
| 	} |  | ||||||
| 	aiter->iter_mapaddr = (char *)paddr + offset; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /*
 |  | ||||||
|  * Unmap the current chunk from aiter. This can be safely called when the aiter |  | ||||||
|  * has already exhausted, in which case this does nothing. |  | ||||||
|  */ |  | ||||||
| static void |  | ||||||
| abd_iter_unmap(struct abd_iter *aiter) |  | ||||||
| { |  | ||||||
| 	/* There's nothing left to unmap, so do nothing */ |  | ||||||
| 	if (aiter->iter_pos == aiter->iter_abd->abd_size) |  | ||||||
| 		return; |  | ||||||
| 
 |  | ||||||
| 	ASSERT3P(aiter->iter_mapaddr, !=, NULL); |  | ||||||
| 	ASSERT3U(aiter->iter_mapsize, >, 0); |  | ||||||
| 
 |  | ||||||
| 	aiter->iter_mapaddr = NULL; |  | ||||||
| 	aiter->iter_mapsize = 0; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int | int | ||||||
| @ -987,6 +703,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, | |||||||
| 	struct abd_iter caiters[3]; | 	struct abd_iter caiters[3]; | ||||||
| 	struct abd_iter daiter = {0}; | 	struct abd_iter daiter = {0}; | ||||||
| 	void *caddrs[3]; | 	void *caddrs[3]; | ||||||
|  | 	unsigned long flags = 0; | ||||||
| 
 | 
 | ||||||
| 	ASSERT3U(parity, <=, 3); | 	ASSERT3U(parity, <=, 3); | ||||||
| 
 | 
 | ||||||
| @ -998,7 +715,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, | |||||||
| 
 | 
 | ||||||
| 	ASSERT3S(dsize, >=, 0); | 	ASSERT3S(dsize, >=, 0); | ||||||
| 
 | 
 | ||||||
| 	critical_enter(); | 	abd_enter_critical(flags); | ||||||
| 	while (csize > 0) { | 	while (csize > 0) { | ||||||
| 		len = csize; | 		len = csize; | ||||||
| 
 | 
 | ||||||
| @ -1010,11 +727,14 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, | |||||||
| 			caddrs[i] = caiters[i].iter_mapaddr; | 			caddrs[i] = caiters[i].iter_mapaddr; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| 		switch (parity) { | 		switch (parity) { | ||||||
| 			case 3: | 			case 3: | ||||||
| 				len = MIN(caiters[2].iter_mapsize, len); | 				len = MIN(caiters[2].iter_mapsize, len); | ||||||
|  | 				/* falls through */ | ||||||
| 			case 2: | 			case 2: | ||||||
| 				len = MIN(caiters[1].iter_mapsize, len); | 				len = MIN(caiters[1].iter_mapsize, len); | ||||||
|  | 				/* falls through */ | ||||||
| 			case 1: | 			case 1: | ||||||
| 				len = MIN(caiters[0].iter_mapsize, len); | 				len = MIN(caiters[0].iter_mapsize, len); | ||||||
| 		} | 		} | ||||||
| @ -1055,7 +775,7 @@ abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, | |||||||
| 		ASSERT3S(dsize, >=, 0); | 		ASSERT3S(dsize, >=, 0); | ||||||
| 		ASSERT3S(csize, >=, 0); | 		ASSERT3S(csize, >=, 0); | ||||||
| 	} | 	} | ||||||
| 	critical_exit(); | 	abd_exit_critical(flags); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
| @ -1080,6 +800,7 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, | |||||||
| 	struct abd_iter citers[3]; | 	struct abd_iter citers[3]; | ||||||
| 	struct abd_iter xiters[3]; | 	struct abd_iter xiters[3]; | ||||||
| 	void *caddrs[3], *xaddrs[3]; | 	void *caddrs[3], *xaddrs[3]; | ||||||
|  | 	unsigned long flags = 0; | ||||||
| 
 | 
 | ||||||
| 	ASSERT3U(parity, <=, 3); | 	ASSERT3U(parity, <=, 3); | ||||||
| 
 | 
 | ||||||
| @ -1088,7 +809,7 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, | |||||||
| 		abd_iter_init(&xiters[i], tabds[i]); | 		abd_iter_init(&xiters[i], tabds[i]); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	critical_enter(); | 	abd_enter_critical(flags); | ||||||
| 	while (tsize > 0) { | 	while (tsize > 0) { | ||||||
| 
 | 
 | ||||||
| 		for (i = 0; i < parity; i++) { | 		for (i = 0; i < parity; i++) { | ||||||
| @ -1103,9 +824,11 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, | |||||||
| 			case 3: | 			case 3: | ||||||
| 				len = MIN(xiters[2].iter_mapsize, len); | 				len = MIN(xiters[2].iter_mapsize, len); | ||||||
| 				len = MIN(citers[2].iter_mapsize, len); | 				len = MIN(citers[2].iter_mapsize, len); | ||||||
|  | 				/* falls through */ | ||||||
| 			case 2: | 			case 2: | ||||||
| 				len = MIN(xiters[1].iter_mapsize, len); | 				len = MIN(xiters[1].iter_mapsize, len); | ||||||
| 				len = MIN(citers[1].iter_mapsize, len); | 				len = MIN(citers[1].iter_mapsize, len); | ||||||
|  | 				/* falls through */ | ||||||
| 			case 1: | 			case 1: | ||||||
| 				len = MIN(xiters[0].iter_mapsize, len); | 				len = MIN(xiters[0].iter_mapsize, len); | ||||||
| 				len = MIN(citers[0].iter_mapsize, len); | 				len = MIN(citers[0].iter_mapsize, len); | ||||||
| @ -1130,5 +853,5 @@ abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds, | |||||||
| 		tsize -= len; | 		tsize -= len; | ||||||
| 		ASSERT3S(tsize, >=, 0); | 		ASSERT3S(tsize, >=, 0); | ||||||
| 	} | 	} | ||||||
| 	critical_exit(); | 	abd_exit_critical(flags); | ||||||
| } | } | ||||||
| @ -1638,7 +1638,7 @@ vdev_indirect_splits_damage(indirect_vsd_t *iv, zio_t *zio) | |||||||
| 			if (ic->ic_data == NULL) | 			if (ic->ic_data == NULL) | ||||||
| 				continue; | 				continue; | ||||||
| 
 | 
 | ||||||
| 			abd_zero(ic->ic_data, ic->ic_data->abd_size); | 			abd_zero(ic->ic_data, abd_get_size(ic->ic_data)); | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		iv->iv_attempts_max *= 2; | 		iv->iv_attempts_max *= 2; | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Brian Atkinson
						Brian Atkinson