mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 09:54:59 +03:00 
			
		
		
		
	DLPX-44812 integrate EP-220 large memory scalability
This commit is contained in:
		
							parent
							
								
									616fa7c02b
								
							
						
					
					
						commit
						a6255b7fce
					
				| @ -23,6 +23,8 @@ | ||||
|  * Copyright (C) 2016 Gvozden Nešković. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #ifdef _ABD_READY_ | ||||
| 
 | ||||
| #include <sys/zfs_context.h> | ||||
| #include <sys/time.h> | ||||
| #include <sys/wait.h> | ||||
| @ -225,3 +227,4 @@ run_raidz_benchmark(void) | ||||
| 
 | ||||
| 	bench_fini_raidz_maps(); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| @ -32,6 +32,16 @@ | ||||
| #include <sys/vdev_raidz_impl.h> | ||||
| #include <assert.h> | ||||
| #include <stdio.h> | ||||
| 
 | ||||
| #ifndef _ABD_READY_ | ||||
| int | ||||
| main(int argc, char **argv) | ||||
| { | ||||
| 	exit(0); | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
| #include "raidz_test.h" | ||||
| 
 | ||||
| static int *rand_data; | ||||
| @ -782,3 +792,4 @@ main(int argc, char **argv) | ||||
| 
 | ||||
| 	return (err); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| @ -59,6 +59,7 @@ | ||||
| #include <sys/arc.h> | ||||
| #include <sys/ddt.h> | ||||
| #include <sys/zfeature.h> | ||||
| #include <sys/abd.h> | ||||
| #include <zfs_comutil.h> | ||||
| #include <libzfs.h> | ||||
| 
 | ||||
| @ -2464,7 +2465,7 @@ zdb_blkptr_done(zio_t *zio) | ||||
| 	zdb_cb_t *zcb = zio->io_private; | ||||
| 	zbookmark_phys_t *zb = &zio->io_bookmark; | ||||
| 
 | ||||
| 	zio_data_buf_free(zio->io_data, zio->io_size); | ||||
| 	abd_free(zio->io_abd); | ||||
| 
 | ||||
| 	mutex_enter(&spa->spa_scrub_lock); | ||||
| 	spa->spa_scrub_inflight--; | ||||
| @ -2530,7 +2531,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, | ||||
| 	if (!BP_IS_EMBEDDED(bp) && | ||||
| 	    (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) { | ||||
| 		size_t size = BP_GET_PSIZE(bp); | ||||
| 		void *data = zio_data_buf_alloc(size); | ||||
| 		abd_t *abd = abd_alloc(size, B_FALSE); | ||||
| 		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW; | ||||
| 
 | ||||
| 		/* If it's an intent log block, failure is expected. */ | ||||
| @ -2543,7 +2544,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, | ||||
| 		spa->spa_scrub_inflight++; | ||||
| 		mutex_exit(&spa->spa_scrub_lock); | ||||
| 
 | ||||
| 		zio_nowait(zio_read(NULL, spa, bp, data, size, | ||||
| 		zio_nowait(zio_read(NULL, spa, bp, abd, size, | ||||
| 		    zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb)); | ||||
| 	} | ||||
| 
 | ||||
| @ -3321,6 +3322,13 @@ name: | ||||
| 	return (NULL); | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
| static int | ||||
| random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused) | ||||
| { | ||||
| 	return (random_get_pseudo_bytes(buf, len)); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Read a block from a pool and print it out.  The syntax of the | ||||
|  * block descriptor is: | ||||
| @ -3352,7 +3360,8 @@ zdb_read_block(char *thing, spa_t *spa) | ||||
| 	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0; | ||||
| 	zio_t *zio; | ||||
| 	vdev_t *vd; | ||||
| 	void *pbuf, *lbuf, *buf; | ||||
| 	abd_t *pabd; | ||||
| 	void *lbuf, *buf; | ||||
| 	char *s, *p, *dup, *vdev, *flagstr; | ||||
| 	int i, error; | ||||
| 
 | ||||
| @ -3425,8 +3434,7 @@ zdb_read_block(char *thing, spa_t *spa) | ||||
| 	psize = size; | ||||
| 	lsize = size; | ||||
| 
 | ||||
| 	/* Some 4K native devices require 4K buffer alignment */ | ||||
| 	pbuf = umem_alloc_aligned(SPA_MAXBLOCKSIZE, PAGESIZE, UMEM_NOFAIL); | ||||
| 	pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE); | ||||
| 	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); | ||||
| 
 | ||||
| 	BP_ZERO(bp); | ||||
| @ -3454,15 +3462,15 @@ zdb_read_block(char *thing, spa_t *spa) | ||||
| 		/*
 | ||||
| 		 * Treat this as a normal block read. | ||||
| 		 */ | ||||
| 		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL, | ||||
| 		zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL, | ||||
| 		    ZIO_PRIORITY_SYNC_READ, | ||||
| 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL)); | ||||
| 	} else { | ||||
| 		/*
 | ||||
| 		 * Treat this as a vdev child I/O. | ||||
| 		 */ | ||||
| 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize, | ||||
| 		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, | ||||
| 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd, | ||||
| 		    psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ, | ||||
| 		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE | | ||||
| 		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY | | ||||
| 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL)); | ||||
| @ -3485,13 +3493,13 @@ zdb_read_block(char *thing, spa_t *spa) | ||||
| 		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); | ||||
| 		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); | ||||
| 
 | ||||
| 		bcopy(pbuf, pbuf2, psize); | ||||
| 		abd_copy_to_buf(pbuf2, pabd, psize); | ||||
| 
 | ||||
| 		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize, | ||||
| 		    SPA_MAXBLOCKSIZE - psize) == 0); | ||||
| 		VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize, | ||||
| 		    random_get_pseudo_bytes_cb, NULL)); | ||||
| 
 | ||||
| 		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize, | ||||
| 		    SPA_MAXBLOCKSIZE - psize) == 0); | ||||
| 		VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize, | ||||
| 		    SPA_MAXBLOCKSIZE - psize)); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * XXX - On the one hand, with SPA_MAXBLOCKSIZE at 16MB, | ||||
| @ -3506,10 +3514,10 @@ zdb_read_block(char *thing, spa_t *spa) | ||||
| 				    "Trying %05llx -> %05llx (%s)\n", | ||||
| 				    (u_longlong_t)psize, (u_longlong_t)lsize, | ||||
| 				    zio_compress_table[c].ci_name); | ||||
| 				if (zio_decompress_data(c, pbuf, lbuf, | ||||
| 				    psize, lsize) == 0 && | ||||
| 				    zio_decompress_data(c, pbuf2, lbuf2, | ||||
| 				    psize, lsize) == 0 && | ||||
| 				if (zio_decompress_data(c, pabd, | ||||
| 				    lbuf, psize, lsize) == 0 && | ||||
| 				    zio_decompress_data_buf(c, pbuf2, | ||||
| 				    lbuf2, psize, lsize) == 0 && | ||||
| 				    bcmp(lbuf, lbuf2, lsize) == 0) | ||||
| 					break; | ||||
| 			} | ||||
| @ -3527,7 +3535,7 @@ zdb_read_block(char *thing, spa_t *spa) | ||||
| 		buf = lbuf; | ||||
| 		size = lsize; | ||||
| 	} else { | ||||
| 		buf = pbuf; | ||||
| 		buf = abd_to_buf(pabd); | ||||
| 		size = psize; | ||||
| 	} | ||||
| 
 | ||||
| @ -3545,7 +3553,7 @@ zdb_read_block(char *thing, spa_t *spa) | ||||
| 		zdb_dump_block(thing, buf, size, flags); | ||||
| 
 | ||||
| out: | ||||
| 	umem_free(pbuf, SPA_MAXBLOCKSIZE); | ||||
| 	abd_free(pabd); | ||||
| 	umem_free(lbuf, SPA_MAXBLOCKSIZE); | ||||
| 	free(dup); | ||||
| } | ||||
|  | ||||
| @ -25,7 +25,7 @@ | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Copyright (c) 2013, 2014 by Delphix. All rights reserved. | ||||
|  * Copyright (c) 2013, 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
| @ -42,6 +42,7 @@ | ||||
| #include <sys/resource.h> | ||||
| #include <sys/zil.h> | ||||
| #include <sys/zil_impl.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| extern uint8_t dump_opt[256]; | ||||
| 
 | ||||
| @ -119,14 +120,30 @@ zil_prt_rec_rename(zilog_t *zilog, int txtype, lr_rename_t *lr) | ||||
| 	(void) printf("%ssrc %s tgt %s\n", prefix, snm, tnm); | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
| static int | ||||
| zil_prt_rec_write_cb(void *data, size_t len, void *unused) | ||||
| { | ||||
| 	char *cdata = data; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < len; i++) { | ||||
| 		if (isprint(*cdata)) | ||||
| 			(void) printf("%c ", *cdata); | ||||
| 		else | ||||
| 			(void) printf("%2X", *cdata); | ||||
| 		cdata++; | ||||
| 	} | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
| static void | ||||
| zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) | ||||
| { | ||||
| 	char *data, *dlimit; | ||||
| 	abd_t *data; | ||||
| 	blkptr_t *bp = &lr->lr_blkptr; | ||||
| 	zbookmark_phys_t zb; | ||||
| 	char *buf; | ||||
| 	int verbose = MAX(dump_opt['d'], dump_opt['i']); | ||||
| 	int error; | ||||
| 
 | ||||
| @ -137,9 +154,6 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) | ||||
| 	if (txtype == TX_WRITE2 || verbose < 5) | ||||
| 		return; | ||||
| 
 | ||||
| 	if ((buf = malloc(SPA_MAXBLOCKSIZE)) == NULL) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) { | ||||
| 		(void) printf("%shas blkptr, %s\n", prefix, | ||||
| 		    !BP_IS_HOLE(bp) && | ||||
| @ -150,43 +164,38 @@ zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr) | ||||
| 		if (BP_IS_HOLE(bp)) { | ||||
| 			(void) printf("\t\t\tLSIZE 0x%llx\n", | ||||
| 			    (u_longlong_t)BP_GET_LSIZE(bp)); | ||||
| 			bzero(buf, SPA_MAXBLOCKSIZE); | ||||
| 			(void) printf("%s<hole>\n", prefix); | ||||
| 			goto exit; | ||||
| 			return; | ||||
| 		} | ||||
| 		if (bp->blk_birth < zilog->zl_header->zh_claim_txg) { | ||||
| 			(void) printf("%s<block already committed>\n", prefix); | ||||
| 			goto exit; | ||||
| 			return; | ||||
| 		} | ||||
| 
 | ||||
| 		SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), | ||||
| 		    lr->lr_foid, ZB_ZIL_LEVEL, | ||||
| 		    lr->lr_offset / BP_GET_LSIZE(bp)); | ||||
| 
 | ||||
| 		data = abd_alloc(BP_GET_LSIZE(bp), B_FALSE); | ||||
| 		error = zio_wait(zio_read(NULL, zilog->zl_spa, | ||||
| 		    bp, buf, BP_GET_LSIZE(bp), NULL, NULL, | ||||
| 		    bp, data, BP_GET_LSIZE(bp), NULL, NULL, | ||||
| 		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb)); | ||||
| 		if (error) | ||||
| 			goto exit; | ||||
| 		data = buf; | ||||
| 			goto out; | ||||
| 	} else { | ||||
| 		data = (char *)(lr + 1); | ||||
| 		/* data is stored after the end of the lr_write record */ | ||||
| 		data = abd_alloc(lr->lr_length, B_FALSE); | ||||
| 		abd_copy_from_buf(data, lr + 1, lr->lr_length); | ||||
| 	} | ||||
| 
 | ||||
| 	dlimit = data + MIN(lr->lr_length, | ||||
| 	    (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE)); | ||||
| 
 | ||||
| 	(void) printf("%s", prefix); | ||||
| 	while (data < dlimit) { | ||||
| 		if (isprint(*data)) | ||||
| 			(void) printf("%c ", *data); | ||||
| 		else | ||||
| 			(void) printf("%2hhX", *data); | ||||
| 		data++; | ||||
| 	} | ||||
| 	(void) abd_iterate_func(data, | ||||
| 	    0, MIN(lr->lr_length, (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE)), | ||||
| 	    zil_prt_rec_write_cb, NULL); | ||||
| 	(void) printf("\n"); | ||||
| exit: | ||||
| 	free(buf); | ||||
| 
 | ||||
| out: | ||||
| 	abd_free(data); | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
|  | ||||
| @ -114,6 +114,7 @@ | ||||
| #include <sys/refcount.h> | ||||
| #include <sys/zfeature.h> | ||||
| #include <sys/dsl_userhold.h> | ||||
| #include <sys/abd.h> | ||||
| #include <stdio.h> | ||||
| #include <stdio_ext.h> | ||||
| #include <stdlib.h> | ||||
| @ -193,6 +194,7 @@ extern uint64_t metaslab_gang_bang; | ||||
| extern uint64_t metaslab_df_alloc_threshold; | ||||
| extern int metaslab_preload_limit; | ||||
| extern boolean_t zfs_compressed_arc_enabled; | ||||
| extern int  zfs_abd_scatter_enabled; | ||||
| 
 | ||||
| static ztest_shared_opts_t *ztest_shared_opts; | ||||
| static ztest_shared_opts_t ztest_opts; | ||||
| @ -5444,7 +5446,7 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) | ||||
| 	enum zio_checksum checksum = spa_dedup_checksum(spa); | ||||
| 	dmu_buf_t *db; | ||||
| 	dmu_tx_t *tx; | ||||
| 	void *buf; | ||||
| 	abd_t *abd; | ||||
| 	blkptr_t blk; | ||||
| 	int copies = 2 * ZIO_DEDUPDITTO_MIN; | ||||
| 	int i; | ||||
| @ -5525,14 +5527,14 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id) | ||||
| 	 * Damage the block.  Dedup-ditto will save us when we read it later. | ||||
| 	 */ | ||||
| 	psize = BP_GET_PSIZE(&blk); | ||||
| 	buf = zio_buf_alloc(psize); | ||||
| 	ztest_pattern_set(buf, psize, ~pattern); | ||||
| 	abd = abd_alloc_linear(psize, B_TRUE); | ||||
| 	ztest_pattern_set(abd_to_buf(abd), psize, ~pattern); | ||||
| 
 | ||||
| 	(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk, | ||||
| 	    buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, | ||||
| 	    abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE, | ||||
| 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL)); | ||||
| 
 | ||||
| 	zio_buf_free(buf, psize); | ||||
| 	abd_free(abd); | ||||
| 
 | ||||
| 	(void) rw_unlock(&ztest_name_lock); | ||||
| 	umem_free(od, sizeof (ztest_od_t)); | ||||
| @ -5965,6 +5967,12 @@ ztest_resume_thread(void *arg) | ||||
| 		 */ | ||||
| 		if (ztest_random(10) == 0) | ||||
| 			zfs_compressed_arc_enabled = ztest_random(2); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Periodically change the zfs_abd_scatter_enabled setting. | ||||
| 		 */ | ||||
| 		if (ztest_random(10) == 0) | ||||
| 			zfs_abd_scatter_enabled = ztest_random(2); | ||||
| 	} | ||||
| 
 | ||||
| 	thread_exit(); | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| SUBDIRS = fm fs crypto sysevent | ||||
| 
 | ||||
| COMMON_H = \
 | ||||
| 	$(top_srcdir)/include/sys/abd.h \
 | ||||
| 	$(top_srcdir)/include/sys/arc.h \
 | ||||
| 	$(top_srcdir)/include/sys/arc_impl.h \
 | ||||
| 	$(top_srcdir)/include/sys/avl.h \
 | ||||
|  | ||||
							
								
								
									
										160
									
								
								include/sys/abd.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										160
									
								
								include/sys/abd.h
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,160 @@ | ||||
| /*
 | ||||
|  * CDDL HEADER START | ||||
|  * | ||||
|  * The contents of this file are subject to the terms of the | ||||
|  * Common Development and Distribution License (the "License"). | ||||
|  * You may not use this file except in compliance with the License. | ||||
|  * | ||||
|  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | ||||
|  * or http://www.opensolaris.org/os/licensing.
 | ||||
|  * See the License for the specific language governing permissions | ||||
|  * and limitations under the License. | ||||
|  * | ||||
|  * When distributing Covered Code, include this CDDL HEADER in each | ||||
|  * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | ||||
|  * If applicable, add the following below this CDDL HEADER, with the | ||||
|  * fields enclosed by brackets "[]" replaced with your own identifying | ||||
|  * information: Portions Copyright [yyyy] [name of copyright owner] | ||||
|  * | ||||
|  * CDDL HEADER END | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright (c) 2014 by Chunwei Chen. All rights reserved. | ||||
|  * Copyright (c) 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #ifndef _ABD_H | ||||
| #define	_ABD_H | ||||
| 
 | ||||
| #include <sys/isa_defs.h> | ||||
| #include <sys/int_types.h> | ||||
| #include <sys/debug.h> | ||||
| #include <sys/refcount.h> | ||||
| #ifdef _KERNEL | ||||
| #include <linux/mm.h> | ||||
| #include <sys/uio.h> | ||||
| #endif | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| typedef enum abd_flags { | ||||
| 	ABD_FLAG_LINEAR	= 1 << 0,	/* is buffer linear (or scattered)? */ | ||||
| 	ABD_FLAG_OWNER	= 1 << 1,	/* does it own its data buffers? */ | ||||
| 	ABD_FLAG_META	= 1 << 2	/* does this represent FS metadata? */ | ||||
| } abd_flags_t; | ||||
| 
 | ||||
| typedef struct abd { | ||||
| 	abd_flags_t	abd_flags; | ||||
| 	uint_t		abd_size;	/* excludes scattered abd_offset */ | ||||
| 	struct abd	*abd_parent; | ||||
| 	refcount_t	abd_children; | ||||
| 	union { | ||||
| 		struct abd_scatter { | ||||
| 			uint_t		abd_offset; | ||||
| 			uint_t		abd_chunk_size; | ||||
| 			struct page	*abd_chunks[]; | ||||
| 		} abd_scatter; | ||||
| 		struct abd_linear { | ||||
| 			void		*abd_buf; | ||||
| 		} abd_linear; | ||||
| 	} abd_u; | ||||
| } abd_t; | ||||
| 
 | ||||
| typedef int abd_iter_func_t(void *buf, size_t len, void *private); | ||||
| typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private); | ||||
| 
 | ||||
| extern int zfs_abd_scatter_enabled; | ||||
| 
 | ||||
| static inline boolean_t | ||||
| abd_is_linear(abd_t *abd) | ||||
| { | ||||
| 	return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Allocations and deallocations | ||||
|  */ | ||||
| 
 | ||||
| abd_t *abd_alloc(size_t, boolean_t); | ||||
| abd_t *abd_alloc_linear(size_t, boolean_t); | ||||
| abd_t *abd_alloc_for_io(size_t, boolean_t); | ||||
| abd_t *abd_alloc_sametype(abd_t *, size_t); | ||||
| void abd_free(abd_t *); | ||||
| abd_t *abd_get_offset(abd_t *, size_t); | ||||
| abd_t *abd_get_from_buf(void *, size_t); | ||||
| void abd_put(abd_t *); | ||||
| 
 | ||||
| /*
 | ||||
|  * Conversion to and from a normal buffer | ||||
|  */ | ||||
| 
 | ||||
| void *abd_to_buf(abd_t *); | ||||
| void *abd_borrow_buf(abd_t *, size_t); | ||||
| void *abd_borrow_buf_copy(abd_t *, size_t); | ||||
| void abd_return_buf(abd_t *, void *, size_t); | ||||
| void abd_return_buf_copy(abd_t *, void *, size_t); | ||||
| void abd_take_ownership_of_buf(abd_t *, boolean_t); | ||||
| void abd_release_ownership_of_buf(abd_t *); | ||||
| 
 | ||||
| /*
 | ||||
|  * ABD operations | ||||
|  */ | ||||
| 
 | ||||
| int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *); | ||||
| int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t, | ||||
|     abd_iter_func2_t *, void *); | ||||
| void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t); | ||||
| void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t); | ||||
| void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t); | ||||
| int abd_cmp(abd_t *, abd_t *); | ||||
| int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t); | ||||
| void abd_zero_off(abd_t *, size_t, size_t); | ||||
| 
 | ||||
| /*
 | ||||
|  * Wrappers for calls with offsets of 0 | ||||
|  */ | ||||
| 
 | ||||
| static inline void | ||||
| abd_copy(abd_t *dabd, abd_t *sabd, size_t size) | ||||
| { | ||||
| 	abd_copy_off(dabd, sabd, 0, 0, size); | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| abd_copy_from_buf(abd_t *abd, void *buf, size_t size) | ||||
| { | ||||
| 	abd_copy_from_buf_off(abd, buf, 0, size); | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| abd_copy_to_buf(void* buf, abd_t *abd, size_t size) | ||||
| { | ||||
| 	abd_copy_to_buf_off(buf, abd, 0, size); | ||||
| } | ||||
| 
 | ||||
| static inline int | ||||
| abd_cmp_buf(abd_t *abd, void *buf, size_t size) | ||||
| { | ||||
| 	return (abd_cmp_buf_off(abd, buf, 0, size)); | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| abd_zero(abd_t *abd, size_t size) | ||||
| { | ||||
| 	abd_zero_off(abd, 0, size); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Module lifecycle | ||||
|  */ | ||||
| 
 | ||||
| void abd_init(void); | ||||
| void abd_fini(void); | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #endif	/* _ABD_H */ | ||||
| @ -166,7 +166,7 @@ typedef struct l1arc_buf_hdr { | ||||
| 	refcount_t		b_refcnt; | ||||
| 
 | ||||
| 	arc_callback_t		*b_acb; | ||||
| 	void			*b_pdata; | ||||
| 	abd_t			*b_pabd; | ||||
| } l1arc_buf_hdr_t; | ||||
| 
 | ||||
| typedef struct l2arc_dev { | ||||
|  | ||||
| @ -20,6 +20,7 @@ | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. | ||||
|  * Copyright (c) 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #ifndef _SYS_DDT_H | ||||
| @ -35,6 +36,8 @@ | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| struct abd; | ||||
| 
 | ||||
| /*
 | ||||
|  * On-disk DDT formats, in the desired search order (newest version first). | ||||
|  */ | ||||
| @ -108,7 +111,7 @@ struct ddt_entry { | ||||
| 	ddt_key_t	dde_key; | ||||
| 	ddt_phys_t	dde_phys[DDT_PHYS_TYPES]; | ||||
| 	zio_t		*dde_lead_zio[DDT_PHYS_TYPES]; | ||||
| 	void		*dde_repair_data; | ||||
| 	struct abd	*dde_repair_abd; | ||||
| 	enum ddt_type	dde_type; | ||||
| 	enum ddt_class	dde_class; | ||||
| 	uint8_t		dde_loading; | ||||
|  | ||||
| @ -416,15 +416,17 @@ _NOTE(CONSTCOND) } while (0) | ||||
| 
 | ||||
| #define	BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill) | ||||
| 
 | ||||
| #define	BP_IS_METADATA(bp)	\ | ||||
| 	(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) | ||||
| 
 | ||||
| #define	BP_GET_ASIZE(bp)	\ | ||||
| 	(BP_IS_EMBEDDED(bp) ? 0 : \ | ||||
| 	DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \ | ||||
| 	DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \ | ||||
| 	DVA_GET_ASIZE(&(bp)->blk_dva[2])) | ||||
| 
 | ||||
| #define	BP_GET_UCSIZE(bp) \ | ||||
| 	((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \ | ||||
| 	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) | ||||
| #define	BP_GET_UCSIZE(bp)	\ | ||||
| 	(BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp)) | ||||
| 
 | ||||
| #define	BP_GET_NDVAS(bp)	\ | ||||
| 	(BP_IS_EMBEDDED(bp) ? 0 : \ | ||||
| @ -569,8 +571,7 @@ _NOTE(CONSTCOND) } while (0) | ||||
| } | ||||
| 
 | ||||
| #define	BP_GET_BUFC_TYPE(bp)						\ | ||||
| 	(((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \ | ||||
| 	ARC_BUFC_METADATA : ARC_BUFC_DATA) | ||||
| 	(BP_IS_METADATA(bp) ? ARC_BUFC_METADATA : ARC_BUFC_DATA) | ||||
| 
 | ||||
| typedef enum spa_import_type { | ||||
| 	SPA_IMPORT_EXISTING, | ||||
|  | ||||
| @ -53,6 +53,7 @@ extern "C" { | ||||
| typedef struct vdev_queue vdev_queue_t; | ||||
| typedef struct vdev_cache vdev_cache_t; | ||||
| typedef struct vdev_cache_entry vdev_cache_entry_t; | ||||
| struct abd; | ||||
| 
 | ||||
| extern int zfs_vdev_queue_depth_pct; | ||||
| extern uint32_t zfs_vdev_async_write_max_active; | ||||
| @ -87,7 +88,7 @@ typedef const struct vdev_ops { | ||||
|  * Virtual device properties | ||||
|  */ | ||||
| struct vdev_cache_entry { | ||||
| 	char		*ve_data; | ||||
| 	struct abd	*ve_abd; | ||||
| 	uint64_t	ve_offset; | ||||
| 	clock_t		ve_lastused; | ||||
| 	avl_node_t	ve_offset_node; | ||||
|  | ||||
| @ -28,6 +28,7 @@ | ||||
| #include <sys/types.h> | ||||
| #include <sys/debug.h> | ||||
| #include <sys/kstat.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| #ifdef  __cplusplus | ||||
| extern "C" { | ||||
| @ -104,7 +105,7 @@ typedef struct raidz_col { | ||||
| 	size_t rc_devidx;		/* child device index for I/O */ | ||||
| 	size_t rc_offset;		/* device offset */ | ||||
| 	size_t rc_size;			/* I/O size */ | ||||
| 	void *rc_data;			/* I/O data */ | ||||
| 	abd_t *rc_abd;			/* I/O data */ | ||||
| 	void *rc_gdata;			/* used to store the "good" version */ | ||||
| 	int rc_error;			/* I/O error for this device */ | ||||
| 	unsigned int rc_tried;		/* Did we attempt this I/O column? */ | ||||
| @ -121,7 +122,7 @@ typedef struct raidz_map { | ||||
| 	size_t rm_firstdatacol;		/* First data column/parity count */ | ||||
| 	size_t rm_nskip;		/* Skipped sectors for padding */ | ||||
| 	size_t rm_skipstart;		/* Column index of padding start */ | ||||
| 	void *rm_datacopy;		/* rm_asize-buffer of copied data */ | ||||
| 	abd_t *rm_abd_copy;		/* rm_asize-buffer of copied data */ | ||||
| 	size_t rm_reports;		/* # of referencing checksum reports */ | ||||
| 	unsigned int rm_freed;		/* map no longer has referencing ZIO */ | ||||
| 	unsigned int rm_ecksuminjected;	/* checksum error was injected */ | ||||
|  | ||||
| @ -301,6 +301,7 @@ typedef void zio_cksum_free_f(void *cbdata, size_t size); | ||||
| 
 | ||||
| struct zio_bad_cksum;				/* defined in zio_checksum.h */ | ||||
| struct dnode_phys; | ||||
| struct abd; | ||||
| 
 | ||||
| struct zio_cksum_report { | ||||
| 	struct zio_cksum_report *zcr_next; | ||||
| @ -333,12 +334,12 @@ typedef struct zio_gang_node { | ||||
| } zio_gang_node_t; | ||||
| 
 | ||||
| typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp, | ||||
|     zio_gang_node_t *gn, void *data); | ||||
|     zio_gang_node_t *gn, struct abd *data, uint64_t offset); | ||||
| 
 | ||||
| typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size); | ||||
| typedef void zio_transform_func_t(zio_t *zio, struct abd *data, uint64_t size); | ||||
| 
 | ||||
| typedef struct zio_transform { | ||||
| 	void			*zt_orig_data; | ||||
| 	struct abd		*zt_orig_abd; | ||||
| 	uint64_t		zt_orig_size; | ||||
| 	uint64_t		zt_bufsize; | ||||
| 	zio_transform_func_t	*zt_transform; | ||||
| @ -396,8 +397,8 @@ struct zio { | ||||
| 	uint64_t	io_lsize; | ||||
| 
 | ||||
| 	/* Data represented by this I/O */ | ||||
| 	void		*io_data; | ||||
| 	void		*io_orig_data; | ||||
| 	struct abd	*io_abd; | ||||
| 	struct abd	*io_orig_abd; | ||||
| 	uint64_t	io_size; | ||||
| 	uint64_t	io_orig_size; | ||||
| 
 | ||||
| @ -455,19 +456,19 @@ extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, | ||||
| extern zio_t *zio_root(spa_t *spa, | ||||
|     zio_done_func_t *done, void *private, enum zio_flag flags); | ||||
| 
 | ||||
| extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data, | ||||
|     uint64_t lsize, zio_done_func_t *done, void *private, | ||||
| extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, | ||||
|     struct abd *data, uint64_t lsize, zio_done_func_t *done, void *private, | ||||
|     zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb); | ||||
| 
 | ||||
| extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, | ||||
|     void *data, uint64_t size, uint64_t psize, const zio_prop_t *zp, | ||||
|     struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp, | ||||
|     zio_done_func_t *ready, zio_done_func_t *children_ready, | ||||
|     zio_done_func_t *physdone, zio_done_func_t *done, | ||||
|     void *private, zio_priority_t priority, enum zio_flag flags, | ||||
|     const zbookmark_phys_t *zb); | ||||
| 
 | ||||
| extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, | ||||
|     void *data, uint64_t size, zio_done_func_t *done, void *private, | ||||
|     struct abd *data, uint64_t size, zio_done_func_t *done, void *private, | ||||
|     zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb); | ||||
| 
 | ||||
| extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies, | ||||
| @ -483,12 +484,12 @@ extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, | ||||
|     zio_done_func_t *done, void *private, enum zio_flag flags); | ||||
| 
 | ||||
| extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, | ||||
|     uint64_t size, void *data, int checksum, | ||||
|     uint64_t size, struct abd *data, int checksum, | ||||
|     zio_done_func_t *done, void *private, zio_priority_t priority, | ||||
|     enum zio_flag flags, boolean_t labels); | ||||
| 
 | ||||
| extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, | ||||
|     uint64_t size, void *data, int checksum, | ||||
|     uint64_t size, struct abd *data, int checksum, | ||||
|     zio_done_func_t *done, void *private, zio_priority_t priority, | ||||
|     enum zio_flag flags, boolean_t labels); | ||||
| 
 | ||||
| @ -517,21 +518,20 @@ extern void *zio_buf_alloc(size_t size); | ||||
| extern void zio_buf_free(void *buf, size_t size); | ||||
| extern void *zio_data_buf_alloc(size_t size); | ||||
| extern void zio_data_buf_free(void *buf, size_t size); | ||||
| extern void *zio_buf_alloc_flags(size_t size, int flags); | ||||
| 
 | ||||
| extern void zio_push_transform(zio_t *zio, void *data, uint64_t size, | ||||
| extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size, | ||||
|     uint64_t bufsize, zio_transform_func_t *transform); | ||||
| extern void zio_pop_transforms(zio_t *zio); | ||||
| 
 | ||||
| extern void zio_resubmit_stage_async(void *); | ||||
| 
 | ||||
| extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd, | ||||
|     uint64_t offset, void *data, uint64_t size, int type, | ||||
|     uint64_t offset, struct abd *data, uint64_t size, int type, | ||||
|     zio_priority_t priority, enum zio_flag flags, | ||||
|     zio_done_func_t *done, void *private); | ||||
| 
 | ||||
| extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, | ||||
|     void *data, uint64_t size, int type, zio_priority_t priority, | ||||
|     struct abd *data, uint64_t size, int type, zio_priority_t priority, | ||||
|     enum zio_flag flags, zio_done_func_t *done, void *private); | ||||
| 
 | ||||
| extern void zio_vdev_io_bypass(zio_t *zio); | ||||
|  | ||||
| @ -20,7 +20,7 @@ | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | ||||
|  * Copyright (c) 2014, 2015 by Delphix. All rights reserved. | ||||
|  * Copyright (c) 2014, 2016 by Delphix. All rights reserved. | ||||
|  * Copyright Saso Kiselkov 2013, All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| @ -34,12 +34,12 @@ | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| struct abd; | ||||
| 
 | ||||
| /*
 | ||||
|  * Signature for checksum functions. | ||||
|  */ | ||||
| typedef void zio_checksum_func_t(const void *, uint64_t, const void *, | ||||
|     zio_cksum_t *); | ||||
| typedef void zio_checksum_t(const void *data, uint64_t size, | ||||
| typedef void zio_checksum_t(struct abd *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp); | ||||
| typedef void *zio_checksum_tmpl_init_t(const zio_cksum_salt_t *salt); | ||||
| typedef void zio_checksum_tmpl_free_t(void *ctx_template); | ||||
| @ -83,28 +83,28 @@ extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS]; | ||||
| /*
 | ||||
|  * Checksum routines. | ||||
|  */ | ||||
| extern zio_checksum_t zio_checksum_SHA256; | ||||
| extern zio_checksum_t zio_checksum_SHA512_native; | ||||
| extern zio_checksum_t zio_checksum_SHA512_byteswap; | ||||
| extern zio_checksum_t abd_checksum_SHA256; | ||||
| extern zio_checksum_t abd_checksum_SHA512_native; | ||||
| extern zio_checksum_t abd_checksum_SHA512_byteswap; | ||||
| 
 | ||||
| /* Skein */ | ||||
| extern zio_checksum_t zio_checksum_skein_native; | ||||
| extern zio_checksum_t zio_checksum_skein_byteswap; | ||||
| extern zio_checksum_tmpl_init_t zio_checksum_skein_tmpl_init; | ||||
| extern zio_checksum_tmpl_free_t zio_checksum_skein_tmpl_free; | ||||
| extern zio_checksum_t abd_checksum_skein_native; | ||||
| extern zio_checksum_t abd_checksum_skein_byteswap; | ||||
| extern zio_checksum_tmpl_init_t abd_checksum_skein_tmpl_init; | ||||
| extern zio_checksum_tmpl_free_t abd_checksum_skein_tmpl_free; | ||||
| 
 | ||||
| /* Edon-R */ | ||||
| extern zio_checksum_t zio_checksum_edonr_native; | ||||
| extern zio_checksum_t zio_checksum_edonr_byteswap; | ||||
| extern zio_checksum_tmpl_init_t zio_checksum_edonr_tmpl_init; | ||||
| extern zio_checksum_tmpl_free_t zio_checksum_edonr_tmpl_free; | ||||
| extern zio_checksum_t abd_checksum_edonr_native; | ||||
| extern zio_checksum_t abd_checksum_edonr_byteswap; | ||||
| extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init; | ||||
| extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free; | ||||
| 
 | ||||
| extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum, | ||||
|     void *, uint64_t, uint64_t, zio_bad_cksum_t *); | ||||
| extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, | ||||
|     void *data, uint64_t size); | ||||
| extern void zio_checksum_compute(zio_t *, enum zio_checksum, | ||||
|     struct abd *, uint64_t); | ||||
| extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum, | ||||
|     void *, uint64_t, uint64_t, zio_bad_cksum_t *); | ||||
|     struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *); | ||||
| extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out); | ||||
| extern enum zio_checksum spa_dedup_checksum(spa_t *spa); | ||||
| extern void zio_checksum_templates_free(spa_t *spa); | ||||
|  | ||||
| @ -22,12 +22,14 @@ | ||||
| /*
 | ||||
|  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved. | ||||
|  * Use is subject to license terms. | ||||
|  * Copyright (c) 2015 by Delphix. All rights reserved. | ||||
|  * Copyright (c) 2015, 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #ifndef _SYS_ZIO_COMPRESS_H | ||||
| #define	_SYS_ZIO_COMPRESS_H | ||||
| 
 | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| #ifdef	__cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| @ -59,14 +61,21 @@ typedef size_t zio_compress_func_t(void *src, void *dst, | ||||
| typedef int zio_decompress_func_t(void *src, void *dst, | ||||
|     size_t s_len, size_t d_len, int); | ||||
| 
 | ||||
| /*
 | ||||
|  * Common signature for all zio decompress functions using an ABD as input. | ||||
|  * This is helpful if you have both compressed ARC and scatter ABDs enabled, | ||||
|  * but is not a requirement for all compression algorithms. | ||||
|  */ | ||||
| typedef int zio_decompress_abd_func_t(abd_t *src, void *dst, | ||||
|     size_t s_len, size_t d_len, int); | ||||
| /*
 | ||||
|  * Information about each compression function. | ||||
|  */ | ||||
| typedef const struct zio_compress_info { | ||||
| 	zio_compress_func_t	*ci_compress;	/* compression function */ | ||||
| 	zio_decompress_func_t	*ci_decompress;	/* decompression function */ | ||||
| 	int			ci_level;	/* level parameter */ | ||||
| 	char			*ci_name;	/* algorithm name */ | ||||
| 	char				*ci_name; | ||||
| 	int				ci_level; | ||||
| 	zio_compress_func_t		*ci_compress; | ||||
| 	zio_decompress_func_t		*ci_decompress; | ||||
| } zio_compress_info_t; | ||||
| 
 | ||||
| extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS]; | ||||
| @ -96,13 +105,16 @@ extern size_t lz4_compress_zfs(void *src, void *dst, size_t s_len, size_t d_len, | ||||
|     int level); | ||||
| extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len, | ||||
|     int level); | ||||
| 
 | ||||
| extern int lz4_decompress_abd(abd_t *src, void *dst, size_t s_len, size_t d_len, | ||||
|     int level); | ||||
| /*
 | ||||
|  * Compress and decompress data if necessary. | ||||
|  */ | ||||
| extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst, | ||||
| extern size_t zio_compress_data(enum zio_compress c, abd_t *src, void *dst, | ||||
|     size_t s_len); | ||||
| extern int zio_decompress_data(enum zio_compress c, void *src, void *dst, | ||||
| extern int zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, | ||||
|     size_t s_len, size_t d_len); | ||||
| extern int zio_decompress_data_buf(enum zio_compress c, void *src, void *dst, | ||||
|     size_t s_len, size_t d_len); | ||||
| 
 | ||||
| #ifdef	__cplusplus | ||||
|  | ||||
| @ -48,15 +48,16 @@ extern "C" { | ||||
|  * checksum method is added. This method will ignore last (size % 4) bytes of | ||||
|  * the data buffer. | ||||
|  */ | ||||
| void fletcher_init(zio_cksum_t *); | ||||
| void fletcher_2_native(const void *, uint64_t, const void *, zio_cksum_t *); | ||||
| void fletcher_2_byteswap(const void *, uint64_t, const void *, zio_cksum_t *); | ||||
| void fletcher_4_native(const void *, uint64_t, const void *, zio_cksum_t *); | ||||
| int fletcher_2_incremental_native(void *, size_t, void *); | ||||
| int fletcher_2_incremental_byteswap(void *, size_t, void *); | ||||
| void fletcher_4_native_varsize(const void *, uint64_t, zio_cksum_t *); | ||||
| void fletcher_4_byteswap(const void *, uint64_t, const void *, zio_cksum_t *); | ||||
| void fletcher_4_incremental_native(const void *, uint64_t, | ||||
|     zio_cksum_t *); | ||||
| void fletcher_4_incremental_byteswap(const void *, uint64_t, | ||||
|     zio_cksum_t *); | ||||
| int fletcher_4_incremental_native(void *, size_t, void *); | ||||
| int fletcher_4_incremental_byteswap(void *, size_t, void *); | ||||
| int fletcher_4_impl_set(const char *selector); | ||||
| void fletcher_4_init(void); | ||||
| void fletcher_4_fini(void); | ||||
|  | ||||
| @ -366,11 +366,12 @@ cksummer(void *arg) | ||||
| 			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum, | ||||
| 			    zero_cksum) || | ||||
| 			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) { | ||||
| 				SHA256_CTX ctx; | ||||
| 				SHA2_CTX ctx; | ||||
| 				zio_cksum_t tmpsha256; | ||||
| 
 | ||||
| 				zio_checksum_SHA256(buf, | ||||
| 				    payload_size, &ctx, &tmpsha256); | ||||
| 				SHA2Init(SHA256, &ctx); | ||||
| 				SHA2Update(&ctx, buf, payload_size); | ||||
| 				SHA2Final(&tmpsha256, &ctx); | ||||
| 
 | ||||
| 				drrw->drr_key.ddk_cksum.zc_word[0] = | ||||
| 				    BE_64(tmpsha256.zc_word[0]); | ||||
|  | ||||
| @ -33,6 +33,7 @@ KERNEL_C = \ | ||||
| 	zfs_uio.c \
 | ||||
| 	zpool_prop.c \
 | ||||
| 	zprop_common.c \
 | ||||
| 	abd.c \
 | ||||
| 	arc.c \
 | ||||
| 	blkptr.c \
 | ||||
| 	bplist.c \
 | ||||
|  | ||||
| @ -27,6 +27,10 @@ | ||||
|  * Copyright 2013 Saso Kiselkov. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Copyright (c) 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Fletcher Checksums | ||||
|  * ------------------ | ||||
| @ -219,14 +223,26 @@ static boolean_t fletcher_4_initialized = B_FALSE; | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| fletcher_2_native(const void *buf, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| fletcher_init(zio_cksum_t *zcp) | ||||
| { | ||||
| 	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); | ||||
| } | ||||
| 
 | ||||
| int | ||||
| fletcher_2_incremental_native(void *buf, size_t size, void *data) | ||||
| { | ||||
| 	zio_cksum_t *zcp = data; | ||||
| 
 | ||||
| 	const uint64_t *ip = buf; | ||||
| 	const uint64_t *ipend = ip + (size / sizeof (uint64_t)); | ||||
| 	uint64_t a0, b0, a1, b1; | ||||
| 
 | ||||
| 	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { | ||||
| 	a0 = zcp->zc_word[0]; | ||||
| 	a1 = zcp->zc_word[1]; | ||||
| 	b0 = zcp->zc_word[2]; | ||||
| 	b1 = zcp->zc_word[3]; | ||||
| 
 | ||||
| 	for (; ip < ipend; ip += 2) { | ||||
| 		a0 += ip[0]; | ||||
| 		a1 += ip[1]; | ||||
| 		b0 += a0; | ||||
| @ -234,18 +250,33 @@ fletcher_2_native(const void *buf, uint64_t size, | ||||
| 	} | ||||
| 
 | ||||
| 	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| fletcher_2_byteswap(const void *buf, uint64_t size, | ||||
| fletcher_2_native(const void *buf, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	fletcher_init(zcp); | ||||
| 	(void) fletcher_2_incremental_native((void *) buf, size, zcp); | ||||
| } | ||||
| 
 | ||||
| int | ||||
| fletcher_2_incremental_byteswap(void *buf, size_t size, void *data) | ||||
| { | ||||
| 	zio_cksum_t *zcp = data; | ||||
| 
 | ||||
| 	const uint64_t *ip = buf; | ||||
| 	const uint64_t *ipend = ip + (size / sizeof (uint64_t)); | ||||
| 	uint64_t a0, b0, a1, b1; | ||||
| 
 | ||||
| 	for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) { | ||||
| 	a0 = zcp->zc_word[0]; | ||||
| 	a1 = zcp->zc_word[1]; | ||||
| 	b0 = zcp->zc_word[2]; | ||||
| 	b1 = zcp->zc_word[3]; | ||||
| 
 | ||||
| 	for (; ip < ipend; ip += 2) { | ||||
| 		a0 += BSWAP_64(ip[0]); | ||||
| 		a1 += BSWAP_64(ip[1]); | ||||
| 		b0 += a0; | ||||
| @ -253,6 +284,16 @@ fletcher_2_byteswap(const void *buf, uint64_t size, | ||||
| 	} | ||||
| 
 | ||||
| 	ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1); | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| fletcher_2_byteswap(const void *buf, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	fletcher_init(zcp); | ||||
| 	(void) fletcher_2_incremental_byteswap((void *) buf, size, zcp); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| @ -523,25 +564,28 @@ fletcher_4_incremental_impl(boolean_t native, const void *buf, uint64_t size, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| void | ||||
| fletcher_4_incremental_native(const void *buf, uint64_t size, zio_cksum_t *zcp) | ||||
| int | ||||
| fletcher_4_incremental_native(void *buf, size_t size, void *data) | ||||
| { | ||||
| 	zio_cksum_t *zcp = data; | ||||
| 	/* Use scalar impl to directly update cksum of small blocks */ | ||||
| 	if (size < SPA_MINBLOCKSIZE) | ||||
| 		fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp, buf, size); | ||||
| 	else | ||||
| 		fletcher_4_incremental_impl(B_TRUE, buf, size, zcp); | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| void | ||||
| fletcher_4_incremental_byteswap(const void *buf, uint64_t size, | ||||
|     zio_cksum_t *zcp) | ||||
| int | ||||
| fletcher_4_incremental_byteswap(void *buf, size_t size, void *data) | ||||
| { | ||||
| 	zio_cksum_t *zcp = data; | ||||
| 	/* Use scalar impl to directly update cksum of small blocks */ | ||||
| 	if (size < SPA_MINBLOCKSIZE) | ||||
| 		fletcher_4_scalar_byteswap((fletcher_4_ctx_t *)zcp, buf, size); | ||||
| 	else | ||||
| 		fletcher_4_incremental_impl(B_FALSE, buf, size, zcp); | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| @ -607,6 +651,9 @@ fletcher_4_kstat_addr(kstat_t *ksp, loff_t n) | ||||
| 
 | ||||
| #define	FLETCHER_4_BENCH_NS	(MSEC2NSEC(50))		/* 50ms */ | ||||
| 
 | ||||
| typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *, | ||||
| 					zio_cksum_t *); | ||||
| 
 | ||||
| static void | ||||
| fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) | ||||
| { | ||||
| @ -618,8 +665,9 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size) | ||||
| 	zio_cksum_t zc; | ||||
| 	uint32_t i, l, sel_save = IMPL_READ(fletcher_4_impl_chosen); | ||||
| 
 | ||||
| 	zio_checksum_func_t *fletcher_4_test = native ? fletcher_4_native : | ||||
| 	    fletcher_4_byteswap; | ||||
| 
 | ||||
| 	fletcher_checksum_func_t *fletcher_4_test = native ? | ||||
| 	    fletcher_4_native : fletcher_4_byteswap; | ||||
| 
 | ||||
| 	for (i = 0; i < fletcher_4_supp_impls_cnt; i++) { | ||||
| 		struct fletcher_4_kstat *stat = &fletcher_4_stat_data[i]; | ||||
| @ -769,6 +817,9 @@ module_param_call(zfs_fletcher_4_impl, | ||||
|     fletcher_4_param_set, fletcher_4_param_get, NULL, 0644); | ||||
| MODULE_PARM_DESC(zfs_fletcher_4_impl, "Select fletcher 4 implementation."); | ||||
| 
 | ||||
| EXPORT_SYMBOL(fletcher_init); | ||||
| EXPORT_SYMBOL(fletcher_2_incremental_native); | ||||
| EXPORT_SYMBOL(fletcher_2_incremental_byteswap); | ||||
| EXPORT_SYMBOL(fletcher_4_init); | ||||
| EXPORT_SYMBOL(fletcher_4_fini); | ||||
| EXPORT_SYMBOL(fletcher_2_native); | ||||
|  | ||||
| @ -7,6 +7,7 @@ EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@ | ||||
| 
 | ||||
| obj-$(CONFIG_ZFS) := $(MODULE).o | ||||
| 
 | ||||
| $(MODULE)-objs += abd.o | ||||
| $(MODULE)-objs += arc.o | ||||
| $(MODULE)-objs += blkptr.o | ||||
| $(MODULE)-objs += bplist.o | ||||
|  | ||||
							
								
								
									
										1008
									
								
								module/zfs/abd.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1008
									
								
								module/zfs/abd.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										388
									
								
								module/zfs/arc.c
									
									
									
									
									
								
							
							
						
						
									
										388
									
								
								module/zfs/arc.c
									
									
									
									
									
								
							| @ -136,14 +136,14 @@ | ||||
|  * the arc_buf_hdr_t that will point to the data block in memory. A block can | ||||
|  * only be read by a consumer if it has an l1arc_buf_hdr_t. The L1ARC | ||||
|  * caches data in two ways -- in a list of ARC buffers (arc_buf_t) and | ||||
|  * also in the arc_buf_hdr_t's private physical data block pointer (b_pdata). | ||||
|  * also in the arc_buf_hdr_t's private physical data block pointer (b_pabd). | ||||
|  * | ||||
|  * The L1ARC's data pointer may or may not be uncompressed. The ARC has the | ||||
|  * ability to store the physical data (b_pdata) associated with the DVA of the | ||||
|  * arc_buf_hdr_t. Since the b_pdata is a copy of the on-disk physical block, | ||||
|  * ability to store the physical data (b_pabd) associated with the DVA of the | ||||
|  * arc_buf_hdr_t. Since the b_pabd is a copy of the on-disk physical block, | ||||
|  * it will match its on-disk compression characteristics. This behavior can be | ||||
|  * disabled by setting 'zfs_compressed_arc_enabled' to B_FALSE. When the | ||||
|  * compressed ARC functionality is disabled, the b_pdata will point to an | ||||
|  * compressed ARC functionality is disabled, the b_pabd will point to an | ||||
|  * uncompressed version of the on-disk data. | ||||
|  * | ||||
|  * Data in the L1ARC is not accessed by consumers of the ARC directly. Each | ||||
| @ -182,7 +182,7 @@ | ||||
|  *   | l1arc_buf_hdr_t | ||||
|  *   |           |              arc_buf_t | ||||
|  *   | b_buf     +------------>+-----------+      arc_buf_t | ||||
|  *   | b_pdata   +-+           |b_next     +---->+-----------+ | ||||
|  *   | b_pabd    +-+           |b_next     +---->+-----------+ | ||||
|  *   +-----------+ |           |-----------|     |b_next     +-->NULL | ||||
|  *                 |           |b_comp = T |     +-----------+ | ||||
|  *                 |           |b_data     +-+   |b_comp = F | | ||||
| @ -199,8 +199,8 @@ | ||||
|  * When a consumer reads a block, the ARC must first look to see if the | ||||
|  * arc_buf_hdr_t is cached. If the hdr is cached then the ARC allocates a new | ||||
|  * arc_buf_t and either copies uncompressed data into a new data buffer from an | ||||
|  * existing uncompressed arc_buf_t, decompresses the hdr's b_pdata buffer into a | ||||
|  * new data buffer, or shares the hdr's b_pdata buffer, depending on whether the | ||||
|  * existing uncompressed arc_buf_t, decompresses the hdr's b_pabd buffer into a | ||||
|  * new data buffer, or shares the hdr's b_pabd buffer, depending on whether the | ||||
|  * hdr is compressed and the desired compression characteristics of the | ||||
|  * arc_buf_t consumer. If the arc_buf_t ends up sharing data with the | ||||
|  * arc_buf_hdr_t and both of them are uncompressed then the arc_buf_t must be | ||||
| @ -224,7 +224,7 @@ | ||||
|  *                |           |                 arc_buf_t    (shared) | ||||
|  *                |    b_buf  +------------>+---------+      arc_buf_t | ||||
|  *                |           |             |b_next   +---->+---------+ | ||||
|  *                |  b_pdata  +-+           |---------|     |b_next   +-->NULL | ||||
|  *                |  b_pabd   +-+           |---------|     |b_next   +-->NULL | ||||
|  *                +-----------+ |           |         |     +---------+ | ||||
|  *                              |           |b_data   +-+   |         | | ||||
|  *                              |           +---------+ |   |b_data   +-+ | ||||
| @ -238,19 +238,19 @@ | ||||
|  *                                    |                    +------+     | | ||||
|  *                                    +---------------------------------+ | ||||
|  * | ||||
|  * Writing to the ARC requires that the ARC first discard the hdr's b_pdata | ||||
|  * Writing to the ARC requires that the ARC first discard the hdr's b_pabd | ||||
|  * since the physical block is about to be rewritten. The new data contents | ||||
|  * will be contained in the arc_buf_t. As the I/O pipeline performs the write, | ||||
|  * it may compress the data before writing it to disk. The ARC will be called | ||||
|  * with the transformed data and will bcopy the transformed on-disk block into | ||||
|  * a newly allocated b_pdata. Writes are always done into buffers which have | ||||
|  * a newly allocated b_pabd. Writes are always done into buffers which have | ||||
|  * either been loaned (and hence are new and don't have other readers) or | ||||
|  * buffers which have been released (and hence have their own hdr, if there | ||||
|  * were originally other readers of the buf's original hdr). This ensures that | ||||
|  * the ARC only needs to update a single buf and its hdr after a write occurs. | ||||
|  * | ||||
|  * When the L2ARC is in use, it will also take advantage of the b_pdata. The | ||||
|  * L2ARC will always write the contents of b_pdata to the L2ARC. This means | ||||
|  * When the L2ARC is in use, it will also take advantage of the b_pabd. The | ||||
|  * L2ARC will always write the contents of b_pabd to the L2ARC. This means | ||||
|  * that when compressed ARC is enabled that the L2ARC blocks are identical | ||||
|  * to the on-disk block in the main data pool. This provides a significant | ||||
|  * advantage since the ARC can leverage the bp's checksum when reading from the | ||||
| @ -271,7 +271,9 @@ | ||||
| #include <sys/vdev.h> | ||||
| #include <sys/vdev_impl.h> | ||||
| #include <sys/dsl_pool.h> | ||||
| #include <sys/zio_checksum.h> | ||||
| #include <sys/multilist.h> | ||||
| #include <sys/abd.h> | ||||
| #ifdef _KERNEL | ||||
| #include <sys/vmsystm.h> | ||||
| #include <vm/anon.h> | ||||
| @ -315,7 +317,7 @@ int zfs_arc_num_sublists_per_state = 0; | ||||
| /* number of seconds before growing cache again */ | ||||
| static int		arc_grow_retry = 5; | ||||
| 
 | ||||
| /* shift of arc_c for calculating overflow limit in arc_get_data_buf */ | ||||
| /* shift of arc_c for calculating overflow limit in arc_get_data_impl */ | ||||
| int		zfs_arc_overflow_shift = 8; | ||||
| 
 | ||||
| /* shift of arc_c for calculating both min and max arc_p */ | ||||
| @ -455,13 +457,13 @@ typedef struct arc_stats { | ||||
| 	kstat_named_t arcstat_c_max; | ||||
| 	kstat_named_t arcstat_size; | ||||
| 	/*
 | ||||
| 	 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pdata. | ||||
| 	 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd. | ||||
| 	 * Note that the compressed bytes may match the uncompressed bytes | ||||
| 	 * if the block is either not compressed or compressed arc is disabled. | ||||
| 	 */ | ||||
| 	kstat_named_t arcstat_compressed_size; | ||||
| 	/*
 | ||||
| 	 * Uncompressed size of the data stored in b_pdata. If compressed | ||||
| 	 * Uncompressed size of the data stored in b_pabd. If compressed | ||||
| 	 * arc is disabled then this value will be identical to the stat | ||||
| 	 * above. | ||||
| 	 */ | ||||
| @ -960,7 +962,7 @@ typedef struct l2arc_read_callback { | ||||
| 
 | ||||
| typedef struct l2arc_data_free { | ||||
| 	/* protected by l2arc_free_on_write_mtx */ | ||||
| 	void		*l2df_data; | ||||
| 	abd_t		*l2df_abd; | ||||
| 	size_t		l2df_size; | ||||
| 	arc_buf_contents_t l2df_type; | ||||
| 	list_node_t	l2df_list_node; | ||||
| @ -970,10 +972,14 @@ static kmutex_t l2arc_feed_thr_lock; | ||||
| static kcondvar_t l2arc_feed_thr_cv; | ||||
| static uint8_t l2arc_thread_exit; | ||||
| 
 | ||||
| static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *); | ||||
| static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *); | ||||
| static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *); | ||||
| static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *); | ||||
| static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *); | ||||
| static void arc_hdr_free_pdata(arc_buf_hdr_t *hdr); | ||||
| static void arc_hdr_alloc_pdata(arc_buf_hdr_t *); | ||||
| static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag); | ||||
| static void arc_hdr_free_pabd(arc_buf_hdr_t *); | ||||
| static void arc_hdr_alloc_pabd(arc_buf_hdr_t *); | ||||
| static void arc_access(arc_buf_hdr_t *, kmutex_t *); | ||||
| static boolean_t arc_is_overflowing(void); | ||||
| static void arc_buf_watch(arc_buf_t *); | ||||
| @ -1336,7 +1342,9 @@ static inline boolean_t | ||||
| arc_buf_is_shared(arc_buf_t *buf) | ||||
| { | ||||
| 	boolean_t shared = (buf->b_data != NULL && | ||||
| 	    buf->b_data == buf->b_hdr->b_l1hdr.b_pdata); | ||||
| 	    buf->b_hdr->b_l1hdr.b_pabd != NULL && | ||||
| 	    abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) && | ||||
| 	    buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd)); | ||||
| 	IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr)); | ||||
| 	IMPLY(shared, ARC_BUF_SHARED(buf)); | ||||
| 	IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf)); | ||||
| @ -1376,8 +1384,6 @@ arc_cksum_verify(arc_buf_t *buf) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (ARC_BUF_COMPRESSED(buf)) { | ||||
| 		ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || | ||||
| 		    hdr->b_l1hdr.b_bufcnt > 1); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| @ -1424,7 +1430,8 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) | ||||
| 
 | ||||
| 		cbuf = zio_buf_alloc(HDR_GET_PSIZE(hdr)); | ||||
| 		lsize = HDR_GET_LSIZE(hdr); | ||||
| 		csize = zio_compress_data(compress, zio->io_data, cbuf, lsize); | ||||
| 		csize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); | ||||
| 
 | ||||
| 		ASSERT3U(csize, <=, HDR_GET_PSIZE(hdr)); | ||||
| 		if (csize < HDR_GET_PSIZE(hdr)) { | ||||
| 			/*
 | ||||
| @ -1459,7 +1466,7 @@ arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio) | ||||
| 	 * logical I/O size and not just a gang fragment. | ||||
| 	 */ | ||||
| 	valid_cksum = (zio_checksum_error_impl(zio->io_spa, zio->io_bp, | ||||
| 	    BP_GET_CHECKSUM(zio->io_bp), zio->io_data, zio->io_size, | ||||
| 	    BP_GET_CHECKSUM(zio->io_bp), zio->io_abd, zio->io_size, | ||||
| 	    zio->io_offset, NULL) == 0); | ||||
| 	zio_pop_transforms(zio); | ||||
| 	return (valid_cksum); | ||||
| @ -1483,18 +1490,9 @@ arc_cksum_compute(arc_buf_t *buf) | ||||
| 
 | ||||
| 	mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); | ||||
| 	if (hdr->b_l1hdr.b_freeze_cksum != NULL) { | ||||
| 		ASSERT(!ARC_BUF_COMPRESSED(buf) || hdr->b_l1hdr.b_bufcnt > 1); | ||||
| 		mutex_exit(&hdr->b_l1hdr.b_freeze_lock); | ||||
| 		return; | ||||
| 	} else if (ARC_BUF_COMPRESSED(buf)) { | ||||
| 		/*
 | ||||
| 		 * Since the checksum doesn't apply to compressed buffers, we | ||||
| 		 * only keep a checksum if there are uncompressed buffers. | ||||
| 		 * Therefore there must be another buffer, which is | ||||
| 		 * uncompressed. | ||||
| 		 */ | ||||
| 		IMPLY(hdr->b_l1hdr.b_freeze_cksum != NULL, | ||||
| 		    hdr->b_l1hdr.b_bufcnt > 1); | ||||
| 		mutex_exit(&hdr->b_l1hdr.b_freeze_lock); | ||||
| 		return; | ||||
| 	} | ||||
| @ -1589,8 +1587,6 @@ arc_buf_thaw(arc_buf_t *buf) | ||||
| 	 * allocate b_thawed. | ||||
| 	 */ | ||||
| 	if (ARC_BUF_COMPRESSED(buf)) { | ||||
| 		ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || | ||||
| 		    hdr->b_l1hdr.b_bufcnt > 1); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| @ -1609,8 +1605,6 @@ arc_buf_freeze(arc_buf_t *buf) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (ARC_BUF_COMPRESSED(buf)) { | ||||
| 		ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL || | ||||
| 		    hdr->b_l1hdr.b_bufcnt > 1); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| @ -1740,7 +1734,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) | ||||
| 
 | ||||
| 	if (hdr_compressed == compressed) { | ||||
| 		if (!arc_buf_is_shared(buf)) { | ||||
| 			bcopy(hdr->b_l1hdr.b_pdata, buf->b_data, | ||||
| 			abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd, | ||||
| 			    arc_buf_size(buf)); | ||||
| 		} | ||||
| 	} else { | ||||
| @ -1792,7 +1786,7 @@ arc_buf_fill(arc_buf_t *buf, boolean_t compressed) | ||||
| 			return (0); | ||||
| 		} else { | ||||
| 			int error = zio_decompress_data(HDR_GET_COMPRESS(hdr), | ||||
| 			    hdr->b_l1hdr.b_pdata, buf->b_data, | ||||
| 			    hdr->b_l1hdr.b_pabd, buf->b_data, | ||||
| 			    HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr)); | ||||
| 
 | ||||
| 			/*
 | ||||
| @ -1829,7 +1823,7 @@ arc_decompress(arc_buf_t *buf) | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return the size of the block, b_pdata, that is stored in the arc_buf_hdr_t. | ||||
|  * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t. | ||||
|  */ | ||||
| static uint64_t | ||||
| arc_hdr_size(arc_buf_hdr_t *hdr) | ||||
| @ -1862,14 +1856,14 @@ arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state) | ||||
| 	if (GHOST_STATE(state)) { | ||||
| 		ASSERT0(hdr->b_l1hdr.b_bufcnt); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 		(void) refcount_add_many(&state->arcs_esize[type], | ||||
| 		    HDR_GET_LSIZE(hdr), hdr); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	ASSERT(!GHOST_STATE(state)); | ||||
| 	if (hdr->b_l1hdr.b_pdata != NULL) { | ||||
| 	if (hdr->b_l1hdr.b_pabd != NULL) { | ||||
| 		(void) refcount_add_many(&state->arcs_esize[type], | ||||
| 		    arc_hdr_size(hdr), hdr); | ||||
| 	} | ||||
| @ -1897,14 +1891,14 @@ arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state) | ||||
| 	if (GHOST_STATE(state)) { | ||||
| 		ASSERT0(hdr->b_l1hdr.b_bufcnt); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 		(void) refcount_remove_many(&state->arcs_esize[type], | ||||
| 		    HDR_GET_LSIZE(hdr), hdr); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	ASSERT(!GHOST_STATE(state)); | ||||
| 	if (hdr->b_l1hdr.b_pdata != NULL) { | ||||
| 	if (hdr->b_l1hdr.b_pabd != NULL) { | ||||
| 		(void) refcount_remove_many(&state->arcs_esize[type], | ||||
| 		    arc_hdr_size(hdr), hdr); | ||||
| 	} | ||||
| @ -2051,7 +2045,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, | ||||
| 		old_state = hdr->b_l1hdr.b_state; | ||||
| 		refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt); | ||||
| 		bufcnt = hdr->b_l1hdr.b_bufcnt; | ||||
| 		update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pdata != NULL); | ||||
| 		update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL); | ||||
| 	} else { | ||||
| 		old_state = arc_l2c_only; | ||||
| 		refcnt = 0; | ||||
| @ -2120,7 +2114,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, | ||||
| 			 */ | ||||
| 			(void) refcount_add_many(&new_state->arcs_size, | ||||
| 			    HDR_GET_LSIZE(hdr), hdr); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 		} else { | ||||
| 			arc_buf_t *buf; | ||||
| 			uint32_t buffers = 0; | ||||
| @ -2150,7 +2144,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, | ||||
| 			} | ||||
| 			ASSERT3U(bufcnt, ==, buffers); | ||||
| 
 | ||||
| 			if (hdr->b_l1hdr.b_pdata != NULL) { | ||||
| 			if (hdr->b_l1hdr.b_pabd != NULL) { | ||||
| 				(void) refcount_add_many(&new_state->arcs_size, | ||||
| 				    arc_hdr_size(hdr), hdr); | ||||
| 			} else { | ||||
| @ -2163,7 +2157,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, | ||||
| 		ASSERT(HDR_HAS_L1HDR(hdr)); | ||||
| 		if (GHOST_STATE(old_state)) { | ||||
| 			ASSERT0(bufcnt); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * When moving a header off of a ghost state, | ||||
| @ -2204,7 +2198,7 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, | ||||
| 				    buf); | ||||
| 			} | ||||
| 			ASSERT3U(bufcnt, ==, buffers); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 			(void) refcount_remove_many( | ||||
| 			    &old_state->arcs_size, arc_hdr_size(hdr), hdr); | ||||
| 		} | ||||
| @ -2302,7 +2296,7 @@ arc_space_return(uint64_t space, arc_space_type_t type) | ||||
| 
 | ||||
| /*
 | ||||
|  * Given a hdr and a buf, returns whether that buf can share its b_data buffer | ||||
|  * with the hdr's b_pdata. | ||||
|  * with the hdr's b_pabd. | ||||
|  */ | ||||
| static boolean_t | ||||
| arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf) | ||||
| @ -2397,17 +2391,20 @@ arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed, | ||||
| 	 * set the appropriate bit in the hdr's b_flags to indicate the hdr is | ||||
| 	 * allocate a new buffer to store the buf's data. | ||||
| 	 * | ||||
| 	 * There is one additional restriction here because we're sharing | ||||
| 	 * hdr -> buf instead of the usual buf -> hdr: the hdr can't be actively | ||||
| 	 * involved in an L2ARC write, because if this buf is used by an | ||||
| 	 * arc_write() then the hdr's data buffer will be released when the | ||||
| 	 * There are two additional restrictions here because we're sharing | ||||
| 	 * hdr -> buf instead of the usual buf -> hdr. First, the hdr can't be | ||||
| 	 * actively involved in an L2ARC write, because if this buf is used by | ||||
| 	 * an arc_write() then the hdr's data buffer will be released when the | ||||
| 	 * write completes, even though the L2ARC write might still be using it. | ||||
| 	 * Second, the hdr's ABD must be linear so that the buf's user doesn't | ||||
| 	 * need to be ABD-aware. | ||||
| 	 */ | ||||
| 	can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr); | ||||
| 	can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) && | ||||
| 	    abd_is_linear(hdr->b_l1hdr.b_pabd); | ||||
| 
 | ||||
| 	/* Set up b_data and sharing */ | ||||
| 	if (can_share) { | ||||
| 		buf->b_data = hdr->b_l1hdr.b_pdata; | ||||
| 		buf->b_data = abd_to_buf(hdr->b_l1hdr.b_pabd); | ||||
| 		buf->b_flags |= ARC_BUF_FLAG_SHARED; | ||||
| 		arc_hdr_set_flags(hdr, ARC_FLAG_SHARED_DATA); | ||||
| 	} else { | ||||
| @ -2492,11 +2489,11 @@ arc_loan_inuse_buf(arc_buf_t *buf, void *tag) | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| l2arc_free_data_on_write(void *data, size_t size, arc_buf_contents_t type) | ||||
| l2arc_free_abd_on_write(abd_t *abd, size_t size, arc_buf_contents_t type) | ||||
| { | ||||
| 	l2arc_data_free_t *df = kmem_alloc(sizeof (*df), KM_SLEEP); | ||||
| 
 | ||||
| 	df->l2df_data = data; | ||||
| 	df->l2df_abd = abd; | ||||
| 	df->l2df_size = size; | ||||
| 	df->l2df_type = type; | ||||
| 	mutex_enter(&l2arc_free_on_write_mtx); | ||||
| @ -2521,7 +2518,7 @@ arc_hdr_free_on_write(arc_buf_hdr_t *hdr) | ||||
| 	} | ||||
| 	(void) refcount_remove_many(&state->arcs_size, size, hdr); | ||||
| 
 | ||||
| 	l2arc_free_data_on_write(hdr->b_l1hdr.b_pdata, size, type); | ||||
| 	l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -2533,7 +2530,7 @@ static void | ||||
| arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) | ||||
| { | ||||
| 	ASSERT(arc_can_share(hdr, buf)); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -2542,7 +2539,9 @@ arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) | ||||
| 	 * the refcount whenever an arc_buf_t is shared. | ||||
| 	 */ | ||||
| 	refcount_transfer_ownership(&hdr->b_l1hdr.b_state->arcs_size, buf, hdr); | ||||
| 	hdr->b_l1hdr.b_pdata = buf->b_data; | ||||
| 	hdr->b_l1hdr.b_pabd = abd_get_from_buf(buf->b_data, arc_buf_size(buf)); | ||||
| 	abd_take_ownership_of_buf(hdr->b_l1hdr.b_pabd, | ||||
| 	    HDR_ISTYPE_METADATA(hdr)); | ||||
| 	arc_hdr_set_flags(hdr, ARC_FLAG_SHARED_DATA); | ||||
| 	buf->b_flags |= ARC_BUF_FLAG_SHARED; | ||||
| 
 | ||||
| @ -2560,7 +2559,7 @@ static void | ||||
| arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) | ||||
| { | ||||
| 	ASSERT(arc_buf_is_shared(buf)); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 	ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr)); | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -2569,7 +2568,9 @@ arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf) | ||||
| 	 */ | ||||
| 	refcount_transfer_ownership(&hdr->b_l1hdr.b_state->arcs_size, hdr, buf); | ||||
| 	arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA); | ||||
| 	hdr->b_l1hdr.b_pdata = NULL; | ||||
| 	abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd); | ||||
| 	abd_put(hdr->b_l1hdr.b_pabd); | ||||
| 	hdr->b_l1hdr.b_pabd = NULL; | ||||
| 	buf->b_flags &= ~ARC_BUF_FLAG_SHARED; | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -2665,7 +2666,7 @@ arc_buf_destroy_impl(arc_buf_t *buf) | ||||
| 	if (ARC_BUF_SHARED(buf) && !ARC_BUF_COMPRESSED(buf)) { | ||||
| 		/*
 | ||||
| 		 * If the current arc_buf_t is sharing its data buffer with the | ||||
| 		 * hdr, then reassign the hdr's b_pdata to share it with the new | ||||
| 		 * hdr, then reassign the hdr's b_pabd to share it with the new | ||||
| 		 * buffer at the end of the list. The shared buffer is always | ||||
| 		 * the last one on the hdr's buffer list. | ||||
| 		 * | ||||
| @ -2680,8 +2681,8 @@ arc_buf_destroy_impl(arc_buf_t *buf) | ||||
| 			/* hdr is uncompressed so can't have compressed buf */ | ||||
| 			VERIFY(!ARC_BUF_COMPRESSED(lastbuf)); | ||||
| 
 | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 			arc_hdr_free_pdata(hdr); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 			arc_hdr_free_pabd(hdr); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * We must setup a new shared block between the | ||||
| @ -2714,26 +2715,26 @@ arc_buf_destroy_impl(arc_buf_t *buf) | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| arc_hdr_alloc_pdata(arc_buf_hdr_t *hdr) | ||||
| arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr) | ||||
| { | ||||
| 	ASSERT3U(HDR_GET_LSIZE(hdr), >, 0); | ||||
| 	ASSERT(HDR_HAS_L1HDR(hdr)); | ||||
| 	ASSERT(!HDR_SHARED_DATA(hdr)); | ||||
| 
 | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 	hdr->b_l1hdr.b_pdata = arc_get_data_buf(hdr, arc_hdr_size(hdr), hdr); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 	hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr); | ||||
| 	hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 
 | ||||
| 	ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr)); | ||||
| 	ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr)); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| arc_hdr_free_pdata(arc_buf_hdr_t *hdr) | ||||
| arc_hdr_free_pabd(arc_buf_hdr_t *hdr) | ||||
| { | ||||
| 	ASSERT(HDR_HAS_L1HDR(hdr)); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the hdr is currently being written to the l2arc then | ||||
| @ -2745,10 +2746,10 @@ arc_hdr_free_pdata(arc_buf_hdr_t *hdr) | ||||
| 		arc_hdr_free_on_write(hdr); | ||||
| 		ARCSTAT_BUMP(arcstat_l2_free_on_write); | ||||
| 	} else { | ||||
| 		arc_free_data_buf(hdr, hdr->b_l1hdr.b_pdata, | ||||
| 		arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd, | ||||
| 		    arc_hdr_size(hdr), hdr); | ||||
| 	} | ||||
| 	hdr->b_l1hdr.b_pdata = NULL; | ||||
| 	hdr->b_l1hdr.b_pabd = NULL; | ||||
| 	hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS; | ||||
| 
 | ||||
| 	ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr)); | ||||
| @ -2784,7 +2785,7 @@ arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize, | ||||
| 	 * the compressed or uncompressed data depending on the block | ||||
| 	 * it references and compressed arc enablement. | ||||
| 	 */ | ||||
| 	arc_hdr_alloc_pdata(hdr); | ||||
| 	arc_hdr_alloc_pabd(hdr); | ||||
| 	ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); | ||||
| 
 | ||||
| 	return (hdr); | ||||
| @ -2824,7 +2825,7 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) | ||||
| 		nhdr->b_l1hdr.b_state = arc_l2c_only; | ||||
| 
 | ||||
| 		/* Verify previous threads set to NULL before freeing */ | ||||
| 		ASSERT3P(nhdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 		ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 	} else { | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); | ||||
| 		ASSERT0(hdr->b_l1hdr.b_bufcnt); | ||||
| @ -2842,11 +2843,11 @@ arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) | ||||
| 		/*
 | ||||
| 		 * A buffer must not be moved into the arc_l2c_only | ||||
| 		 * state if it's not finished being written out to the | ||||
| 		 * l2arc device. Otherwise, the b_l1hdr.b_pdata field | ||||
| 		 * l2arc device. Otherwise, the b_l1hdr.b_pabd field | ||||
| 		 * might try to be accessed, even though it was removed. | ||||
| 		 */ | ||||
| 		VERIFY(!HDR_L2_WRITING(hdr)); | ||||
| 		VERIFY3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 		VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 
 | ||||
| 		arc_hdr_clear_flags(nhdr, ARC_FLAG_HAS_L1HDR); | ||||
| 	} | ||||
| @ -2931,6 +2932,18 @@ arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize, | ||||
| 	arc_buf_thaw(buf); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL); | ||||
| 
 | ||||
| 	if (!arc_buf_is_shared(buf)) { | ||||
| 		/*
 | ||||
| 		 * To ensure that the hdr has the correct data in it if we call | ||||
| 		 * arc_decompress() on this buf before it's been written to | ||||
| 		 * disk, it's easiest if we just set up sharing between the | ||||
| 		 * buf and the hdr. | ||||
| 		 */ | ||||
| 		ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd)); | ||||
| 		arc_hdr_free_pabd(hdr); | ||||
| 		arc_share_buf(hdr, buf); | ||||
| 	} | ||||
| 
 | ||||
| 	return (buf); | ||||
| } | ||||
| 
 | ||||
| @ -2999,9 +3012,8 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr) | ||||
| 		while (hdr->b_l1hdr.b_buf != NULL) | ||||
| 			arc_buf_destroy_impl(hdr->b_l1hdr.b_buf); | ||||
| 
 | ||||
| 		if (hdr->b_l1hdr.b_pdata != NULL) { | ||||
| 			arc_hdr_free_pdata(hdr); | ||||
| 		} | ||||
| 		if (hdr->b_l1hdr.b_pabd != NULL) | ||||
| 			arc_hdr_free_pabd(hdr); | ||||
| 	} | ||||
| 
 | ||||
| 	ASSERT3P(hdr->b_hash_next, ==, NULL); | ||||
| @ -3068,7 +3080,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * l2arc_write_buffers() relies on a header's L1 portion | ||||
| 		 * (i.e. its b_pdata field) during its write phase. | ||||
| 		 * (i.e. its b_pabd field) during it's write phase. | ||||
| 		 * Thus, we cannot push a header onto the arc_l2c_only | ||||
| 		 * state (removing its L1 piece) until the header is | ||||
| 		 * done being written to the l2arc. | ||||
| @ -3084,7 +3096,7 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) | ||||
| 		DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr); | ||||
| 
 | ||||
| 		if (HDR_HAS_L2HDR(hdr)) { | ||||
| 			ASSERT(hdr->b_l1hdr.b_pdata == NULL); | ||||
| 			ASSERT(hdr->b_l1hdr.b_pabd == NULL); | ||||
| 			/*
 | ||||
| 			 * This buffer is cached on the 2nd Level ARC; | ||||
| 			 * don't destroy the header. | ||||
| @ -3149,9 +3161,9 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) | ||||
| 		 * If this hdr is being evicted and has a compressed | ||||
| 		 * buffer then we discard it here before we change states. | ||||
| 		 * This ensures that the accounting is updated correctly | ||||
| 		 * in arc_free_data_buf(). | ||||
| 		 * in arc_free_data_impl(). | ||||
| 		 */ | ||||
| 		arc_hdr_free_pdata(hdr); | ||||
| 		arc_hdr_free_pabd(hdr); | ||||
| 
 | ||||
| 		arc_change_state(evicted_state, hdr, hash_lock); | ||||
| 		ASSERT(HDR_IN_HASH_TABLE(hdr)); | ||||
| @ -3249,7 +3261,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, | ||||
| 			 * thread. If we used cv_broadcast, we could | ||||
| 			 * wake up "too many" threads causing arc_size | ||||
| 			 * to significantly overflow arc_c; since | ||||
| 			 * arc_get_data_buf() doesn't check for overflow | ||||
| 			 * arc_get_data_impl() doesn't check for overflow | ||||
| 			 * when it's woken up (it doesn't because it's | ||||
| 			 * possible for the ARC to be overflowing while | ||||
| 			 * full of un-evictable buffers, and the | ||||
| @ -4154,13 +4166,13 @@ arc_kmem_reap_now(void) | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Threads can block in arc_get_data_buf() waiting for this thread to evict | ||||
|  * Threads can block in arc_get_data_impl() waiting for this thread to evict | ||||
|  * enough data and signal them to proceed. When this happens, the threads in | ||||
|  * arc_get_data_buf() are sleeping while holding the hash lock for their | ||||
|  * arc_get_data_impl() are sleeping while holding the hash lock for their | ||||
|  * particular arc header. Thus, we must be careful to never sleep on a | ||||
|  * hash lock in this thread. This is to prevent the following deadlock: | ||||
|  * | ||||
|  *  - Thread A sleeps on CV in arc_get_data_buf() holding hash lock "L", | ||||
|  *  - Thread A sleeps on CV in arc_get_data_impl() holding hash lock "L", | ||||
|  *    waiting for the reclaim thread to signal it. | ||||
|  * | ||||
|  *  - arc_reclaim_thread() tries to acquire hash lock "L" using mutex_enter, | ||||
| @ -4509,18 +4521,45 @@ arc_is_overflowing(void) | ||||
| 	return (arc_size >= arc_c + overflow); | ||||
| } | ||||
| 
 | ||||
| static abd_t * | ||||
| arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | ||||
| { | ||||
| 	arc_buf_contents_t type = arc_buf_type(hdr); | ||||
| 
 | ||||
| 	arc_get_data_impl(hdr, size, tag); | ||||
| 	if (type == ARC_BUFC_METADATA) { | ||||
| 		return (abd_alloc(size, B_TRUE)); | ||||
| 	} else { | ||||
| 		ASSERT(type == ARC_BUFC_DATA); | ||||
| 		return (abd_alloc(size, B_FALSE)); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void * | ||||
| arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | ||||
| { | ||||
| 	arc_buf_contents_t type = arc_buf_type(hdr); | ||||
| 
 | ||||
| 	arc_get_data_impl(hdr, size, tag); | ||||
| 	if (type == ARC_BUFC_METADATA) { | ||||
| 		return (zio_buf_alloc(size)); | ||||
| 	} else { | ||||
| 		ASSERT(type == ARC_BUFC_DATA); | ||||
| 		return (zio_data_buf_alloc(size)); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Allocate a block and return it to the caller. If we are hitting the | ||||
|  * hard limit for the cache size, we must sleep, waiting for the eviction | ||||
|  * thread to catch up. If we're past the target size but below the hard | ||||
|  * limit, we'll only signal the reclaim thread and continue on. | ||||
|  */ | ||||
| static void * | ||||
| arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | ||||
| static void | ||||
| arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | ||||
| { | ||||
| 	void *datap = NULL; | ||||
| 	arc_state_t		*state = hdr->b_l1hdr.b_state; | ||||
| 	arc_buf_contents_t	type = arc_buf_type(hdr); | ||||
| 	arc_state_t *state = hdr->b_l1hdr.b_state; | ||||
| 	arc_buf_contents_t type = arc_buf_type(hdr); | ||||
| 
 | ||||
| 	arc_adapt(size, state); | ||||
| 
 | ||||
| @ -4562,11 +4601,8 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | ||||
| 
 | ||||
| 	VERIFY3U(hdr->b_type, ==, type); | ||||
| 	if (type == ARC_BUFC_METADATA) { | ||||
| 		datap = zio_buf_alloc(size); | ||||
| 		arc_space_consume(size, ARC_SPACE_META); | ||||
| 	} else { | ||||
| 		ASSERT(type == ARC_BUFC_DATA); | ||||
| 		datap = zio_data_buf_alloc(size); | ||||
| 		arc_space_consume(size, ARC_SPACE_DATA); | ||||
| 	} | ||||
| 
 | ||||
| @ -4602,14 +4638,34 @@ arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | ||||
| 		    refcount_count(&arc_mru->arcs_size) > arc_p)) | ||||
| 			arc_p = MIN(arc_c, arc_p + size); | ||||
| 	} | ||||
| 	return (datap); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| arc_free_data_abd(arc_buf_hdr_t *hdr, abd_t *abd, uint64_t size, void *tag) | ||||
| { | ||||
| 	arc_free_data_impl(hdr, size, tag); | ||||
| 	abd_free(abd); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, void *tag) | ||||
| { | ||||
| 	arc_buf_contents_t type = arc_buf_type(hdr); | ||||
| 
 | ||||
| 	arc_free_data_impl(hdr, size, tag); | ||||
| 	if (type == ARC_BUFC_METADATA) { | ||||
| 		zio_buf_free(buf, size); | ||||
| 	} else { | ||||
| 		ASSERT(type == ARC_BUFC_DATA); | ||||
| 		zio_data_buf_free(buf, size); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Free the arc data buffer. | ||||
|  */ | ||||
| static void | ||||
| arc_free_data_buf(arc_buf_hdr_t *hdr, void *data, uint64_t size, void *tag) | ||||
| arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag) | ||||
| { | ||||
| 	arc_state_t *state = hdr->b_l1hdr.b_state; | ||||
| 	arc_buf_contents_t type = arc_buf_type(hdr); | ||||
| @ -4626,11 +4682,9 @@ arc_free_data_buf(arc_buf_hdr_t *hdr, void *data, uint64_t size, void *tag) | ||||
| 
 | ||||
| 	VERIFY3U(hdr->b_type, ==, type); | ||||
| 	if (type == ARC_BUFC_METADATA) { | ||||
| 		zio_buf_free(data, size); | ||||
| 		arc_space_return(size, ARC_SPACE_META); | ||||
| 	} else { | ||||
| 		ASSERT(type == ARC_BUFC_DATA); | ||||
| 		zio_data_buf_free(data, size); | ||||
| 		arc_space_return(size, ARC_SPACE_DATA); | ||||
| 	} | ||||
| } | ||||
| @ -4912,7 +4966,7 @@ arc_read_done(zio_t *zio) | ||||
| 	if (callback_cnt == 0) { | ||||
| 		ASSERT(HDR_PREFETCH(hdr)); | ||||
| 		ASSERT0(hdr->b_l1hdr.b_bufcnt); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || | ||||
| @ -5009,7 +5063,7 @@ top: | ||||
| 		hdr = buf_hash_find(guid, bp, &hash_lock); | ||||
| 	} | ||||
| 
 | ||||
| 	if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pdata != NULL) { | ||||
| 	if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) { | ||||
| 		arc_buf_t *buf = NULL; | ||||
| 		*arc_flags |= ARC_FLAG_CACHED; | ||||
| 
 | ||||
| @ -5161,7 +5215,7 @@ top: | ||||
| 				    hdr_full_cache); | ||||
| 			} | ||||
| 
 | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 			ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state)); | ||||
| 			ASSERT(!HDR_IO_IN_PROGRESS(hdr)); | ||||
| 			ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); | ||||
| @ -5179,9 +5233,9 @@ top: | ||||
| 			 * avoid hitting an assert in remove_reference(). | ||||
| 			 */ | ||||
| 			arc_access(hdr, hash_lock); | ||||
| 			arc_hdr_alloc_pdata(hdr); | ||||
| 			arc_hdr_alloc_pabd(hdr); | ||||
| 		} | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 		size = arc_hdr_size(hdr); | ||||
| 
 | ||||
| 		/*
 | ||||
| @ -5285,7 +5339,7 @@ top: | ||||
| 				ASSERT3U(HDR_GET_COMPRESS(hdr), !=, | ||||
| 				    ZIO_COMPRESS_EMPTY); | ||||
| 				rzio = zio_read_phys(pio, vd, addr, | ||||
| 				    size, hdr->b_l1hdr.b_pdata, | ||||
| 				    size, hdr->b_l1hdr.b_pabd, | ||||
| 				    ZIO_CHECKSUM_OFF, | ||||
| 				    l2arc_read_done, cb, priority, | ||||
| 				    zio_flags | ZIO_FLAG_DONT_CACHE | | ||||
| @ -5325,7 +5379,7 @@ top: | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pdata, size, | ||||
| 		rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size, | ||||
| 		    arc_read_done, hdr, priority, zio_flags, zb); | ||||
| 
 | ||||
| 		if (*arc_flags & ARC_FLAG_WAIT) { | ||||
| @ -5557,16 +5611,17 @@ arc_release(arc_buf_t *buf, void *tag) | ||||
| 			arc_unshare_buf(hdr, buf); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Now we need to recreate the hdr's b_pdata. Since we | ||||
| 			 * Now we need to recreate the hdr's b_pabd. Since we | ||||
| 			 * have lastbuf handy, we try to share with it, but if | ||||
| 			 * we can't then we allocate a new b_pdata and copy the | ||||
| 			 * we can't then we allocate a new b_pabd and copy the | ||||
| 			 * data from buf into it. | ||||
| 			 */ | ||||
| 			if (arc_can_share(hdr, lastbuf)) { | ||||
| 				arc_share_buf(hdr, lastbuf); | ||||
| 			} else { | ||||
| 				arc_hdr_alloc_pdata(hdr); | ||||
| 				bcopy(buf->b_data, hdr->b_l1hdr.b_pdata, psize); | ||||
| 				arc_hdr_alloc_pabd(hdr); | ||||
| 				abd_copy_from_buf(hdr->b_l1hdr.b_pabd, | ||||
| 				    buf->b_data, psize); | ||||
| 			} | ||||
| 			VERIFY3P(lastbuf->b_data, !=, NULL); | ||||
| 		} else if (HDR_SHARED_DATA(hdr)) { | ||||
| @ -5582,7 +5637,7 @@ arc_release(arc_buf_t *buf, void *tag) | ||||
| 			    HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF); | ||||
| 			ASSERT(!ARC_BUF_SHARED(buf)); | ||||
| 		} | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 		ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 		ASSERT3P(state, !=, arc_l2c_only); | ||||
| 
 | ||||
| 		(void) refcount_remove_many(&state->arcs_size, | ||||
| @ -5601,7 +5656,7 @@ arc_release(arc_buf_t *buf, void *tag) | ||||
| 		mutex_exit(hash_lock); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Allocate a new hdr. The new hdr will contain a b_pdata | ||||
| 		 * Allocate a new hdr. The new hdr will contain a b_pabd | ||||
| 		 * buffer which will be freed in arc_write(). | ||||
| 		 */ | ||||
| 		nhdr = arc_hdr_alloc(spa, psize, lsize, compress, type); | ||||
| @ -5677,6 +5732,7 @@ arc_write_ready(zio_t *zio) | ||||
| 	arc_buf_hdr_t *hdr = buf->b_hdr; | ||||
| 	uint64_t psize = BP_IS_HOLE(zio->io_bp) ? 0 : BP_GET_PSIZE(zio->io_bp); | ||||
| 	enum zio_compress compress; | ||||
| 	fstrans_cookie_t cookie = spl_fstrans_mark(); | ||||
| 
 | ||||
| 	ASSERT(HDR_HAS_L1HDR(hdr)); | ||||
| 	ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt)); | ||||
| @ -5690,15 +5746,15 @@ arc_write_ready(zio_t *zio) | ||||
| 	if (zio->io_flags & ZIO_FLAG_REEXECUTED) { | ||||
| 		arc_cksum_free(hdr); | ||||
| 		arc_buf_unwatch(buf); | ||||
| 		if (hdr->b_l1hdr.b_pdata != NULL) { | ||||
| 		if (hdr->b_l1hdr.b_pabd != NULL) { | ||||
| 			if (arc_buf_is_shared(buf)) { | ||||
| 				arc_unshare_buf(hdr, buf); | ||||
| 			} else { | ||||
| 				arc_hdr_free_pdata(hdr); | ||||
| 				arc_hdr_free_pabd(hdr); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 	ASSERT(!HDR_SHARED_DATA(hdr)); | ||||
| 	ASSERT(!arc_buf_is_shared(buf)); | ||||
| 
 | ||||
| @ -5720,33 +5776,47 @@ arc_write_ready(zio_t *zio) | ||||
| 	arc_hdr_set_compress(hdr, compress); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the hdr is compressed, then copy the compressed | ||||
| 	 * zio contents into arc_buf_hdr_t. Otherwise, copy the original | ||||
| 	 * data buf into the hdr. Ideally, we would like to always copy the | ||||
| 	 * io_data into b_pdata but the user may have disabled compressed | ||||
| 	 * arc thus the on-disk block may or may not match what we maintain | ||||
| 	 * in the hdr's b_pdata field. | ||||
| 	 * Fill the hdr with data. If the hdr is compressed, the data we want | ||||
| 	 * is available from the zio, otherwise we can take it from the buf. | ||||
| 	 * | ||||
| 	 * We might be able to share the buf's data with the hdr here. However, | ||||
| 	 * doing so would cause the ARC to be full of linear ABDs if we write a | ||||
| 	 * lot of shareable data. As a compromise, we check whether scattered | ||||
| 	 * ABDs are allowed, and assume that if they are then the user wants | ||||
| 	 * the ARC to be primarily filled with them regardless of the data being | ||||
| 	 * written. Therefore, if they're allowed then we allocate one and copy | ||||
| 	 * the data into it; otherwise, we share the data directly if we can. | ||||
| 	 */ | ||||
| 	if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF && | ||||
| 	    !ARC_BUF_COMPRESSED(buf)) { | ||||
| 		ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, ZIO_COMPRESS_OFF); | ||||
| 		ASSERT3U(psize, >, 0); | ||||
| 		arc_hdr_alloc_pdata(hdr); | ||||
| 		bcopy(zio->io_data, hdr->b_l1hdr.b_pdata, psize); | ||||
| 	if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) { | ||||
| 		arc_hdr_alloc_pabd(hdr); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Ideally, we would always copy the io_abd into b_pabd, but the | ||||
| 		 * user may have disabled compressed ARC, thus we must check the | ||||
| 		 * hdr's compression setting rather than the io_bp's. | ||||
| 		 */ | ||||
| 		if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) { | ||||
| 			ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=, | ||||
| 			    ZIO_COMPRESS_OFF); | ||||
| 			ASSERT3U(psize, >, 0); | ||||
| 
 | ||||
| 			abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize); | ||||
| 		} else { | ||||
| 			ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr)); | ||||
| 
 | ||||
| 			abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data, | ||||
| 			    arc_buf_size(buf)); | ||||
| 		} | ||||
| 	} else { | ||||
| 		ASSERT3P(buf->b_data, ==, zio->io_orig_data); | ||||
| 		ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd)); | ||||
| 		ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf)); | ||||
| 		ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * This hdr is not compressed so we're able to share | ||||
| 		 * the arc_buf_t data buffer with the hdr. | ||||
| 		 */ | ||||
| 		arc_share_buf(hdr, buf); | ||||
| 		ASSERT0(bcmp(zio->io_orig_data, hdr->b_l1hdr.b_pdata, | ||||
| 		    HDR_GET_LSIZE(hdr))); | ||||
| 	} | ||||
| 
 | ||||
| 	arc_hdr_verify(hdr, zio->io_bp); | ||||
| 	spl_fstrans_unmark(cookie); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| @ -5850,6 +5920,7 @@ arc_write_done(zio_t *zio) | ||||
| 	ASSERT(!refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); | ||||
| 	callback->awcb_done(zio, buf, callback->awcb_private); | ||||
| 
 | ||||
| 	abd_put(zio->io_abd); | ||||
| 	kmem_free(callback, sizeof (arc_write_callback_t)); | ||||
| } | ||||
| 
 | ||||
| @ -5886,10 +5957,10 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, | ||||
| 	callback->awcb_buf = buf; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The hdr's b_pdata is now stale, free it now. A new data block | ||||
| 	 * The hdr's b_pabd is now stale, free it now. A new data block | ||||
| 	 * will be allocated when the zio pipeline calls arc_write_ready(). | ||||
| 	 */ | ||||
| 	if (hdr->b_l1hdr.b_pdata != NULL) { | ||||
| 	if (hdr->b_l1hdr.b_pabd != NULL) { | ||||
| 		/*
 | ||||
| 		 * If the buf is currently sharing the data block with | ||||
| 		 * the hdr then we need to break that relationship here. | ||||
| @ -5899,15 +5970,16 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, | ||||
| 		if (arc_buf_is_shared(buf)) { | ||||
| 			arc_unshare_buf(hdr, buf); | ||||
| 		} else { | ||||
| 			arc_hdr_free_pdata(hdr); | ||||
| 			arc_hdr_free_pabd(hdr); | ||||
| 		} | ||||
| 		VERIFY3P(buf->b_data, !=, NULL); | ||||
| 		arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF); | ||||
| 	} | ||||
| 	ASSERT(!arc_buf_is_shared(buf)); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pdata, ==, NULL); | ||||
| 	ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL); | ||||
| 
 | ||||
| 	zio = zio_write(pio, spa, txg, bp, buf->b_data, | ||||
| 	zio = zio_write(pio, spa, txg, bp, | ||||
| 	    abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)), | ||||
| 	    HDR_GET_LSIZE(hdr), arc_buf_size(buf), zp, | ||||
| 	    arc_write_ready, | ||||
| 	    (children_ready != NULL) ? arc_write_children_ready : NULL, | ||||
| @ -6768,13 +6840,8 @@ l2arc_do_free_on_write(void) | ||||
| 
 | ||||
| 	for (df = list_tail(buflist); df; df = df_prev) { | ||||
| 		df_prev = list_prev(buflist, df); | ||||
| 		ASSERT3P(df->l2df_data, !=, NULL); | ||||
| 		if (df->l2df_type == ARC_BUFC_METADATA) { | ||||
| 			zio_buf_free(df->l2df_data, df->l2df_size); | ||||
| 		} else { | ||||
| 			ASSERT(df->l2df_type == ARC_BUFC_DATA); | ||||
| 			zio_data_buf_free(df->l2df_data, df->l2df_size); | ||||
| 		} | ||||
| 		ASSERT3P(df->l2df_abd, !=, NULL); | ||||
| 		abd_free(df->l2df_abd); | ||||
| 		list_remove(buflist, df); | ||||
| 		kmem_free(df, sizeof (l2arc_data_free_t)); | ||||
| 	} | ||||
| @ -6928,12 +6995,12 @@ l2arc_read_done(zio_t *zio) | ||||
| 	mutex_enter(hash_lock); | ||||
| 	ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); | ||||
| 
 | ||||
| 	ASSERT3P(zio->io_data, !=, NULL); | ||||
| 	ASSERT3P(zio->io_abd, !=, NULL); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Check this survived the L2ARC journey. | ||||
| 	 */ | ||||
| 	ASSERT3P(zio->io_data, ==, hdr->b_l1hdr.b_pdata); | ||||
| 	ASSERT3P(zio->io_abd, ==, hdr->b_l1hdr.b_pabd); | ||||
| 	zio->io_bp_copy = cb->l2rcb_bp;	/* XXX fix in L2ARC 2.0	*/ | ||||
| 	zio->io_bp = &zio->io_bp_copy;	/* XXX fix in L2ARC 2.0	*/ | ||||
| 
 | ||||
| @ -6967,7 +7034,7 @@ l2arc_read_done(zio_t *zio) | ||||
| 			ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); | ||||
| 
 | ||||
| 			zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp, | ||||
| 			    hdr->b_l1hdr.b_pdata, zio->io_size, arc_read_done, | ||||
| 			    hdr->b_l1hdr.b_pabd, zio->io_size, arc_read_done, | ||||
| 			    hdr, zio->io_priority, cb->l2rcb_flags, | ||||
| 			    &cb->l2rcb_zb)); | ||||
| 		} | ||||
| @ -7191,7 +7258,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) | ||||
| 		for (; hdr; hdr = hdr_prev) { | ||||
| 			kmutex_t *hash_lock; | ||||
| 			uint64_t asize, size; | ||||
| 			void *to_write; | ||||
| 			abd_t *to_write; | ||||
| 
 | ||||
| 			if (arc_warm == B_FALSE) | ||||
| 				hdr_prev = multilist_sublist_next(mls, hdr); | ||||
| @ -7264,7 +7331,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) | ||||
| 			ASSERT(HDR_HAS_L1HDR(hdr)); | ||||
| 
 | ||||
| 			ASSERT3U(HDR_GET_PSIZE(hdr), >, 0); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pdata, !=, NULL); | ||||
| 			ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL); | ||||
| 			ASSERT3U(arc_hdr_size(hdr), >, 0); | ||||
| 			size = arc_hdr_size(hdr); | ||||
| 
 | ||||
| @ -7280,18 +7347,13 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz) | ||||
| 			 * add it to the l2arc_free_on_write queue. | ||||
| 			 */ | ||||
| 			if (!HDR_SHARED_DATA(hdr)) { | ||||
| 				to_write = hdr->b_l1hdr.b_pdata; | ||||
| 				to_write = hdr->b_l1hdr.b_pabd; | ||||
| 			} else { | ||||
| 				arc_buf_contents_t type = arc_buf_type(hdr); | ||||
| 				if (type == ARC_BUFC_METADATA) { | ||||
| 					to_write = zio_buf_alloc(size); | ||||
| 				} else { | ||||
| 					ASSERT3U(type, ==, ARC_BUFC_DATA); | ||||
| 					to_write = zio_data_buf_alloc(size); | ||||
| 				} | ||||
| 
 | ||||
| 				bcopy(hdr->b_l1hdr.b_pdata, to_write, size); | ||||
| 				l2arc_free_data_on_write(to_write, size, type); | ||||
| 				to_write = abd_alloc_for_io(size, | ||||
| 				    HDR_ISTYPE_METADATA(hdr)); | ||||
| 				abd_copy(to_write, hdr->b_l1hdr.b_pabd, size); | ||||
| 				l2arc_free_abd_on_write(to_write, size, | ||||
| 				    arc_buf_type(hdr)); | ||||
| 			} | ||||
| 			wzio = zio_write_phys(pio, dev->l2ad_vdev, | ||||
| 			    hdr->b_l2hdr.b_daddr, size, to_write, | ||||
|  | ||||
| @ -14,7 +14,7 @@ | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Copyright (c) 2013 by Delphix. All rights reserved. | ||||
|  * Copyright (c) 2013, 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include <sys/zfs_context.h> | ||||
|  | ||||
| @ -46,6 +46,7 @@ | ||||
| #include <sys/range_tree.h> | ||||
| #include <sys/trace_dbuf.h> | ||||
| #include <sys/callb.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| struct dbuf_hold_impl_data { | ||||
| 	/* Function arguments */ | ||||
| @ -3709,6 +3710,9 @@ dbuf_write_override_done(zio_t *zio) | ||||
| 	mutex_exit(&db->db_mtx); | ||||
| 
 | ||||
| 	dbuf_write_done(zio, NULL, db); | ||||
| 
 | ||||
| 	if (zio->io_abd != NULL) | ||||
| 		abd_put(zio->io_abd); | ||||
| } | ||||
| 
 | ||||
| /* Issue I/O to commit a dirty buffer to disk. */ | ||||
| @ -3801,7 +3805,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) | ||||
| 		 * The BP for this block has been provided by open context | ||||
| 		 * (by dmu_sync() or dmu_buf_write_embedded()). | ||||
| 		 */ | ||||
| 		void *contents = (data != NULL) ? data->b_data : NULL; | ||||
| 		abd_t *contents = (data != NULL) ? | ||||
| 		    abd_get_from_buf(data->b_data, arc_buf_size(data)) : NULL; | ||||
| 
 | ||||
| 		dr->dr_zio = zio_write(zio, os->os_spa, txg, | ||||
| 		    &dr->dr_bp_copy, contents, db->db.db_size, db->db.db_size, | ||||
|  | ||||
| @ -21,7 +21,7 @@ | ||||
| 
 | ||||
| /*
 | ||||
|  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. | ||||
|  * Copyright (c) 2012, 2015 by Delphix. All rights reserved. | ||||
|  * Copyright (c) 2012, 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include <sys/zfs_context.h> | ||||
| @ -36,6 +36,7 @@ | ||||
| #include <sys/zio_checksum.h> | ||||
| #include <sys/zio_compress.h> | ||||
| #include <sys/dsl_scan.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| static kmem_cache_t *ddt_cache; | ||||
| static kmem_cache_t *ddt_entry_cache; | ||||
| @ -706,9 +707,8 @@ ddt_free(ddt_entry_t *dde) | ||||
| 	for (p = 0; p < DDT_PHYS_TYPES; p++) | ||||
| 		ASSERT(dde->dde_lead_zio[p] == NULL); | ||||
| 
 | ||||
| 	if (dde->dde_repair_data != NULL) | ||||
| 		zio_buf_free(dde->dde_repair_data, | ||||
| 		    DDK_GET_PSIZE(&dde->dde_key)); | ||||
| 	if (dde->dde_repair_abd != NULL) | ||||
| 		abd_free(dde->dde_repair_abd); | ||||
| 
 | ||||
| 	cv_destroy(&dde->dde_cv); | ||||
| 	kmem_cache_free(ddt_entry_cache, dde); | ||||
| @ -1002,7 +1002,7 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde) | ||||
| 
 | ||||
| 	ddt_enter(ddt); | ||||
| 
 | ||||
| 	if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) && | ||||
| 	if (dde->dde_repair_abd != NULL && spa_writeable(ddt->ddt_spa) && | ||||
| 	    avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL) | ||||
| 		avl_insert(&ddt->ddt_repair_tree, dde, where); | ||||
| 	else | ||||
| @ -1040,7 +1040,7 @@ ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio) | ||||
| 			continue; | ||||
| 		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk); | ||||
| 		zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk, | ||||
| 		    rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL, | ||||
| 		    rdde->dde_repair_abd, DDK_GET_PSIZE(rddk), NULL, NULL, | ||||
| 		    ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL)); | ||||
| 	} | ||||
| 
 | ||||
|  | ||||
| @ -47,6 +47,7 @@ | ||||
| #include <sys/zio_compress.h> | ||||
| #include <sys/sa.h> | ||||
| #include <sys/zfeature.h> | ||||
| #include <sys/abd.h> | ||||
| #ifdef _KERNEL | ||||
| #include <sys/vmsystm.h> | ||||
| #include <sys/zfs_znode.h> | ||||
| @ -1513,6 +1514,7 @@ dmu_sync_late_arrival_done(zio_t *zio) | ||||
| 
 | ||||
| 	dsa->dsa_done(dsa->dsa_zgd, zio->io_error); | ||||
| 
 | ||||
| 	abd_put(zio->io_abd); | ||||
| 	kmem_free(dsa, sizeof (*dsa)); | ||||
| } | ||||
| 
 | ||||
| @ -1537,11 +1539,11 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, | ||||
| 	dsa->dsa_zgd = zgd; | ||||
| 	dsa->dsa_tx = tx; | ||||
| 
 | ||||
| 	zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), | ||||
| 	    zgd->zgd_bp, zgd->zgd_db->db_data, zgd->zgd_db->db_size, | ||||
| 	    zgd->zgd_db->db_size, zp, dmu_sync_late_arrival_ready, NULL, | ||||
| 	    NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE, | ||||
| 	    ZIO_FLAG_CANFAIL, zb)); | ||||
| 	zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, | ||||
| 	    abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size), | ||||
| 	    zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp, | ||||
| 	    dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done, | ||||
| 	    dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| @ -2062,6 +2064,7 @@ byteswap_uint8_array(void *vbuf, size_t size) | ||||
| void | ||||
| dmu_init(void) | ||||
| { | ||||
| 	abd_init(); | ||||
| 	zfs_dbgmsg_init(); | ||||
| 	sa_cache_init(); | ||||
| 	xuio_stat_init(); | ||||
| @ -2087,6 +2090,7 @@ dmu_fini(void) | ||||
| 	xuio_stat_fini(); | ||||
| 	sa_cache_fini(); | ||||
| 	zfs_dbgmsg_fini(); | ||||
| 	abd_fini(); | ||||
| } | ||||
| 
 | ||||
| #if defined(_KERNEL) && defined(HAVE_SPL) | ||||
|  | ||||
| @ -166,7 +166,7 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) | ||||
| { | ||||
| 	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), | ||||
| 	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t)); | ||||
| 	fletcher_4_incremental_native(dsp->dsa_drr, | ||||
| 	(void) fletcher_4_incremental_native(dsp->dsa_drr, | ||||
| 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), | ||||
| 	    &dsp->dsa_zc); | ||||
| 	if (dsp->dsa_drr->drr_type == DRR_BEGIN) { | ||||
| @ -179,13 +179,13 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len) | ||||
| 	if (dsp->dsa_drr->drr_type == DRR_END) { | ||||
| 		dsp->dsa_sent_end = B_TRUE; | ||||
| 	} | ||||
| 	fletcher_4_incremental_native(&dsp->dsa_drr-> | ||||
| 	(void) fletcher_4_incremental_native(&dsp->dsa_drr-> | ||||
| 	    drr_u.drr_checksum.drr_checksum, | ||||
| 	    sizeof (zio_cksum_t), &dsp->dsa_zc); | ||||
| 	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0) | ||||
| 		return (SET_ERROR(EINTR)); | ||||
| 	if (payload_len != 0) { | ||||
| 		fletcher_4_incremental_native(payload, payload_len, | ||||
| 		(void) fletcher_4_incremental_native(payload, payload_len, | ||||
| 		    &dsp->dsa_zc); | ||||
| 		if (dump_bytes(dsp, payload, payload_len) != 0) | ||||
| 			return (SET_ERROR(EINTR)); | ||||
| @ -1786,11 +1786,11 @@ dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin, | ||||
| 
 | ||||
| 	if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { | ||||
| 		drc->drc_byteswap = B_TRUE; | ||||
| 		fletcher_4_incremental_byteswap(drr_begin, | ||||
| 		(void) fletcher_4_incremental_byteswap(drr_begin, | ||||
| 		    sizeof (dmu_replay_record_t), &drc->drc_cksum); | ||||
| 		byteswap_record(drr_begin); | ||||
| 	} else if (drc->drc_drrb->drr_magic == DMU_BACKUP_MAGIC) { | ||||
| 		fletcher_4_incremental_native(drr_begin, | ||||
| 		(void) fletcher_4_incremental_native(drr_begin, | ||||
| 		    sizeof (dmu_replay_record_t), &drc->drc_cksum); | ||||
| 	} else { | ||||
| 		return (SET_ERROR(EINVAL)); | ||||
| @ -2470,9 +2470,9 @@ static void | ||||
| receive_cksum(struct receive_arg *ra, int len, void *buf) | ||||
| { | ||||
| 	if (ra->byteswap) { | ||||
| 		fletcher_4_incremental_byteswap(buf, len, &ra->cksum); | ||||
| 		(void) fletcher_4_incremental_byteswap(buf, len, &ra->cksum); | ||||
| 	} else { | ||||
| 		fletcher_4_incremental_native(buf, len, &ra->cksum); | ||||
| 		(void) fletcher_4_incremental_native(buf, len, &ra->cksum); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -20,7 +20,7 @@ | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. | ||||
|  * Copyright (c) 2011, 2015 by Delphix. All rights reserved. | ||||
|  * Copyright (c) 2011, 2016 by Delphix. All rights reserved. | ||||
|  * Copyright 2016 Gary Mills | ||||
|  */ | ||||
| 
 | ||||
| @ -47,6 +47,7 @@ | ||||
| #include <sys/sa.h> | ||||
| #include <sys/sa_impl.h> | ||||
| #include <sys/zfeature.h> | ||||
| #include <sys/abd.h> | ||||
| #ifdef _KERNEL | ||||
| #include <sys/zfs_vfsops.h> | ||||
| #endif | ||||
| @ -1820,7 +1821,7 @@ dsl_scan_scrub_done(zio_t *zio) | ||||
| { | ||||
| 	spa_t *spa = zio->io_spa; | ||||
| 
 | ||||
| 	zio_data_buf_free(zio->io_data, zio->io_size); | ||||
| 	abd_free(zio->io_abd); | ||||
| 
 | ||||
| 	mutex_enter(&spa->spa_scrub_lock); | ||||
| 	spa->spa_scrub_inflight--; | ||||
| @ -1904,7 +1905,6 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, | ||||
| 	if (needs_io && !zfs_no_scrub_io) { | ||||
| 		vdev_t *rvd = spa->spa_root_vdev; | ||||
| 		uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight; | ||||
| 		void *data = zio_data_buf_alloc(size); | ||||
| 
 | ||||
| 		mutex_enter(&spa->spa_scrub_lock); | ||||
| 		while (spa->spa_scrub_inflight >= maxinflight) | ||||
| @ -1919,9 +1919,9 @@ dsl_scan_scrub_cb(dsl_pool_t *dp, | ||||
| 		if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle) | ||||
| 			delay(scan_delay); | ||||
| 
 | ||||
| 		zio_nowait(zio_read(NULL, spa, bp, data, size, | ||||
| 		    dsl_scan_scrub_done, NULL, ZIO_PRIORITY_SCRUB, | ||||
| 		    zio_flags, zb)); | ||||
| 		zio_nowait(zio_read(NULL, spa, bp, | ||||
| 		    abd_alloc_for_io(size, B_FALSE), size, dsl_scan_scrub_done, | ||||
| 		    NULL, ZIO_PRIORITY_SCRUB, zio_flags, zb)); | ||||
| 	} | ||||
| 
 | ||||
| 	/* do not relocate this block */ | ||||
|  | ||||
| @ -22,20 +22,32 @@ | ||||
|  * Copyright 2013 Saso Kiselkov.  All rights reserved. | ||||
|  * Use is subject to license terms. | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright (c) 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| #include <sys/zfs_context.h> | ||||
| #include <sys/zio.h> | ||||
| #include <sys/edonr.h> | ||||
| #include <sys/zfs_context.h>	/* For CTASSERT() */ | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| #define	EDONR_MODE		512 | ||||
| #define	EDONR_BLOCK_SIZE	EdonR512_BLOCK_SIZE | ||||
| 
 | ||||
| static int | ||||
| edonr_incremental(void *buf, size_t size, void *arg) | ||||
| { | ||||
| 	EdonRState *ctx = arg; | ||||
| 	EdonRUpdate(ctx, buf, size * 8); | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Native zio_checksum interface for the Edon-R hash function. | ||||
|  */ | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| zio_checksum_edonr_native(const void *buf, uint64_t size, | ||||
| abd_checksum_edonr_native(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	uint8_t		digest[EDONR_MODE / 8]; | ||||
| @ -43,7 +55,7 @@ zio_checksum_edonr_native(const void *buf, uint64_t size, | ||||
| 
 | ||||
| 	ASSERT(ctx_template != NULL); | ||||
| 	bcopy(ctx_template, &ctx, sizeof (ctx)); | ||||
| 	EdonRUpdate(&ctx, buf, size * 8); | ||||
| 	(void) abd_iterate_func(abd, 0, size, edonr_incremental, &ctx); | ||||
| 	EdonRFinal(&ctx, digest); | ||||
| 	bcopy(digest, zcp->zc_word, sizeof (zcp->zc_word)); | ||||
| } | ||||
| @ -52,12 +64,12 @@ zio_checksum_edonr_native(const void *buf, uint64_t size, | ||||
|  * Byteswapped zio_checksum interface for the Edon-R hash function. | ||||
|  */ | ||||
| void | ||||
| zio_checksum_edonr_byteswap(const void *buf, uint64_t size, | ||||
| abd_checksum_edonr_byteswap(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	zio_cksum_t	tmp; | ||||
| 
 | ||||
| 	zio_checksum_edonr_native(buf, size, ctx_template, &tmp); | ||||
| 	abd_checksum_edonr_native(abd, size, ctx_template, &tmp); | ||||
| 	zcp->zc_word[0] = BSWAP_64(zcp->zc_word[0]); | ||||
| 	zcp->zc_word[1] = BSWAP_64(zcp->zc_word[1]); | ||||
| 	zcp->zc_word[2] = BSWAP_64(zcp->zc_word[2]); | ||||
| @ -65,7 +77,7 @@ zio_checksum_edonr_byteswap(const void *buf, uint64_t size, | ||||
| } | ||||
| 
 | ||||
| void * | ||||
| zio_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt) | ||||
| abd_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt) | ||||
| { | ||||
| 	EdonRState	*ctx; | ||||
| 	uint8_t		salt_block[EDONR_BLOCK_SIZE]; | ||||
| @ -94,7 +106,7 @@ zio_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt) | ||||
| } | ||||
| 
 | ||||
| void | ||||
| zio_checksum_edonr_tmpl_free(void *ctx_template) | ||||
| abd_checksum_edonr_tmpl_free(void *ctx_template) | ||||
| { | ||||
| 	EdonRState	*ctx = ctx_template; | ||||
| 
 | ||||
|  | ||||
| @ -24,30 +24,39 @@ | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright 2013 Saso Kiselkov. All rights reserved. | ||||
|  * Copyright (c) 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| #include <sys/zfs_context.h> | ||||
| #include <sys/zio.h> | ||||
| #include <sys/zio_checksum.h> | ||||
| #include <sys/sha2.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| static int | ||||
| sha_incremental(void *buf, size_t size, void *arg) | ||||
| { | ||||
| 	SHA2_CTX *ctx = arg; | ||||
| 	SHA2Update(ctx, buf, size); | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| zio_checksum_SHA256(const void *buf, uint64_t size, | ||||
| abd_checksum_SHA256(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	SHA2_CTX ctx; | ||||
| 	zio_cksum_t tmp; | ||||
| 
 | ||||
| 	SHA2Init(SHA256, &ctx); | ||||
| 	SHA2Update(&ctx, buf, size); | ||||
| 	(void) abd_iterate_func(abd, 0, size, sha_incremental, &ctx); | ||||
| 	SHA2Final(&tmp, &ctx); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * A prior implementation of this function had a | ||||
| 	 * private SHA256 implementation always wrote things out in | ||||
| 	 * Big Endian and there wasn't a byteswap variant of it. | ||||
| 	 * To preseve on disk compatibility we need to force that | ||||
| 	 * behaviour. | ||||
| 	 * To preserve on disk compatibility we need to force that | ||||
| 	 * behavior. | ||||
| 	 */ | ||||
| 	zcp->zc_word[0] = BE_64(tmp.zc_word[0]); | ||||
| 	zcp->zc_word[1] = BE_64(tmp.zc_word[1]); | ||||
| @ -57,24 +66,24 @@ zio_checksum_SHA256(const void *buf, uint64_t size, | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| zio_checksum_SHA512_native(const void *buf, uint64_t size, | ||||
| abd_checksum_SHA512_native(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	SHA2_CTX	ctx; | ||||
| 
 | ||||
| 	SHA2Init(SHA512_256, &ctx); | ||||
| 	SHA2Update(&ctx, buf, size); | ||||
| 	(void) abd_iterate_func(abd, 0, size, sha_incremental, &ctx); | ||||
| 	SHA2Final(zcp, &ctx); | ||||
| } | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| zio_checksum_SHA512_byteswap(const void *buf, uint64_t size, | ||||
| abd_checksum_SHA512_byteswap(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	zio_cksum_t	tmp; | ||||
| 
 | ||||
| 	zio_checksum_SHA512_native(buf, size, ctx_template, &tmp); | ||||
| 	abd_checksum_SHA512_native(abd, size, ctx_template, &tmp); | ||||
| 	zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]); | ||||
| 	zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]); | ||||
| 	zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]); | ||||
|  | ||||
| @ -20,42 +20,52 @@ | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright 2013 Saso Kiselkov.  All rights reserved. | ||||
|  * Copyright (c) 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| #include <sys/zfs_context.h> | ||||
| #include <sys/zio.h> | ||||
| #include <sys/skein.h> | ||||
| 
 | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| static int | ||||
| skein_incremental(void *buf, size_t size, void *arg) | ||||
| { | ||||
| 	Skein_512_Ctxt_t *ctx = arg; | ||||
| 	(void) Skein_512_Update(ctx, buf, size); | ||||
| 	return (0); | ||||
| } | ||||
| /*
 | ||||
|  * Computes a native 256-bit skein MAC checksum. Please note that this | ||||
|  * function requires the presence of a ctx_template that should be allocated | ||||
|  * using zio_checksum_skein_tmpl_init. | ||||
|  * using abd_checksum_skein_tmpl_init. | ||||
|  */ | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| zio_checksum_skein_native(const void *buf, uint64_t size, | ||||
| abd_checksum_skein_native(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	Skein_512_Ctxt_t	ctx; | ||||
| 
 | ||||
| 	ASSERT(ctx_template != NULL); | ||||
| 	bcopy(ctx_template, &ctx, sizeof (ctx)); | ||||
| 	(void) Skein_512_Update(&ctx, buf, size); | ||||
| 	(void) abd_iterate_func(abd, 0, size, skein_incremental, &ctx); | ||||
| 	(void) Skein_512_Final(&ctx, (uint8_t *)zcp); | ||||
| 	bzero(&ctx, sizeof (ctx)); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Byteswapped version of zio_checksum_skein_native. This just invokes | ||||
|  * Byteswapped version of abd_checksum_skein_native. This just invokes | ||||
|  * the native checksum function and byteswaps the resulting checksum (since | ||||
|  * skein is internally endian-insensitive). | ||||
|  */ | ||||
| void | ||||
| zio_checksum_skein_byteswap(const void *buf, uint64_t size, | ||||
| abd_checksum_skein_byteswap(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	zio_cksum_t	tmp; | ||||
| 
 | ||||
| 	zio_checksum_skein_native(buf, size, ctx_template, &tmp); | ||||
| 	abd_checksum_skein_native(abd, size, ctx_template, &tmp); | ||||
| 	zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]); | ||||
| 	zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]); | ||||
| 	zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]); | ||||
| @ -67,7 +77,7 @@ zio_checksum_skein_byteswap(const void *buf, uint64_t size, | ||||
|  * computations and returns a pointer to it. | ||||
|  */ | ||||
| void * | ||||
| zio_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt) | ||||
| abd_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt) | ||||
| { | ||||
| 	Skein_512_Ctxt_t	*ctx; | ||||
| 
 | ||||
| @ -82,7 +92,7 @@ zio_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt) | ||||
|  * zio_checksum_skein_tmpl_init. | ||||
|  */ | ||||
| void | ||||
| zio_checksum_skein_tmpl_free(void *ctx_template) | ||||
| abd_checksum_skein_tmpl_free(void *ctx_template) | ||||
| { | ||||
| 	Skein_512_Ctxt_t	*ctx = ctx_template; | ||||
| 
 | ||||
|  | ||||
| @ -1963,6 +1963,7 @@ spa_load_verify_done(zio_t *zio) | ||||
| 	int error = zio->io_error; | ||||
| 	spa_t *spa = zio->io_spa; | ||||
| 
 | ||||
| 	abd_free(zio->io_abd); | ||||
| 	if (error) { | ||||
| 		if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) && | ||||
| 		    type != DMU_OT_INTENT_LOG) | ||||
| @ -1970,7 +1971,6 @@ spa_load_verify_done(zio_t *zio) | ||||
| 		else | ||||
| 			atomic_inc_64(&sle->sle_data_count); | ||||
| 	} | ||||
| 	zio_data_buf_free(zio->io_data, zio->io_size); | ||||
| 
 | ||||
| 	mutex_enter(&spa->spa_scrub_lock); | ||||
| 	spa->spa_scrub_inflight--; | ||||
| @ -1993,7 +1993,6 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, | ||||
| { | ||||
| 	zio_t *rio; | ||||
| 	size_t size; | ||||
| 	void *data; | ||||
| 
 | ||||
| 	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) | ||||
| 		return (0); | ||||
| @ -2004,12 +2003,11 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, | ||||
| 	 */ | ||||
| 	if (!spa_load_verify_metadata) | ||||
| 		return (0); | ||||
| 	if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data) | ||||
| 	if (!BP_IS_METADATA(bp) && !spa_load_verify_data) | ||||
| 		return (0); | ||||
| 
 | ||||
| 	rio = arg; | ||||
| 	size = BP_GET_PSIZE(bp); | ||||
| 	data = zio_data_buf_alloc(size); | ||||
| 
 | ||||
| 	mutex_enter(&spa->spa_scrub_lock); | ||||
| 	while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight) | ||||
| @ -2017,7 +2015,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, | ||||
| 	spa->spa_scrub_inflight++; | ||||
| 	mutex_exit(&spa->spa_scrub_lock); | ||||
| 
 | ||||
| 	zio_nowait(zio_read(rio, spa, bp, data, size, | ||||
| 	zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size, | ||||
| 	    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB, | ||||
| 	    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL | | ||||
| 	    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb)); | ||||
|  | ||||
| @ -43,6 +43,7 @@ | ||||
| #include <sys/arc.h> | ||||
| #include <sys/zil.h> | ||||
| #include <sys/dsl_scan.h> | ||||
| #include <sys/abd.h> | ||||
| #include <sys/zvol.h> | ||||
| #include <sys/zfs_ratelimit.h> | ||||
| 
 | ||||
| @ -999,16 +1000,16 @@ vdev_probe_done(zio_t *zio) | ||||
| 			vps->vps_readable = 1; | ||||
| 		if (zio->io_error == 0 && spa_writeable(spa)) { | ||||
| 			zio_nowait(zio_write_phys(vd->vdev_probe_zio, vd, | ||||
| 			    zio->io_offset, zio->io_size, zio->io_data, | ||||
| 			    zio->io_offset, zio->io_size, zio->io_abd, | ||||
| 			    ZIO_CHECKSUM_OFF, vdev_probe_done, vps, | ||||
| 			    ZIO_PRIORITY_SYNC_WRITE, vps->vps_flags, B_TRUE)); | ||||
| 		} else { | ||||
| 			zio_buf_free(zio->io_data, zio->io_size); | ||||
| 			abd_free(zio->io_abd); | ||||
| 		} | ||||
| 	} else if (zio->io_type == ZIO_TYPE_WRITE) { | ||||
| 		if (zio->io_error == 0) | ||||
| 			vps->vps_writeable = 1; | ||||
| 		zio_buf_free(zio->io_data, zio->io_size); | ||||
| 		abd_free(zio->io_abd); | ||||
| 	} else if (zio->io_type == ZIO_TYPE_NULL) { | ||||
| 		zio_t *pio; | ||||
| 		zio_link_t *zl; | ||||
| @ -1126,8 +1127,8 @@ vdev_probe(vdev_t *vd, zio_t *zio) | ||||
| 	for (l = 1; l < VDEV_LABELS; l++) { | ||||
| 		zio_nowait(zio_read_phys(pio, vd, | ||||
| 		    vdev_label_offset(vd->vdev_psize, l, | ||||
| 		    offsetof(vdev_label_t, vl_pad2)), | ||||
| 		    VDEV_PAD_SIZE, zio_buf_alloc(VDEV_PAD_SIZE), | ||||
| 		    offsetof(vdev_label_t, vl_pad2)), VDEV_PAD_SIZE, | ||||
| 		    abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE), | ||||
| 		    ZIO_CHECKSUM_OFF, vdev_probe_done, vps, | ||||
| 		    ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE)); | ||||
| 	} | ||||
|  | ||||
| @ -23,7 +23,7 @@ | ||||
|  * Use is subject to license terms. | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright (c) 2013, 2015 by Delphix. All rights reserved. | ||||
|  * Copyright (c) 2013, 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include <sys/zfs_context.h> | ||||
| @ -31,6 +31,7 @@ | ||||
| #include <sys/vdev_impl.h> | ||||
| #include <sys/zio.h> | ||||
| #include <sys/kstat.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Virtual device read-ahead caching. | ||||
| @ -136,12 +137,12 @@ static void | ||||
| vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve) | ||||
| { | ||||
| 	ASSERT(MUTEX_HELD(&vc->vc_lock)); | ||||
| 	ASSERT(ve->ve_fill_io == NULL); | ||||
| 	ASSERT(ve->ve_data != NULL); | ||||
| 	ASSERT3P(ve->ve_fill_io, ==, NULL); | ||||
| 	ASSERT3P(ve->ve_abd, !=, NULL); | ||||
| 
 | ||||
| 	avl_remove(&vc->vc_lastused_tree, ve); | ||||
| 	avl_remove(&vc->vc_offset_tree, ve); | ||||
| 	zio_buf_free(ve->ve_data, VCBS); | ||||
| 	abd_free(ve->ve_abd); | ||||
| 	kmem_free(ve, sizeof (vdev_cache_entry_t)); | ||||
| } | ||||
| 
 | ||||
| @ -171,14 +172,14 @@ vdev_cache_allocate(zio_t *zio) | ||||
| 		ve = avl_first(&vc->vc_lastused_tree); | ||||
| 		if (ve->ve_fill_io != NULL) | ||||
| 			return (NULL); | ||||
| 		ASSERT(ve->ve_hits != 0); | ||||
| 		ASSERT3U(ve->ve_hits, !=, 0); | ||||
| 		vdev_cache_evict(vc, ve); | ||||
| 	} | ||||
| 
 | ||||
| 	ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP); | ||||
| 	ve->ve_offset = offset; | ||||
| 	ve->ve_lastused = ddi_get_lbolt(); | ||||
| 	ve->ve_data = zio_buf_alloc(VCBS); | ||||
| 	ve->ve_abd = abd_alloc_for_io(VCBS, B_TRUE); | ||||
| 
 | ||||
| 	avl_add(&vc->vc_offset_tree, ve); | ||||
| 	avl_add(&vc->vc_lastused_tree, ve); | ||||
| @ -192,7 +193,7 @@ vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio) | ||||
| 	uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS); | ||||
| 
 | ||||
| 	ASSERT(MUTEX_HELD(&vc->vc_lock)); | ||||
| 	ASSERT(ve->ve_fill_io == NULL); | ||||
| 	ASSERT3P(ve->ve_fill_io, ==, NULL); | ||||
| 
 | ||||
| 	if (ve->ve_lastused != ddi_get_lbolt()) { | ||||
| 		avl_remove(&vc->vc_lastused_tree, ve); | ||||
| @ -201,7 +202,7 @@ vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio) | ||||
| 	} | ||||
| 
 | ||||
| 	ve->ve_hits++; | ||||
| 	bcopy(ve->ve_data + cache_phase, zio->io_data, zio->io_size); | ||||
| 	abd_copy_off(zio->io_abd, ve->ve_abd, 0, cache_phase, zio->io_size); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -216,16 +217,16 @@ vdev_cache_fill(zio_t *fio) | ||||
| 	zio_t *pio; | ||||
| 	zio_link_t *zl; | ||||
| 
 | ||||
| 	ASSERT(fio->io_size == VCBS); | ||||
| 	ASSERT3U(fio->io_size, ==, VCBS); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Add data to the cache. | ||||
| 	 */ | ||||
| 	mutex_enter(&vc->vc_lock); | ||||
| 
 | ||||
| 	ASSERT(ve->ve_fill_io == fio); | ||||
| 	ASSERT(ve->ve_offset == fio->io_offset); | ||||
| 	ASSERT(ve->ve_data == fio->io_data); | ||||
| 	ASSERT3P(ve->ve_fill_io, ==, fio); | ||||
| 	ASSERT3U(ve->ve_offset, ==, fio->io_offset); | ||||
| 	ASSERT3P(ve->ve_abd, ==, fio->io_abd); | ||||
| 
 | ||||
| 	ve->ve_fill_io = NULL; | ||||
| 
 | ||||
| @ -256,7 +257,7 @@ vdev_cache_read(zio_t *zio) | ||||
| 	zio_t *fio; | ||||
| 	ASSERTV(uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS)); | ||||
| 
 | ||||
| 	ASSERT(zio->io_type == ZIO_TYPE_READ); | ||||
| 	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ); | ||||
| 
 | ||||
| 	if (zio->io_flags & ZIO_FLAG_DONT_CACHE) | ||||
| 		return (B_FALSE); | ||||
| @ -270,7 +271,7 @@ vdev_cache_read(zio_t *zio) | ||||
| 	if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS)) | ||||
| 		return (B_FALSE); | ||||
| 
 | ||||
| 	ASSERT(cache_phase + zio->io_size <= VCBS); | ||||
| 	ASSERT3U(cache_phase + zio->io_size, <=, VCBS); | ||||
| 
 | ||||
| 	mutex_enter(&vc->vc_lock); | ||||
| 
 | ||||
| @ -309,7 +310,7 @@ vdev_cache_read(zio_t *zio) | ||||
| 	} | ||||
| 
 | ||||
| 	fio = zio_vdev_delegated_io(zio->io_vd, cache_offset, | ||||
| 	    ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW, | ||||
| 	    ve->ve_abd, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW, | ||||
| 	    ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve); | ||||
| 
 | ||||
| 	ve->ve_fill_io = fio; | ||||
| @ -337,7 +338,7 @@ vdev_cache_write(zio_t *zio) | ||||
| 	uint64_t max_offset = P2ROUNDUP(io_end, VCBS); | ||||
| 	avl_index_t where; | ||||
| 
 | ||||
| 	ASSERT(zio->io_type == ZIO_TYPE_WRITE); | ||||
| 	ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE); | ||||
| 
 | ||||
| 	mutex_enter(&vc->vc_lock); | ||||
| 
 | ||||
| @ -354,8 +355,8 @@ vdev_cache_write(zio_t *zio) | ||||
| 		if (ve->ve_fill_io != NULL) { | ||||
| 			ve->ve_missed_update = 1; | ||||
| 		} else { | ||||
| 			bcopy((char *)zio->io_data + start - io_start, | ||||
| 			    ve->ve_data + start - ve->ve_offset, end - start); | ||||
| 			abd_copy_off(ve->ve_abd, zio->io_abd, start - io_start, | ||||
| 			    start - ve->ve_offset, end - start); | ||||
| 		} | ||||
| 		ve = AVL_NEXT(&vc->vc_offset_tree, ve); | ||||
| 	} | ||||
|  | ||||
| @ -30,6 +30,7 @@ | ||||
| #include <sys/spa.h> | ||||
| #include <sys/vdev_disk.h> | ||||
| #include <sys/vdev_impl.h> | ||||
| #include <sys/abd.h> | ||||
| #include <sys/fs/zfs.h> | ||||
| #include <sys/zio.h> | ||||
| #include <sys/sunldi.h> | ||||
| @ -42,6 +43,7 @@ static void *zfs_vdev_holder = VDEV_HOLDER; | ||||
|  */ | ||||
| typedef struct dio_request { | ||||
| 	zio_t			*dr_zio;	/* Parent ZIO */ | ||||
| 	void			*dr_loanbuf;	/* borrowed abd buffer */ | ||||
| 	atomic_t		dr_ref;		/* References */ | ||||
| 	int			dr_error;	/* Bio error */ | ||||
| 	int			dr_bio_count;	/* Count of bio's */ | ||||
| @ -402,6 +404,7 @@ vdev_disk_dio_put(dio_request_t *dr) | ||||
| 	 */ | ||||
| 	if (rc == 0) { | ||||
| 		zio_t *zio = dr->dr_zio; | ||||
| 		void *loanbuf = dr->dr_loanbuf; | ||||
| 		int error = dr->dr_error; | ||||
| 
 | ||||
| 		vdev_disk_dio_free(dr); | ||||
| @ -411,6 +414,15 @@ vdev_disk_dio_put(dio_request_t *dr) | ||||
| 			ASSERT3S(zio->io_error, >=, 0); | ||||
| 			if (zio->io_error) | ||||
| 				vdev_disk_error(zio); | ||||
| 			/* ABD placeholder */ | ||||
| 			if (loanbuf != NULL) { | ||||
| 				if (zio->io_type == ZIO_TYPE_READ) { | ||||
| 					abd_copy_from_buf(zio->io_abd, loanbuf, | ||||
| 					    zio->io_size); | ||||
| 				} | ||||
| 				zio_buf_free(loanbuf, zio->io_size); | ||||
| 			} | ||||
| 
 | ||||
| 			zio_delay_interrupt(zio); | ||||
| 		} | ||||
| 	} | ||||
| @ -547,7 +559,30 @@ retry: | ||||
| 	 * their volume block size to match the maximum request size and | ||||
| 	 * the common case will be one bio per vdev IO request. | ||||
| 	 */ | ||||
| 	bio_ptr    = kbuf_ptr; | ||||
| 	if (zio != NULL) { | ||||
| 		abd_t *abd = zio->io_abd; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * ABD placeholder | ||||
| 		 * We can't use abd_borrow_buf routines here since our | ||||
| 		 * completion context is interrupt and abd refcounts | ||||
| 		 * take a mutex (in debug mode). | ||||
| 		 */ | ||||
| 		if (abd_is_linear(abd)) { | ||||
| 			bio_ptr = abd_to_buf(abd); | ||||
| 			dr->dr_loanbuf = NULL; | ||||
| 		} else { | ||||
| 			bio_ptr = zio_buf_alloc(zio->io_size); | ||||
| 			dr->dr_loanbuf = bio_ptr; | ||||
| 			if (zio->io_type != ZIO_TYPE_READ) | ||||
| 				abd_copy_to_buf(bio_ptr, abd, zio->io_size); | ||||
| 
 | ||||
| 		} | ||||
| 	} else { | ||||
| 		bio_ptr = kbuf_ptr; | ||||
| 		dr->dr_loanbuf = NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	bio_offset = kbuf_offset; | ||||
| 	bio_size   = kbuf_size; | ||||
| 	for (i = 0; i <= dr->dr_bio_count; i++) { | ||||
| @ -562,6 +597,8 @@ retry: | ||||
| 		 * are needed we allocate a larger dio and warn the user. | ||||
| 		 */ | ||||
| 		if (dr->dr_bio_count == i) { | ||||
| 			if (dr->dr_loanbuf) | ||||
| 				zio_buf_free(dr->dr_loanbuf, zio->io_size); | ||||
| 			vdev_disk_dio_free(dr); | ||||
| 			bio_count *= 2; | ||||
| 			goto retry; | ||||
| @ -571,6 +608,8 @@ retry: | ||||
| 		dr->dr_bio[i] = bio_alloc(GFP_NOIO, | ||||
| 		    MIN(bio_nr_pages(bio_ptr, bio_size), BIO_MAX_PAGES)); | ||||
| 		if (unlikely(dr->dr_bio[i] == NULL)) { | ||||
| 			if (dr->dr_loanbuf) | ||||
| 				zio_buf_free(dr->dr_loanbuf, zio->io_size); | ||||
| 			vdev_disk_dio_free(dr); | ||||
| 			return (ENOMEM); | ||||
| 		} | ||||
| @ -730,7 +769,7 @@ vdev_disk_io_start(zio_t *zio) | ||||
| 	} | ||||
| 
 | ||||
| 	zio->io_target_timestamp = zio_handle_io_delay(zio); | ||||
| 	error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data, | ||||
| 	error = __vdev_disk_physio(vd->vd_bdev, zio, NULL, | ||||
| 	    zio->io_size, zio->io_offset, rw, flags); | ||||
| 	if (error) { | ||||
| 		zio->io_error = error; | ||||
|  | ||||
| @ -31,6 +31,7 @@ | ||||
| #include <sys/zio.h> | ||||
| #include <sys/fs/zfs.h> | ||||
| #include <sys/fm/fs/zfs.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Virtual device vector for files. | ||||
| @ -150,11 +151,21 @@ vdev_file_io_strategy(void *arg) | ||||
| 	vdev_t *vd = zio->io_vd; | ||||
| 	vdev_file_t *vf = vd->vdev_tsd; | ||||
| 	ssize_t resid; | ||||
| 	void *buf; | ||||
| 
 | ||||
| 	if (zio->io_type == ZIO_TYPE_READ) | ||||
| 		buf = abd_borrow_buf(zio->io_abd, zio->io_size); | ||||
| 	else | ||||
| 		buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size); | ||||
| 
 | ||||
| 	zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? | ||||
| 	    UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data, | ||||
| 	    zio->io_size, zio->io_offset, UIO_SYSSPACE, | ||||
| 	    0, RLIM64_INFINITY, kcred, &resid); | ||||
| 	    UIO_READ : UIO_WRITE, vf->vf_vnode, buf, zio->io_size, | ||||
| 	    zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); | ||||
| 
 | ||||
| 	if (zio->io_type == ZIO_TYPE_READ) | ||||
| 		abd_return_buf_copy(zio->io_abd, buf, zio->io_size); | ||||
| 	else | ||||
| 		abd_return_buf(zio->io_abd, buf, zio->io_size); | ||||
| 
 | ||||
| 	if (resid != 0 && zio->io_error == 0) | ||||
| 		zio->io_error = SET_ERROR(ENOSPC); | ||||
|  | ||||
| @ -145,6 +145,7 @@ | ||||
| #include <sys/metaslab.h> | ||||
| #include <sys/zio.h> | ||||
| #include <sys/dsl_scan.h> | ||||
| #include <sys/abd.h> | ||||
| #include <sys/fs/zfs.h> | ||||
| 
 | ||||
| /*
 | ||||
| @ -178,7 +179,7 @@ vdev_label_number(uint64_t psize, uint64_t offset) | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, | ||||
| vdev_label_read(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, | ||||
| 	uint64_t size, zio_done_func_t *done, void *private, int flags) | ||||
| { | ||||
| 	ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) == | ||||
| @ -192,7 +193,7 @@ vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset, | ||||
| vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset, | ||||
| 	uint64_t size, zio_done_func_t *done, void *private, int flags) | ||||
| { | ||||
| 	ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL || | ||||
| @ -587,6 +588,7 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg) | ||||
| 	spa_t *spa = vd->vdev_spa; | ||||
| 	nvlist_t *config = NULL; | ||||
| 	vdev_phys_t *vp; | ||||
| 	abd_t *vp_abd; | ||||
| 	zio_t *zio; | ||||
| 	uint64_t best_txg = 0; | ||||
| 	int error = 0; | ||||
| @ -599,7 +601,8 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg) | ||||
| 	if (!vdev_readable(vd)) | ||||
| 		return (NULL); | ||||
| 
 | ||||
| 	vp = zio_buf_alloc(sizeof (vdev_phys_t)); | ||||
| 	vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE); | ||||
| 	vp = abd_to_buf(vp_abd); | ||||
| 
 | ||||
| retry: | ||||
| 	for (l = 0; l < VDEV_LABELS; l++) { | ||||
| @ -607,7 +610,7 @@ retry: | ||||
| 
 | ||||
| 		zio = zio_root(spa, NULL, NULL, flags); | ||||
| 
 | ||||
| 		vdev_label_read(zio, vd, l, vp, | ||||
| 		vdev_label_read(zio, vd, l, vp_abd, | ||||
| 		    offsetof(vdev_label_t, vl_vdev_phys), | ||||
| 		    sizeof (vdev_phys_t), NULL, NULL, flags); | ||||
| 
 | ||||
| @ -646,7 +649,7 @@ retry: | ||||
| 		goto retry; | ||||
| 	} | ||||
| 
 | ||||
| 	zio_buf_free(vp, sizeof (vdev_phys_t)); | ||||
| 	abd_free(vp_abd); | ||||
| 
 | ||||
| 	return (config); | ||||
| } | ||||
| @ -782,8 +785,10 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) | ||||
| 	spa_t *spa = vd->vdev_spa; | ||||
| 	nvlist_t *label; | ||||
| 	vdev_phys_t *vp; | ||||
| 	char *pad2; | ||||
| 	abd_t *vp_abd; | ||||
| 	abd_t *pad2; | ||||
| 	uberblock_t *ub; | ||||
| 	abd_t *ub_abd; | ||||
| 	zio_t *zio; | ||||
| 	char *buf; | ||||
| 	size_t buflen; | ||||
| @ -867,8 +872,9 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) | ||||
| 	/*
 | ||||
| 	 * Initialize its label. | ||||
| 	 */ | ||||
| 	vp = zio_buf_alloc(sizeof (vdev_phys_t)); | ||||
| 	bzero(vp, sizeof (vdev_phys_t)); | ||||
| 	vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE); | ||||
| 	abd_zero(vp_abd, sizeof (vdev_phys_t)); | ||||
| 	vp = abd_to_buf(vp_abd); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Generate a label describing the pool and our top-level vdev. | ||||
| @ -928,7 +934,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) | ||||
| 	error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP); | ||||
| 	if (error != 0) { | ||||
| 		nvlist_free(label); | ||||
| 		zio_buf_free(vp, sizeof (vdev_phys_t)); | ||||
| 		abd_free(vp_abd); | ||||
| 		/* EFAULT means nvlist_pack ran out of room */ | ||||
| 		return (error == EFAULT ? ENAMETOOLONG : EINVAL); | ||||
| 	} | ||||
| @ -936,14 +942,15 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) | ||||
| 	/*
 | ||||
| 	 * Initialize uberblock template. | ||||
| 	 */ | ||||
| 	ub = zio_buf_alloc(VDEV_UBERBLOCK_RING); | ||||
| 	bzero(ub, VDEV_UBERBLOCK_RING); | ||||
| 	*ub = spa->spa_uberblock; | ||||
| 	ub_abd = abd_alloc_linear(VDEV_UBERBLOCK_RING, B_TRUE); | ||||
| 	abd_zero(ub_abd, VDEV_UBERBLOCK_RING); | ||||
| 	abd_copy_from_buf(ub_abd, &spa->spa_uberblock, sizeof (uberblock_t)); | ||||
| 	ub = abd_to_buf(ub_abd); | ||||
| 	ub->ub_txg = 0; | ||||
| 
 | ||||
| 	/* Initialize the 2nd padding area. */ | ||||
| 	pad2 = zio_buf_alloc(VDEV_PAD_SIZE); | ||||
| 	bzero(pad2, VDEV_PAD_SIZE); | ||||
| 	pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE); | ||||
| 	abd_zero(pad2, VDEV_PAD_SIZE); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Write everything in parallel. | ||||
| @ -953,7 +960,7 @@ retry: | ||||
| 
 | ||||
| 	for (l = 0; l < VDEV_LABELS; l++) { | ||||
| 
 | ||||
| 		vdev_label_write(zio, vd, l, vp, | ||||
| 		vdev_label_write(zio, vd, l, vp_abd, | ||||
| 		    offsetof(vdev_label_t, vl_vdev_phys), | ||||
| 		    sizeof (vdev_phys_t), NULL, NULL, flags); | ||||
| 
 | ||||
| @ -966,7 +973,7 @@ retry: | ||||
| 		    offsetof(vdev_label_t, vl_pad2), | ||||
| 		    VDEV_PAD_SIZE, NULL, NULL, flags); | ||||
| 
 | ||||
| 		vdev_label_write(zio, vd, l, ub, | ||||
| 		vdev_label_write(zio, vd, l, ub_abd, | ||||
| 		    offsetof(vdev_label_t, vl_uberblock), | ||||
| 		    VDEV_UBERBLOCK_RING, NULL, NULL, flags); | ||||
| 	} | ||||
| @ -979,9 +986,9 @@ retry: | ||||
| 	} | ||||
| 
 | ||||
| 	nvlist_free(label); | ||||
| 	zio_buf_free(pad2, VDEV_PAD_SIZE); | ||||
| 	zio_buf_free(ub, VDEV_UBERBLOCK_RING); | ||||
| 	zio_buf_free(vp, sizeof (vdev_phys_t)); | ||||
| 	abd_free(pad2); | ||||
| 	abd_free(ub_abd); | ||||
| 	abd_free(vp_abd); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If this vdev hasn't been previously identified as a spare, then we | ||||
| @ -1039,7 +1046,7 @@ vdev_uberblock_load_done(zio_t *zio) | ||||
| 	vdev_t *vd = zio->io_vd; | ||||
| 	spa_t *spa = zio->io_spa; | ||||
| 	zio_t *rio = zio->io_private; | ||||
| 	uberblock_t *ub = zio->io_data; | ||||
| 	uberblock_t *ub = abd_to_buf(zio->io_abd); | ||||
| 	struct ubl_cbdata *cbp = rio->io_private; | ||||
| 
 | ||||
| 	ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd)); | ||||
| @ -1060,7 +1067,7 @@ vdev_uberblock_load_done(zio_t *zio) | ||||
| 		mutex_exit(&rio->io_lock); | ||||
| 	} | ||||
| 
 | ||||
| 	zio_buf_free(zio->io_data, zio->io_size); | ||||
| 	abd_free(zio->io_abd); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| @ -1076,8 +1083,8 @@ vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags, | ||||
| 		for (l = 0; l < VDEV_LABELS; l++) { | ||||
| 			for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { | ||||
| 				vdev_label_read(zio, vd, l, | ||||
| 				    zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)), | ||||
| 				    VDEV_UBERBLOCK_OFFSET(vd, n), | ||||
| 				    abd_alloc_linear(VDEV_UBERBLOCK_SIZE(vd), | ||||
| 				    B_TRUE), VDEV_UBERBLOCK_OFFSET(vd, n), | ||||
| 				    VDEV_UBERBLOCK_SIZE(vd), | ||||
| 				    vdev_uberblock_load_done, zio, flags); | ||||
| 			} | ||||
| @ -1144,7 +1151,7 @@ vdev_uberblock_sync_done(zio_t *zio) | ||||
| static void | ||||
| vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) | ||||
| { | ||||
| 	uberblock_t *ubbuf; | ||||
| 	abd_t *ub_abd; | ||||
| 	int c, l, n; | ||||
| 
 | ||||
| 	for (c = 0; c < vd->vdev_children; c++) | ||||
| @ -1158,17 +1165,18 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) | ||||
| 
 | ||||
| 	n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1); | ||||
| 
 | ||||
| 	ubbuf = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)); | ||||
| 	bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); | ||||
| 	*ubbuf = *ub; | ||||
| 	/* Copy the uberblock_t into the ABD */ | ||||
| 	ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE); | ||||
| 	abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd)); | ||||
| 	abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t)); | ||||
| 
 | ||||
| 	for (l = 0; l < VDEV_LABELS; l++) | ||||
| 		vdev_label_write(zio, vd, l, ubbuf, | ||||
| 		vdev_label_write(zio, vd, l, ub_abd, | ||||
| 		    VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), | ||||
| 		    vdev_uberblock_sync_done, zio->io_private, | ||||
| 		    flags | ZIO_FLAG_DONT_PROPAGATE); | ||||
| 
 | ||||
| 	zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd)); | ||||
| 	abd_free(ub_abd); | ||||
| } | ||||
| 
 | ||||
| /* Sync the uberblocks to all vdevs in svd[] */ | ||||
| @ -1245,6 +1253,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) | ||||
| { | ||||
| 	nvlist_t *label; | ||||
| 	vdev_phys_t *vp; | ||||
| 	abd_t *vp_abd; | ||||
| 	char *buf; | ||||
| 	size_t buflen; | ||||
| 	int c; | ||||
| @ -1263,15 +1272,16 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) | ||||
| 	 */ | ||||
| 	label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE); | ||||
| 
 | ||||
| 	vp = zio_buf_alloc(sizeof (vdev_phys_t)); | ||||
| 	bzero(vp, sizeof (vdev_phys_t)); | ||||
| 	vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE); | ||||
| 	abd_zero(vp_abd, sizeof (vdev_phys_t)); | ||||
| 	vp = abd_to_buf(vp_abd); | ||||
| 
 | ||||
| 	buf = vp->vp_nvlist; | ||||
| 	buflen = sizeof (vp->vp_nvlist); | ||||
| 
 | ||||
| 	if (!nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP)) { | ||||
| 		for (; l < VDEV_LABELS; l += 2) { | ||||
| 			vdev_label_write(zio, vd, l, vp, | ||||
| 			vdev_label_write(zio, vd, l, vp_abd, | ||||
| 			    offsetof(vdev_label_t, vl_vdev_phys), | ||||
| 			    sizeof (vdev_phys_t), | ||||
| 			    vdev_label_sync_done, zio->io_private, | ||||
| @ -1279,7 +1289,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	zio_buf_free(vp, sizeof (vdev_phys_t)); | ||||
| 	abd_free(vp_abd); | ||||
| 	nvlist_free(label); | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -31,6 +31,7 @@ | ||||
| #include <sys/spa.h> | ||||
| #include <sys/vdev_impl.h> | ||||
| #include <sys/zio.h> | ||||
| #include <sys/abd.h> | ||||
| #include <sys/fs/zfs.h> | ||||
| 
 | ||||
| /*
 | ||||
| @ -272,13 +273,13 @@ vdev_mirror_scrub_done(zio_t *zio) | ||||
| 		while ((pio = zio_walk_parents(zio, &zl)) != NULL) { | ||||
| 			mutex_enter(&pio->io_lock); | ||||
| 			ASSERT3U(zio->io_size, >=, pio->io_size); | ||||
| 			bcopy(zio->io_data, pio->io_data, pio->io_size); | ||||
| 			abd_copy(pio->io_abd, zio->io_abd, pio->io_size); | ||||
| 			mutex_exit(&pio->io_lock); | ||||
| 		} | ||||
| 		mutex_exit(&zio->io_lock); | ||||
| 	} | ||||
| 
 | ||||
| 	zio_buf_free(zio->io_data, zio->io_size); | ||||
| 	abd_free(zio->io_abd); | ||||
| 
 | ||||
| 	mc->mc_error = zio->io_error; | ||||
| 	mc->mc_tried = 1; | ||||
| @ -433,7 +434,8 @@ vdev_mirror_io_start(zio_t *zio) | ||||
| 				mc = &mm->mm_child[c]; | ||||
| 				zio_nowait(zio_vdev_child_io(zio, zio->io_bp, | ||||
| 				    mc->mc_vd, mc->mc_offset, | ||||
| 				    zio_buf_alloc(zio->io_size), zio->io_size, | ||||
| 				    abd_alloc_sametype(zio->io_abd, | ||||
| 				    zio->io_size), zio->io_size, | ||||
| 				    zio->io_type, zio->io_priority, 0, | ||||
| 				    vdev_mirror_scrub_done, mc)); | ||||
| 			} | ||||
| @ -458,7 +460,7 @@ vdev_mirror_io_start(zio_t *zio) | ||||
| 	while (children--) { | ||||
| 		mc = &mm->mm_child[c]; | ||||
| 		zio_nowait(zio_vdev_child_io(zio, zio->io_bp, | ||||
| 		    mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, | ||||
| 		    mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size, | ||||
| 		    zio->io_type, zio->io_priority, 0, | ||||
| 		    vdev_mirror_child_done, mc)); | ||||
| 		c++; | ||||
| @ -543,7 +545,7 @@ vdev_mirror_io_done(zio_t *zio) | ||||
| 		mc = &mm->mm_child[c]; | ||||
| 		zio_vdev_io_redone(zio); | ||||
| 		zio_nowait(zio_vdev_child_io(zio, zio->io_bp, | ||||
| 		    mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size, | ||||
| 		    mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size, | ||||
| 		    ZIO_TYPE_READ, zio->io_priority, 0, | ||||
| 		    vdev_mirror_child_done, mc)); | ||||
| 		return; | ||||
| @ -584,7 +586,7 @@ vdev_mirror_io_done(zio_t *zio) | ||||
| 
 | ||||
| 			zio_nowait(zio_vdev_child_io(zio, zio->io_bp, | ||||
| 			    mc->mc_vd, mc->mc_offset, | ||||
| 			    zio->io_data, zio->io_size, | ||||
| 			    zio->io_abd, zio->io_size, | ||||
| 			    ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE, | ||||
| 			    ZIO_FLAG_IO_REPAIR | (unexpected_errors ? | ||||
| 			    ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); | ||||
|  | ||||
| @ -37,6 +37,7 @@ | ||||
| #include <sys/spa.h> | ||||
| #include <sys/spa_impl.h> | ||||
| #include <sys/kstat.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * ZFS I/O Scheduler | ||||
| @ -496,12 +497,12 @@ vdev_queue_agg_io_done(zio_t *aio) | ||||
| 		zio_t *pio; | ||||
| 		zio_link_t *zl = NULL; | ||||
| 		while ((pio = zio_walk_parents(aio, &zl)) != NULL) { | ||||
| 			bcopy((char *)aio->io_data + (pio->io_offset - | ||||
| 			    aio->io_offset), pio->io_data, pio->io_size); | ||||
| 			abd_copy_off(pio->io_abd, aio->io_abd, | ||||
| 			    0, pio->io_offset - aio->io_offset, pio->io_size); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	zio_buf_free(aio->io_data, aio->io_size); | ||||
| 	abd_free(aio->io_abd); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -523,7 +524,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) | ||||
| 	boolean_t stretch = B_FALSE; | ||||
| 	avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type); | ||||
| 	enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT; | ||||
| 	void *buf; | ||||
| 	abd_t *abd; | ||||
| 
 | ||||
| 	limit = MAX(MIN(zfs_vdev_aggregation_limit, | ||||
| 	    spa_maxblocksize(vq->vq_vdev->vdev_spa)), 0); | ||||
| @ -626,12 +627,12 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) | ||||
| 	size = IO_SPAN(first, last); | ||||
| 	ASSERT3U(size, <=, limit); | ||||
| 
 | ||||
| 	buf = zio_buf_alloc_flags(size, KM_NOSLEEP); | ||||
| 	if (buf == NULL) | ||||
| 	abd = abd_alloc_for_io(size, B_TRUE); | ||||
| 	if (abd == NULL) | ||||
| 		return (NULL); | ||||
| 
 | ||||
| 	aio = zio_vdev_delegated_io(first->io_vd, first->io_offset, | ||||
| 	    buf, size, first->io_type, zio->io_priority, | ||||
| 	    abd, size, first->io_type, zio->io_priority, | ||||
| 	    flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, | ||||
| 	    vdev_queue_agg_io_done, NULL); | ||||
| 	aio->io_timestamp = first->io_timestamp; | ||||
| @ -644,12 +645,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) | ||||
| 
 | ||||
| 		if (dio->io_flags & ZIO_FLAG_NODATA) { | ||||
| 			ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE); | ||||
| 			bzero((char *)aio->io_data + (dio->io_offset - | ||||
| 			    aio->io_offset), dio->io_size); | ||||
| 			abd_zero_off(aio->io_abd, | ||||
| 			    dio->io_offset - aio->io_offset, dio->io_size); | ||||
| 		} else if (dio->io_type == ZIO_TYPE_WRITE) { | ||||
| 			bcopy(dio->io_data, (char *)aio->io_data + | ||||
| 			    (dio->io_offset - aio->io_offset), | ||||
| 			    dio->io_size); | ||||
| 			abd_copy_off(aio->io_abd, dio->io_abd, | ||||
| 			    dio->io_offset - aio->io_offset, 0, dio->io_size); | ||||
| 		} | ||||
| 
 | ||||
| 		zio_add_child(dio, aio); | ||||
|  | ||||
| @ -30,6 +30,7 @@ | ||||
| #include <sys/vdev_impl.h> | ||||
| #include <sys/zio.h> | ||||
| #include <sys/zio_checksum.h> | ||||
| #include <sys/abd.h> | ||||
| #include <sys/fs/zfs.h> | ||||
| #include <sys/fm/fs/zfs.h> | ||||
| #include <sys/vdev_raidz.h> | ||||
| @ -136,7 +137,7 @@ vdev_raidz_map_free(raidz_map_t *rm) | ||||
| 	size_t size; | ||||
| 
 | ||||
| 	for (c = 0; c < rm->rm_firstdatacol; c++) { | ||||
| 		zio_buf_free(rm->rm_col[c].rc_data, rm->rm_col[c].rc_size); | ||||
| 		abd_free(rm->rm_col[c].rc_abd); | ||||
| 
 | ||||
| 		if (rm->rm_col[c].rc_gdata != NULL) | ||||
| 			zio_buf_free(rm->rm_col[c].rc_gdata, | ||||
| @ -144,11 +145,13 @@ vdev_raidz_map_free(raidz_map_t *rm) | ||||
| 	} | ||||
| 
 | ||||
| 	size = 0; | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 		abd_put(rm->rm_col[c].rc_abd); | ||||
| 		size += rm->rm_col[c].rc_size; | ||||
| 	} | ||||
| 
 | ||||
| 	if (rm->rm_datacopy != NULL) | ||||
| 		zio_buf_free(rm->rm_datacopy, size); | ||||
| 	if (rm->rm_abd_copy != NULL) | ||||
| 		abd_free(rm->rm_abd_copy); | ||||
| 
 | ||||
| 	kmem_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_scols])); | ||||
| } | ||||
| @ -185,7 +188,7 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) | ||||
| 	size_t x; | ||||
| 
 | ||||
| 	const char *good = NULL; | ||||
| 	const char *bad = rm->rm_col[c].rc_data; | ||||
| 	char *bad; | ||||
| 
 | ||||
| 	if (good_data == NULL) { | ||||
| 		zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE); | ||||
| @ -199,8 +202,9 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) | ||||
| 		 * data never changes for a given logical ZIO) | ||||
| 		 */ | ||||
| 		if (rm->rm_col[0].rc_gdata == NULL) { | ||||
| 			char *bad_parity[VDEV_RAIDZ_MAXPARITY]; | ||||
| 			abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY]; | ||||
| 			char *buf; | ||||
| 			int offset; | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Set up the rm_col[]s to generate the parity for | ||||
| @ -208,15 +212,20 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) | ||||
| 			 * replacing them with buffers to hold the result. | ||||
| 			 */ | ||||
| 			for (x = 0; x < rm->rm_firstdatacol; x++) { | ||||
| 				bad_parity[x] = rm->rm_col[x].rc_data; | ||||
| 				rm->rm_col[x].rc_data = rm->rm_col[x].rc_gdata = | ||||
| 				bad_parity[x] = rm->rm_col[x].rc_abd; | ||||
| 				rm->rm_col[x].rc_gdata = | ||||
| 				    zio_buf_alloc(rm->rm_col[x].rc_size); | ||||
| 				rm->rm_col[x].rc_abd = | ||||
| 				    abd_get_from_buf(rm->rm_col[x].rc_gdata, | ||||
| 				    rm->rm_col[x].rc_size); | ||||
| 			} | ||||
| 
 | ||||
| 			/* fill in the data columns from good_data */ | ||||
| 			buf = (char *)good_data; | ||||
| 			for (; x < rm->rm_cols; x++) { | ||||
| 				rm->rm_col[x].rc_data = buf; | ||||
| 				abd_put(rm->rm_col[x].rc_abd); | ||||
| 				rm->rm_col[x].rc_abd = abd_get_from_buf(buf, | ||||
| 				    rm->rm_col[x].rc_size); | ||||
| 				buf += rm->rm_col[x].rc_size; | ||||
| 			} | ||||
| 
 | ||||
| @ -226,13 +235,17 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) | ||||
| 			vdev_raidz_generate_parity(rm); | ||||
| 
 | ||||
| 			/* restore everything back to its original state */ | ||||
| 			for (x = 0; x < rm->rm_firstdatacol; x++) | ||||
| 				rm->rm_col[x].rc_data = bad_parity[x]; | ||||
| 			for (x = 0; x < rm->rm_firstdatacol; x++) { | ||||
| 				abd_put(rm->rm_col[x].rc_abd); | ||||
| 				rm->rm_col[x].rc_abd = bad_parity[x]; | ||||
| 			} | ||||
| 
 | ||||
| 			buf = rm->rm_datacopy; | ||||
| 			offset = 0; | ||||
| 			for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) { | ||||
| 				rm->rm_col[x].rc_data = buf; | ||||
| 				buf += rm->rm_col[x].rc_size; | ||||
| 				abd_put(rm->rm_col[x].rc_abd); | ||||
| 				rm->rm_col[x].rc_abd = abd_get_offset( | ||||
| 				    rm->rm_abd_copy, offset); | ||||
| 				offset += rm->rm_col[x].rc_size; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| @ -246,8 +259,10 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data) | ||||
| 			good += rm->rm_col[x].rc_size; | ||||
| 	} | ||||
| 
 | ||||
| 	bad = abd_borrow_buf_copy(rm->rm_col[c].rc_abd, rm->rm_col[c].rc_size); | ||||
| 	/* we drop the ereport if it ends up that the data was good */ | ||||
| 	zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE); | ||||
| 	abd_return_buf(rm->rm_col[c].rc_abd, bad, rm->rm_col[c].rc_size); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -260,7 +275,7 @@ static void | ||||
| vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) | ||||
| { | ||||
| 	size_t c = (size_t)(uintptr_t)arg; | ||||
| 	caddr_t buf; | ||||
| 	size_t offset; | ||||
| 
 | ||||
| 	raidz_map_t *rm = zio->io_vsd; | ||||
| 	size_t size; | ||||
| @ -274,7 +289,7 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) | ||||
| 	rm->rm_reports++; | ||||
| 	ASSERT3U(rm->rm_reports, >, 0); | ||||
| 
 | ||||
| 	if (rm->rm_datacopy != NULL) | ||||
| 	if (rm->rm_abd_copy != NULL) | ||||
| 		return; | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -290,17 +305,20 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg) | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) | ||||
| 		size += rm->rm_col[c].rc_size; | ||||
| 
 | ||||
| 	buf = rm->rm_datacopy = zio_buf_alloc(size); | ||||
| 	rm->rm_abd_copy = | ||||
| 	    abd_alloc_sametype(rm->rm_col[rm->rm_firstdatacol].rc_abd, size); | ||||
| 
 | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 	for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 		raidz_col_t *col = &rm->rm_col[c]; | ||||
| 		abd_t *tmp = abd_get_offset(rm->rm_abd_copy, offset); | ||||
| 
 | ||||
| 		bcopy(col->rc_data, buf, col->rc_size); | ||||
| 		col->rc_data = buf; | ||||
| 		abd_copy(tmp, col->rc_abd, col->rc_size); | ||||
| 		abd_put(col->rc_abd); | ||||
| 		col->rc_abd = tmp; | ||||
| 
 | ||||
| 		buf += col->rc_size; | ||||
| 		offset += col->rc_size; | ||||
| 	} | ||||
| 	ASSERT3P(buf - (caddr_t)rm->rm_datacopy, ==, size); | ||||
| 	ASSERT3U(offset, ==, size); | ||||
| } | ||||
| 
 | ||||
| static const zio_vsd_ops_t vdev_raidz_vsd_ops = { | ||||
| @ -329,6 +347,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, | ||||
| 	/* The starting byte offset on each child vdev. */ | ||||
| 	uint64_t o = (b / dcols) << unit_shift; | ||||
| 	uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot; | ||||
| 	uint64_t off = 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * "Quotient": The number of data sectors for this stripe on all but | ||||
| @ -373,7 +392,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, | ||||
| 	rm->rm_missingdata = 0; | ||||
| 	rm->rm_missingparity = 0; | ||||
| 	rm->rm_firstdatacol = nparity; | ||||
| 	rm->rm_datacopy = NULL; | ||||
| 	rm->rm_abd_copy = NULL; | ||||
| 	rm->rm_reports = 0; | ||||
| 	rm->rm_freed = 0; | ||||
| 	rm->rm_ecksuminjected = 0; | ||||
| @ -389,7 +408,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, | ||||
| 		} | ||||
| 		rm->rm_col[c].rc_devidx = col; | ||||
| 		rm->rm_col[c].rc_offset = coff; | ||||
| 		rm->rm_col[c].rc_data = NULL; | ||||
| 		rm->rm_col[c].rc_abd = NULL; | ||||
| 		rm->rm_col[c].rc_gdata = NULL; | ||||
| 		rm->rm_col[c].rc_error = 0; | ||||
| 		rm->rm_col[c].rc_tried = 0; | ||||
| @ -412,13 +431,16 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, | ||||
| 	ASSERT3U(rm->rm_nskip, <=, nparity); | ||||
| 
 | ||||
| 	for (c = 0; c < rm->rm_firstdatacol; c++) | ||||
| 		rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size); | ||||
| 		rm->rm_col[c].rc_abd = | ||||
| 		    abd_alloc_linear(rm->rm_col[c].rc_size, B_TRUE); | ||||
| 
 | ||||
| 	rm->rm_col[c].rc_data = zio->io_data; | ||||
| 	rm->rm_col[c].rc_abd = abd_get_offset(zio->io_abd, 0); | ||||
| 	off = rm->rm_col[c].rc_size; | ||||
| 
 | ||||
| 	for (c = c + 1; c < acols; c++) | ||||
| 		rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data + | ||||
| 		    rm->rm_col[c - 1].rc_size; | ||||
| 	for (c = c + 1; c < acols; c++) { | ||||
| 		rm->rm_col[c].rc_abd = abd_get_offset(zio->io_abd, off); | ||||
| 		off += rm->rm_col[c].rc_size; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If all data stored spans all columns, there's a danger that parity | ||||
| @ -464,29 +486,84 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, | ||||
| 	return (rm); | ||||
| } | ||||
| 
 | ||||
| struct pqr_struct { | ||||
| 	uint64_t *p; | ||||
| 	uint64_t *q; | ||||
| 	uint64_t *r; | ||||
| }; | ||||
| 
 | ||||
| static int | ||||
| vdev_raidz_p_func(void *buf, size_t size, void *private) | ||||
| { | ||||
| 	struct pqr_struct *pqr = private; | ||||
| 	const uint64_t *src = buf; | ||||
| 	int i, cnt = size / sizeof (src[0]); | ||||
| 
 | ||||
| 	ASSERT(pqr->p && !pqr->q && !pqr->r); | ||||
| 
 | ||||
| 	for (i = 0; i < cnt; i++, src++, pqr->p++) | ||||
| 		*pqr->p ^= *src; | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| vdev_raidz_pq_func(void *buf, size_t size, void *private) | ||||
| { | ||||
| 	struct pqr_struct *pqr = private; | ||||
| 	const uint64_t *src = buf; | ||||
| 	uint64_t mask; | ||||
| 	int i, cnt = size / sizeof (src[0]); | ||||
| 
 | ||||
| 	ASSERT(pqr->p && pqr->q && !pqr->r); | ||||
| 
 | ||||
| 	for (i = 0; i < cnt; i++, src++, pqr->p++, pqr->q++) { | ||||
| 		*pqr->p ^= *src; | ||||
| 		VDEV_RAIDZ_64MUL_2(*pqr->q, mask); | ||||
| 		*pqr->q ^= *src; | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| vdev_raidz_pqr_func(void *buf, size_t size, void *private) | ||||
| { | ||||
| 	struct pqr_struct *pqr = private; | ||||
| 	const uint64_t *src = buf; | ||||
| 	uint64_t mask; | ||||
| 	int i, cnt = size / sizeof (src[0]); | ||||
| 
 | ||||
| 	ASSERT(pqr->p && pqr->q && pqr->r); | ||||
| 
 | ||||
| 	for (i = 0; i < cnt; i++, src++, pqr->p++, pqr->q++, pqr->r++) { | ||||
| 		*pqr->p ^= *src; | ||||
| 		VDEV_RAIDZ_64MUL_2(*pqr->q, mask); | ||||
| 		*pqr->q ^= *src; | ||||
| 		VDEV_RAIDZ_64MUL_4(*pqr->r, mask); | ||||
| 		*pqr->r ^= *src; | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| vdev_raidz_generate_parity_p(raidz_map_t *rm) | ||||
| { | ||||
| 	uint64_t *p, *src, pcount, ccount, i; | ||||
| 	uint64_t *p; | ||||
| 	int c; | ||||
| 
 | ||||
| 	pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); | ||||
| 	abd_t *src; | ||||
| 
 | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 		src = rm->rm_col[c].rc_data; | ||||
| 		p = rm->rm_col[VDEV_RAIDZ_P].rc_data; | ||||
| 		ccount = rm->rm_col[c].rc_size / sizeof (src[0]); | ||||
| 		src = rm->rm_col[c].rc_abd; | ||||
| 		p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); | ||||
| 
 | ||||
| 		if (c == rm->rm_firstdatacol) { | ||||
| 			ASSERT(ccount == pcount); | ||||
| 			for (i = 0; i < ccount; i++, src++, p++) { | ||||
| 				*p = *src; | ||||
| 			} | ||||
| 			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); | ||||
| 		} else { | ||||
| 			ASSERT(ccount <= pcount); | ||||
| 			for (i = 0; i < ccount; i++, src++, p++) { | ||||
| 				*p ^= *src; | ||||
| 			} | ||||
| 			struct pqr_struct pqr = { p, NULL, NULL }; | ||||
| 			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, | ||||
| 			    vdev_raidz_p_func, &pqr); | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| @ -494,50 +571,43 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm) | ||||
| static void | ||||
| vdev_raidz_generate_parity_pq(raidz_map_t *rm) | ||||
| { | ||||
| 	uint64_t *p, *q, *src, pcnt, ccnt, mask, i; | ||||
| 	uint64_t *p, *q, pcnt, ccnt, mask, i; | ||||
| 	int c; | ||||
| 	abd_t *src; | ||||
| 
 | ||||
| 	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); | ||||
| 	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]); | ||||
| 	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == | ||||
| 	    rm->rm_col[VDEV_RAIDZ_Q].rc_size); | ||||
| 
 | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 		src = rm->rm_col[c].rc_data; | ||||
| 		p = rm->rm_col[VDEV_RAIDZ_P].rc_data; | ||||
| 		q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; | ||||
| 		src = rm->rm_col[c].rc_abd; | ||||
| 		p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); | ||||
| 		q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); | ||||
| 
 | ||||
| 		ccnt = rm->rm_col[c].rc_size / sizeof (src[0]); | ||||
| 		ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); | ||||
| 
 | ||||
| 		if (c == rm->rm_firstdatacol) { | ||||
| 			ASSERT(ccnt == pcnt || ccnt == 0); | ||||
| 			for (i = 0; i < ccnt; i++, src++, p++, q++) { | ||||
| 				*p = *src; | ||||
| 				*q = *src; | ||||
| 			} | ||||
| 			for (; i < pcnt; i++, src++, p++, q++) { | ||||
| 				*p = 0; | ||||
| 				*q = 0; | ||||
| 			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); | ||||
| 			(void) memcpy(q, p, rm->rm_col[c].rc_size); | ||||
| 		} else { | ||||
| 			struct pqr_struct pqr = { p, q, NULL }; | ||||
| 			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, | ||||
| 			    vdev_raidz_pq_func, &pqr); | ||||
| 		} | ||||
| 
 | ||||
| 		if (c == rm->rm_firstdatacol) { | ||||
| 			for (i = ccnt; i < pcnt; i++) { | ||||
| 				p[i] = 0; | ||||
| 				q[i] = 0; | ||||
| 			} | ||||
| 		} else { | ||||
| 			ASSERT(ccnt <= pcnt); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Apply the algorithm described above by multiplying | ||||
| 			 * the previous result and adding in the new value. | ||||
| 			 */ | ||||
| 			for (i = 0; i < ccnt; i++, src++, p++, q++) { | ||||
| 				*p ^= *src; | ||||
| 
 | ||||
| 				VDEV_RAIDZ_64MUL_2(*q, mask); | ||||
| 				*q ^= *src; | ||||
| 			} | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Treat short columns as though they are full of 0s. | ||||
| 			 * Note that there's therefore nothing needed for P. | ||||
| 			 */ | ||||
| 			for (; i < pcnt; i++, q++) { | ||||
| 				VDEV_RAIDZ_64MUL_2(*q, mask); | ||||
| 			for (i = ccnt; i < pcnt; i++) { | ||||
| 				VDEV_RAIDZ_64MUL_2(q[i], mask); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| @ -546,59 +616,48 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm) | ||||
| static void | ||||
| vdev_raidz_generate_parity_pqr(raidz_map_t *rm) | ||||
| { | ||||
| 	uint64_t *p, *q, *r, *src, pcnt, ccnt, mask, i; | ||||
| 	uint64_t *p, *q, *r, pcnt, ccnt, mask, i; | ||||
| 	int c; | ||||
| 	abd_t *src; | ||||
| 
 | ||||
| 	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]); | ||||
| 	pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]); | ||||
| 	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == | ||||
| 	    rm->rm_col[VDEV_RAIDZ_Q].rc_size); | ||||
| 	ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size == | ||||
| 	    rm->rm_col[VDEV_RAIDZ_R].rc_size); | ||||
| 
 | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 		src = rm->rm_col[c].rc_data; | ||||
| 		p = rm->rm_col[VDEV_RAIDZ_P].rc_data; | ||||
| 		q = rm->rm_col[VDEV_RAIDZ_Q].rc_data; | ||||
| 		r = rm->rm_col[VDEV_RAIDZ_R].rc_data; | ||||
| 		src = rm->rm_col[c].rc_abd; | ||||
| 		p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); | ||||
| 		q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); | ||||
| 		r = abd_to_buf(rm->rm_col[VDEV_RAIDZ_R].rc_abd); | ||||
| 
 | ||||
| 		ccnt = rm->rm_col[c].rc_size / sizeof (src[0]); | ||||
| 		ccnt = rm->rm_col[c].rc_size / sizeof (p[0]); | ||||
| 
 | ||||
| 		if (c == rm->rm_firstdatacol) { | ||||
| 			ASSERT(ccnt == pcnt || ccnt == 0); | ||||
| 			for (i = 0; i < ccnt; i++, src++, p++, q++, r++) { | ||||
| 				*p = *src; | ||||
| 				*q = *src; | ||||
| 				*r = *src; | ||||
| 			} | ||||
| 			for (; i < pcnt; i++, src++, p++, q++, r++) { | ||||
| 				*p = 0; | ||||
| 				*q = 0; | ||||
| 				*r = 0; | ||||
| 			abd_copy_to_buf(p, src, rm->rm_col[c].rc_size); | ||||
| 			(void) memcpy(q, p, rm->rm_col[c].rc_size); | ||||
| 			(void) memcpy(r, p, rm->rm_col[c].rc_size); | ||||
| 		} else { | ||||
| 			struct pqr_struct pqr = { p, q, r }; | ||||
| 			(void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size, | ||||
| 			    vdev_raidz_pqr_func, &pqr); | ||||
| 		} | ||||
| 
 | ||||
| 		if (c == rm->rm_firstdatacol) { | ||||
| 			for (i = ccnt; i < pcnt; i++) { | ||||
| 				p[i] = 0; | ||||
| 				q[i] = 0; | ||||
| 				r[i] = 0; | ||||
| 			} | ||||
| 		} else { | ||||
| 			ASSERT(ccnt <= pcnt); | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Apply the algorithm described above by multiplying | ||||
| 			 * the previous result and adding in the new value. | ||||
| 			 */ | ||||
| 			for (i = 0; i < ccnt; i++, src++, p++, q++, r++) { | ||||
| 				*p ^= *src; | ||||
| 
 | ||||
| 				VDEV_RAIDZ_64MUL_2(*q, mask); | ||||
| 				*q ^= *src; | ||||
| 
 | ||||
| 				VDEV_RAIDZ_64MUL_4(*r, mask); | ||||
| 				*r ^= *src; | ||||
| 			} | ||||
| 
 | ||||
| 			/*
 | ||||
| 			 * Treat short columns as though they are full of 0s. | ||||
| 			 * Note that there's therefore nothing needed for P. | ||||
| 			 */ | ||||
| 			for (; i < pcnt; i++, q++, r++) { | ||||
| 				VDEV_RAIDZ_64MUL_2(*q, mask); | ||||
| 				VDEV_RAIDZ_64MUL_4(*r, mask); | ||||
| 			for (i = ccnt; i < pcnt; i++) { | ||||
| 				VDEV_RAIDZ_64MUL_2(q[i], mask); | ||||
| 				VDEV_RAIDZ_64MUL_4(r[i], mask); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| @ -630,40 +689,159 @@ vdev_raidz_generate_parity(raidz_map_t *rm) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
| static int | ||||
| vdev_raidz_reconst_p_func(void *dbuf, void *sbuf, size_t size, void *private) | ||||
| { | ||||
| 	uint64_t *dst = dbuf; | ||||
| 	uint64_t *src = sbuf; | ||||
| 	int cnt = size / sizeof (src[0]); | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < cnt; i++) { | ||||
| 		dst[i] ^= src[i]; | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
| static int | ||||
| vdev_raidz_reconst_q_pre_func(void *dbuf, void *sbuf, size_t size, | ||||
|     void *private) | ||||
| { | ||||
| 	uint64_t *dst = dbuf; | ||||
| 	uint64_t *src = sbuf; | ||||
| 	uint64_t mask; | ||||
| 	int cnt = size / sizeof (dst[0]); | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < cnt; i++, dst++, src++) { | ||||
| 		VDEV_RAIDZ_64MUL_2(*dst, mask); | ||||
| 		*dst ^= *src; | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
| static int | ||||
| vdev_raidz_reconst_q_pre_tail_func(void *buf, size_t size, void *private) | ||||
| { | ||||
| 	uint64_t *dst = buf; | ||||
| 	uint64_t mask; | ||||
| 	int cnt = size / sizeof (dst[0]); | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < cnt; i++, dst++) { | ||||
| 		/* same operation as vdev_raidz_reconst_q_pre_func() on dst */ | ||||
| 		VDEV_RAIDZ_64MUL_2(*dst, mask); | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| struct reconst_q_struct { | ||||
| 	uint64_t *q; | ||||
| 	int exp; | ||||
| }; | ||||
| 
 | ||||
| static int | ||||
| vdev_raidz_reconst_q_post_func(void *buf, size_t size, void *private) | ||||
| { | ||||
| 	struct reconst_q_struct *rq = private; | ||||
| 	uint64_t *dst = buf; | ||||
| 	int cnt = size / sizeof (dst[0]); | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < cnt; i++, dst++, rq->q++) { | ||||
| 		int j; | ||||
| 		uint8_t *b; | ||||
| 
 | ||||
| 		*dst ^= *rq->q; | ||||
| 		for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) { | ||||
| 			*b = vdev_raidz_exp2(*b, rq->exp); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| struct reconst_pq_struct { | ||||
| 	uint8_t *p; | ||||
| 	uint8_t *q; | ||||
| 	uint8_t *pxy; | ||||
| 	uint8_t *qxy; | ||||
| 	int aexp; | ||||
| 	int bexp; | ||||
| }; | ||||
| 
 | ||||
| static int | ||||
| vdev_raidz_reconst_pq_func(void *xbuf, void *ybuf, size_t size, void *private) | ||||
| { | ||||
| 	struct reconst_pq_struct *rpq = private; | ||||
| 	uint8_t *xd = xbuf; | ||||
| 	uint8_t *yd = ybuf; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < size; | ||||
| 	    i++, rpq->p++, rpq->q++, rpq->pxy++, rpq->qxy++, xd++, yd++) { | ||||
| 		*xd = vdev_raidz_exp2(*rpq->p ^ *rpq->pxy, rpq->aexp) ^ | ||||
| 		    vdev_raidz_exp2(*rpq->q ^ *rpq->qxy, rpq->bexp); | ||||
| 		*yd = *rpq->p ^ *rpq->pxy ^ *xd; | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| vdev_raidz_reconst_pq_tail_func(void *xbuf, size_t size, void *private) | ||||
| { | ||||
| 	struct reconst_pq_struct *rpq = private; | ||||
| 	uint8_t *xd = xbuf; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = 0; i < size; | ||||
| 	    i++, rpq->p++, rpq->q++, rpq->pxy++, rpq->qxy++, xd++) { | ||||
| 		/* same operation as vdev_raidz_reconst_pq_func() on xd */ | ||||
| 		*xd = vdev_raidz_exp2(*rpq->p ^ *rpq->pxy, rpq->aexp) ^ | ||||
| 		    vdev_raidz_exp2(*rpq->q ^ *rpq->qxy, rpq->bexp); | ||||
| 	} | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| { | ||||
| 	uint64_t *dst, *src, xcount, ccount, count, i; | ||||
| 	int x = tgts[0]; | ||||
| 	int c; | ||||
| 	abd_t *dst, *src; | ||||
| 
 | ||||
| 	ASSERT(ntgts == 1); | ||||
| 	ASSERT(x >= rm->rm_firstdatacol); | ||||
| 	ASSERT(x < rm->rm_cols); | ||||
| 
 | ||||
| 	xcount = rm->rm_col[x].rc_size / sizeof (src[0]); | ||||
| 	ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0])); | ||||
| 	ASSERT(xcount > 0); | ||||
| 	ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_P].rc_size); | ||||
| 	ASSERT(rm->rm_col[x].rc_size > 0); | ||||
| 
 | ||||
| 	src = rm->rm_col[VDEV_RAIDZ_P].rc_data; | ||||
| 	dst = rm->rm_col[x].rc_data; | ||||
| 	for (i = 0; i < xcount; i++, dst++, src++) { | ||||
| 		*dst = *src; | ||||
| 	} | ||||
| 	src = rm->rm_col[VDEV_RAIDZ_P].rc_abd; | ||||
| 	dst = rm->rm_col[x].rc_abd; | ||||
| 
 | ||||
| 	abd_copy_from_buf(dst, abd_to_buf(src), rm->rm_col[x].rc_size); | ||||
| 
 | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 		src = rm->rm_col[c].rc_data; | ||||
| 		dst = rm->rm_col[x].rc_data; | ||||
| 		uint64_t size = MIN(rm->rm_col[x].rc_size, | ||||
| 		    rm->rm_col[c].rc_size); | ||||
| 
 | ||||
| 		src = rm->rm_col[c].rc_abd; | ||||
| 		dst = rm->rm_col[x].rc_abd; | ||||
| 
 | ||||
| 		if (c == x) | ||||
| 			continue; | ||||
| 
 | ||||
| 		ccount = rm->rm_col[c].rc_size / sizeof (src[0]); | ||||
| 		count = MIN(ccount, xcount); | ||||
| 
 | ||||
| 		for (i = 0; i < count; i++, dst++, src++) { | ||||
| 			*dst ^= *src; | ||||
| 		} | ||||
| 		(void) abd_iterate_func2(dst, src, 0, 0, size, | ||||
| 		    vdev_raidz_reconst_p_func, NULL); | ||||
| 	} | ||||
| 
 | ||||
| 	return (1 << VDEV_RAIDZ_P); | ||||
| @ -672,57 +850,46 @@ vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| static int | ||||
| vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| { | ||||
| 	uint64_t *dst, *src, xcount, ccount, count, mask, i; | ||||
| 	uint8_t *b; | ||||
| 	int x = tgts[0]; | ||||
| 	int c, j, exp; | ||||
| 	int c, exp; | ||||
| 	abd_t *dst, *src; | ||||
| 	struct reconst_q_struct rq; | ||||
| 
 | ||||
| 	ASSERT(ntgts == 1); | ||||
| 
 | ||||
| 	xcount = rm->rm_col[x].rc_size / sizeof (src[0]); | ||||
| 	ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0])); | ||||
| 	ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_Q].rc_size); | ||||
| 
 | ||||
| 	for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 		src = rm->rm_col[c].rc_data; | ||||
| 		dst = rm->rm_col[x].rc_data; | ||||
| 		uint64_t size = (c == x) ? 0 : MIN(rm->rm_col[x].rc_size, | ||||
| 		    rm->rm_col[c].rc_size); | ||||
| 
 | ||||
| 		if (c == x) | ||||
| 			ccount = 0; | ||||
| 		else | ||||
| 			ccount = rm->rm_col[c].rc_size / sizeof (src[0]); | ||||
| 
 | ||||
| 		count = MIN(ccount, xcount); | ||||
| 		src = rm->rm_col[c].rc_abd; | ||||
| 		dst = rm->rm_col[x].rc_abd; | ||||
| 
 | ||||
| 		if (c == rm->rm_firstdatacol) { | ||||
| 			for (i = 0; i < count; i++, dst++, src++) { | ||||
| 				*dst = *src; | ||||
| 			} | ||||
| 			for (; i < xcount; i++, dst++) { | ||||
| 				*dst = 0; | ||||
| 			} | ||||
| 			abd_copy(dst, src, size); | ||||
| 			if (rm->rm_col[x].rc_size > size) | ||||
| 				abd_zero_off(dst, size, | ||||
| 				    rm->rm_col[x].rc_size - size); | ||||
| 
 | ||||
| 		} else { | ||||
| 			for (i = 0; i < count; i++, dst++, src++) { | ||||
| 				VDEV_RAIDZ_64MUL_2(*dst, mask); | ||||
| 				*dst ^= *src; | ||||
| 			} | ||||
| 
 | ||||
| 			for (; i < xcount; i++, dst++) { | ||||
| 				VDEV_RAIDZ_64MUL_2(*dst, mask); | ||||
| 			} | ||||
| 			ASSERT3U(size, <=, rm->rm_col[x].rc_size); | ||||
| 			(void) abd_iterate_func2(dst, src, 0, 0, size, | ||||
| 			    vdev_raidz_reconst_q_pre_func, NULL); | ||||
| 			(void) abd_iterate_func(dst, | ||||
| 			    size, rm->rm_col[x].rc_size - size, | ||||
| 			    vdev_raidz_reconst_q_pre_tail_func, NULL); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	src = rm->rm_col[VDEV_RAIDZ_Q].rc_data; | ||||
| 	dst = rm->rm_col[x].rc_data; | ||||
| 	src = rm->rm_col[VDEV_RAIDZ_Q].rc_abd; | ||||
| 	dst = rm->rm_col[x].rc_abd; | ||||
| 	exp = 255 - (rm->rm_cols - 1 - x); | ||||
| 	rq.q = abd_to_buf(src); | ||||
| 	rq.exp = exp; | ||||
| 
 | ||||
| 	for (i = 0; i < xcount; i++, dst++, src++) { | ||||
| 		*dst ^= *src; | ||||
| 		for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) { | ||||
| 			*b = vdev_raidz_exp2(*b, exp); | ||||
| 		} | ||||
| 	} | ||||
| 	(void) abd_iterate_func(dst, 0, rm->rm_col[x].rc_size, | ||||
| 	    vdev_raidz_reconst_q_post_func, &rq); | ||||
| 
 | ||||
| 	return (1 << VDEV_RAIDZ_Q); | ||||
| } | ||||
| @ -730,11 +897,13 @@ vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| static int | ||||
| vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| { | ||||
| 	uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp; | ||||
| 	void *pdata, *qdata; | ||||
| 	uint64_t xsize, ysize, i; | ||||
| 	uint8_t *p, *q, *pxy, *qxy, tmp, a, b, aexp, bexp; | ||||
| 	abd_t *pdata, *qdata; | ||||
| 	uint64_t xsize, ysize; | ||||
| 	int x = tgts[0]; | ||||
| 	int y = tgts[1]; | ||||
| 	abd_t *xd, *yd; | ||||
| 	struct reconst_pq_struct rpq; | ||||
| 
 | ||||
| 	ASSERT(ntgts == 2); | ||||
| 	ASSERT(x < y); | ||||
| @ -750,15 +919,15 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| 	 * parity so we make those columns appear to be full of zeros by | ||||
| 	 * setting their lengths to zero. | ||||
| 	 */ | ||||
| 	pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data; | ||||
| 	qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data; | ||||
| 	pdata = rm->rm_col[VDEV_RAIDZ_P].rc_abd; | ||||
| 	qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_abd; | ||||
| 	xsize = rm->rm_col[x].rc_size; | ||||
| 	ysize = rm->rm_col[y].rc_size; | ||||
| 
 | ||||
| 	rm->rm_col[VDEV_RAIDZ_P].rc_data = | ||||
| 	    zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size); | ||||
| 	rm->rm_col[VDEV_RAIDZ_Q].rc_data = | ||||
| 	    zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size); | ||||
| 	rm->rm_col[VDEV_RAIDZ_P].rc_abd = | ||||
| 	    abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_P].rc_size, B_TRUE); | ||||
| 	rm->rm_col[VDEV_RAIDZ_Q].rc_abd = | ||||
| 	    abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_Q].rc_size, B_TRUE); | ||||
| 	rm->rm_col[x].rc_size = 0; | ||||
| 	rm->rm_col[y].rc_size = 0; | ||||
| 
 | ||||
| @ -767,12 +936,12 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| 	rm->rm_col[x].rc_size = xsize; | ||||
| 	rm->rm_col[y].rc_size = ysize; | ||||
| 
 | ||||
| 	p = pdata; | ||||
| 	q = qdata; | ||||
| 	pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data; | ||||
| 	qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data; | ||||
| 	xd = rm->rm_col[x].rc_data; | ||||
| 	yd = rm->rm_col[y].rc_data; | ||||
| 	p = abd_to_buf(pdata); | ||||
| 	q = abd_to_buf(qdata); | ||||
| 	pxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd); | ||||
| 	qxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); | ||||
| 	xd = rm->rm_col[x].rc_abd; | ||||
| 	yd = rm->rm_col[y].rc_abd; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We now have: | ||||
| @ -796,24 +965,27 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| 	aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)]; | ||||
| 	bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)]; | ||||
| 
 | ||||
| 	for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) { | ||||
| 		*xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^ | ||||
| 		    vdev_raidz_exp2(*q ^ *qxy, bexp); | ||||
| 	ASSERT3U(xsize, >=, ysize); | ||||
| 	rpq.p = p; | ||||
| 	rpq.q = q; | ||||
| 	rpq.pxy = pxy; | ||||
| 	rpq.qxy = qxy; | ||||
| 	rpq.aexp = aexp; | ||||
| 	rpq.bexp = bexp; | ||||
| 
 | ||||
| 		if (i < ysize) | ||||
| 			*yd = *p ^ *pxy ^ *xd; | ||||
| 	} | ||||
| 	(void) abd_iterate_func2(xd, yd, 0, 0, ysize, | ||||
| 	    vdev_raidz_reconst_pq_func, &rpq); | ||||
| 	(void) abd_iterate_func(xd, ysize, xsize - ysize, | ||||
| 	    vdev_raidz_reconst_pq_tail_func, &rpq); | ||||
| 
 | ||||
| 	zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data, | ||||
| 	    rm->rm_col[VDEV_RAIDZ_P].rc_size); | ||||
| 	zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data, | ||||
| 	    rm->rm_col[VDEV_RAIDZ_Q].rc_size); | ||||
| 	abd_free(rm->rm_col[VDEV_RAIDZ_P].rc_abd); | ||||
| 	abd_free(rm->rm_col[VDEV_RAIDZ_Q].rc_abd); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Restore the saved parity data. | ||||
| 	 */ | ||||
| 	rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata; | ||||
| 	rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata; | ||||
| 	rm->rm_col[VDEV_RAIDZ_P].rc_abd = pdata; | ||||
| 	rm->rm_col[VDEV_RAIDZ_Q].rc_abd = qdata; | ||||
| 
 | ||||
| 	return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q)); | ||||
| } | ||||
| @ -1131,7 +1303,7 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, | ||||
| 		c = used[i]; | ||||
| 		ASSERT3U(c, <, rm->rm_cols); | ||||
| 
 | ||||
| 		src = rm->rm_col[c].rc_data; | ||||
| 		src = abd_to_buf(rm->rm_col[c].rc_abd); | ||||
| 		ccount = rm->rm_col[c].rc_size; | ||||
| 		for (j = 0; j < nmissing; j++) { | ||||
| 			cc = missing[j] + rm->rm_firstdatacol; | ||||
| @ -1139,7 +1311,7 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, | ||||
| 			ASSERT3U(cc, <, rm->rm_cols); | ||||
| 			ASSERT3U(cc, !=, c); | ||||
| 
 | ||||
| 			dst[j] = rm->rm_col[cc].rc_data; | ||||
| 			dst[j] = abd_to_buf(rm->rm_col[cc].rc_abd); | ||||
| 			dcount[j] = rm->rm_col[cc].rc_size; | ||||
| 		} | ||||
| 
 | ||||
| @ -1187,8 +1359,25 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| 	uint8_t *invrows[VDEV_RAIDZ_MAXPARITY]; | ||||
| 	uint8_t *used; | ||||
| 
 | ||||
| 	abd_t **bufs = NULL; | ||||
| 
 | ||||
| 	int code = 0; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Matrix reconstruction can't use scatter ABDs yet, so we allocate | ||||
| 	 * temporary linear ABDs. | ||||
| 	 */ | ||||
| 	if (!abd_is_linear(rm->rm_col[rm->rm_firstdatacol].rc_abd)) { | ||||
| 		bufs = kmem_alloc(rm->rm_cols * sizeof (abd_t *), KM_PUSHPAGE); | ||||
| 
 | ||||
| 		for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 			raidz_col_t *col = &rm->rm_col[c]; | ||||
| 
 | ||||
| 			bufs[c] = col->rc_abd; | ||||
| 			col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE); | ||||
| 			abd_copy(col->rc_abd, bufs[c], col->rc_size); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	n = rm->rm_cols - rm->rm_firstdatacol; | ||||
| 
 | ||||
| @ -1275,6 +1464,20 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) | ||||
| 
 | ||||
| 	kmem_free(p, psize); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * copy back from temporary linear abds and free them | ||||
| 	 */ | ||||
| 	if (bufs) { | ||||
| 		for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) { | ||||
| 			raidz_col_t *col = &rm->rm_col[c]; | ||||
| 
 | ||||
| 			abd_copy(bufs[c], col->rc_abd, col->rc_size); | ||||
| 			abd_free(col->rc_abd); | ||||
| 			col->rc_abd = bufs[c]; | ||||
| 		} | ||||
| 		kmem_free(bufs, rm->rm_cols * sizeof (abd_t *)); | ||||
| 	} | ||||
| 
 | ||||
| 	return (code); | ||||
| } | ||||
| 
 | ||||
| @ -1321,7 +1524,6 @@ vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt) | ||||
| 
 | ||||
| 	dt = &tgts[nbadparity]; | ||||
| 
 | ||||
| 
 | ||||
| 	/* Reconstruct using the new math implementation */ | ||||
| 	ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata); | ||||
| 	if (ret != RAIDZ_ORIGINAL_IMPL) | ||||
| @ -1479,7 +1681,7 @@ vdev_raidz_io_start(zio_t *zio) | ||||
| 			rc = &rm->rm_col[c]; | ||||
| 			cvd = vd->vdev_child[rc->rc_devidx]; | ||||
| 			zio_nowait(zio_vdev_child_io(zio, NULL, cvd, | ||||
| 			    rc->rc_offset, rc->rc_data, rc->rc_size, | ||||
| 			    rc->rc_offset, rc->rc_abd, rc->rc_size, | ||||
| 			    zio->io_type, zio->io_priority, 0, | ||||
| 			    vdev_raidz_child_done, rc)); | ||||
| 		} | ||||
| @ -1536,7 +1738,7 @@ vdev_raidz_io_start(zio_t *zio) | ||||
| 		if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 || | ||||
| 		    (zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) { | ||||
| 			zio_nowait(zio_vdev_child_io(zio, NULL, cvd, | ||||
| 			    rc->rc_offset, rc->rc_data, rc->rc_size, | ||||
| 			    rc->rc_offset, rc->rc_abd, rc->rc_size, | ||||
| 			    zio->io_type, zio->io_priority, 0, | ||||
| 			    vdev_raidz_child_done, rc)); | ||||
| 		} | ||||
| @ -1552,6 +1754,7 @@ vdev_raidz_io_start(zio_t *zio) | ||||
| static void | ||||
| raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) | ||||
| { | ||||
| 	void *buf; | ||||
| 	vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx]; | ||||
| 
 | ||||
| 	if (!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { | ||||
| @ -1565,9 +1768,11 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data) | ||||
| 		zbc.zbc_has_cksum = 0; | ||||
| 		zbc.zbc_injected = rm->rm_ecksuminjected; | ||||
| 
 | ||||
| 		buf = abd_borrow_buf_copy(rc->rc_abd, rc->rc_size); | ||||
| 		zfs_ereport_post_checksum(zio->io_spa, vd, zio, | ||||
| 		    rc->rc_offset, rc->rc_size, rc->rc_data, bad_data, | ||||
| 		    rc->rc_offset, rc->rc_size, buf, bad_data, | ||||
| 		    &zbc); | ||||
| 		abd_return_buf(rc->rc_abd, buf, rc->rc_size); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| @ -1616,7 +1821,7 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) | ||||
| 		if (!rc->rc_tried || rc->rc_error != 0) | ||||
| 			continue; | ||||
| 		orig[c] = zio_buf_alloc(rc->rc_size); | ||||
| 		bcopy(rc->rc_data, orig[c], rc->rc_size); | ||||
| 		abd_copy_to_buf(orig[c], rc->rc_abd, rc->rc_size); | ||||
| 	} | ||||
| 
 | ||||
| 	vdev_raidz_generate_parity(rm); | ||||
| @ -1625,7 +1830,7 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm) | ||||
| 		rc = &rm->rm_col[c]; | ||||
| 		if (!rc->rc_tried || rc->rc_error != 0) | ||||
| 			continue; | ||||
| 		if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) { | ||||
| 		if (bcmp(orig[c], abd_to_buf(rc->rc_abd), rc->rc_size) != 0) { | ||||
| 			raidz_checksum_error(zio, rc, orig[c]); | ||||
| 			rc->rc_error = SET_ERROR(ECKSUM); | ||||
| 			ret++; | ||||
| @ -1728,7 +1933,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) | ||||
| 				ASSERT3S(c, >=, 0); | ||||
| 				ASSERT3S(c, <, rm->rm_cols); | ||||
| 				rc = &rm->rm_col[c]; | ||||
| 				bcopy(rc->rc_data, orig[i], rc->rc_size); | ||||
| 				abd_copy_to_buf(orig[i], rc->rc_abd, | ||||
| 				    rc->rc_size); | ||||
| 			} | ||||
| 
 | ||||
| 			/*
 | ||||
| @ -1758,7 +1964,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors) | ||||
| 			for (i = 0; i < n; i++) { | ||||
| 				c = tgts[i]; | ||||
| 				rc = &rm->rm_col[c]; | ||||
| 				bcopy(orig[i], rc->rc_data, rc->rc_size); | ||||
| 				abd_copy_from_buf(rc->rc_abd, orig[i], | ||||
| 				    rc->rc_size); | ||||
| 			} | ||||
| 
 | ||||
| 			do { | ||||
| @ -1997,7 +2204,7 @@ vdev_raidz_io_done(zio_t *zio) | ||||
| 				continue; | ||||
| 			zio_nowait(zio_vdev_child_io(zio, NULL, | ||||
| 			    vd->vdev_child[rc->rc_devidx], | ||||
| 			    rc->rc_offset, rc->rc_data, rc->rc_size, | ||||
| 			    rc->rc_offset, rc->rc_abd, rc->rc_size, | ||||
| 			    zio->io_type, zio->io_priority, 0, | ||||
| 			    vdev_raidz_child_done, rc)); | ||||
| 		} while (++c < rm->rm_cols); | ||||
| @ -2077,7 +2284,7 @@ done: | ||||
| 				continue; | ||||
| 
 | ||||
| 			zio_nowait(zio_vdev_child_io(zio, NULL, cvd, | ||||
| 			    rc->rc_offset, rc->rc_data, rc->rc_size, | ||||
| 			    rc->rc_offset, rc->rc_abd, rc->rc_size, | ||||
| 			    ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE, | ||||
| 			    ZIO_FLAG_IO_REPAIR | (unexpected_errors ? | ||||
| 			    ZIO_FLAG_SELF_HEAL : 0), NULL, NULL)); | ||||
|  | ||||
| @ -44,6 +44,16 @@ static raidz_impl_ops_t vdev_raidz_fastest_impl = { | ||||
| 	.name = "fastest" | ||||
| }; | ||||
| 
 | ||||
| /* ABD BRINGUP -- not ready yet */ | ||||
| #if 1 | ||||
| #ifdef HAVE_SSSE3 | ||||
| #undef HAVE_SSSE3 | ||||
| #endif | ||||
| #ifdef HAVE_AVX2 | ||||
| #undef HAVE_AVX2 | ||||
| #endif | ||||
| #endif | ||||
| 
 | ||||
| /* All compiled in implementations */ | ||||
| const raidz_impl_ops_t *raidz_all_maths[] = { | ||||
| 	&vdev_raidz_original_impl, | ||||
| @ -149,6 +159,8 @@ vdev_raidz_math_generate(raidz_map_t *rm) | ||||
| { | ||||
| 	raidz_gen_f gen_parity = NULL; | ||||
| 
 | ||||
| /* ABD Bringup -- vector code not ready */ | ||||
| #if 0 | ||||
| 	switch (raidz_parity(rm)) { | ||||
| 		case 1: | ||||
| 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P]; | ||||
| @ -165,6 +177,7 @@ vdev_raidz_math_generate(raidz_map_t *rm) | ||||
| 				raidz_parity(rm)); | ||||
| 			break; | ||||
| 	} | ||||
| #endif | ||||
| 
 | ||||
| 	/* if method is NULL execute the original implementation */ | ||||
| 	if (gen_parity == NULL) | ||||
| @ -175,6 +188,8 @@ vdev_raidz_math_generate(raidz_map_t *rm) | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| /* ABD Bringup -- vector code not ready */ | ||||
| #if 0 | ||||
| static raidz_rec_f | ||||
| reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid, | ||||
| 	const int nbaddata) | ||||
| @ -229,6 +244,7 @@ reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid, | ||||
| 	} | ||||
| 	return ((raidz_rec_f) NULL); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * Select data reconstruction method for raidz_map | ||||
| @ -242,6 +258,8 @@ vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid, | ||||
| { | ||||
| 	raidz_rec_f rec_data = NULL; | ||||
| 
 | ||||
| /* ABD Bringup -- vector code not ready */ | ||||
| #if 0 | ||||
| 	switch (raidz_parity(rm)) { | ||||
| 	case PARITY_P: | ||||
| 		rec_data = reconstruct_fun_p_sel(rm, parity_valid, nbaddata); | ||||
| @ -257,6 +275,7 @@ vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid, | ||||
| 		    raidz_parity(rm)); | ||||
| 		break; | ||||
| 	} | ||||
| #endif | ||||
| 
 | ||||
| 	if (rec_data == NULL) | ||||
| 		return (RAIDZ_ORIGINAL_IMPL); | ||||
| @ -471,13 +490,12 @@ vdev_raidz_math_init(void) | ||||
| 	return; | ||||
| #endif | ||||
| 
 | ||||
| 	/* Fake an zio and run the benchmark on it */ | ||||
| 	/* Fake an zio and run the benchmark on a warmed up buffer */ | ||||
| 	bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP); | ||||
| 	bench_zio->io_offset = 0; | ||||
| 	bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */ | ||||
| 	bench_zio->io_data = zio_data_buf_alloc(BENCH_ZIO_SIZE); | ||||
| 	VERIFY(bench_zio->io_data); | ||||
| 	memset(bench_zio->io_data, 0xAA, BENCH_ZIO_SIZE); /* warm up */ | ||||
| 	bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE); | ||||
| 	memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE); | ||||
| 
 | ||||
| 	/* Benchmark parity generation methods */ | ||||
| 	for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) { | ||||
| @ -501,7 +519,7 @@ vdev_raidz_math_init(void) | ||||
| 	vdev_raidz_map_free(bench_rm); | ||||
| 
 | ||||
| 	/* cleanup the bench zio */ | ||||
| 	zio_data_buf_free(bench_zio->io_data, BENCH_ZIO_SIZE); | ||||
| 	abd_free(bench_zio->io_abd); | ||||
| 	kmem_free(bench_zio, sizeof (zio_t)); | ||||
| 
 | ||||
| 	/* install kstats for all impl */ | ||||
|  | ||||
| @ -21,7 +21,6 @@ | ||||
| /*
 | ||||
|  * Copyright (C) 2016 Gvozden Nešković. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include <sys/isa_defs.h> | ||||
| 
 | ||||
| #if defined(__x86_64) && defined(HAVE_AVX2) | ||||
| @ -401,7 +400,12 @@ DEFINE_REC_METHODS(avx2); | ||||
| static boolean_t | ||||
| raidz_will_avx2_work(void) | ||||
| { | ||||
| /* ABD Bringup -- vector code not ready */ | ||||
| #if 1 | ||||
| 	return (B_FALSE); | ||||
| #else | ||||
| 	return (zfs_avx_available() && zfs_avx2_available()); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| const raidz_impl_ops_t vdev_raidz_avx2_impl = { | ||||
|  | ||||
| @ -33,7 +33,8 @@ | ||||
| #endif | ||||
| 
 | ||||
| /* Calculate data offset in raidz column, offset is in bytes */ | ||||
| #define	COL_OFF(col, off)	((v_t *)(((char *)(col)->rc_data) + (off))) | ||||
| /* ADB BRINGUP -- needs to be refactored for ABD */ | ||||
| #define	COL_OFF(col, off)	((v_t *)(((char *)(col)->rc_abd) + (off))) | ||||
| 
 | ||||
| /*
 | ||||
|  * PARITY CALCULATION | ||||
| @ -83,6 +84,8 @@ raidz_generate_p_impl(raidz_map_t * const rm) | ||||
| 	const size_t psize = raidz_big_size(rm); | ||||
| 	const size_t short_size = raidz_short_size(rm); | ||||
| 
 | ||||
| 	panic("not ABD ready"); | ||||
| 
 | ||||
| 	raidz_math_begin(); | ||||
| 
 | ||||
| 	/* short_size */ | ||||
| @ -141,6 +144,8 @@ raidz_generate_pq_impl(raidz_map_t * const rm) | ||||
| 	const size_t psize = raidz_big_size(rm); | ||||
| 	const size_t short_size = raidz_short_size(rm); | ||||
| 
 | ||||
| 	panic("not ABD ready"); | ||||
| 
 | ||||
| 	raidz_math_begin(); | ||||
| 
 | ||||
| 	/* short_size */ | ||||
| @ -208,6 +213,8 @@ raidz_generate_pqr_impl(raidz_map_t * const rm) | ||||
| 	const size_t psize = raidz_big_size(rm); | ||||
| 	const size_t short_size = raidz_short_size(rm); | ||||
| 
 | ||||
| 	panic("not ABD ready"); | ||||
| 
 | ||||
| 	raidz_math_begin(); | ||||
| 
 | ||||
| 	/* short_size */ | ||||
|  | ||||
| @ -24,7 +24,6 @@ | ||||
|  */ | ||||
| 
 | ||||
| #include <sys/vdev_raidz_impl.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Provide native CPU scalar routines. | ||||
|  * Support 32bit and 64bit CPUs. | ||||
|  | ||||
| @ -403,8 +403,13 @@ DEFINE_REC_METHODS(ssse3); | ||||
| static boolean_t | ||||
| raidz_will_ssse3_work(void) | ||||
| { | ||||
| /* ABD Bringup -- vector code not ready */ | ||||
| #if 1 | ||||
| 	return (B_FALSE); | ||||
| #else | ||||
| 	return (zfs_sse_available() && zfs_sse2_available() && | ||||
| 	    zfs_ssse3_available()); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| const raidz_impl_ops_t vdev_raidz_ssse3_impl = { | ||||
|  | ||||
| @ -40,6 +40,7 @@ | ||||
| #include <sys/dsl_pool.h> | ||||
| #include <sys/metaslab.h> | ||||
| #include <sys/trace_zil.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * The zfs intent log (ZIL) saves transaction records of system calls | ||||
| @ -878,6 +879,7 @@ zil_lwb_write_done(zio_t *zio) | ||||
| 	 * one in zil_commit_writer(). zil_sync() will only remove | ||||
| 	 * the lwb if lwb_buf is null. | ||||
| 	 */ | ||||
| 	abd_put(zio->io_abd); | ||||
| 	zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); | ||||
| 	mutex_enter(&zilog->zl_lock); | ||||
| 	lwb->lwb_zio = NULL; | ||||
| @ -914,12 +916,14 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb) | ||||
| 	/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */ | ||||
| 	mutex_enter(&zilog->zl_lock); | ||||
| 	if (lwb->lwb_zio == NULL) { | ||||
| 		abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf, | ||||
| 		    BP_GET_LSIZE(&lwb->lwb_blk)); | ||||
| 		if (!lwb->lwb_fastwrite) { | ||||
| 			metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk); | ||||
| 			lwb->lwb_fastwrite = 1; | ||||
| 		} | ||||
| 		lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa, | ||||
| 		    0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk), | ||||
| 		    0, &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk), | ||||
| 		    zil_lwb_write_done, lwb, ZIO_PRIORITY_SYNC_WRITE, | ||||
| 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | | ||||
| 		    ZIO_FLAG_FASTWRITE, &zb); | ||||
|  | ||||
							
								
								
									
										293
									
								
								module/zfs/zio.c
									
									
									
									
									
								
							
							
						
						
									
										293
									
								
								module/zfs/zio.c
									
									
									
									
									
								
							| @ -42,6 +42,7 @@ | ||||
| #include <sys/metaslab_impl.h> | ||||
| #include <sys/time.h> | ||||
| #include <sys/trace_zio.h> | ||||
| #include <sys/abd.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * ========================================================================== | ||||
| @ -67,6 +68,11 @@ kmem_cache_t *zio_cache; | ||||
| kmem_cache_t *zio_link_cache; | ||||
| kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; | ||||
| kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; | ||||
| #if defined(ZFS_DEBUG) && !defined(_KERNEL) | ||||
| uint64_t zio_buf_cache_allocs[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; | ||||
| uint64_t zio_buf_cache_frees[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; | ||||
| #endif | ||||
| 
 | ||||
| int zio_delay_max = ZIO_DELAY_MAX; | ||||
| 
 | ||||
| #define	ZIO_PIPELINE_CONTINUE		0x100 | ||||
| @ -211,6 +217,13 @@ zio_fini(void) | ||||
| 		 */ | ||||
| 		if (((c + 1) << SPA_MINBLOCKSHIFT) > zfs_max_recordsize) | ||||
| 			break; | ||||
| #endif | ||||
| #if defined(ZFS_DEBUG) && !defined(_KERNEL) | ||||
| 		if (zio_buf_cache_allocs[c] != zio_buf_cache_frees[c]) | ||||
| 			(void) printf("zio_fini: [%d] %llu != %llu\n", | ||||
| 			    (int)((c + 1) << SPA_MINBLOCKSHIFT), | ||||
| 			    (long long unsigned)zio_buf_cache_allocs[c], | ||||
| 			    (long long unsigned)zio_buf_cache_frees[c]); | ||||
| #endif | ||||
| 		if (zio_buf_cache[c] != last_cache) { | ||||
| 			last_cache = zio_buf_cache[c]; | ||||
| @ -251,6 +264,9 @@ zio_buf_alloc(size_t size) | ||||
| 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; | ||||
| 
 | ||||
| 	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); | ||||
| #if defined(ZFS_DEBUG) && !defined(_KERNEL) | ||||
| 	atomic_add_64(&zio_buf_cache_allocs[c], 1); | ||||
| #endif | ||||
| 
 | ||||
| 	return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); | ||||
| } | ||||
| @ -271,26 +287,15 @@ zio_data_buf_alloc(size_t size) | ||||
| 	return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Use zio_buf_alloc_flags when specific allocation flags are needed.  e.g. | ||||
|  * passing KM_NOSLEEP when it is acceptable for an allocation to fail. | ||||
|  */ | ||||
| void * | ||||
| zio_buf_alloc_flags(size_t size, int flags) | ||||
| { | ||||
| 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; | ||||
| 
 | ||||
| 	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); | ||||
| 
 | ||||
| 	return (kmem_cache_alloc(zio_buf_cache[c], flags)); | ||||
| } | ||||
| 
 | ||||
| void | ||||
| zio_buf_free(void *buf, size_t size) | ||||
| { | ||||
| 	size_t c = (size - 1) >> SPA_MINBLOCKSHIFT; | ||||
| 
 | ||||
| 	VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); | ||||
| #if defined(ZFS_DEBUG) && !defined(_KERNEL) | ||||
| 	atomic_add_64(&zio_buf_cache_frees[c], 1); | ||||
| #endif | ||||
| 
 | ||||
| 	kmem_cache_free(zio_buf_cache[c], buf); | ||||
| } | ||||
| @ -311,12 +316,18 @@ zio_data_buf_free(void *buf, size_t size) | ||||
|  * ========================================================================== | ||||
|  */ | ||||
| void | ||||
| zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, | ||||
| zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize, | ||||
| 	zio_transform_func_t *transform) | ||||
| { | ||||
| 	zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); | ||||
| 
 | ||||
| 	zt->zt_orig_data = zio->io_data; | ||||
| 	/*
 | ||||
| 	 * Ensure that anyone expecting this zio to contain a linear ABD isn't | ||||
| 	 * going to get a nasty surprise when they try to access the data. | ||||
| 	 */ | ||||
| 	IMPLY(abd_is_linear(zio->io_abd), abd_is_linear(data)); | ||||
| 
 | ||||
| 	zt->zt_orig_abd = zio->io_abd; | ||||
| 	zt->zt_orig_size = zio->io_size; | ||||
| 	zt->zt_bufsize = bufsize; | ||||
| 	zt->zt_transform = transform; | ||||
| @ -324,7 +335,7 @@ zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, | ||||
| 	zt->zt_next = zio->io_transform_stack; | ||||
| 	zio->io_transform_stack = zt; | ||||
| 
 | ||||
| 	zio->io_data = data; | ||||
| 	zio->io_abd = data; | ||||
| 	zio->io_size = size; | ||||
| } | ||||
| 
 | ||||
| @ -336,12 +347,12 @@ zio_pop_transforms(zio_t *zio) | ||||
| 	while ((zt = zio->io_transform_stack) != NULL) { | ||||
| 		if (zt->zt_transform != NULL) | ||||
| 			zt->zt_transform(zio, | ||||
| 			    zt->zt_orig_data, zt->zt_orig_size); | ||||
| 			    zt->zt_orig_abd, zt->zt_orig_size); | ||||
| 
 | ||||
| 		if (zt->zt_bufsize != 0) | ||||
| 			zio_buf_free(zio->io_data, zt->zt_bufsize); | ||||
| 			abd_free(zio->io_abd); | ||||
| 
 | ||||
| 		zio->io_data = zt->zt_orig_data; | ||||
| 		zio->io_abd = zt->zt_orig_abd; | ||||
| 		zio->io_size = zt->zt_orig_size; | ||||
| 		zio->io_transform_stack = zt->zt_next; | ||||
| 
 | ||||
| @ -355,21 +366,26 @@ zio_pop_transforms(zio_t *zio) | ||||
|  * ========================================================================== | ||||
|  */ | ||||
| static void | ||||
| zio_subblock(zio_t *zio, void *data, uint64_t size) | ||||
| zio_subblock(zio_t *zio, abd_t *data, uint64_t size) | ||||
| { | ||||
| 	ASSERT(zio->io_size > size); | ||||
| 
 | ||||
| 	if (zio->io_type == ZIO_TYPE_READ) | ||||
| 		bcopy(zio->io_data, data, size); | ||||
| 		abd_copy(data, zio->io_abd, size); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| zio_decompress(zio_t *zio, void *data, uint64_t size) | ||||
| zio_decompress(zio_t *zio, abd_t *data, uint64_t size) | ||||
| { | ||||
| 	if (zio->io_error == 0 && | ||||
| 	    zio_decompress_data(BP_GET_COMPRESS(zio->io_bp), | ||||
| 	    zio->io_data, data, zio->io_size, size) != 0) | ||||
| 		zio->io_error = SET_ERROR(EIO); | ||||
| 	if (zio->io_error == 0) { | ||||
| 		void *tmp = abd_borrow_buf(data, size); | ||||
| 		int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp), | ||||
| 		    zio->io_abd, tmp, zio->io_size, size); | ||||
| 		abd_return_buf_copy(data, tmp, size); | ||||
| 
 | ||||
| 		if (ret != 0) | ||||
| 			zio->io_error = SET_ERROR(EIO); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| @ -552,7 +568,7 @@ zio_timestamp_compare(const void *x1, const void *x2) | ||||
|  */ | ||||
| static zio_t * | ||||
| zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, | ||||
|     void *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done, | ||||
|     abd_t *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done, | ||||
|     void *private, zio_type_t type, zio_priority_t priority, | ||||
|     enum zio_flag flags, vdev_t *vd, uint64_t offset, | ||||
|     const zbookmark_phys_t *zb, enum zio_stage stage, | ||||
| @ -611,7 +627,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, | ||||
| 	zio->io_priority = priority; | ||||
| 	zio->io_vd = vd; | ||||
| 	zio->io_offset = offset; | ||||
| 	zio->io_orig_data = zio->io_data = data; | ||||
| 	zio->io_orig_abd = zio->io_abd = data; | ||||
| 	zio->io_orig_size = zio->io_size = psize; | ||||
| 	zio->io_lsize = lsize; | ||||
| 	zio->io_orig_flags = zio->io_flags = flags; | ||||
| @ -755,7 +771,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp) | ||||
| 
 | ||||
| zio_t * | ||||
| zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, | ||||
|     void *data, uint64_t size, zio_done_func_t *done, void *private, | ||||
|     abd_t *data, uint64_t size, zio_done_func_t *done, void *private, | ||||
|     zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb) | ||||
| { | ||||
| 	zio_t *zio; | ||||
| @ -773,7 +789,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, | ||||
| 
 | ||||
| zio_t * | ||||
| zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, | ||||
|     void *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp, | ||||
|     abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp, | ||||
|     zio_done_func_t *ready, zio_done_func_t *children_ready, | ||||
|     zio_done_func_t *physdone, zio_done_func_t *done, | ||||
|     void *private, zio_priority_t priority, enum zio_flag flags, | ||||
| @ -814,7 +830,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, | ||||
| } | ||||
| 
 | ||||
| zio_t * | ||||
| zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data, | ||||
| zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data, | ||||
|     uint64_t size, zio_done_func_t *done, void *private, | ||||
|     zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb) | ||||
| { | ||||
| @ -967,7 +983,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd, | ||||
| 
 | ||||
| zio_t * | ||||
| zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, | ||||
|     void *data, int checksum, zio_done_func_t *done, void *private, | ||||
|     abd_t *data, int checksum, zio_done_func_t *done, void *private, | ||||
|     zio_priority_t priority, enum zio_flag flags, boolean_t labels) | ||||
| { | ||||
| 	zio_t *zio; | ||||
| @ -988,7 +1004,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, | ||||
| 
 | ||||
| zio_t * | ||||
| zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, | ||||
|     void *data, int checksum, zio_done_func_t *done, void *private, | ||||
|     abd_t *data, int checksum, zio_done_func_t *done, void *private, | ||||
|     zio_priority_t priority, enum zio_flag flags, boolean_t labels) | ||||
| { | ||||
| 	zio_t *zio; | ||||
| @ -1011,8 +1027,9 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, | ||||
| 		 * Therefore, we must make a local copy in case the data is | ||||
| 		 * being written to multiple places in parallel. | ||||
| 		 */ | ||||
| 		void *wbuf = zio_buf_alloc(size); | ||||
| 		bcopy(data, wbuf, size); | ||||
| 		abd_t *wbuf = abd_alloc_sametype(data, size); | ||||
| 		abd_copy(wbuf, data, size); | ||||
| 
 | ||||
| 		zio_push_transform(zio, wbuf, size, size, NULL); | ||||
| 	} | ||||
| 
 | ||||
| @ -1024,7 +1041,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size, | ||||
|  */ | ||||
| zio_t * | ||||
| zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, | ||||
| 	void *data, uint64_t size, int type, zio_priority_t priority, | ||||
| 	abd_t *data, uint64_t size, int type, zio_priority_t priority, | ||||
| 	enum zio_flag flags, zio_done_func_t *done, void *private) | ||||
| { | ||||
| 	enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE; | ||||
| @ -1090,7 +1107,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, | ||||
| } | ||||
| 
 | ||||
| zio_t * | ||||
| zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size, | ||||
| zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size, | ||||
| 	int type, zio_priority_t priority, enum zio_flag flags, | ||||
| 	zio_done_func_t *done, void *private) | ||||
| { | ||||
| @ -1151,14 +1168,17 @@ zio_read_bp_init(zio_t *zio) | ||||
| 	    !(zio->io_flags & ZIO_FLAG_RAW)) { | ||||
| 		uint64_t psize = | ||||
| 		    BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp); | ||||
| 		void *cbuf = zio_buf_alloc(psize); | ||||
| 
 | ||||
| 		zio_push_transform(zio, cbuf, psize, psize, zio_decompress); | ||||
| 		zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize), | ||||
| 		    psize, psize, zio_decompress); | ||||
| 	} | ||||
| 
 | ||||
| 	if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) { | ||||
| 		int psize = BPE_GET_PSIZE(bp); | ||||
| 		void *data = abd_borrow_buf(zio->io_abd, psize); | ||||
| 
 | ||||
| 		zio->io_pipeline = ZIO_INTERLOCK_PIPELINE; | ||||
| 		decode_embedded_bp_compressed(bp, zio->io_data); | ||||
| 		decode_embedded_bp_compressed(bp, data); | ||||
| 		abd_return_buf_copy(zio->io_abd, data, psize); | ||||
| 	} else { | ||||
| 		ASSERT(!BP_IS_EMBEDDED(bp)); | ||||
| 	} | ||||
| @ -1299,7 +1319,7 @@ zio_write_compress(zio_t *zio) | ||||
| 	/* If it's a compressed write that is not raw, compress the buffer. */ | ||||
| 	if (compress != ZIO_COMPRESS_OFF && psize == lsize) { | ||||
| 		void *cbuf = zio_buf_alloc(lsize); | ||||
| 		psize = zio_compress_data(compress, zio->io_data, cbuf, lsize); | ||||
| 		psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize); | ||||
| 		if (psize == 0 || psize == lsize) { | ||||
| 			compress = ZIO_COMPRESS_OFF; | ||||
| 			zio_buf_free(cbuf, lsize); | ||||
| @ -1337,9 +1357,11 @@ zio_write_compress(zio_t *zio) | ||||
| 				zio_buf_free(cbuf, lsize); | ||||
| 				psize = lsize; | ||||
| 			} else { | ||||
| 				bzero((char *)cbuf + psize, rounded - psize); | ||||
| 				abd_t *cdata = abd_get_from_buf(cbuf, lsize); | ||||
| 				abd_take_ownership_of_buf(cdata, B_TRUE); | ||||
| 				abd_zero_off(cdata, psize, rounded - psize); | ||||
| 				psize = rounded; | ||||
| 				zio_push_transform(zio, cbuf, | ||||
| 				zio_push_transform(zio, cdata, | ||||
| 				    psize, lsize, NULL); | ||||
| 			} | ||||
| 		} | ||||
| @ -1942,26 +1964,38 @@ zio_resume_wait(spa_t *spa) | ||||
|  * ========================================================================== | ||||
|  */ | ||||
| 
 | ||||
| static void | ||||
| zio_gang_issue_func_done(zio_t *zio) | ||||
| { | ||||
| 	abd_put(zio->io_abd); | ||||
| } | ||||
| 
 | ||||
| static zio_t * | ||||
| zio_read_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) | ||||
| zio_read_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, | ||||
|     uint64_t offset) | ||||
| { | ||||
| 	if (gn != NULL) | ||||
| 		return (pio); | ||||
| 
 | ||||
| 	return (zio_read(pio, pio->io_spa, bp, data, BP_GET_PSIZE(bp), | ||||
| 	    NULL, NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), | ||||
| 	return (zio_read(pio, pio->io_spa, bp, abd_get_offset(data, offset), | ||||
| 	    BP_GET_PSIZE(bp), zio_gang_issue_func_done, | ||||
| 	    NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), | ||||
| 	    &pio->io_bookmark)); | ||||
| } | ||||
| 
 | ||||
| zio_t * | ||||
| zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) | ||||
| static zio_t * | ||||
| zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, | ||||
|     uint64_t offset) | ||||
| { | ||||
| 	zio_t *zio; | ||||
| 
 | ||||
| 	if (gn != NULL) { | ||||
| 		abd_t *gbh_abd = | ||||
| 		    abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE); | ||||
| 		zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, | ||||
| 		    gn->gn_gbh, SPA_GANGBLOCKSIZE, NULL, NULL, pio->io_priority, | ||||
| 		    ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); | ||||
| 		    gbh_abd, SPA_GANGBLOCKSIZE, zio_gang_issue_func_done, NULL, | ||||
| 		    pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), | ||||
| 		    &pio->io_bookmark); | ||||
| 		/*
 | ||||
| 		 * As we rewrite each gang header, the pipeline will compute | ||||
| 		 * a new gang block header checksum for it; but no one will | ||||
| @ -1972,8 +2006,12 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) | ||||
| 		 * this is just good hygiene.) | ||||
| 		 */ | ||||
| 		if (gn != pio->io_gang_leader->io_gang_tree) { | ||||
| 			abd_t *buf = abd_get_offset(data, offset); | ||||
| 
 | ||||
| 			zio_checksum_compute(zio, BP_GET_CHECKSUM(bp), | ||||
| 			    data, BP_GET_PSIZE(bp)); | ||||
| 			    buf, BP_GET_PSIZE(bp)); | ||||
| 
 | ||||
| 			abd_put(buf); | ||||
| 		} | ||||
| 		/*
 | ||||
| 		 * If we are here to damage data for testing purposes, | ||||
| @ -1983,7 +2021,8 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) | ||||
| 			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; | ||||
| 	} else { | ||||
| 		zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp, | ||||
| 		    data, BP_GET_PSIZE(bp), NULL, NULL, pio->io_priority, | ||||
| 		    abd_get_offset(data, offset), BP_GET_PSIZE(bp), | ||||
| 		    zio_gang_issue_func_done, NULL, pio->io_priority, | ||||
| 		    ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); | ||||
| 	} | ||||
| 
 | ||||
| @ -1991,16 +2030,18 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
| zio_t * | ||||
| zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) | ||||
| static zio_t * | ||||
| zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, | ||||
|     uint64_t offset) | ||||
| { | ||||
| 	return (zio_free_sync(pio, pio->io_spa, pio->io_txg, bp, | ||||
| 	    ZIO_GANG_CHILD_FLAGS(pio))); | ||||
| } | ||||
| 
 | ||||
| /* ARGSUSED */ | ||||
| zio_t * | ||||
| zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data) | ||||
| static zio_t * | ||||
| zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data, | ||||
|     uint64_t offset) | ||||
| { | ||||
| 	return (zio_claim(pio, pio->io_spa, pio->io_txg, bp, | ||||
| 	    NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio))); | ||||
| @ -2064,13 +2105,14 @@ static void | ||||
| zio_gang_tree_assemble(zio_t *gio, blkptr_t *bp, zio_gang_node_t **gnpp) | ||||
| { | ||||
| 	zio_gang_node_t *gn = zio_gang_node_alloc(gnpp); | ||||
| 	abd_t *gbh_abd = abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE); | ||||
| 
 | ||||
| 	ASSERT(gio->io_gang_leader == gio); | ||||
| 	ASSERT(BP_IS_GANG(bp)); | ||||
| 
 | ||||
| 	zio_nowait(zio_read(gio, gio->io_spa, bp, gn->gn_gbh, | ||||
| 	    SPA_GANGBLOCKSIZE, zio_gang_tree_assemble_done, gn, | ||||
| 	    gio->io_priority, ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark)); | ||||
| 	zio_nowait(zio_read(gio, gio->io_spa, bp, gbh_abd, SPA_GANGBLOCKSIZE, | ||||
| 	    zio_gang_tree_assemble_done, gn, gio->io_priority, | ||||
| 	    ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark)); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| @ -2087,13 +2129,16 @@ zio_gang_tree_assemble_done(zio_t *zio) | ||||
| 	if (zio->io_error) | ||||
| 		return; | ||||
| 
 | ||||
| 	/* this ABD was created from a linear buf in zio_gang_tree_assemble */ | ||||
| 	if (BP_SHOULD_BYTESWAP(bp)) | ||||
| 		byteswap_uint64_array(zio->io_data, zio->io_size); | ||||
| 		byteswap_uint64_array(abd_to_buf(zio->io_abd), zio->io_size); | ||||
| 
 | ||||
| 	ASSERT(zio->io_data == gn->gn_gbh); | ||||
| 	ASSERT3P(abd_to_buf(zio->io_abd), ==, gn->gn_gbh); | ||||
| 	ASSERT(zio->io_size == SPA_GANGBLOCKSIZE); | ||||
| 	ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC); | ||||
| 
 | ||||
| 	abd_put(zio->io_abd); | ||||
| 
 | ||||
| 	for (g = 0; g < SPA_GBH_NBLKPTRS; g++) { | ||||
| 		blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; | ||||
| 		if (!BP_IS_GANG(gbp)) | ||||
| @ -2103,7 +2148,8 @@ zio_gang_tree_assemble_done(zio_t *zio) | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data) | ||||
| zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, abd_t *data, | ||||
|     uint64_t offset) | ||||
| { | ||||
| 	zio_t *gio = pio->io_gang_leader; | ||||
| 	zio_t *zio; | ||||
| @ -2117,7 +2163,7 @@ zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data) | ||||
| 	 * If you're a gang header, your data is in gn->gn_gbh. | ||||
| 	 * If you're a gang member, your data is in 'data' and gn == NULL. | ||||
| 	 */ | ||||
| 	zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data); | ||||
| 	zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data, offset); | ||||
| 
 | ||||
| 	if (gn != NULL) { | ||||
| 		ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC); | ||||
| @ -2126,13 +2172,14 @@ zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data) | ||||
| 			blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g]; | ||||
| 			if (BP_IS_HOLE(gbp)) | ||||
| 				continue; | ||||
| 			zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data); | ||||
| 			data = (char *)data + BP_GET_PSIZE(gbp); | ||||
| 			zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data, | ||||
| 			    offset); | ||||
| 			offset += BP_GET_PSIZE(gbp); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (gn == gio->io_gang_tree) | ||||
| 		ASSERT3P((char *)gio->io_data + gio->io_size, ==, data); | ||||
| 		ASSERT3U(gio->io_size, ==, offset); | ||||
| 
 | ||||
| 	if (zio != pio) | ||||
| 		zio_nowait(zio); | ||||
| @ -2165,7 +2212,8 @@ zio_gang_issue(zio_t *zio) | ||||
| 	ASSERT(zio->io_child_type > ZIO_CHILD_GANG); | ||||
| 
 | ||||
| 	if (zio->io_child_error[ZIO_CHILD_GANG] == 0) | ||||
| 		zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_data); | ||||
| 		zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_abd, | ||||
| 		    0); | ||||
| 	else | ||||
| 		zio_gang_tree_free(&zio->io_gang_tree); | ||||
| 
 | ||||
| @ -2205,6 +2253,12 @@ zio_write_gang_member_ready(zio_t *zio) | ||||
| 	mutex_exit(&pio->io_lock); | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| zio_write_gang_done(zio_t *zio) | ||||
| { | ||||
| 	abd_put(zio->io_abd); | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| zio_write_gang_block(zio_t *pio) | ||||
| { | ||||
| @ -2215,6 +2269,7 @@ zio_write_gang_block(zio_t *pio) | ||||
| 	zio_t *zio; | ||||
| 	zio_gang_node_t *gn, **gnpp; | ||||
| 	zio_gbh_phys_t *gbh; | ||||
| 	abd_t *gbh_abd; | ||||
| 	uint64_t txg = pio->io_txg; | ||||
| 	uint64_t resid = pio->io_size; | ||||
| 	uint64_t lsize; | ||||
| @ -2275,12 +2330,14 @@ zio_write_gang_block(zio_t *pio) | ||||
| 	gn = zio_gang_node_alloc(gnpp); | ||||
| 	gbh = gn->gn_gbh; | ||||
| 	bzero(gbh, SPA_GANGBLOCKSIZE); | ||||
| 	gbh_abd = abd_get_from_buf(gbh, SPA_GANGBLOCKSIZE); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Create the gang header. | ||||
| 	 */ | ||||
| 	zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL, | ||||
| 	    pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); | ||||
| 	zio = zio_rewrite(pio, spa, txg, bp, gbh_abd, SPA_GANGBLOCKSIZE, | ||||
| 	    zio_write_gang_done, NULL, pio->io_priority, | ||||
| 	    ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Create and nowait the gang children. | ||||
| @ -2302,9 +2359,9 @@ zio_write_gang_block(zio_t *pio) | ||||
| 		zp.zp_nopwrite = B_FALSE; | ||||
| 
 | ||||
| 		cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g], | ||||
| 		    (char *)pio->io_data + (pio->io_size - resid), lsize, | ||||
| 		    lsize, &zp, zio_write_gang_member_ready, NULL, NULL, NULL, | ||||
| 		    &gn->gn_child[g], pio->io_priority, | ||||
| 		    abd_get_offset(pio->io_abd, pio->io_size - resid), lsize, | ||||
| 		    lsize, &zp, zio_write_gang_member_ready, NULL, NULL, | ||||
| 		    zio_write_gang_done, &gn->gn_child[g], pio->io_priority, | ||||
| 		    ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark); | ||||
| 
 | ||||
| 		if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { | ||||
| @ -2320,7 +2377,6 @@ zio_write_gang_block(zio_t *pio) | ||||
| 			    zp.zp_copies, cio, flags)); | ||||
| 		} | ||||
| 		zio_nowait(cio); | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| @ -2423,10 +2479,11 @@ zio_ddt_child_read_done(zio_t *zio) | ||||
| 	ddp = ddt_phys_select(dde, bp); | ||||
| 	if (zio->io_error == 0) | ||||
| 		ddt_phys_clear(ddp);	/* this ddp doesn't need repair */ | ||||
| 	if (zio->io_error == 0 && dde->dde_repair_data == NULL) | ||||
| 		dde->dde_repair_data = zio->io_data; | ||||
| 
 | ||||
| 	if (zio->io_error == 0 && dde->dde_repair_abd == NULL) | ||||
| 		dde->dde_repair_abd = zio->io_abd; | ||||
| 	else | ||||
| 		zio_buf_free(zio->io_data, zio->io_size); | ||||
| 		abd_free(zio->io_abd); | ||||
| 	mutex_exit(&pio->io_lock); | ||||
| } | ||||
| 
 | ||||
| @ -2459,16 +2516,16 @@ zio_ddt_read_start(zio_t *zio) | ||||
| 			ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, ddp, | ||||
| 			    &blk); | ||||
| 			zio_nowait(zio_read(zio, zio->io_spa, &blk, | ||||
| 			    zio_buf_alloc(zio->io_size), zio->io_size, | ||||
| 			    zio_ddt_child_read_done, dde, zio->io_priority, | ||||
| 			    ZIO_DDT_CHILD_FLAGS(zio) | ZIO_FLAG_DONT_PROPAGATE, | ||||
| 			    &zio->io_bookmark)); | ||||
| 			    abd_alloc_for_io(zio->io_size, B_TRUE), | ||||
| 			    zio->io_size, zio_ddt_child_read_done, dde, | ||||
| 			    zio->io_priority, ZIO_DDT_CHILD_FLAGS(zio) | | ||||
| 			    ZIO_FLAG_DONT_PROPAGATE, &zio->io_bookmark)); | ||||
| 		} | ||||
| 		return (ZIO_PIPELINE_CONTINUE); | ||||
| 	} | ||||
| 
 | ||||
| 	zio_nowait(zio_read(zio, zio->io_spa, bp, | ||||
| 	    zio->io_data, zio->io_size, NULL, NULL, zio->io_priority, | ||||
| 	    zio->io_abd, zio->io_size, NULL, NULL, zio->io_priority, | ||||
| 	    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark)); | ||||
| 
 | ||||
| 	return (ZIO_PIPELINE_CONTINUE); | ||||
| @ -2498,8 +2555,9 @@ zio_ddt_read_done(zio_t *zio) | ||||
| 			zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE); | ||||
| 			return (ZIO_PIPELINE_STOP); | ||||
| 		} | ||||
| 		if (dde->dde_repair_data != NULL) { | ||||
| 			bcopy(dde->dde_repair_data, zio->io_data, zio->io_size); | ||||
| 		if (dde->dde_repair_abd != NULL) { | ||||
| 			abd_copy(zio->io_abd, dde->dde_repair_abd, | ||||
| 			    zio->io_size); | ||||
| 			zio->io_child_error[ZIO_CHILD_DDT] = 0; | ||||
| 		} | ||||
| 		ddt_repair_done(ddt, dde); | ||||
| @ -2537,12 +2595,10 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) | ||||
| 
 | ||||
| 		if (lio != NULL && do_raw) { | ||||
| 			return (lio->io_size != zio->io_size || | ||||
| 			    bcmp(zio->io_data, lio->io_data, | ||||
| 			    zio->io_size) != 0); | ||||
| 			    abd_cmp(zio->io_abd, lio->io_abd) != 0); | ||||
| 		} else if (lio != NULL) { | ||||
| 			return (lio->io_orig_size != zio->io_orig_size || | ||||
| 			    bcmp(zio->io_orig_data, lio->io_orig_data, | ||||
| 			    zio->io_orig_size) != 0); | ||||
| 			    abd_cmp(zio->io_orig_abd, lio->io_orig_abd) != 0); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| @ -2552,7 +2608,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) | ||||
| 		if (ddp->ddp_phys_birth != 0 && do_raw) { | ||||
| 			blkptr_t blk = *zio->io_bp; | ||||
| 			uint64_t psize; | ||||
| 			void *tmpbuf; | ||||
| 			abd_t *tmpabd; | ||||
| 			int error; | ||||
| 
 | ||||
| 			ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth); | ||||
| @ -2563,19 +2619,19 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) | ||||
| 
 | ||||
| 			ddt_exit(ddt); | ||||
| 
 | ||||
| 			tmpbuf = zio_buf_alloc(psize); | ||||
| 			tmpabd = abd_alloc_for_io(psize, B_TRUE); | ||||
| 
 | ||||
| 			error = zio_wait(zio_read(NULL, spa, &blk, tmpbuf, | ||||
| 			error = zio_wait(zio_read(NULL, spa, &blk, tmpabd, | ||||
| 			    psize, NULL, NULL, ZIO_PRIORITY_SYNC_READ, | ||||
| 			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | | ||||
| 			    ZIO_FLAG_RAW, &zio->io_bookmark)); | ||||
| 
 | ||||
| 			if (error == 0) { | ||||
| 				if (bcmp(tmpbuf, zio->io_data, psize) != 0) | ||||
| 				if (abd_cmp(tmpabd, zio->io_abd) != 0) | ||||
| 					error = SET_ERROR(ENOENT); | ||||
| 			} | ||||
| 
 | ||||
| 			zio_buf_free(tmpbuf, psize); | ||||
| 			abd_free(tmpabd); | ||||
| 			ddt_enter(ddt); | ||||
| 			return (error != 0); | ||||
| 		} else if (ddp->ddp_phys_birth != 0) { | ||||
| @ -2597,7 +2653,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde) | ||||
| 			    &aflags, &zio->io_bookmark); | ||||
| 
 | ||||
| 			if (error == 0) { | ||||
| 				if (bcmp(abuf->b_data, zio->io_orig_data, | ||||
| 				if (abd_cmp_buf(zio->io_orig_abd, abuf->b_data, | ||||
| 				    zio->io_orig_size) != 0) | ||||
| 					error = SET_ERROR(ENOENT); | ||||
| 				arc_buf_destroy(abuf, &abuf); | ||||
| @ -2762,12 +2818,12 @@ zio_ddt_write(zio_t *zio) | ||||
| 			return (ZIO_PIPELINE_CONTINUE); | ||||
| 		} | ||||
| 
 | ||||
| 		dio = zio_write(zio, spa, txg, bp, zio->io_orig_data, | ||||
| 		dio = zio_write(zio, spa, txg, bp, zio->io_orig_abd, | ||||
| 		    zio->io_orig_size, zio->io_orig_size, &czp, NULL, NULL, | ||||
| 		    NULL, zio_ddt_ditto_write_done, dde, zio->io_priority, | ||||
| 		    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); | ||||
| 
 | ||||
| 		zio_push_transform(dio, zio->io_data, zio->io_size, 0, NULL); | ||||
| 		zio_push_transform(dio, zio->io_abd, zio->io_size, 0, NULL); | ||||
| 		dde->dde_lead_zio[DDT_PHYS_DITTO] = dio; | ||||
| 	} | ||||
| 
 | ||||
| @ -2784,13 +2840,13 @@ zio_ddt_write(zio_t *zio) | ||||
| 		ddt_phys_fill(ddp, bp); | ||||
| 		ddt_phys_addref(ddp); | ||||
| 	} else { | ||||
| 		cio = zio_write(zio, spa, txg, bp, zio->io_orig_data, | ||||
| 		cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd, | ||||
| 		    zio->io_orig_size, zio->io_orig_size, zp, | ||||
| 		    zio_ddt_child_write_ready, NULL, NULL, | ||||
| 		    zio_ddt_child_write_done, dde, zio->io_priority, | ||||
| 		    ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark); | ||||
| 
 | ||||
| 		zio_push_transform(cio, zio->io_data, zio->io_size, 0, NULL); | ||||
| 		zio_push_transform(cio, zio->io_abd, zio->io_size, 0, NULL); | ||||
| 		dde->dde_lead_zio[p] = cio; | ||||
| 	} | ||||
| 
 | ||||
| @ -3130,11 +3186,11 @@ zio_vdev_io_start(zio_t *zio) | ||||
| 	    P2PHASE(zio->io_size, align) != 0) { | ||||
| 		/* Transform logical writes to be a full physical block size. */ | ||||
| 		uint64_t asize = P2ROUNDUP(zio->io_size, align); | ||||
| 		char *abuf = zio_buf_alloc(asize); | ||||
| 		abd_t *abuf = abd_alloc_sametype(zio->io_abd, asize); | ||||
| 		ASSERT(vd == vd->vdev_top); | ||||
| 		if (zio->io_type == ZIO_TYPE_WRITE) { | ||||
| 			bcopy(zio->io_data, abuf, zio->io_size); | ||||
| 			bzero(abuf + zio->io_size, asize - zio->io_size); | ||||
| 			abd_copy(abuf, zio->io_abd, zio->io_size); | ||||
| 			abd_zero_off(abuf, zio->io_size, asize - zio->io_size); | ||||
| 		} | ||||
| 		zio_push_transform(zio, abuf, asize, asize, zio_subblock); | ||||
| 	} | ||||
| @ -3264,7 +3320,7 @@ zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored) | ||||
| { | ||||
| 	void *buf = zio_buf_alloc(zio->io_size); | ||||
| 
 | ||||
| 	bcopy(zio->io_data, buf, zio->io_size); | ||||
| 	abd_copy_to_buf(buf, zio->io_abd, zio->io_size); | ||||
| 
 | ||||
| 	zcr->zcr_cbinfo = zio->io_size; | ||||
| 	zcr->zcr_cbdata = buf; | ||||
| @ -3398,7 +3454,7 @@ zio_checksum_generate(zio_t *zio) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	zio_checksum_compute(zio, checksum, zio->io_data, zio->io_size); | ||||
| 	zio_checksum_compute(zio, checksum, zio->io_abd, zio->io_size); | ||||
| 
 | ||||
| 	return (ZIO_PIPELINE_CONTINUE); | ||||
| } | ||||
| @ -3537,7 +3593,7 @@ zio_ready(zio_t *zio) | ||||
| 		if (BP_IS_GANG(bp)) { | ||||
| 			zio->io_flags &= ~ZIO_FLAG_NODATA; | ||||
| 		} else { | ||||
| 			ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE); | ||||
| 			ASSERT((uintptr_t)zio->io_abd < SPA_MAXBLOCKSIZE); | ||||
| 			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; | ||||
| 		} | ||||
| 	} | ||||
| @ -3616,6 +3672,7 @@ zio_done(zio_t *zio) | ||||
| 	 * Always attempt to keep stack usage minimal here since | ||||
| 	 * we can be called recurisvely up to 19 levels deep. | ||||
| 	 */ | ||||
| 	uint64_t psize = zio->io_size; | ||||
| 	zio_t *pio, *pio_next; | ||||
| 	int c, w; | ||||
| 	zio_link_t *zl = NULL; | ||||
| @ -3696,28 +3753,35 @@ zio_done(zio_t *zio) | ||||
| 		while (zio->io_cksum_report != NULL) { | ||||
| 			zio_cksum_report_t *zcr = zio->io_cksum_report; | ||||
| 			uint64_t align = zcr->zcr_align; | ||||
| 			uint64_t asize = P2ROUNDUP(zio->io_size, align); | ||||
| 			char *abuf = zio->io_data; | ||||
| 			uint64_t asize = P2ROUNDUP(psize, align); | ||||
| 			char *abuf = NULL; | ||||
| 			abd_t *adata = zio->io_abd; | ||||
| 
 | ||||
| 			if (asize != zio->io_size) { | ||||
| 				abuf = zio_buf_alloc(asize); | ||||
| 				bcopy(zio->io_data, abuf, zio->io_size); | ||||
| 				bzero(abuf+zio->io_size, asize-zio->io_size); | ||||
| 			if (asize != psize) { | ||||
| 				adata = abd_alloc_linear(asize, B_TRUE); | ||||
| 				abd_copy(adata, zio->io_abd, psize); | ||||
| 				abd_zero_off(adata, psize, asize - psize); | ||||
| 			} | ||||
| 
 | ||||
| 			if (adata != NULL) | ||||
| 				abuf = abd_borrow_buf_copy(adata, asize); | ||||
| 
 | ||||
| 			zio->io_cksum_report = zcr->zcr_next; | ||||
| 			zcr->zcr_next = NULL; | ||||
| 			zcr->zcr_finish(zcr, abuf); | ||||
| 			zfs_ereport_free_checksum(zcr); | ||||
| 
 | ||||
| 			if (asize != zio->io_size) | ||||
| 				zio_buf_free(abuf, asize); | ||||
| 			if (adata != NULL) | ||||
| 				abd_return_buf(adata, abuf, asize); | ||||
| 
 | ||||
| 			if (asize != psize) | ||||
| 				abd_free(adata); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	zio_pop_transforms(zio);	/* note: may set zio->io_error */ | ||||
| 
 | ||||
| 	vdev_stat_update(zio, zio->io_size); | ||||
| 	vdev_stat_update(zio, psize); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If this I/O is attached to a particular vdev is slow, exceeding | ||||
| @ -4098,7 +4162,6 @@ zbookmark_subtree_completed(const dnode_phys_t *dnp, | ||||
| EXPORT_SYMBOL(zio_type_name); | ||||
| EXPORT_SYMBOL(zio_buf_alloc); | ||||
| EXPORT_SYMBOL(zio_data_buf_alloc); | ||||
| EXPORT_SYMBOL(zio_buf_alloc_flags); | ||||
| EXPORT_SYMBOL(zio_buf_free); | ||||
| EXPORT_SYMBOL(zio_data_buf_free); | ||||
| 
 | ||||
|  | ||||
| @ -20,8 +20,8 @@ | ||||
|  */ | ||||
| /*
 | ||||
|  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | ||||
|  * Copyright (c) 2013 by Delphix. All rights reserved. | ||||
|  * Copyright 2013 Saso Kiselkov. All rights reserved. | ||||
|  * Copyright (c) 2013, 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include <sys/zfs_context.h> | ||||
| @ -30,6 +30,7 @@ | ||||
| #include <sys/zio.h> | ||||
| #include <sys/zio_checksum.h> | ||||
| #include <sys/zil.h> | ||||
| #include <sys/abd.h> | ||||
| #include <zfs_fletcher.h> | ||||
| 
 | ||||
| /*
 | ||||
| @ -92,45 +93,85 @@ | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| static void | ||||
| zio_checksum_off(const void *buf, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| abd_checksum_off(abd_t *abd, uint64_t size, | ||||
| 	const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); | ||||
| } | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| abd_fletcher_2_native(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	fletcher_init(zcp); | ||||
| 	(void) abd_iterate_func(abd, 0, size, | ||||
| 	    fletcher_2_incremental_native, zcp); | ||||
| } | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| abd_fletcher_2_byteswap(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	fletcher_init(zcp); | ||||
| 	(void) abd_iterate_func(abd, 0, size, | ||||
| 	    fletcher_2_incremental_byteswap, zcp); | ||||
| } | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| abd_fletcher_4_native(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	fletcher_init(zcp); | ||||
| 	(void) abd_iterate_func(abd, 0, size, | ||||
| 	    fletcher_4_incremental_native, zcp); | ||||
| } | ||||
| 
 | ||||
| /*ARGSUSED*/ | ||||
| void | ||||
| abd_fletcher_4_byteswap(abd_t *abd, uint64_t size, | ||||
|     const void *ctx_template, zio_cksum_t *zcp) | ||||
| { | ||||
| 	fletcher_init(zcp); | ||||
| 	(void) abd_iterate_func(abd, 0, size, | ||||
| 	    fletcher_4_incremental_byteswap, zcp); | ||||
| } | ||||
| 
 | ||||
| zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { | ||||
| 	{{NULL, NULL}, NULL, NULL, 0, "inherit"}, | ||||
| 	{{NULL, NULL}, NULL, NULL, 0, "on"}, | ||||
| 	{{zio_checksum_off,		zio_checksum_off}, | ||||
| 	{{abd_checksum_off,		abd_checksum_off}, | ||||
| 	    NULL, NULL, 0, "off"}, | ||||
| 	{{zio_checksum_SHA256,		zio_checksum_SHA256}, | ||||
| 	{{abd_checksum_SHA256,		abd_checksum_SHA256}, | ||||
| 	    NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, | ||||
| 	    "label"}, | ||||
| 	{{zio_checksum_SHA256,		zio_checksum_SHA256}, | ||||
| 	{{abd_checksum_SHA256,		abd_checksum_SHA256}, | ||||
| 	    NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED, | ||||
| 	    "gang_header"}, | ||||
| 	{{fletcher_2_native,		fletcher_2_byteswap}, | ||||
| 	{{abd_fletcher_2_native,	abd_fletcher_2_byteswap}, | ||||
| 	    NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog"}, | ||||
| 	{{fletcher_2_native,		fletcher_2_byteswap}, | ||||
| 	{{abd_fletcher_2_native,	abd_fletcher_2_byteswap}, | ||||
| 	    NULL, NULL, 0, "fletcher2"}, | ||||
| 	{{fletcher_4_native,		fletcher_4_byteswap}, | ||||
| 	{{abd_fletcher_4_native,	abd_fletcher_4_byteswap}, | ||||
| 	    NULL, NULL, ZCHECKSUM_FLAG_METADATA, "fletcher4"}, | ||||
| 	{{zio_checksum_SHA256,		zio_checksum_SHA256}, | ||||
| 	{{abd_checksum_SHA256,		abd_checksum_SHA256}, | ||||
| 	    NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | | ||||
| 	    ZCHECKSUM_FLAG_NOPWRITE, "sha256"}, | ||||
| 	{{fletcher_4_native,		fletcher_4_byteswap}, | ||||
| 	{{abd_fletcher_4_native,	abd_fletcher_4_byteswap}, | ||||
| 	    NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog2"}, | ||||
| 	{{zio_checksum_off,		zio_checksum_off}, | ||||
| 	{{abd_checksum_off,		abd_checksum_off}, | ||||
| 	    NULL, NULL, 0, "noparity"}, | ||||
| 	{{zio_checksum_SHA512_native,	zio_checksum_SHA512_byteswap}, | ||||
| 	{{abd_checksum_SHA512_native,	abd_checksum_SHA512_byteswap}, | ||||
| 	    NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | | ||||
| 	    ZCHECKSUM_FLAG_NOPWRITE, "sha512"}, | ||||
| 	{{zio_checksum_skein_native,	zio_checksum_skein_byteswap}, | ||||
| 	    zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free, | ||||
| 	{{abd_checksum_skein_native,	abd_checksum_skein_byteswap}, | ||||
| 	    abd_checksum_skein_tmpl_init, abd_checksum_skein_tmpl_free, | ||||
| 	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP | | ||||
| 	    ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"}, | ||||
| 	{{zio_checksum_edonr_native,	zio_checksum_edonr_byteswap}, | ||||
| 	    zio_checksum_edonr_tmpl_init, zio_checksum_edonr_tmpl_free, | ||||
| 	{{abd_checksum_edonr_native,	abd_checksum_edonr_byteswap}, | ||||
| 	    abd_checksum_edonr_tmpl_init, abd_checksum_edonr_tmpl_free, | ||||
| 	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED | | ||||
| 	    ZCHECKSUM_FLAG_NOPWRITE, "edonr"}, | ||||
| }; | ||||
| @ -251,7 +292,7 @@ zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa) | ||||
|  */ | ||||
| void | ||||
| zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, | ||||
| 	void *data, uint64_t size) | ||||
|     abd_t *abd, uint64_t size) | ||||
| { | ||||
| 	blkptr_t *bp = zio->io_bp; | ||||
| 	uint64_t offset = zio->io_offset; | ||||
| @ -266,6 +307,7 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, | ||||
| 
 | ||||
| 	if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { | ||||
| 		zio_eck_t *eck; | ||||
| 		void *data = abd_to_buf(abd); | ||||
| 
 | ||||
| 		if (checksum == ZIO_CHECKSUM_ZILOG2) { | ||||
| 			zil_chain_t *zilc = data; | ||||
| @ -283,18 +325,18 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, | ||||
| 		else | ||||
| 			bp->blk_cksum = eck->zec_cksum; | ||||
| 		eck->zec_magic = ZEC_MAGIC; | ||||
| 		ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum], | ||||
| 		ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], | ||||
| 		    &cksum); | ||||
| 		eck->zec_cksum = cksum; | ||||
| 	} else { | ||||
| 		ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum], | ||||
| 		ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum], | ||||
| 		    &bp->blk_cksum); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| int | ||||
| zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, | ||||
|     void *data, uint64_t size, uint64_t offset, zio_bad_cksum_t *info) | ||||
|     abd_t *abd, uint64_t size, uint64_t offset, zio_bad_cksum_t *info) | ||||
| { | ||||
| 	zio_checksum_info_t *ci = &zio_checksum_table[checksum]; | ||||
| 	int byteswap; | ||||
| @ -308,25 +350,32 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, | ||||
| 	if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) { | ||||
| 		zio_eck_t *eck; | ||||
| 		zio_cksum_t verifier; | ||||
| 		size_t eck_offset; | ||||
| 		uint64_t data_size = size; | ||||
| 		void *data = abd_borrow_buf_copy(abd, data_size); | ||||
| 
 | ||||
| 		if (checksum == ZIO_CHECKSUM_ZILOG2) { | ||||
| 			zil_chain_t *zilc = data; | ||||
| 			uint64_t nused; | ||||
| 
 | ||||
| 			eck = &zilc->zc_eck; | ||||
| 			if (eck->zec_magic == ZEC_MAGIC) | ||||
| 			if (eck->zec_magic == ZEC_MAGIC) { | ||||
| 				nused = zilc->zc_nused; | ||||
| 			else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) | ||||
| 			} else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) { | ||||
| 				nused = BSWAP_64(zilc->zc_nused); | ||||
| 			else | ||||
| 			} else { | ||||
| 				abd_return_buf(abd, data, data_size); | ||||
| 				return (SET_ERROR(ECKSUM)); | ||||
| 			} | ||||
| 
 | ||||
| 			if (nused > size) | ||||
| 			if (nused > data_size) { | ||||
| 				abd_return_buf(abd, data, data_size); | ||||
| 				return (SET_ERROR(ECKSUM)); | ||||
| 			} | ||||
| 
 | ||||
| 			size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); | ||||
| 		} else { | ||||
| 			eck = (zio_eck_t *)((char *)data + size) - 1; | ||||
| 			eck = (zio_eck_t *)((char *)data + data_size) - 1; | ||||
| 		} | ||||
| 
 | ||||
| 		if (checksum == ZIO_CHECKSUM_GANG_HEADER) | ||||
| @ -341,11 +390,15 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, | ||||
| 		if (byteswap) | ||||
| 			byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); | ||||
| 
 | ||||
| 		eck_offset = (size_t)(&eck->zec_cksum) - (size_t)data; | ||||
| 		expected_cksum = eck->zec_cksum; | ||||
| 		eck->zec_cksum = verifier; | ||||
| 		ci->ci_func[byteswap](data, size, | ||||
| 		abd_return_buf_copy(abd, data, data_size); | ||||
| 
 | ||||
| 		ci->ci_func[byteswap](abd, size, | ||||
| 		    spa->spa_cksum_tmpls[checksum], &actual_cksum); | ||||
| 		eck->zec_cksum = expected_cksum; | ||||
| 		abd_copy_from_buf_off(abd, &expected_cksum, | ||||
| 		    eck_offset, sizeof (zio_cksum_t)); | ||||
| 
 | ||||
| 		if (byteswap) { | ||||
| 			byteswap_uint64_array(&expected_cksum, | ||||
| @ -354,7 +407,7 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum, | ||||
| 	} else { | ||||
| 		byteswap = BP_SHOULD_BYTESWAP(bp); | ||||
| 		expected_cksum = bp->blk_cksum; | ||||
| 		ci->ci_func[byteswap](data, size, | ||||
| 		ci->ci_func[byteswap](abd, size, | ||||
| 		    spa->spa_cksum_tmpls[checksum], &actual_cksum); | ||||
| 	} | ||||
| 
 | ||||
| @ -383,7 +436,7 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) | ||||
| 	uint64_t size = (bp == NULL ? zio->io_size : | ||||
| 	    (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); | ||||
| 	uint64_t offset = zio->io_offset; | ||||
| 	void *data = zio->io_data; | ||||
| 	abd_t *data = zio->io_abd; | ||||
| 	spa_t *spa = zio->io_spa; | ||||
| 
 | ||||
| 	error = zio_checksum_error_impl(spa, bp, checksum, data, size, | ||||
|  | ||||
| @ -28,7 +28,7 @@ | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Copyright (c) 2013 by Delphix. All rights reserved. | ||||
|  * Copyright (c) 2013, 2016 by Delphix. All rights reserved. | ||||
|  */ | ||||
| 
 | ||||
| #include <sys/zfs_context.h> | ||||
| @ -41,24 +41,23 @@ | ||||
| /*
 | ||||
|  * Compression vectors. | ||||
|  */ | ||||
| 
 | ||||
| zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = { | ||||
| 	{NULL,			NULL,			0,	"inherit"}, | ||||
| 	{NULL,			NULL,			0,	"on"}, | ||||
| 	{NULL,			NULL,			0,	"uncompressed"}, | ||||
| 	{lzjb_compress,		lzjb_decompress,	0,	"lzjb"}, | ||||
| 	{NULL,			NULL,			0,	"empty"}, | ||||
| 	{gzip_compress,		gzip_decompress,	1,	"gzip-1"}, | ||||
| 	{gzip_compress,		gzip_decompress,	2,	"gzip-2"}, | ||||
| 	{gzip_compress,		gzip_decompress,	3,	"gzip-3"}, | ||||
| 	{gzip_compress,		gzip_decompress,	4,	"gzip-4"}, | ||||
| 	{gzip_compress,		gzip_decompress,	5,	"gzip-5"}, | ||||
| 	{gzip_compress,		gzip_decompress,	6,	"gzip-6"}, | ||||
| 	{gzip_compress,		gzip_decompress,	7,	"gzip-7"}, | ||||
| 	{gzip_compress,		gzip_decompress,	8,	"gzip-8"}, | ||||
| 	{gzip_compress,		gzip_decompress,	9,	"gzip-9"}, | ||||
| 	{zle_compress,		zle_decompress,		64,	"zle"}, | ||||
| 	{lz4_compress_zfs,	lz4_decompress_zfs,	0,	"lz4"}, | ||||
| 	{"inherit",		0,	NULL,		NULL}, | ||||
| 	{"on",			0,	NULL,		NULL}, | ||||
| 	{"uncompressed",	0,	NULL,		NULL}, | ||||
| 	{"lzjb",		0,	lzjb_compress,	lzjb_decompress}, | ||||
| 	{"empty",		0,	NULL,		NULL}, | ||||
| 	{"gzip-1",		1,	gzip_compress,	gzip_decompress}, | ||||
| 	{"gzip-2",		2,	gzip_compress,	gzip_decompress}, | ||||
| 	{"gzip-3",		3,	gzip_compress,	gzip_decompress}, | ||||
| 	{"gzip-4",		4,	gzip_compress,	gzip_decompress}, | ||||
| 	{"gzip-5",		5,	gzip_compress,	gzip_decompress}, | ||||
| 	{"gzip-6",		6,	gzip_compress,	gzip_decompress}, | ||||
| 	{"gzip-7",		7,	gzip_compress,	gzip_decompress}, | ||||
| 	{"gzip-8",		8,	gzip_compress,	gzip_decompress}, | ||||
| 	{"gzip-9",		9,	gzip_compress,	gzip_decompress}, | ||||
| 	{"zle",			64,	zle_compress,	zle_decompress}, | ||||
| 	{"lz4",			0,	lz4_compress_zfs, lz4_decompress_zfs} | ||||
| }; | ||||
| 
 | ||||
| enum zio_compress | ||||
| @ -85,12 +84,26 @@ zio_compress_select(spa_t *spa, enum zio_compress child, | ||||
| 	return (result); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len) | ||||
| /*ARGSUSED*/ | ||||
| static int | ||||
| zio_compress_zeroed_cb(void *data, size_t len, void *private) | ||||
| { | ||||
| 	uint64_t *end = (uint64_t *)((char *)data + len); | ||||
| 	uint64_t *word; | ||||
| 
 | ||||
| 	for (word = data; word < end; word++) | ||||
| 		if (*word != 0) | ||||
| 			return (1); | ||||
| 
 | ||||
| 	return (0); | ||||
| } | ||||
| 
 | ||||
| size_t | ||||
| zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len) | ||||
| { | ||||
| 	uint64_t *word, *word_end; | ||||
| 	size_t c_len, d_len; | ||||
| 	zio_compress_info_t *ci = &zio_compress_table[c]; | ||||
| 	void *tmp; | ||||
| 
 | ||||
| 	ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS); | ||||
| 	ASSERT((uint_t)c == ZIO_COMPRESS_EMPTY || ci->ci_compress != NULL); | ||||
| @ -99,12 +112,7 @@ zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len) | ||||
| 	 * If the data is all zeroes, we don't even need to allocate | ||||
| 	 * a block for it.  We indicate this by returning zero size. | ||||
| 	 */ | ||||
| 	word_end = (uint64_t *)((char *)src + s_len); | ||||
| 	for (word = src; word < word_end; word++) | ||||
| 		if (*word != 0) | ||||
| 			break; | ||||
| 
 | ||||
| 	if (word == word_end) | ||||
| 	if (abd_iterate_func(src, 0, s_len, zio_compress_zeroed_cb, NULL) == 0) | ||||
| 		return (0); | ||||
| 
 | ||||
| 	if (c == ZIO_COMPRESS_EMPTY) | ||||
| @ -112,7 +120,11 @@ zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len) | ||||
| 
 | ||||
| 	/* Compress at least 12.5% */ | ||||
| 	d_len = s_len - (s_len >> 3); | ||||
| 	c_len = ci->ci_compress(src, dst, s_len, d_len, ci->ci_level); | ||||
| 
 | ||||
| 	/* No compression algorithms can read from ABDs directly */ | ||||
| 	tmp = abd_borrow_buf_copy(src, s_len); | ||||
| 	c_len = ci->ci_compress(tmp, dst, s_len, d_len, ci->ci_level); | ||||
| 	abd_return_buf(src, tmp, s_len); | ||||
| 
 | ||||
| 	if (c_len > d_len) | ||||
| 		return (s_len); | ||||
| @ -122,13 +134,23 @@ zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len) | ||||
| } | ||||
| 
 | ||||
| int | ||||
| zio_decompress_data(enum zio_compress c, void *src, void *dst, | ||||
| zio_decompress_data_buf(enum zio_compress c, void *src, void *dst, | ||||
|     size_t s_len, size_t d_len) | ||||
| { | ||||
| 	zio_compress_info_t *ci = &zio_compress_table[c]; | ||||
| 
 | ||||
| 	if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL) | ||||
| 		return (SET_ERROR(EINVAL)); | ||||
| 
 | ||||
| 	return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level)); | ||||
| } | ||||
| 
 | ||||
| int | ||||
| zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, | ||||
|     size_t s_len, size_t d_len) | ||||
| { | ||||
| 	void *tmp = abd_borrow_buf_copy(src, s_len); | ||||
| 	int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len); | ||||
| 	abd_return_buf(src, tmp, s_len); | ||||
| 
 | ||||
| 	return (ret); | ||||
| } | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 David Quigley
						David Quigley