mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	 62e7d3c89e
			
		
	
	
		62e7d3c89e
		
			
		
	
	
	
	
		
			
			This change adds a new `zpool prefetch -t ddt $pool` command which causes a pool's DDT to be loaded into the ARC. The primary goal is to remove the need to "warm" a pool's cache before deduplication stops slowing write performance. It may also provide a way to reload portions of a DDT if they have been flushed due to inactivity. Sponsored-by: iXsystems, Inc. Sponsored-by: Catalogics, Inc. Sponsored-by: Klara, Inc. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Will Andrews <will.andrews@klarasystems.com> Signed-off-by: Fred Weigel <fred.weigel@klarasystems.com> Signed-off-by: Rob Norris <rob.norris@klarasystems.com> Signed-off-by: Don Brady <don.brady@klarasystems.com> Co-authored-by: Will Andrews <will.andrews@klarasystems.com> Co-authored-by: Don Brady <don.brady@klarasystems.com> Closes #15890
		
			
				
	
	
		
			280 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			280 lines
		
	
	
		
			6.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * CDDL HEADER START
 | |
|  *
 | |
|  * The contents of this file are subject to the terms of the
 | |
|  * Common Development and Distribution License (the "License").
 | |
|  * You may not use this file except in compliance with the License.
 | |
|  *
 | |
|  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 | |
|  * or https://opensource.org/licenses/CDDL-1.0.
 | |
|  * See the License for the specific language governing permissions
 | |
|  * and limitations under the License.
 | |
|  *
 | |
|  * When distributing Covered Code, include this CDDL HEADER in each
 | |
|  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 | |
|  * If applicable, add the following below this CDDL HEADER, with the
 | |
|  * fields enclosed by brackets "[]" replaced with your own identifying
 | |
|  * information: Portions Copyright [yyyy] [name of copyright owner]
 | |
|  *
 | |
|  * CDDL HEADER END
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 | |
|  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
 | |
|  * Copyright (c) 2022 by Pawel Jakub Dawidek
 | |
|  * Copyright (c) 2023, Klara Inc.
 | |
|  */
 | |
| 
 | |
| #include <sys/zfs_context.h>
 | |
| #include <sys/spa.h>
 | |
| #include <sys/spa_impl.h>
 | |
| #include <sys/ddt.h>
 | |
| #include <sys/ddt_impl.h>
 | |
| 
 | |
| static void
 | |
| ddt_stat_generate(ddt_t *ddt, ddt_entry_t *dde, ddt_stat_t *dds)
 | |
| {
 | |
| 	spa_t *spa = ddt->ddt_spa;
 | |
| 	ddt_phys_t *ddp = dde->dde_phys;
 | |
| 	ddt_key_t *ddk = &dde->dde_key;
 | |
| 	uint64_t lsize = DDK_GET_LSIZE(ddk);
 | |
| 	uint64_t psize = DDK_GET_PSIZE(ddk);
 | |
| 
 | |
| 	memset(dds, 0, sizeof (*dds));
 | |
| 
 | |
| 	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 | |
| 		uint64_t dsize = 0;
 | |
| 		uint64_t refcnt = ddp->ddp_refcnt;
 | |
| 
 | |
| 		if (ddp->ddp_phys_birth == 0)
 | |
| 			continue;
 | |
| 
 | |
| 		int ndvas = DDK_GET_CRYPT(&dde->dde_key) ?
 | |
| 		    SPA_DVAS_PER_BP - 1 : SPA_DVAS_PER_BP;
 | |
| 		for (int d = 0; d < ndvas; d++)
 | |
| 			dsize += dva_get_dsize_sync(spa, &ddp->ddp_dva[d]);
 | |
| 
 | |
| 		dds->dds_blocks += 1;
 | |
| 		dds->dds_lsize += lsize;
 | |
| 		dds->dds_psize += psize;
 | |
| 		dds->dds_dsize += dsize;
 | |
| 
 | |
| 		dds->dds_ref_blocks += refcnt;
 | |
| 		dds->dds_ref_lsize += lsize * refcnt;
 | |
| 		dds->dds_ref_psize += psize * refcnt;
 | |
| 		dds->dds_ref_dsize += dsize * refcnt;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| void
 | |
| ddt_stat_add(ddt_stat_t *dst, const ddt_stat_t *src, uint64_t neg)
 | |
| {
 | |
| 	const uint64_t *s = (const uint64_t *)src;
 | |
| 	uint64_t *d = (uint64_t *)dst;
 | |
| 	uint64_t *d_end = (uint64_t *)(dst + 1);
 | |
| 
 | |
| 	ASSERT(neg == 0 || neg == -1ULL);	/* add or subtract */
 | |
| 
 | |
| 	for (int i = 0; i < d_end - d; i++)
 | |
| 		d[i] += (s[i] ^ neg) - neg;
 | |
| }
 | |
| 
 | |
| void
 | |
| ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
 | |
| {
 | |
| 	ddt_stat_t dds;
 | |
| 	ddt_histogram_t *ddh;
 | |
| 	int bucket;
 | |
| 
 | |
| 	ddt_stat_generate(ddt, dde, &dds);
 | |
| 
 | |
| 	bucket = highbit64(dds.dds_ref_blocks) - 1;
 | |
| 	ASSERT3U(bucket, >=, 0);
 | |
| 
 | |
| 	ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];
 | |
| 
 | |
| 	ddt_stat_add(&ddh->ddh_stat[bucket], &dds, neg);
 | |
| }
 | |
| 
 | |
| void
 | |
| ddt_histogram_add(ddt_histogram_t *dst, const ddt_histogram_t *src)
 | |
| {
 | |
| 	for (int h = 0; h < 64; h++)
 | |
| 		ddt_stat_add(&dst->ddh_stat[h], &src->ddh_stat[h], 0);
 | |
| }
 | |
| 
 | |
| void
 | |
| ddt_histogram_stat(ddt_stat_t *dds, const ddt_histogram_t *ddh)
 | |
| {
 | |
| 	memset(dds, 0, sizeof (*dds));
 | |
| 
 | |
| 	for (int h = 0; h < 64; h++)
 | |
| 		ddt_stat_add(dds, &ddh->ddh_stat[h], 0);
 | |
| }
 | |
| 
 | |
| boolean_t
 | |
| ddt_histogram_empty(const ddt_histogram_t *ddh)
 | |
| {
 | |
| 	const uint64_t *s = (const uint64_t *)ddh;
 | |
| 	const uint64_t *s_end = (const uint64_t *)(ddh + 1);
 | |
| 
 | |
| 	while (s < s_end)
 | |
| 		if (*s++ != 0)
 | |
| 			return (B_FALSE);
 | |
| 
 | |
| 	return (B_TRUE);
 | |
| }
 | |
| 
 | |
| void
 | |
| ddt_get_dedup_object_stats(spa_t *spa, ddt_object_t *ddo_total)
 | |
| {
 | |
| 	memset(ddo_total, 0, sizeof (*ddo_total));
 | |
| 
 | |
| 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 | |
| 		ddt_t *ddt = spa->spa_ddt[c];
 | |
| 		if (!ddt)
 | |
| 			continue;
 | |
| 
 | |
| 		for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
 | |
| 			for (ddt_class_t class = 0; class < DDT_CLASSES;
 | |
| 			    class++) {
 | |
| 				dmu_object_info_t doi;
 | |
| 				uint64_t cnt;
 | |
| 				int err;
 | |
| 
 | |
| 				/*
 | |
| 				 * These stats were originally calculated
 | |
| 				 * during ddt_object_load().
 | |
| 				 */
 | |
| 
 | |
| 				err = ddt_object_info(ddt, type, class, &doi);
 | |
| 				if (err != 0)
 | |
| 					continue;
 | |
| 
 | |
| 				err = ddt_object_count(ddt, type, class, &cnt);
 | |
| 				if (err != 0)
 | |
| 					continue;
 | |
| 
 | |
| 				ddt_object_t *ddo =
 | |
| 				    &ddt->ddt_object_stats[type][class];
 | |
| 
 | |
| 				ddo->ddo_count = cnt;
 | |
| 				ddo->ddo_dspace =
 | |
| 				    doi.doi_physical_blocks_512 << 9;
 | |
| 				ddo->ddo_mspace = doi.doi_fill_count *
 | |
| 				    doi.doi_data_block_size;
 | |
| 
 | |
| 				ddo_total->ddo_count += ddo->ddo_count;
 | |
| 				ddo_total->ddo_dspace += ddo->ddo_dspace;
 | |
| 				ddo_total->ddo_mspace += ddo->ddo_mspace;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * This returns raw counts (not averages). One of the consumers,
 | |
| 	 * print_dedup_stats(), historically has expected raw counts.
 | |
| 	 */
 | |
| 
 | |
| 	spa->spa_dedup_dsize = ddo_total->ddo_dspace;
 | |
| }
 | |
| 
 | |
| uint64_t
 | |
| ddt_get_ddt_dsize(spa_t *spa)
 | |
| {
 | |
| 	ddt_object_t ddo_total;
 | |
| 
 | |
| 	/* recalculate after each txg sync */
 | |
| 	if (spa->spa_dedup_dsize == ~0ULL)
 | |
| 		ddt_get_dedup_object_stats(spa, &ddo_total);
 | |
| 
 | |
| 	return (spa->spa_dedup_dsize);
 | |
| }
 | |
| 
 | |
| void
 | |
| ddt_get_dedup_histogram(spa_t *spa, ddt_histogram_t *ddh)
 | |
| {
 | |
| 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 | |
| 		ddt_t *ddt = spa->spa_ddt[c];
 | |
| 		if (!ddt)
 | |
| 			continue;
 | |
| 
 | |
| 		for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
 | |
| 			for (ddt_class_t class = 0; class < DDT_CLASSES;
 | |
| 			    class++) {
 | |
| 				ddt_histogram_add(ddh,
 | |
| 				    &ddt->ddt_histogram_cache[type][class]);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| void
 | |
| ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
 | |
| {
 | |
| 	ddt_histogram_t *ddh_total;
 | |
| 
 | |
| 	ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
 | |
| 	ddt_get_dedup_histogram(spa, ddh_total);
 | |
| 	ddt_histogram_stat(dds_total, ddh_total);
 | |
| 	kmem_free(ddh_total, sizeof (ddt_histogram_t));
 | |
| }
 | |
| 
 | |
| uint64_t
 | |
| ddt_get_dedup_dspace(spa_t *spa)
 | |
| {
 | |
| 	ddt_stat_t dds_total;
 | |
| 
 | |
| 	if (spa->spa_dedup_dspace != ~0ULL)
 | |
| 		return (spa->spa_dedup_dspace);
 | |
| 
 | |
| 	memset(&dds_total, 0, sizeof (ddt_stat_t));
 | |
| 
 | |
| 	/* Calculate and cache the stats */
 | |
| 	ddt_get_dedup_stats(spa, &dds_total);
 | |
| 	spa->spa_dedup_dspace = dds_total.dds_ref_dsize - dds_total.dds_dsize;
 | |
| 	return (spa->spa_dedup_dspace);
 | |
| }
 | |
| 
 | |
| uint64_t
 | |
| ddt_get_pool_dedup_ratio(spa_t *spa)
 | |
| {
 | |
| 	ddt_stat_t dds_total = { 0 };
 | |
| 
 | |
| 	ddt_get_dedup_stats(spa, &dds_total);
 | |
| 	if (dds_total.dds_dsize == 0)
 | |
| 		return (100);
 | |
| 
 | |
| 	return (dds_total.dds_ref_dsize * 100 / dds_total.dds_dsize);
 | |
| }
 | |
| 
 | |
| int
 | |
| ddt_get_pool_dedup_cached(spa_t *spa, uint64_t *psize)
 | |
| {
 | |
| 	uint64_t l1sz, l1tot, l2sz, l2tot;
 | |
| 	int err = 0;
 | |
| 
 | |
| 	l1tot = l2tot = 0;
 | |
| 	*psize = 0;
 | |
| 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 | |
| 		ddt_t *ddt = spa->spa_ddt[c];
 | |
| 		if (ddt == NULL)
 | |
| 			continue;
 | |
| 		for (ddt_type_t type = 0; type < DDT_TYPES; type++) {
 | |
| 			for (ddt_class_t class = 0; class < DDT_CLASSES;
 | |
| 			    class++) {
 | |
| 				err = dmu_object_cached_size(ddt->ddt_os,
 | |
| 				    ddt->ddt_object[type][class], &l1sz, &l2sz);
 | |
| 				if (err != 0)
 | |
| 					return (err);
 | |
| 				l1tot += l1sz;
 | |
| 				l2tot += l2sz;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	*psize = l1tot + l2tot;
 | |
| 	return (err);
 | |
| }
 |