Pool allocation classes

Allocation Classes add the ability to have allocation classes in a
pool that are dedicated to serving specific block categories, such
as DDT data, metadata, and small file blocks. A pool can opt-in to
this feature by adding a 'special' or 'dedup' top-level VDEV.

Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed-by: Richard Laager <rlaager@wiktel.com>
Reviewed-by: Alek Pinchuk <apinchuk@datto.com>
Reviewed-by: Håkan Johansson <f96hajo@chalmers.se>
Reviewed-by: Andreas Dilger <andreas.dilger@chamcloud.com>
Reviewed-by: DHE <git@dehacked.net>
Reviewed-by: Richard Elling <Richard.Elling@RichardElling.com>
Reviewed-by: Gregor Kopka <gregor@kopka.net>
Reviewed-by: Kash Pande <kash@tripleback.net>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Don Brady <don.brady@delphix.com>
Closes #5182
This commit is contained in:
Don Brady
2018-09-05 19:33:36 -06:00
committed by Brian Behlendorf
parent cfa37548eb
commit cc99f275a2
57 changed files with 2326 additions and 282 deletions
+13 -2
View File
@@ -20,12 +20,13 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -136,6 +137,16 @@ typedef enum dmu_object_byteswap {
((ot) & DMU_OT_METADATA) : \
DMU_OT_IS_METADATA_IMPL(ot))
#define DMU_OT_IS_DDT(ot) \
((ot) == DMU_OT_DDT_ZAP)
#define DMU_OT_IS_ZIL(ot) \
((ot) == DMU_OT_INTENT_LOG)
/* Note: ztest uses DMU_OT_UINT64_OTHER as a proxy for file blocks */
#define DMU_OT_IS_FILE(ot) \
((ot) == DMU_OT_PLAIN_FILE_CONTENTS || (ot) == DMU_OT_UINT64_OTHER)
#define DMU_OT_IS_ENCRYPTED(ot) (((ot) & DMU_OT_NEWTYPE) ? \
((ot) & DMU_OT_ENCRYPTED) : \
DMU_OT_IS_ENCRYPTED_IMPL(ot))
@@ -226,7 +237,7 @@ typedef enum dmu_object_type {
* values.
*
* The DMU_OTN_* types do not have entries in the dmu_ot table,
* use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead
* use the DMU_OT_IS_METADATA() and DMU_OT_BYTESWAP() macros instead
* of indexing into dmu_ot directly (this works for both DMU_OT_* types
* and DMU_OTN_* types).
*/
+6 -1
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -137,6 +137,11 @@ struct objset {
uint64_t os_normalization;
uint64_t os_utf8only;
uint64_t os_casesensitivity;
/*
* The largest zpl file block allowed in special class.
* cached here instead of zfsvfs for easier access.
*/
int os_zpl_special_smallblock;
/*
* Pointer is constant; the blkptr it points to is protected by
+11
View File
@@ -26,6 +26,7 @@
* Copyright (c) 2013, 2017 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -181,6 +182,7 @@ typedef enum {
ZFS_PROP_KEY_GUID,
ZFS_PROP_KEYSTATUS,
ZFS_PROP_REMAPTXG, /* not exposed to the user */
ZFS_PROP_SPECIAL_SMALL_BLOCKS,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -713,6 +715,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */
#define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */
#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */
#define ZPOOL_CONFIG_ALLOCATION_BIAS "alloc_bias" /* not stored on disk */
/*
* The persistent vdev state is stored as separate values rather than a single
@@ -759,6 +762,14 @@ typedef struct zpool_load_policy {
#define VDEV_TOP_ZAP_POOL_CHECKPOINT_SM \
"com.delphix:pool_checkpoint_sm"
#define VDEV_TOP_ZAP_ALLOCATION_BIAS \
"org.zfsonlinux:allocation_bias"
/* vdev metaslab allocation bias */
#define VDEV_ALLOC_BIAS_LOG "log"
#define VDEV_ALLOC_BIAS_SPECIAL "special"
#define VDEV_ALLOC_BIAS_DEDUP "dedup"
/*
* This is needed in userland to report the minimum necessary device size.
*/
+6 -3
View File
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_METASLAB_H
@@ -57,13 +58,17 @@ void metaslab_sync_done(metaslab_t *, uint64_t);
void metaslab_sync_reassess(metaslab_group_t *);
uint64_t metaslab_block_maxsize(metaslab_t *);
/*
* metaslab alloc flags
*/
#define METASLAB_HINTBP_FAVOR 0x0
#define METASLAB_HINTBP_AVOID 0x1
#define METASLAB_GANG_HEADER 0x2
#define METASLAB_GANG_CHILD 0x4
#define METASLAB_ASYNC_ALLOC 0x8
#define METASLAB_DONT_THROTTLE 0x10
#define METASLAB_FASTWRITE 0x20
#define METASLAB_MUST_RESERVE 0x20
#define METASLAB_FASTWRITE 0x40
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
blkptr_t *, int, uint64_t, blkptr_t *, int, zio_alloc_list_t *, zio_t *,
@@ -96,8 +101,6 @@ boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int, int,
zio_t *, int);
void metaslab_class_throttle_unreserve(metaslab_class_t *, int, int, zio_t *);
void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t,
int64_t, int64_t);
uint64_t metaslab_class_get_alloc(metaslab_class_t *);
uint64_t metaslab_class_get_space(metaslab_class_t *);
uint64_t metaslab_class_get_dspace(metaslab_class_t *);
+8 -1
View File
@@ -20,13 +20,14 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2017 Joyent, Inc.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_SPA_H
@@ -976,6 +977,11 @@ extern uint64_t spa_version(spa_t *spa);
extern boolean_t spa_deflate(spa_t *spa);
extern metaslab_class_t *spa_normal_class(spa_t *spa);
extern metaslab_class_t *spa_log_class(spa_t *spa);
extern metaslab_class_t *spa_special_class(spa_t *spa);
extern metaslab_class_t *spa_dedup_class(spa_t *spa);
extern metaslab_class_t *spa_preferred_class(spa_t *spa, uint64_t size,
dmu_object_type_t objtype, uint_t level, uint_t special_smallblk);
extern void spa_evicting_os_register(spa_t *, objset_t *os);
extern void spa_evicting_os_deregister(spa_t *, objset_t *os);
extern void spa_evicting_os_wait(spa_t *spa);
@@ -1040,6 +1046,7 @@ extern void spa_set_missing_tvds(spa_t *spa, uint64_t missing);
extern boolean_t spa_top_vdevs_spacemap_addressable(spa_t *spa);
extern boolean_t spa_multihost(spa_t *spa);
extern unsigned long spa_get_hostid(void);
extern void spa_activate_allocation_classes(spa_t *, dmu_tx_t *);
extern int spa_mode(spa_t *spa);
extern uint64_t zfs_strtonum(const char *str, char **nptr);
+4 -1
View File
@@ -20,12 +20,13 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -220,6 +221,8 @@ struct spa {
boolean_t spa_is_initializing; /* true while opening pool */
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
metaslab_class_t *spa_special_class; /* special allocation class */
metaslab_class_t *spa_dedup_class; /* dedup allocation class */
uint64_t spa_first_txg; /* first txg after spa_open() */
uint64_t spa_final_txg; /* txg of export/destroy */
uint64_t spa_freeze_txg; /* freeze pool at this txg */
+3
View File
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_VDEV_H
@@ -108,6 +109,8 @@ extern boolean_t vdev_children_are_offline(vdev_t *vd);
extern void vdev_space_update(vdev_t *vd,
int64_t alloc_delta, int64_t defer_delta, int64_t space_delta);
extern int64_t vdev_deflated_space(vdev_t *vd, int64_t space);
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);
+10
View File
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_VDEV_IMPL_H
@@ -140,6 +141,14 @@ struct vdev_queue {
kmutex_t vq_lock;
};
typedef enum vdev_alloc_bias {
VDEV_BIAS_NONE,
VDEV_BIAS_LOG, /* dedicated to ZIL data (SLOG) */
VDEV_BIAS_SPECIAL, /* dedicated to ddt, metadata, and small blks */
VDEV_BIAS_DEDUP /* dedicated to dedup metadata */
} vdev_alloc_bias_t;
/*
* On-disk indirect vdev state.
*
@@ -236,6 +245,7 @@ struct vdev {
boolean_t vdev_ishole; /* is a hole in the namespace */
kmutex_t vdev_queue_lock; /* protects vdev_queue_depth */
uint64_t vdev_top_zap;
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
/* pool checkpoint related */
space_map_t *vdev_checkpoint_sm; /* contains reserved blocks */
+2
View File
@@ -353,6 +353,7 @@ typedef struct zio_prop {
uint8_t zp_salt[ZIO_DATA_SALT_LEN];
uint8_t zp_iv[ZIO_DATA_IV_LEN];
uint8_t zp_mac[ZIO_DATA_MAC_LEN];
uint32_t zp_zpl_smallblk;
} zio_prop_t;
typedef struct zio_cksum_report zio_cksum_report_t;
@@ -473,6 +474,7 @@ struct zio {
vdev_t *io_vd;
void *io_vsd;
const zio_vsd_ops_t *io_vsd_ops;
metaslab_class_t *io_metaslab_class; /* dva throttle class */
uint64_t io_offset;
hrtime_t io_timestamp; /* submitted at */