mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-23 02:44:41 +03:00
RAID-Z expansion feature
This feature allows disks to be added one at a time to a RAID-Z group, expanding its capacity incrementally. This feature is especially useful for small pools (typically with only one RAID-Z group), where there isn't sufficient hardware to add capacity by adding a whole new RAID-Z group (typically doubling the number of disks). == Initiating expansion == A new device (disk) can be attached to an existing RAIDZ vdev, by running `zpool attach POOL raidzP-N NEW_DEVICE`, e.g. `zpool attach tank raidz2-0 sda`. The new device will become part of the RAIDZ group. A "raidz expansion" will be initiated, and the new device will contribute additional space to the RAIDZ group once the expansion completes. The `feature@raidz_expansion` on-disk feature flag must be `enabled` to initiate an expansion, and it remains `active` for the life of the pool. In other words, pools with expanded RAIDZ vdevs can not be imported by older releases of the ZFS software. == During expansion == The expansion entails reading all allocated space from existing disks in the RAIDZ group, and rewriting it to the new disks in the RAIDZ group (including the newly added device). The expansion progress can be monitored with `zpool status`. Data redundancy is maintained during (and after) the expansion. If a disk fails while the expansion is in progress, the expansion pauses until the health of the RAIDZ vdev is restored (e.g. by replacing the failed disk and waiting for reconstruction to complete). The pool remains accessible during expansion. Following a reboot or export/import, the expansion resumes where it left off. == After expansion == When the expansion completes, the additional space is available for use, and is reflected in the `available` zfs property (as seen in `zfs list`, `df`, etc). Expansion does not change the number of failures that can be tolerated without data loss (e.g. a RAIDZ2 is still a RAIDZ2 even after expansion). A RAIDZ vdev can be expanded multiple times. After the expansion completes, old blocks remain with their old data-to-parity ratio (e.g. 5-wide RAIDZ2, has 3 data to 2 parity), but distributed among the larger set of disks. New blocks will be written with the new data-to-parity ratio (e.g. a 5-wide RAIDZ2 which has been expanded once to 6-wide, has 4 data to 2 parity). However, the RAIDZ vdev's "assumed parity ratio" does not change, so slightly less space than is expected may be reported for newly-written blocks, according to `zfs list`, `df`, `ls -s`, and similar tools. Sponsored-by: The FreeBSD Foundation Sponsored-by: iXsystems, Inc. Sponsored-by: vStack Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Mark Maybee <mark.maybee@delphix.com> Authored-by: Matthew Ahrens <mahrens@delphix.com> Contributions-by: Fedor Uporov <fuporov.vstack@gmail.com> Contributions-by: Stuart Maybee <stuart.maybee@comcast.net> Contributions-by: Thorsten Behrens <tbehrens@outlook.com> Contributions-by: Fmstrat <nospam@nowsci.com> Contributions-by: Don Brady <dev.fs.zfs@gmail.com> Signed-off-by: Don Brady <dev.fs.zfs@gmail.com> Closes #15022
This commit is contained in:
@@ -365,6 +365,7 @@ typedef enum {
|
||||
VDEV_PROP_CHECKSUM_T,
|
||||
VDEV_PROP_IO_N,
|
||||
VDEV_PROP_IO_T,
|
||||
VDEV_PROP_RAIDZ_EXPANDING,
|
||||
VDEV_NUM_PROPS
|
||||
} vdev_prop_t;
|
||||
|
||||
@@ -724,6 +725,7 @@ typedef struct zpool_load_policy {
|
||||
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_REMOVAL_STATS "removal_stats" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_CHECKPOINT_STATS "checkpoint_stats" /* not on disk */
|
||||
#define ZPOOL_CONFIG_RAIDZ_EXPAND_STATS "raidz_expand_stats" /* not on disk */
|
||||
#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_INDIRECT_SIZE "indirect_size" /* not stored on disk */
|
||||
|
||||
@@ -789,6 +791,8 @@ typedef struct zpool_load_policy {
|
||||
#define ZPOOL_CONFIG_SPARES "spares"
|
||||
#define ZPOOL_CONFIG_IS_SPARE "is_spare"
|
||||
#define ZPOOL_CONFIG_NPARITY "nparity"
|
||||
#define ZPOOL_CONFIG_RAIDZ_EXPANDING "raidz_expanding"
|
||||
#define ZPOOL_CONFIG_RAIDZ_EXPAND_TXGS "raidz_expand_txgs"
|
||||
#define ZPOOL_CONFIG_HOSTID "hostid"
|
||||
#define ZPOOL_CONFIG_HOSTNAME "hostname"
|
||||
#define ZPOOL_CONFIG_LOADED_TIME "initial_load_time"
|
||||
@@ -907,6 +911,15 @@ typedef struct zpool_load_policy {
|
||||
#define VDEV_TOP_ZAP_ALLOCATION_BIAS \
|
||||
"org.zfsonlinux:allocation_bias"
|
||||
|
||||
#define VDEV_TOP_ZAP_RAIDZ_EXPAND_STATE \
|
||||
"org.openzfs:raidz_expand_state"
|
||||
#define VDEV_TOP_ZAP_RAIDZ_EXPAND_START_TIME \
|
||||
"org.openzfs:raidz_expand_start_time"
|
||||
#define VDEV_TOP_ZAP_RAIDZ_EXPAND_END_TIME \
|
||||
"org.openzfs:raidz_expand_end_time"
|
||||
#define VDEV_TOP_ZAP_RAIDZ_EXPAND_BYTES_COPIED \
|
||||
"org.openzfs:raidz_expand_bytes_copied"
|
||||
|
||||
/* vdev metaslab allocation bias */
|
||||
#define VDEV_ALLOC_BIAS_LOG "log"
|
||||
#define VDEV_ALLOC_BIAS_SPECIAL "special"
|
||||
@@ -1138,6 +1151,16 @@ typedef struct pool_removal_stat {
|
||||
uint64_t prs_mapping_memory;
|
||||
} pool_removal_stat_t;
|
||||
|
||||
typedef struct pool_raidz_expand_stat {
|
||||
uint64_t pres_state; /* dsl_scan_state_t */
|
||||
uint64_t pres_expanding_vdev;
|
||||
uint64_t pres_start_time;
|
||||
uint64_t pres_end_time;
|
||||
uint64_t pres_to_reflow; /* bytes that need to be moved */
|
||||
uint64_t pres_reflowed; /* bytes moved so far */
|
||||
uint64_t pres_waiting_for_resilver;
|
||||
} pool_raidz_expand_stat_t;
|
||||
|
||||
typedef enum dsl_scan_state {
|
||||
DSS_NONE,
|
||||
DSS_SCANNING,
|
||||
@@ -1577,6 +1600,7 @@ typedef enum {
|
||||
ZFS_ERR_NOT_USER_NAMESPACE,
|
||||
ZFS_ERR_RESUME_EXISTS,
|
||||
ZFS_ERR_CRYPTO_NOTSUP,
|
||||
ZFS_ERR_RAIDZ_EXPAND_IN_PROGRESS,
|
||||
} zfs_errno_t;
|
||||
|
||||
/*
|
||||
@@ -1601,6 +1625,7 @@ typedef enum {
|
||||
ZPOOL_WAIT_RESILVER,
|
||||
ZPOOL_WAIT_SCRUB,
|
||||
ZPOOL_WAIT_TRIM,
|
||||
ZPOOL_WAIT_RAIDZ_EXPAND,
|
||||
ZPOOL_WAIT_NUM_ACTIVITIES
|
||||
} zpool_wait_activity_t;
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
#include <sys/vdev.h>
|
||||
#include <sys/vdev_rebuild.h>
|
||||
#include <sys/vdev_removal.h>
|
||||
#include <sys/vdev_raidz.h>
|
||||
#include <sys/metaslab.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
@@ -333,6 +334,9 @@ struct spa {
|
||||
spa_condensing_indirect_t *spa_condensing_indirect;
|
||||
zthr_t *spa_condense_zthr; /* zthr doing condense. */
|
||||
|
||||
vdev_raidz_expand_t *spa_raidz_expand;
|
||||
zthr_t *spa_raidz_expand_zthr;
|
||||
|
||||
uint64_t spa_checkpoint_txg; /* the txg of the checkpoint */
|
||||
spa_checkpoint_info_t spa_checkpoint_info; /* checkpoint accounting */
|
||||
zthr_t *spa_checkpoint_discard_zthr;
|
||||
|
||||
@@ -75,6 +75,39 @@ extern "C" {
|
||||
#define MMP_FAIL_INT_SET(fail) \
|
||||
(((uint64_t)(fail & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT)
|
||||
|
||||
/*
|
||||
* RAIDZ expansion reflow information.
|
||||
*
|
||||
* 64 56 48 40 32 24 16 8 0
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* |Scratch | Reflow |
|
||||
* | State | Offset |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
*/
|
||||
typedef enum raidz_reflow_scratch_state {
|
||||
RRSS_SCRATCH_NOT_IN_USE = 0,
|
||||
RRSS_SCRATCH_VALID,
|
||||
RRSS_SCRATCH_INVALID_SYNCED,
|
||||
RRSS_SCRATCH_INVALID_SYNCED_ON_IMPORT,
|
||||
RRSS_SCRATCH_INVALID_SYNCED_REFLOW
|
||||
} raidz_reflow_scratch_state_t;
|
||||
|
||||
#define RRSS_GET_OFFSET(ub) \
|
||||
BF64_GET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0)
|
||||
#define RRSS_SET_OFFSET(ub, x) \
|
||||
BF64_SET_SB((ub)->ub_raidz_reflow_info, 0, 55, SPA_MINBLOCKSHIFT, 0, x)
|
||||
|
||||
#define RRSS_GET_STATE(ub) \
|
||||
BF64_GET((ub)->ub_raidz_reflow_info, 55, 9)
|
||||
#define RRSS_SET_STATE(ub, x) \
|
||||
BF64_SET((ub)->ub_raidz_reflow_info, 55, 9, x)
|
||||
|
||||
#define RAIDZ_REFLOW_SET(ub, state, offset) do { \
|
||||
(ub)->ub_raidz_reflow_info = 0; \
|
||||
RRSS_SET_OFFSET(ub, offset); \
|
||||
RRSS_SET_STATE(ub, state); \
|
||||
} while (0)
|
||||
|
||||
struct uberblock {
|
||||
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
||||
uint64_t ub_version; /* SPA_VERSION */
|
||||
@@ -136,6 +169,8 @@ struct uberblock {
|
||||
* the ZIL block is not allocated [see uses of spa_min_claim_txg()].
|
||||
*/
|
||||
uint64_t ub_checkpoint_txg;
|
||||
|
||||
uint64_t ub_raidz_reflow_info;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
+8
-2
@@ -132,15 +132,19 @@ extern void vdev_space_update(vdev_t *vd,
|
||||
|
||||
extern int64_t vdev_deflated_space(vdev_t *vd, int64_t space);
|
||||
|
||||
extern uint64_t vdev_psize_to_asize_txg(vdev_t *vd, uint64_t psize,
|
||||
uint64_t txg);
|
||||
extern uint64_t vdev_psize_to_asize(vdev_t *vd, uint64_t psize);
|
||||
|
||||
/*
|
||||
* Return the amount of space allocated for a gang block header.
|
||||
* Return the amount of space allocated for a gang block header. Note that
|
||||
* since the physical birth txg is not provided, this must be constant for
|
||||
* a given vdev. (e.g. raidz expansion can't change this)
|
||||
*/
|
||||
static inline uint64_t
|
||||
vdev_gang_header_asize(vdev_t *vd)
|
||||
{
|
||||
return (vdev_psize_to_asize(vd, SPA_GANGBLOCKSIZE));
|
||||
return (vdev_psize_to_asize_txg(vd, SPA_GANGBLOCKSIZE, 0));
|
||||
}
|
||||
|
||||
extern int vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux);
|
||||
@@ -204,6 +208,8 @@ extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
|
||||
offset, uint64_t size, zio_done_func_t *done, void *priv, int flags);
|
||||
extern int vdev_label_read_bootenv(vdev_t *, nvlist_t *);
|
||||
extern int vdev_label_write_bootenv(vdev_t *, nvlist_t *);
|
||||
extern int vdev_uberblock_sync_list(vdev_t **, int, struct uberblock *, int);
|
||||
extern int vdev_check_boot_reserve(spa_t *, vdev_t *);
|
||||
|
||||
typedef enum {
|
||||
VDEV_LABEL_CREATE, /* create/add a new device */
|
||||
|
||||
@@ -72,7 +72,7 @@ typedef void vdev_fini_func_t(vdev_t *vd);
|
||||
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
|
||||
uint64_t *ashift, uint64_t *pshift);
|
||||
typedef void vdev_close_func_t(vdev_t *vd);
|
||||
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
|
||||
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize, uint64_t txg);
|
||||
typedef uint64_t vdev_min_asize_func_t(vdev_t *vd);
|
||||
typedef uint64_t vdev_min_alloc_func_t(vdev_t *vd);
|
||||
typedef void vdev_io_start_func_t(zio_t *zio);
|
||||
@@ -281,6 +281,7 @@ struct vdev {
|
||||
uint64_t vdev_noalloc; /* device is passivated? */
|
||||
uint64_t vdev_removing; /* device is being removed? */
|
||||
uint64_t vdev_failfast; /* device failfast setting */
|
||||
boolean_t vdev_rz_expanding; /* raidz is being expanded? */
|
||||
boolean_t vdev_ishole; /* is a hole in the namespace */
|
||||
uint64_t vdev_top_zap;
|
||||
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
|
||||
@@ -536,6 +537,7 @@ typedef struct vdev_label {
|
||||
/*
|
||||
* Size of embedded boot loader region on each label.
|
||||
* The total size of the first two labels plus the boot area is 4MB.
|
||||
* On RAIDZ, this space is overwritten during RAIDZ expansion.
|
||||
*/
|
||||
#define VDEV_BOOT_SIZE (7ULL << 19) /* 3.5M */
|
||||
|
||||
@@ -608,7 +610,7 @@ extern vdev_ops_t vdev_indirect_ops;
|
||||
*/
|
||||
extern void vdev_default_xlate(vdev_t *vd, const range_seg64_t *logical_rs,
|
||||
range_seg64_t *physical_rs, range_seg64_t *remain_rs);
|
||||
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
|
||||
extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize, uint64_t txg);
|
||||
extern uint64_t vdev_default_min_asize(vdev_t *vd);
|
||||
extern uint64_t vdev_get_min_asize(vdev_t *vd);
|
||||
extern void vdev_set_min_asize(vdev_t *vd);
|
||||
|
||||
+100
-1
@@ -26,6 +26,7 @@
|
||||
#define _SYS_VDEV_RAIDZ_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/zfs_rlock.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -35,6 +36,8 @@ struct zio;
|
||||
struct raidz_col;
|
||||
struct raidz_row;
|
||||
struct raidz_map;
|
||||
struct vdev_raidz;
|
||||
struct uberblock;
|
||||
#if !defined(_KERNEL)
|
||||
struct kernel_param {};
|
||||
#endif
|
||||
@@ -44,13 +47,19 @@ struct kernel_param {};
|
||||
*/
|
||||
struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
|
||||
uint64_t);
|
||||
struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *,
|
||||
uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t);
|
||||
void vdev_raidz_map_free(struct raidz_map *);
|
||||
void vdev_raidz_free(struct vdev_raidz *);
|
||||
void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *);
|
||||
void vdev_raidz_generate_parity(struct raidz_map *);
|
||||
void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
|
||||
void vdev_raidz_child_done(zio_t *);
|
||||
void vdev_raidz_io_done(zio_t *);
|
||||
void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
|
||||
struct raidz_row *vdev_raidz_row_alloc(int);
|
||||
void vdev_raidz_reflow_copy_scratch(spa_t *);
|
||||
void raidz_dtl_reassessed(vdev_t *);
|
||||
|
||||
extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
|
||||
|
||||
@@ -65,11 +74,101 @@ int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *,
|
||||
const int *, const int *, const int);
|
||||
int vdev_raidz_impl_set(const char *);
|
||||
|
||||
typedef struct vdev_raidz_expand {
|
||||
uint64_t vre_vdev_id;
|
||||
|
||||
kmutex_t vre_lock;
|
||||
kcondvar_t vre_cv;
|
||||
|
||||
/*
|
||||
* How much i/o is outstanding (issued and not completed).
|
||||
*/
|
||||
uint64_t vre_outstanding_bytes;
|
||||
|
||||
/*
|
||||
* Next offset to issue i/o for.
|
||||
*/
|
||||
uint64_t vre_offset;
|
||||
|
||||
/*
|
||||
* Lowest offset of a failed expansion i/o. The expansion will retry
|
||||
* from here. Once the expansion thread notices the failure and exits,
|
||||
* vre_failed_offset is reset back to UINT64_MAX, and
|
||||
* vre_waiting_for_resilver will be set.
|
||||
*/
|
||||
uint64_t vre_failed_offset;
|
||||
boolean_t vre_waiting_for_resilver;
|
||||
|
||||
/*
|
||||
* Offset that is completing each txg
|
||||
*/
|
||||
uint64_t vre_offset_pertxg[TXG_SIZE];
|
||||
|
||||
/*
|
||||
* Bytes copied in each txg.
|
||||
*/
|
||||
uint64_t vre_bytes_copied_pertxg[TXG_SIZE];
|
||||
|
||||
/*
|
||||
* The rangelock prevents normal read/write zio's from happening while
|
||||
* there are expansion (reflow) i/os in progress to the same offsets.
|
||||
*/
|
||||
zfs_rangelock_t vre_rangelock;
|
||||
|
||||
/*
|
||||
* These fields are stored on-disk in the vdev_top_zap:
|
||||
*/
|
||||
dsl_scan_state_t vre_state;
|
||||
uint64_t vre_start_time;
|
||||
uint64_t vre_end_time;
|
||||
uint64_t vre_bytes_copied;
|
||||
} vdev_raidz_expand_t;
|
||||
|
||||
typedef struct vdev_raidz {
|
||||
int vd_logical_width;
|
||||
/*
|
||||
* Number of child vdevs when this raidz vdev was created (i.e. before
|
||||
* any raidz expansions).
|
||||
*/
|
||||
int vd_original_width;
|
||||
|
||||
/*
|
||||
* The current number of child vdevs, which may be more than the
|
||||
* original width if an expansion is in progress or has completed.
|
||||
*/
|
||||
int vd_physical_width;
|
||||
|
||||
int vd_nparity;
|
||||
|
||||
/*
|
||||
* Tree of reflow_node_t's. The lock protects the avl tree only.
|
||||
* The reflow_node_t's describe completed expansions, and are used
|
||||
* to determine the logical width given a block's birth time.
|
||||
*/
|
||||
avl_tree_t vd_expand_txgs;
|
||||
kmutex_t vd_expand_lock;
|
||||
|
||||
/*
|
||||
* If this vdev is being expanded, spa_raidz_expand is set to this
|
||||
*/
|
||||
vdev_raidz_expand_t vn_vre;
|
||||
} vdev_raidz_t;
|
||||
|
||||
extern int vdev_raidz_attach_check(vdev_t *);
|
||||
extern void vdev_raidz_attach_sync(void *, dmu_tx_t *);
|
||||
extern void spa_start_raidz_expansion_thread(spa_t *);
|
||||
extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *);
|
||||
extern int vdev_raidz_load(vdev_t *);
|
||||
|
||||
/* RAIDZ scratch area pause points (for testing) */
|
||||
#define RAIDZ_EXPAND_PAUSE_NONE 0
|
||||
#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1
|
||||
#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2
|
||||
#define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3
|
||||
#define RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4
|
||||
#define RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5
|
||||
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
|
||||
#define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -30,6 +30,8 @@
|
||||
#include <sys/kstat.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/abd_impl.h>
|
||||
#include <sys/zfs_rlock.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -102,28 +104,32 @@ typedef struct raidz_impl_ops {
|
||||
char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */
|
||||
} raidz_impl_ops_t;
|
||||
|
||||
|
||||
typedef struct raidz_col {
|
||||
uint64_t rc_devidx; /* child device index for I/O */
|
||||
int rc_devidx; /* child device index for I/O */
|
||||
uint32_t rc_size; /* I/O size */
|
||||
uint64_t rc_offset; /* device offset */
|
||||
uint64_t rc_size; /* I/O size */
|
||||
abd_t rc_abdstruct; /* rc_abd probably points here */
|
||||
abd_t *rc_abd; /* I/O data */
|
||||
abd_t *rc_orig_data; /* pre-reconstruction */
|
||||
int rc_error; /* I/O error for this device */
|
||||
uint8_t rc_tried; /* Did we attempt this I/O column? */
|
||||
uint8_t rc_skipped; /* Did we skip this I/O column? */
|
||||
uint8_t rc_need_orig_restore; /* need to restore from orig_data? */
|
||||
uint8_t rc_force_repair; /* Write good data to this column */
|
||||
uint8_t rc_allow_repair; /* Allow repair I/O to this column */
|
||||
uint8_t rc_tried:1; /* Did we attempt this I/O column? */
|
||||
uint8_t rc_skipped:1; /* Did we skip this I/O column? */
|
||||
uint8_t rc_need_orig_restore:1; /* need to restore from orig_data? */
|
||||
uint8_t rc_force_repair:1; /* Write good data to this column */
|
||||
uint8_t rc_allow_repair:1; /* Allow repair I/O to this column */
|
||||
int rc_shadow_devidx; /* for double write during expansion */
|
||||
int rc_shadow_error; /* for double write during expansion */
|
||||
uint64_t rc_shadow_offset; /* for double write during expansion */
|
||||
} raidz_col_t;
|
||||
|
||||
typedef struct raidz_row {
|
||||
uint64_t rr_cols; /* Regular column count */
|
||||
uint64_t rr_scols; /* Count including skipped columns */
|
||||
uint64_t rr_bigcols; /* Remainder data column count */
|
||||
uint64_t rr_missingdata; /* Count of missing data devices */
|
||||
uint64_t rr_missingparity; /* Count of missing parity devices */
|
||||
uint64_t rr_firstdatacol; /* First data column/parity count */
|
||||
int rr_cols; /* Regular column count */
|
||||
int rr_scols; /* Count including skipped columns */
|
||||
int rr_bigcols; /* Remainder data column count */
|
||||
int rr_missingdata; /* Count of missing data devices */
|
||||
int rr_missingparity; /* Count of missing parity devices */
|
||||
int rr_firstdatacol; /* First data column/parity count */
|
||||
abd_t *rr_abd_empty; /* dRAID empty sector buffer */
|
||||
int rr_nempty; /* empty sectors included in parity */
|
||||
#ifdef ZFS_DEBUG
|
||||
@@ -138,10 +144,25 @@ typedef struct raidz_map {
|
||||
int rm_nrows; /* Regular row count */
|
||||
int rm_nskip; /* RAIDZ sectors skipped for padding */
|
||||
int rm_skipstart; /* Column index of padding start */
|
||||
int rm_original_width; /* pre-expansion width of raidz vdev */
|
||||
int rm_nphys_cols; /* num entries in rm_phys_col[] */
|
||||
zfs_locked_range_t *rm_lr;
|
||||
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
|
||||
raidz_col_t *rm_phys_col; /* if non-NULL, read i/o aggregation */
|
||||
raidz_row_t *rm_row[0]; /* flexible array of rows */
|
||||
} raidz_map_t;
|
||||
|
||||
/*
|
||||
* Nodes in vdev_raidz_t:vd_expand_txgs.
|
||||
* Blocks with physical birth time of re_txg or later have the specified
|
||||
* logical width (until the next node).
|
||||
*/
|
||||
typedef struct reflow_node {
|
||||
uint64_t re_txg;
|
||||
uint64_t re_logical_width;
|
||||
avl_node_t re_link;
|
||||
} reflow_node_t;
|
||||
|
||||
|
||||
#define RAIDZ_ORIGINAL_IMPL (INT_MAX)
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ extern int zfs_dbgmsg_enable;
|
||||
#define ZFS_DEBUG_LOG_SPACEMAP (1 << 12)
|
||||
#define ZFS_DEBUG_METASLAB_ALLOC (1 << 13)
|
||||
#define ZFS_DEBUG_BRT (1 << 14)
|
||||
#define ZFS_DEBUG_RAIDZ_RECONSTRUCT (1 << 15)
|
||||
|
||||
extern void __set_error(const char *file, const char *func, int line, int err);
|
||||
extern void __zfs_dbgmsg(char *buf);
|
||||
|
||||
Reference in New Issue
Block a user