mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-24 11:18:52 +03:00
Illumos #3741
3741 zfs needs better comments Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Eric Schrock <eric.schrock@delphix.com> Approved by: Christopher Siden <christopher.siden@delphix.com> References: https://www.illumos.org/issues/3741 illumos/illumos-gate@3e30c24aee Ported-by: Richard Yao <ryao@gentoo.org> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #1775
This commit is contained in:
committed by
Brian Behlendorf
parent
b1118acbb1
commit
e49f1e20a0
@@ -431,23 +431,50 @@ static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
|
||||
vdev_raidz_cksum_report
|
||||
};
|
||||
|
||||
/*
|
||||
* Divides the IO evenly across all child vdevs; usually, dcols is
|
||||
* the number of children in the target vdev.
|
||||
*/
|
||||
static raidz_map_t *
|
||||
vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
|
||||
uint64_t nparity)
|
||||
{
|
||||
raidz_map_t *rm;
|
||||
/* The starting RAIDZ (parent) vdev sector of the block. */
|
||||
uint64_t b = zio->io_offset >> unit_shift;
|
||||
/* The zio's size in units of the vdev's minimum sector size. */
|
||||
uint64_t s = zio->io_size >> unit_shift;
|
||||
/* The first column for this stripe. */
|
||||
uint64_t f = b % dcols;
|
||||
/* The starting byte offset on each child vdev. */
|
||||
uint64_t o = (b / dcols) << unit_shift;
|
||||
uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
|
||||
|
||||
/*
|
||||
* "Quotient": The number of data sectors for this stripe on all but
|
||||
* the "big column" child vdevs that also contain "remainder" data.
|
||||
*/
|
||||
q = s / (dcols - nparity);
|
||||
|
||||
/*
|
||||
* "Remainder": The number of partial stripe data sectors in this I/O.
|
||||
* This will add a sector to some, but not all, child vdevs.
|
||||
*/
|
||||
r = s - q * (dcols - nparity);
|
||||
|
||||
/* The number of "big columns" - those which contain remainder data. */
|
||||
bc = (r == 0 ? 0 : r + nparity);
|
||||
|
||||
/*
|
||||
* The total number of data and parity sectors associated with
|
||||
* this I/O.
|
||||
*/
|
||||
tot = s + nparity * (q + (r == 0 ? 0 : 1));
|
||||
|
||||
/* acols: The columns that will be accessed. */
|
||||
/* scols: The columns that will be accessed or skipped. */
|
||||
if (q == 0) {
|
||||
/* Our I/O request doesn't span all child vdevs. */
|
||||
acols = bc;
|
||||
scols = MIN(dcols, roundup(bc, nparity + 1));
|
||||
} else {
|
||||
@@ -1521,6 +1548,23 @@ vdev_raidz_child_done(zio_t *zio)
|
||||
rc->rc_skipped = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Start an IO operation on a RAIDZ VDev
|
||||
*
|
||||
* Outline:
|
||||
* - For write operations:
|
||||
* 1. Generate the parity data
|
||||
* 2. Create child zio write operations to each column's vdev, for both
|
||||
* data and parity.
|
||||
* 3. If the column skips any sectors for padding, create optional dummy
|
||||
* write zio children for those areas to improve aggregation continuity.
|
||||
* - For read operations:
|
||||
* 1. Create child zio read operations to each data column's vdev to read
|
||||
* the range of data required for zio.
|
||||
* 2. If this is a scrub or resilver operation, or if any of the data
|
||||
* vdevs have had errors, then create zio read operations to the parity
|
||||
* columns' VDevs as well.
|
||||
*/
|
||||
static int
|
||||
vdev_raidz_io_start(zio_t *zio)
|
||||
{
|
||||
@@ -1864,6 +1908,27 @@ done:
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete an IO operation on a RAIDZ VDev
|
||||
*
|
||||
* Outline:
|
||||
* - For write operations:
|
||||
* 1. Check for errors on the child IOs.
|
||||
* 2. Return, setting an error code if too few child VDevs were written
|
||||
* to reconstruct the data later. Note that partial writes are
|
||||
* considered successful if they can be reconstructed at all.
|
||||
* - For read operations:
|
||||
* 1. Check for errors on the child IOs.
|
||||
* 2. If data errors occurred:
|
||||
* a. Try to reassemble the data from the parity available.
|
||||
* b. If we haven't yet read the parity drives, read them now.
|
||||
* c. If all parity drives have been read but the data still doesn't
|
||||
* reassemble with a correct checksum, then try combinatorial
|
||||
* reconstruction.
|
||||
* d. If that doesn't work, return an error.
|
||||
* 3. If there were unexpected errors or this is a resilver operation,
|
||||
* rewrite the vdevs that had errors.
|
||||
*/
|
||||
static void
|
||||
vdev_raidz_io_done(zio_t *zio)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user