DLPX-44812 integrate EP-220 large memory scalability

This commit is contained in:
David Quigley
2016-07-22 11:52:49 -04:00
committed by Brian Behlendorf
parent 616fa7c02b
commit a6255b7fce
49 changed files with 2625 additions and 798 deletions
+1
View File
@@ -1,6 +1,7 @@
SUBDIRS = fm fs crypto sysevent
COMMON_H = \
$(top_srcdir)/include/sys/abd.h \
$(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/avl.h \
+160
View File
@@ -0,0 +1,160 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2014 by Chunwei Chen. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
#ifndef _ABD_H
#define _ABD_H
#include <sys/isa_defs.h>
#include <sys/int_types.h>
#include <sys/debug.h>
#include <sys/refcount.h>
#ifdef _KERNEL
#include <linux/mm.h>
#include <sys/uio.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef enum abd_flags {
ABD_FLAG_LINEAR = 1 << 0, /* is buffer linear (or scattered)? */
ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */
ABD_FLAG_META = 1 << 2 /* does this represent FS metadata? */
} abd_flags_t;
typedef struct abd {
abd_flags_t abd_flags;
uint_t abd_size; /* excludes scattered abd_offset */
struct abd *abd_parent;
refcount_t abd_children;
union {
struct abd_scatter {
uint_t abd_offset;
uint_t abd_chunk_size;
struct page *abd_chunks[];
} abd_scatter;
struct abd_linear {
void *abd_buf;
} abd_linear;
} abd_u;
} abd_t;
typedef int abd_iter_func_t(void *buf, size_t len, void *private);
typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private);
extern int zfs_abd_scatter_enabled;
static inline boolean_t
abd_is_linear(abd_t *abd)
{
return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0);
}
/*
* Allocations and deallocations
*/
abd_t *abd_alloc(size_t, boolean_t);
abd_t *abd_alloc_linear(size_t, boolean_t);
abd_t *abd_alloc_for_io(size_t, boolean_t);
abd_t *abd_alloc_sametype(abd_t *, size_t);
void abd_free(abd_t *);
abd_t *abd_get_offset(abd_t *, size_t);
abd_t *abd_get_from_buf(void *, size_t);
void abd_put(abd_t *);
/*
* Conversion to and from a normal buffer
*/
void *abd_to_buf(abd_t *);
void *abd_borrow_buf(abd_t *, size_t);
void *abd_borrow_buf_copy(abd_t *, size_t);
void abd_return_buf(abd_t *, void *, size_t);
void abd_return_buf_copy(abd_t *, void *, size_t);
void abd_take_ownership_of_buf(abd_t *, boolean_t);
void abd_release_ownership_of_buf(abd_t *);
/*
* ABD operations
*/
int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
abd_iter_func2_t *, void *);
void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
int abd_cmp(abd_t *, abd_t *);
int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
void abd_zero_off(abd_t *, size_t, size_t);
/*
* Wrappers for calls with offsets of 0
*/
static inline void
abd_copy(abd_t *dabd, abd_t *sabd, size_t size)
{
abd_copy_off(dabd, sabd, 0, 0, size);
}
static inline void
abd_copy_from_buf(abd_t *abd, void *buf, size_t size)
{
abd_copy_from_buf_off(abd, buf, 0, size);
}
static inline void
abd_copy_to_buf(void* buf, abd_t *abd, size_t size)
{
abd_copy_to_buf_off(buf, abd, 0, size);
}
static inline int
abd_cmp_buf(abd_t *abd, void *buf, size_t size)
{
return (abd_cmp_buf_off(abd, buf, 0, size));
}
static inline void
abd_zero(abd_t *abd, size_t size)
{
abd_zero_off(abd, 0, size);
}
/*
* Module lifecycle
*/
void abd_init(void);
void abd_fini(void);
#ifdef __cplusplus
}
#endif
#endif /* _ABD_H */
+1 -1
View File
@@ -166,7 +166,7 @@ typedef struct l1arc_buf_hdr {
refcount_t b_refcnt;
arc_callback_t *b_acb;
void *b_pdata;
abd_t *b_pabd;
} l1arc_buf_hdr_t;
typedef struct l2arc_dev {
+4 -1
View File
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
#ifndef _SYS_DDT_H
@@ -35,6 +36,8 @@
extern "C" {
#endif
struct abd;
/*
* On-disk DDT formats, in the desired search order (newest version first).
*/
@@ -108,7 +111,7 @@ struct ddt_entry {
ddt_key_t dde_key;
ddt_phys_t dde_phys[DDT_PHYS_TYPES];
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
void *dde_repair_data;
struct abd *dde_repair_abd;
enum ddt_type dde_type;
enum ddt_class dde_class;
uint8_t dde_loading;
+6 -5
View File
@@ -416,15 +416,17 @@ _NOTE(CONSTCOND) } while (0)
#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
#define BP_IS_METADATA(bp) \
(BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
#define BP_GET_ASIZE(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \
DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_GET_UCSIZE(bp) \
((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_UCSIZE(bp) \
(BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_NDVAS(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \
@@ -569,8 +571,7 @@ _NOTE(CONSTCOND) } while (0)
}
#define BP_GET_BUFC_TYPE(bp) \
(((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA)
(BP_IS_METADATA(bp) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
typedef enum spa_import_type {
SPA_IMPORT_EXISTING,
+2 -1
View File
@@ -53,6 +53,7 @@ extern "C" {
typedef struct vdev_queue vdev_queue_t;
typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t;
struct abd;
extern int zfs_vdev_queue_depth_pct;
extern uint32_t zfs_vdev_async_write_max_active;
@@ -87,7 +88,7 @@ typedef const struct vdev_ops {
* Virtual device properties
*/
struct vdev_cache_entry {
char *ve_data;
struct abd *ve_abd;
uint64_t ve_offset;
clock_t ve_lastused;
avl_node_t ve_offset_node;
+3 -2
View File
@@ -28,6 +28,7 @@
#include <sys/types.h>
#include <sys/debug.h>
#include <sys/kstat.h>
#include <sys/abd.h>
#ifdef __cplusplus
extern "C" {
@@ -104,7 +105,7 @@ typedef struct raidz_col {
size_t rc_devidx; /* child device index for I/O */
size_t rc_offset; /* device offset */
size_t rc_size; /* I/O size */
void *rc_data; /* I/O data */
abd_t *rc_abd; /* I/O data */
void *rc_gdata; /* used to store the "good" version */
int rc_error; /* I/O error for this device */
unsigned int rc_tried; /* Did we attempt this I/O column? */
@@ -121,7 +122,7 @@ typedef struct raidz_map {
size_t rm_firstdatacol; /* First data column/parity count */
size_t rm_nskip; /* Skipped sectors for padding */
size_t rm_skipstart; /* Column index of padding start */
void *rm_datacopy; /* rm_asize-buffer of copied data */
abd_t *rm_abd_copy; /* rm_asize-buffer of copied data */
size_t rm_reports; /* # of referencing checksum reports */
unsigned int rm_freed; /* map no longer has referencing ZIO */
unsigned int rm_ecksuminjected; /* checksum error was injected */
+15 -15
View File
@@ -301,6 +301,7 @@ typedef void zio_cksum_free_f(void *cbdata, size_t size);
struct zio_bad_cksum; /* defined in zio_checksum.h */
struct dnode_phys;
struct abd;
struct zio_cksum_report {
struct zio_cksum_report *zcr_next;
@@ -333,12 +334,12 @@ typedef struct zio_gang_node {
} zio_gang_node_t;
typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
zio_gang_node_t *gn, void *data);
zio_gang_node_t *gn, struct abd *data, uint64_t offset);
typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size);
typedef void zio_transform_func_t(zio_t *zio, struct abd *data, uint64_t size);
typedef struct zio_transform {
void *zt_orig_data;
struct abd *zt_orig_abd;
uint64_t zt_orig_size;
uint64_t zt_bufsize;
zio_transform_func_t *zt_transform;
@@ -396,8 +397,8 @@ struct zio {
uint64_t io_lsize;
/* Data represented by this I/O */
void *io_data;
void *io_orig_data;
struct abd *io_abd;
struct abd *io_orig_abd;
uint64_t io_size;
uint64_t io_orig_size;
@@ -455,19 +456,19 @@ extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
uint64_t lsize, zio_done_func_t *done, void *private,
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
struct abd *data, uint64_t lsize, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, uint64_t psize, const zio_prop_t *zp,
struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp,
zio_done_func_t *ready, zio_done_func_t *children_ready,
zio_done_func_t *physdone, zio_done_func_t *done,
void *private, zio_priority_t priority, enum zio_flag flags,
const zbookmark_phys_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *private,
struct abd *data, uint64_t size, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb);
extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
@@ -483,12 +484,12 @@ extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *private, zio_priority_t priority,
enum zio_flag flags, boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *private, zio_priority_t priority,
enum zio_flag flags, boolean_t labels);
@@ -517,21 +518,20 @@ extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
extern void *zio_buf_alloc_flags(size_t size, int flags);
extern void zio_push_transform(zio_t *zio, void *data, uint64_t size,
extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size,
uint64_t bufsize, zio_transform_func_t *transform);
extern void zio_pop_transforms(zio_t *zio);
extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
uint64_t offset, void *data, uint64_t size, int type,
uint64_t offset, struct abd *data, uint64_t size, int type,
zio_priority_t priority, enum zio_flag flags,
zio_done_func_t *done, void *private);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
void *data, uint64_t size, int type, zio_priority_t priority,
struct abd *data, uint64_t size, int type, zio_priority_t priority,
enum zio_flag flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
+18 -18
View File
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
* Copyright Saso Kiselkov 2013, All rights reserved.
*/
@@ -34,12 +34,12 @@
extern "C" {
#endif
struct abd;
/*
* Signature for checksum functions.
*/
typedef void zio_checksum_func_t(const void *, uint64_t, const void *,
zio_cksum_t *);
typedef void zio_checksum_t(const void *data, uint64_t size,
typedef void zio_checksum_t(struct abd *abd, uint64_t size,
const void *ctx_template, zio_cksum_t *zcp);
typedef void *zio_checksum_tmpl_init_t(const zio_cksum_salt_t *salt);
typedef void zio_checksum_tmpl_free_t(void *ctx_template);
@@ -83,28 +83,28 @@ extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
/*
* Checksum routines.
*/
extern zio_checksum_t zio_checksum_SHA256;
extern zio_checksum_t zio_checksum_SHA512_native;
extern zio_checksum_t zio_checksum_SHA512_byteswap;
extern zio_checksum_t abd_checksum_SHA256;
extern zio_checksum_t abd_checksum_SHA512_native;
extern zio_checksum_t abd_checksum_SHA512_byteswap;
/* Skein */
extern zio_checksum_t zio_checksum_skein_native;
extern zio_checksum_t zio_checksum_skein_byteswap;
extern zio_checksum_tmpl_init_t zio_checksum_skein_tmpl_init;
extern zio_checksum_tmpl_free_t zio_checksum_skein_tmpl_free;
extern zio_checksum_t abd_checksum_skein_native;
extern zio_checksum_t abd_checksum_skein_byteswap;
extern zio_checksum_tmpl_init_t abd_checksum_skein_tmpl_init;
extern zio_checksum_tmpl_free_t abd_checksum_skein_tmpl_free;
/* Edon-R */
extern zio_checksum_t zio_checksum_edonr_native;
extern zio_checksum_t zio_checksum_edonr_byteswap;
extern zio_checksum_tmpl_init_t zio_checksum_edonr_tmpl_init;
extern zio_checksum_tmpl_free_t zio_checksum_edonr_tmpl_free;
extern zio_checksum_t abd_checksum_edonr_native;
extern zio_checksum_t abd_checksum_edonr_byteswap;
extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init;
extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free;
extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum,
void *, uint64_t, uint64_t, zio_bad_cksum_t *);
extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
void *data, uint64_t size);
extern void zio_checksum_compute(zio_t *, enum zio_checksum,
struct abd *, uint64_t);
extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum,
void *, uint64_t, uint64_t, zio_bad_cksum_t *);
struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *);
extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
extern enum zio_checksum spa_dedup_checksum(spa_t *spa);
extern void zio_checksum_templates_free(spa_t *spa);
+20 -8
View File
@@ -22,12 +22,14 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2015 by Delphix. All rights reserved.
* Copyright (c) 2015, 2016 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZIO_COMPRESS_H
#define _SYS_ZIO_COMPRESS_H
#include <sys/abd.h>
#ifdef __cplusplus
extern "C" {
#endif
@@ -59,14 +61,21 @@ typedef size_t zio_compress_func_t(void *src, void *dst,
typedef int zio_decompress_func_t(void *src, void *dst,
size_t s_len, size_t d_len, int);
/*
* Common signature for all zio decompress functions using an ABD as input.
* This is helpful if you have both compressed ARC and scatter ABDs enabled,
* but is not a requirement for all compression algorithms.
*/
typedef int zio_decompress_abd_func_t(abd_t *src, void *dst,
size_t s_len, size_t d_len, int);
/*
* Information about each compression function.
*/
typedef const struct zio_compress_info {
zio_compress_func_t *ci_compress; /* compression function */
zio_decompress_func_t *ci_decompress; /* decompression function */
int ci_level; /* level parameter */
char *ci_name; /* algorithm name */
char *ci_name;
int ci_level;
zio_compress_func_t *ci_compress;
zio_decompress_func_t *ci_decompress;
} zio_compress_info_t;
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
@@ -96,13 +105,16 @@ extern size_t lz4_compress_zfs(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int lz4_decompress_abd(abd_t *src, void *dst, size_t s_len, size_t d_len,
int level);
/*
* Compress and decompress data if necessary.
*/
extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst,
extern size_t zio_compress_data(enum zio_compress c, abd_t *src, void *dst,
size_t s_len);
extern int zio_decompress_data(enum zio_compress c, void *src, void *dst,
extern int zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
size_t s_len, size_t d_len);
extern int zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
size_t s_len, size_t d_len);
#ifdef __cplusplus
+5 -4
View File
@@ -48,15 +48,16 @@ extern "C" {
* checksum method is added. This method will ignore last (size % 4) bytes of
* the data buffer.
*/
void fletcher_init(zio_cksum_t *);
void fletcher_2_native(const void *, uint64_t, const void *, zio_cksum_t *);
void fletcher_2_byteswap(const void *, uint64_t, const void *, zio_cksum_t *);
void fletcher_4_native(const void *, uint64_t, const void *, zio_cksum_t *);
int fletcher_2_incremental_native(void *, size_t, void *);
int fletcher_2_incremental_byteswap(void *, size_t, void *);
void fletcher_4_native_varsize(const void *, uint64_t, zio_cksum_t *);
void fletcher_4_byteswap(const void *, uint64_t, const void *, zio_cksum_t *);
void fletcher_4_incremental_native(const void *, uint64_t,
zio_cksum_t *);
void fletcher_4_incremental_byteswap(const void *, uint64_t,
zio_cksum_t *);
int fletcher_4_incremental_native(void *, size_t, void *);
int fletcher_4_incremental_byteswap(void *, size_t, void *);
int fletcher_4_impl_set(const char *selector);
void fletcher_4_init(void);
void fletcher_4_fini(void);