mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 02:27:36 +03:00
Illumos 5027 - zfs large block support
5027 zfs large block support Reviewed by: Alek Pinchuk <pinchuk.alek@gmail.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com> Reviewed by: Richard Elling <richard.elling@richardelling.com> Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/5027 https://github.com/illumos/illumos-gate/commit/b515258 Porting Notes: * Included in this patch is a tiny ISP2() cleanup in zio_init() from Illumos 5255. * Unlike the upstream Illumos commit this patch does not impose an arbitrary 128K block size limit on volumes. Volumes, like filesystems, are limited by the zfs_max_recordsize=1M module option. * By default the maximum record size is limited to 1M by the module option zfs_max_recordsize. This value may be safely increased up to 16M which is the largest block size supported by the on-disk format. At the moment, 1M blocks clearly offer a significant performance improvement but the benefits of going beyond this for the majority of workloads are less clear. * The illumos version of this patch increased DMU_MAX_ACCESS to 32M. This was determined not to be large enough when using 16M blocks because the zfs_make_xattrdir() function will fail (EFBIG) when assigning a TX. This was immediately observed under Linux because all newly created files must have a security xattr created and that was failing. Therefore, we've set DMU_MAX_ACCESS to 64M. * On 32-bit platforms a hard limit of 1M is set for blocks due to the limited virtual address space. We should be able to relax this one the ABD patches are merged. Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #354
This commit is contained in:
committed by
Brian Behlendorf
parent
3df293404a
commit
f1512ee61e
+18
-12
@@ -1055,21 +1055,28 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
|
||||
break;
|
||||
}
|
||||
|
||||
case ZFS_PROP_RECORDSIZE:
|
||||
case ZFS_PROP_VOLBLOCKSIZE:
|
||||
/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
|
||||
case ZFS_PROP_RECORDSIZE:
|
||||
{
|
||||
int maxbs = SPA_MAXBLOCKSIZE;
|
||||
if (zhp != NULL) {
|
||||
maxbs = zpool_get_prop_int(zhp->zpool_hdl,
|
||||
ZPOOL_PROP_MAXBLOCKSIZE, NULL);
|
||||
}
|
||||
/*
|
||||
* The value must be a power of two between
|
||||
* SPA_MINBLOCKSIZE and maxbs.
|
||||
*/
|
||||
if (intval < SPA_MINBLOCKSIZE ||
|
||||
intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
|
||||
intval > maxbs || !ISP2(intval)) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"'%s' must be power of 2 from %u "
|
||||
"to %uk"), propname,
|
||||
(uint_t)SPA_MINBLOCKSIZE,
|
||||
(uint_t)SPA_MAXBLOCKSIZE >> 10);
|
||||
"'%s' must be power of 2 from 512B "
|
||||
"to %uKB"), propname, maxbs >> 10);
|
||||
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
|
||||
goto error;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
case ZFS_PROP_MLSLABEL:
|
||||
{
|
||||
#ifdef HAVE_MLSLABEL
|
||||
@@ -1446,7 +1453,8 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
|
||||
break;
|
||||
|
||||
case ERANGE:
|
||||
if (prop == ZFS_PROP_COMPRESSION) {
|
||||
if (prop == ZFS_PROP_COMPRESSION ||
|
||||
prop == ZFS_PROP_RECORDSIZE) {
|
||||
(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"property setting is not allowed on "
|
||||
"bootable datasets"));
|
||||
@@ -3212,9 +3220,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
|
||||
case EDOM:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"volume block size must be power of 2 from "
|
||||
"%u to %uk"),
|
||||
(uint_t)SPA_MINBLOCKSIZE,
|
||||
(uint_t)SPA_MAXBLOCKSIZE >> 10);
|
||||
"512B to %uKB"), zfs_max_recordsize >> 10);
|
||||
|
||||
return (zfs_error(hdl, EZFS_BADPROP, errbuf));
|
||||
|
||||
|
||||
@@ -214,7 +214,7 @@ static void *
|
||||
cksummer(void *arg)
|
||||
{
|
||||
dedup_arg_t *dda = arg;
|
||||
char *buf = malloc(1<<20);
|
||||
char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
|
||||
dmu_replay_record_t thedrr;
|
||||
dmu_replay_record_t *drr = &thedrr;
|
||||
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
|
||||
@@ -279,9 +279,9 @@ cksummer(void *arg)
|
||||
DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
|
||||
int sz = drr->drr_payloadlen;
|
||||
|
||||
if (sz > 1<<20) {
|
||||
free(buf);
|
||||
buf = malloc(sz);
|
||||
if (sz > SPA_MAXBLOCKSIZE) {
|
||||
buf = zfs_realloc(dda->dedup_hdl, buf,
|
||||
SPA_MAXBLOCKSIZE, sz);
|
||||
}
|
||||
(void) ssread(buf, sz, ofp);
|
||||
if (ferror(stdin))
|
||||
@@ -834,7 +834,7 @@ typedef struct send_dump_data {
|
||||
char prevsnap[ZFS_MAXNAMELEN];
|
||||
uint64_t prevsnap_obj;
|
||||
boolean_t seenfrom, seento, replicate, doall, fromorigin;
|
||||
boolean_t verbose, dryrun, parsable, progress, embed_data;
|
||||
boolean_t verbose, dryrun, parsable, progress, embed_data, large_block;
|
||||
int outfd;
|
||||
boolean_t err;
|
||||
nvlist_t *fss;
|
||||
@@ -1181,6 +1181,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
|
||||
}
|
||||
|
||||
enum lzc_send_flags flags = 0;
|
||||
if (sdd->large_block)
|
||||
flags |= LZC_SEND_FLAG_LARGE_BLOCK;
|
||||
if (sdd->embed_data)
|
||||
flags |= LZC_SEND_FLAG_EMBED_DATA;
|
||||
|
||||
@@ -1529,6 +1531,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
|
||||
sdd.parsable = flags->parsable;
|
||||
sdd.progress = flags->progress;
|
||||
sdd.dryrun = flags->dryrun;
|
||||
sdd.large_block = flags->largeblock;
|
||||
sdd.embed_data = flags->embed_data;
|
||||
sdd.filter_cb = filter_func;
|
||||
sdd.filter_cb_arg = cb_arg;
|
||||
@@ -2564,7 +2567,7 @@ static int
|
||||
recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
|
||||
{
|
||||
dmu_replay_record_t *drr;
|
||||
void *buf = malloc(1<<20);
|
||||
void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
|
||||
char errbuf[1024];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
|
||||
@@ -455,6 +455,10 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp)
|
||||
*
|
||||
* "fd" is the file descriptor to write the send stream to.
|
||||
*
|
||||
* If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
|
||||
* to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
|
||||
* records with drr_blksz > 128K.
|
||||
*
|
||||
* If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
|
||||
* to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
|
||||
* which the receiving system must support (as indicated by support
|
||||
@@ -471,6 +475,8 @@ lzc_send(const char *snapname, const char *from, int fd,
|
||||
fnvlist_add_int32(args, "fd", fd);
|
||||
if (from != NULL)
|
||||
fnvlist_add_string(args, "fromsnap", from);
|
||||
if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
|
||||
fnvlist_add_boolean(args, "largeblockok");
|
||||
if (flags & LZC_SEND_FLAG_EMBED_DATA)
|
||||
fnvlist_add_boolean(args, "embedok");
|
||||
err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
|
||||
|
||||
Reference in New Issue
Block a user