mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-26 04:07:45 +03:00
Illumos 5027 - zfs large block support
5027 zfs large block support Reviewed by: Alek Pinchuk <pinchuk.alek@gmail.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com> Reviewed by: Richard Elling <richard.elling@richardelling.com> Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Approved by: Dan McDonald <danmcd@omniti.com> References: https://www.illumos.org/issues/5027 https://github.com/illumos/illumos-gate/commit/b515258 Porting Notes: * Included in this patch is a tiny ISP2() cleanup in zio_init() from Illumos 5255. * Unlike the upstream Illumos commit this patch does not impose an arbitrary 128K block size limit on volumes. Volumes, like filesystems, are limited by the zfs_max_recordsize=1M module option. * By default the maximum record size is limited to 1M by the module option zfs_max_recordsize. This value may be safely increased up to 16M which is the largest block size supported by the on-disk format. At the moment, 1M blocks clearly offer a significant performance improvement but the benefits of going beyond this for the majority of workloads are less clear. * The illumos version of this patch increased DMU_MAX_ACCESS to 32M. This was determined not to be large enough when using 16M blocks because the zfs_make_xattrdir() function will fail (EFBIG) when assigning a TX. This was immediately observed under Linux because all newly created files must have a security xattr created and that was failing. Therefore, we've set DMU_MAX_ACCESS to 64M. * On 32-bit platforms a hard limit of 1M is set for blocks due to the limited virtual address space. We should be able to relax this one the ABD patches are merged. Ported-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #354
This commit is contained in:
committed by
Brian Behlendorf
parent
3df293404a
commit
f1512ee61e
+48
-6
@@ -2392,7 +2392,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
|
||||
const char *propname = nvpair_name(pair);
|
||||
zfs_prop_t prop = zfs_name_to_prop(propname);
|
||||
uint64_t intval;
|
||||
int err;
|
||||
int err = -1;
|
||||
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (zfs_prop_userquota(propname))
|
||||
@@ -3790,8 +3790,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
|
||||
* the SPA supports it. We ignore any errors here since
|
||||
* we'll catch them later.
|
||||
*/
|
||||
if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
|
||||
nvpair_value_uint64(pair, &intval) == 0) {
|
||||
if (nvpair_value_uint64(pair, &intval) == 0) {
|
||||
if (intval >= ZIO_COMPRESS_GZIP_1 &&
|
||||
intval <= ZIO_COMPRESS_GZIP_9 &&
|
||||
zfs_earlier_version(dsname,
|
||||
@@ -3842,6 +3841,42 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
break;
|
||||
|
||||
case ZFS_PROP_RECORDSIZE:
|
||||
/* Record sizes above 128k need the feature to be enabled */
|
||||
if (nvpair_value_uint64(pair, &intval) == 0 &&
|
||||
intval > SPA_OLD_MAXBLOCKSIZE) {
|
||||
spa_t *spa;
|
||||
|
||||
/*
|
||||
* If this is a bootable dataset then
|
||||
* the we don't allow large (>128K) blocks,
|
||||
* because GRUB doesn't support them.
|
||||
*/
|
||||
if (zfs_is_bootfs(dsname) &&
|
||||
intval > SPA_OLD_MAXBLOCKSIZE) {
|
||||
return (SET_ERROR(EDOM));
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't allow setting the property above 1MB,
|
||||
* unless the tunable has been changed.
|
||||
*/
|
||||
if (intval > zfs_max_recordsize ||
|
||||
intval > SPA_MAXBLOCKSIZE)
|
||||
return (SET_ERROR(EDOM));
|
||||
|
||||
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
|
||||
return (err);
|
||||
|
||||
if (!spa_feature_is_enabled(spa,
|
||||
SPA_FEATURE_LARGE_BLOCKS)) {
|
||||
spa_close(spa, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
spa_close(spa, FTAG);
|
||||
}
|
||||
break;
|
||||
|
||||
case ZFS_PROP_SHARESMB:
|
||||
if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
@@ -4221,7 +4256,7 @@ out:
|
||||
* zc_fromobj objsetid of incremental fromsnap (may be zero)
|
||||
* zc_guid if set, estimate size of stream only. zc_cookie is ignored.
|
||||
* output size in zc_objset_type.
|
||||
* zc_flags if =1, WRITE_EMBEDDED records are permitted
|
||||
* zc_flags lzc_send_flags
|
||||
*
|
||||
* outputs:
|
||||
* zc_objset_type estimated size, if zc_guid is set
|
||||
@@ -4233,6 +4268,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
|
||||
offset_t off;
|
||||
boolean_t estimate = (zc->zc_guid != 0);
|
||||
boolean_t embedok = (zc->zc_flags & 0x1);
|
||||
boolean_t large_block_ok = (zc->zc_flags & 0x2);
|
||||
|
||||
if (zc->zc_obj != 0) {
|
||||
dsl_pool_t *dp;
|
||||
@@ -4294,7 +4330,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
|
||||
|
||||
off = fp->f_offset;
|
||||
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
|
||||
zc->zc_fromobj, embedok, zc->zc_cookie, fp->f_vnode, &off);
|
||||
zc->zc_fromobj, embedok, large_block_ok,
|
||||
zc->zc_cookie, fp->f_vnode, &off);
|
||||
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
fp->f_offset = off;
|
||||
@@ -5160,6 +5197,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
* innvl: {
|
||||
* "fd" -> file descriptor to write stream to (int32)
|
||||
* (optional) "fromsnap" -> full snap name to send an incremental from
|
||||
* (optional) "largeblockok" -> (value ignored)
|
||||
* indicates that blocks > 128KB are permitted
|
||||
* (optional) "embedok" -> (value ignored)
|
||||
* presence indicates DRR_WRITE_EMBEDDED records are permitted
|
||||
* }
|
||||
@@ -5175,6 +5214,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
char *fromname = NULL;
|
||||
int fd;
|
||||
file_t *fp;
|
||||
boolean_t largeblockok;
|
||||
boolean_t embedok;
|
||||
|
||||
error = nvlist_lookup_int32(innvl, "fd", &fd);
|
||||
@@ -5183,13 +5223,15 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
|
||||
(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
|
||||
|
||||
largeblockok = nvlist_exists(innvl, "largeblockok");
|
||||
embedok = nvlist_exists(innvl, "embedok");
|
||||
|
||||
if ((fp = getf(fd)) == NULL)
|
||||
return (SET_ERROR(EBADF));
|
||||
|
||||
off = fp->f_offset;
|
||||
error = dmu_send(snapname, fromname, embedok, fd, fp->f_vnode, &off);
|
||||
error = dmu_send(snapname, fromname, embedok, largeblockok,
|
||||
fd, fp->f_vnode, &off);
|
||||
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
fp->f_offset = off;
|
||||
|
||||
Reference in New Issue
Block a user