Add "ashift" property to zpool create

Some disks with internal sectors larger than 512 bytes (e.g., 4k) can
suffer from bad write performance when ashift is not configured
correctly.  This is caused by the disk not reporting its actual sector
size, but a sector size of 512 bytes.  The drive may behave this way
for compatibility reasons.  For example, the WDC WD20EARS disks are
known to exhibit this behavior.

When creating a zpool, ZFS takes that wrong sector size and sets the
"ashift" property accordingly (to 9: 1<<9=512), whereas it should be
set to 12 for 4k sectors (1<<12=4096).

This patch allows an adminstrator to manual specify the known correct
ashift size at 'zpool create' time.  This can significantly improve
performance in certain cases.  However, it will have an impact on your
total pool capacity.  See the updated ashift property description
in the zpool.8 man page for additional details.

Valid values for the ashift property range from 9 to 17 (512B-128KB).
Additionally, you may set the ashift to 0 if you wish to auto-detect
the sector size based on what the disk reports, this is the default
behavior.  The most common ashift values are 9 and 12.

  Example:
  zpool create -o ashift=12 tank raidz2 sda sdb sdc sdd

Closes #280

Original-patch-by: Richard Laager <rlaager@wiktel.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
This commit is contained in:
Christian Kohlschütter
2011-06-16 21:56:38 +02:00
committed by Brian Behlendorf
parent 96801d2906
commit df30f56639
8 changed files with 69 additions and 12 deletions
+3 -3
View File
@@ -488,7 +488,7 @@ zpool_do_add(int argc, char **argv)
}
/* pass off to get_vdev_spec for processing */
nvroot = make_root_vdev(zhp, force, !force, B_FALSE, dryrun,
nvroot = make_root_vdev(zhp, NULL, force, !force, B_FALSE, dryrun,
argc, argv);
if (nvroot == NULL) {
zpool_close(zhp);
@@ -688,7 +688,7 @@ zpool_do_create(int argc, char **argv)
}
/* pass off to get_vdev_spec for bulk processing */
nvroot = make_root_vdev(NULL, force, !force, B_FALSE, dryrun,
nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun,
argc - 1, argv + 1);
if (nvroot == NULL)
goto errout;
@@ -2683,7 +2683,7 @@ zpool_do_attach_or_replace(int argc, char **argv, int replacing)
return (1);
}
nvroot = make_root_vdev(zhp, force, B_FALSE, replacing, B_FALSE,
nvroot = make_root_vdev(zhp, NULL, force, B_FALSE, replacing, B_FALSE,
argc, argv);
if (nvroot == NULL) {
zpool_close(zhp);
+2 -2
View File
@@ -43,8 +43,8 @@ uint_t num_logs(nvlist_t *nv);
* Virtual device functions
*/
nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
boolean_t replacing, boolean_t dryrun, int argc, char **argv);
nvlist_t *make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force,
int check_rep, boolean_t replacing, boolean_t dryrun, int argc, char **argv);
nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
nvlist_t *props, splitflags_t flags, int argc, char **argv);
+20 -7
View File
@@ -407,7 +407,7 @@ is_shorthand_path(const char *arg, char *path,
* xxx Shorthand for /dev/disk/yyy/xxx
*/
static nvlist_t *
make_leaf_vdev(const char *arg, uint64_t is_log)
make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
{
char path[MAXPATHLEN];
struct stat64 statbuf;
@@ -499,6 +499,19 @@ make_leaf_vdev(const char *arg, uint64_t is_log)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
(uint64_t)wholedisk) == 0);
if (props != NULL) {
uint64_t ashift = 0;
char *value = NULL;
if (nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0)
zfs_nicestrtonum(NULL, value, &ashift);
if (ashift > 0)
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT,
ashift) == 0);
}
return (vdev);
}
@@ -1195,7 +1208,7 @@ is_grouping(const char *type, int *mindev, int *maxdev)
* because the program is just going to exit anyway.
*/
nvlist_t *
construct_spec(int argc, char **argv)
construct_spec(nvlist_t *props, int argc, char **argv)
{
nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
@@ -1284,7 +1297,7 @@ construct_spec(int argc, char **argv)
children * sizeof (nvlist_t *));
if (child == NULL)
zpool_no_memory();
if ((nv = make_leaf_vdev(argv[c], B_FALSE))
if ((nv = make_leaf_vdev(props, argv[c], B_FALSE))
== NULL)
return (NULL);
child[children - 1] = nv;
@@ -1340,7 +1353,7 @@ construct_spec(int argc, char **argv)
* We have a device. Pass off to make_leaf_vdev() to
* construct the appropriate nvlist describing the vdev.
*/
if ((nv = make_leaf_vdev(argv[0], is_log)) == NULL)
if ((nv = make_leaf_vdev(props, argv[0], is_log)) == NULL)
return (NULL);
if (is_log)
nlogs++;
@@ -1406,7 +1419,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
uint_t c, children;
if (argc > 0) {
if ((newroot = construct_spec(argc, argv)) == NULL) {
if ((newroot = construct_spec(props, argc, argv)) == NULL) {
(void) fprintf(stderr, gettext("Unable to build a "
"pool from the specified devices\n"));
return (NULL);
@@ -1456,7 +1469,7 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
* added, even if they appear in use.
*/
nvlist_t *
make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
boolean_t replacing, boolean_t dryrun, int argc, char **argv)
{
nvlist_t *newroot;
@@ -1468,7 +1481,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
* that we have a valid specification, and that all devices can be
* opened.
*/
if ((newroot = construct_spec(argc, argv)) == NULL)
if ((newroot = construct_spec(props, argc, argv)) == NULL)
return (NULL);
if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL))