mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2025-01-13 19:50:25 +03:00
Fix 4K sector support
Yesterday I ran across a 3TB drive which exposed 4K sectors to Linux. While I thought I had gotten this support correct it turns out there were 2 subtle bugs which prevented it from working. sudo ./cmd/zpool/zpool create -f large-sector /dev/sda cannot create 'large-sector': one or more devices is currently unavailable 1) The first issue was that it was possible that bdev_capacity() would return the number of 512 byte sectors rather than the number of 4096 sectors. Internally, certain Linux functions only operate with 512 byte sectors so you need to be careful. To avoid any confusion in the future I've updated bdev_capacity() to simply return the device (or partition) capacity in bytes. The higher levels of ZFS want the value in bytes anyway so this is cleaner. 2) When creating a bio the ->bi_sector count must always be expressed in 512 byte sectors. The existing code would scale the byte offset by the logical sector size. Until now this was always 512 so it never caused problems. Trying a 4K sector drive clearly exposed the issue. The problem has been fixed by hard coding the 512 byte sector which is exactly what the bio code does internally. With these changes I'm now able to create ZFS pools using 4K sector drives. No issues were observed during fairly extensive testing. This is also a low risk change if your using 512b sectors devices because none of the logic changes. Closes #256
This commit is contained in:
parent
2b8cad6159
commit
f74fae8b30
@ -87,10 +87,10 @@ bdev_capacity(struct block_device *bdev)
|
|||||||
|
|
||||||
/* The partition capacity referenced by the block device */
|
/* The partition capacity referenced by the block device */
|
||||||
if (part)
|
if (part)
|
||||||
return part->nr_sects;
|
return (part->nr_sects << 9);
|
||||||
|
|
||||||
/* Otherwise assume the full device capacity */
|
/* Otherwise assume the full device capacity */
|
||||||
return get_capacity(bdev->bd_disk);
|
return (get_capacity(bdev->bd_disk) << 9);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -218,7 +218,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *ashift)
|
|||||||
v->vdev_nowritecache = B_FALSE;
|
v->vdev_nowritecache = B_FALSE;
|
||||||
|
|
||||||
/* Physical volume size in bytes */
|
/* Physical volume size in bytes */
|
||||||
*psize = bdev_capacity(bdev) * block_size;
|
*psize = bdev_capacity(bdev);
|
||||||
|
|
||||||
/* Based on the minimum sector size set the block size */
|
/* Based on the minimum sector size set the block size */
|
||||||
*ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
|
*ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
|
||||||
@ -417,7 +417,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
|
|||||||
caddr_t bio_ptr;
|
caddr_t bio_ptr;
|
||||||
uint64_t bio_offset;
|
uint64_t bio_offset;
|
||||||
int bio_size, bio_count = 16;
|
int bio_size, bio_count = 16;
|
||||||
int i = 0, error = 0, block_size;
|
int i = 0, error = 0;
|
||||||
|
|
||||||
ASSERT3U(kbuf_offset + kbuf_size, <=, bdev->bd_inode->i_size);
|
ASSERT3U(kbuf_offset + kbuf_size, <=, bdev->bd_inode->i_size);
|
||||||
|
|
||||||
@ -431,7 +431,6 @@ retry:
|
|||||||
|
|
||||||
dr->dr_zio = zio;
|
dr->dr_zio = zio;
|
||||||
dr->dr_rw = flags;
|
dr->dr_rw = flags;
|
||||||
block_size = vdev_bdev_block_size(bdev);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When the IO size exceeds the maximum bio size for the request
|
* When the IO size exceeds the maximum bio size for the request
|
||||||
@ -472,7 +471,7 @@ retry:
|
|||||||
vdev_disk_dio_get(dr);
|
vdev_disk_dio_get(dr);
|
||||||
|
|
||||||
dr->dr_bio[i]->bi_bdev = bdev;
|
dr->dr_bio[i]->bi_bdev = bdev;
|
||||||
dr->dr_bio[i]->bi_sector = bio_offset / block_size;
|
dr->dr_bio[i]->bi_sector = bio_offset >> 9;
|
||||||
dr->dr_bio[i]->bi_rw = dr->dr_rw;
|
dr->dr_bio[i]->bi_rw = dr->dr_rw;
|
||||||
dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
|
dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
|
||||||
dr->dr_bio[i]->bi_private = dr;
|
dr->dr_bio[i]->bi_private = dr;
|
||||||
@ -715,7 +714,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
|
|||||||
if (IS_ERR(bdev))
|
if (IS_ERR(bdev))
|
||||||
return -PTR_ERR(bdev);
|
return -PTR_ERR(bdev);
|
||||||
|
|
||||||
s = bdev_capacity(bdev) * vdev_bdev_block_size(bdev);
|
s = bdev_capacity(bdev);
|
||||||
if (s == 0) {
|
if (s == 0) {
|
||||||
vdev_bdev_close(bdev, vdev_bdev_mode(FREAD));
|
vdev_bdev_close(bdev, vdev_bdev_mode(FREAD));
|
||||||
return EIO;
|
return EIO;
|
||||||
|
Loading…
Reference in New Issue
Block a user