mirror of
https://git.proxmox.com/git/mirror_zfs.git
synced 2026-05-22 10:37:35 +03:00
Allow for lock-free reading zfsdev_state_list.
Restructure the zfsdev_state_list to allow for lock-free reading by converting to a simple singly-linked list from which items are never deleted and over which only forward iterations are performed. It depends on, among other things, the atomicity of accessing the zs_minor integer and zs_next pointer. This fixes a lock inversion in which the zfsdev_state_lock is used by both the sync task (txg_sync) and indirectly by any user program which uses /dev/zfs; the zfsdev_release method uses the same lock and then blocks on the sync task. The most typical failure scenerio occurs when the sync task is cleaning up a user hold while various concurrent "zfs" commands are in progress. Neither Illumos nor Solaris are affected by this issue because they use DDI interface which provides lock-free reading of device state via the ddi_get_soft_state() function. Signed-off-by: Tim Chase <tim@chase2k.com> Signed-off-by: Chunwei Chen <tuxoko@gmail.com> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Closes #2301
This commit is contained in:
committed by
Brian Behlendorf
parent
1cbae971c5
commit
3937ab20f3
@@ -371,8 +371,15 @@ enum zfsdev_state_type {
|
||||
ZST_ALL,
|
||||
};
|
||||
|
||||
/*
|
||||
* The zfsdev_state_t structure is managed as a singly-linked list
|
||||
* from which items are never deleted. This allows for lock-free
|
||||
* reading of the list so long as assignments to the zs_next and
|
||||
* reads from zs_minor are performed atomically. Empty items are
|
||||
* indicated by storing -1 into zs_minor.
|
||||
*/
|
||||
typedef struct zfsdev_state {
|
||||
list_node_t zs_next; /* next zfsdev_state_t link */
|
||||
struct zfsdev_state *zs_next; /* next zfsdev_state_t link */
|
||||
struct file *zs_file; /* associated file struct */
|
||||
minor_t zs_minor; /* made up minor number */
|
||||
void *zs_onexit; /* onexit data */
|
||||
|
||||
Reference in New Issue
Block a user