mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Always wait for txg sync when umounting dataset
Currently, when unmounting a filesystem, ZFS will only wait for a txg sync if the dataset is dirty and not readonly. However, this can be problematic in cases where a dataset is remounted readonly immediately before being unmounted, which often happens when the system is being shut down. Since encrypted datasets require that all I/O is completed before the dataset is disowned, this issue causes problems when write I/Os leak into the txgs after the dataset is disowned, which can happen when sync=disabled. While looking into fixes for this issue, it was discovered that dsl_dataset_is_dirty() does not return B_TRUE when the dataset has been removed from the txg dirty datasets list, but has not actually been processed yet. Furthermore, the implementation is comletely different from dmu_objset_is_dirty(), adding to the confusion. Rather than relying on this function, this patch forces the umount code path (and the remount readonly code path) to always perform a txg sync on read-write datasets and removes the function altogether. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tom Caputi <tcaputi@datto.com> Closes #7753 Closes #7795
This commit is contained in:
		
							parent
							
								
									8c4fb36a24
								
							
						
					
					
						commit
						47ab01a18f
					
				| @ -395,7 +395,6 @@ int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, | |||||||
|     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); |     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); | ||||||
| int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last, | int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last, | ||||||
|     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); |     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp); | ||||||
| boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds); |  | ||||||
| 
 | 
 | ||||||
| int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); | int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -794,15 +794,6 @@ dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag) | |||||||
| 	    (flags & DS_HOLD_FLAG_DECRYPT)) { | 	    (flags & DS_HOLD_FLAG_DECRYPT)) { | ||||||
| 		(void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa, | 		(void) spa_keystore_remove_mapping(ds->ds_dir->dd_pool->dp_spa, | ||||||
| 		    ds->ds_object, ds); | 		    ds->ds_object, ds); | ||||||
| 
 |  | ||||||
| 		/*
 |  | ||||||
| 		 * Encrypted datasets require that users only release their |  | ||||||
| 		 * decrypting reference after the dirty data has actually |  | ||||||
| 		 * been written out. This ensures that the mapping exists |  | ||||||
| 		 * when it is needed to write out dirty data. |  | ||||||
| 		 */ |  | ||||||
| 		ASSERT(dmu_buf_user_refcount(ds->ds_dbuf) != 0 || |  | ||||||
| 		    !dsl_dataset_is_dirty(ds)); |  | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	dmu_buf_rele(ds->ds_dbuf, tag); | 	dmu_buf_rele(ds->ds_dbuf, tag); | ||||||
| @ -1168,17 +1159,6 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| boolean_t |  | ||||||
| dsl_dataset_is_dirty(dsl_dataset_t *ds) |  | ||||||
| { |  | ||||||
| 	for (int t = 0; t < TXG_SIZE; t++) { |  | ||||||
| 		if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, |  | ||||||
| 		    ds, t)) |  | ||||||
| 			return (B_TRUE); |  | ||||||
| 	} |  | ||||||
| 	return (B_FALSE); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static int | static int | ||||||
| dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) | dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) | ||||||
| { | { | ||||||
|  | |||||||
| @ -1745,10 +1745,10 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) | |||||||
| 	zfs_unregister_callbacks(zfsvfs); | 	zfs_unregister_callbacks(zfsvfs); | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Evict cached data | 	 * Evict cached data. We must write out any dirty data before | ||||||
|  | 	 * disowning the dataset. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) && | 	if (!zfs_is_readonly(zfsvfs)) | ||||||
| 	    !zfs_is_readonly(zfsvfs)) |  | ||||||
| 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); | 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); | ||||||
| 	dmu_objset_evict_dbufs(zfsvfs->z_os); | 	dmu_objset_evict_dbufs(zfsvfs->z_os); | ||||||
| 
 | 
 | ||||||
| @ -1970,6 +1970,9 @@ zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm) | |||||||
| 	if (error) | 	if (error) | ||||||
| 		return (error); | 		return (error); | ||||||
| 
 | 
 | ||||||
|  | 	if (!zfs_is_readonly(zfsvfs) && (*flags & MS_RDONLY)) | ||||||
|  | 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); | ||||||
|  | 
 | ||||||
| 	zfs_unregister_callbacks(zfsvfs); | 	zfs_unregister_callbacks(zfsvfs); | ||||||
| 	zfsvfs_vfs_free(zfsvfs->z_vfs); | 	zfsvfs_vfs_free(zfsvfs->z_vfs); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1193,10 +1193,10 @@ zvol_shutdown_zv(zvol_state_t *zv) | |||||||
| 	zv->zv_dn = NULL; | 	zv->zv_dn = NULL; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Evict cached data | 	 * Evict cached data. We must write out any dirty data before | ||||||
|  | 	 * disowning the dataset. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && | 	if (!(zv->zv_flags & ZVOL_RDONLY)) | ||||||
| 	    !(zv->zv_flags & ZVOL_RDONLY)) |  | ||||||
| 		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); | 		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); | ||||||
| 	(void) dmu_objset_evict_dbufs(zv->zv_objset); | 	(void) dmu_objset_evict_dbufs(zv->zv_objset); | ||||||
| } | } | ||||||
| @ -1489,8 +1489,7 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode, | |||||||
| 		invalidate_bdev(bdev); | 		invalidate_bdev(bdev); | ||||||
| 		rw_enter(&zv->zv_suspend_lock, RW_READER); | 		rw_enter(&zv->zv_suspend_lock, RW_READER); | ||||||
| 
 | 
 | ||||||
| 		if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) && | 		if (!(zv->zv_flags & ZVOL_RDONLY)) | ||||||
| 		    !(zv->zv_flags & ZVOL_RDONLY)) |  | ||||||
| 			txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); | 			txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0); | ||||||
| 
 | 
 | ||||||
| 		rw_exit(&zv->zv_suspend_lock); | 		rw_exit(&zv->zv_suspend_lock); | ||||||
|  | |||||||
| @ -36,8 +36,10 @@ | |||||||
| # 2. Verify we can (re)mount the dataset readonly/read-write | # 2. Verify we can (re)mount the dataset readonly/read-write | ||||||
| # 3. Verify we can mount the snapshot and it's mounted readonly | # 3. Verify we can mount the snapshot and it's mounted readonly | ||||||
| # 4. Verify we can't remount it read-write | # 4. Verify we can't remount it read-write | ||||||
| # 5. Re-import the pool readonly | # 5. Verify we can remount a dataset readonly and unmount it with | ||||||
| # 6. Verify we can't remount its filesystem read-write | #    encryption=on and sync=disabled (issue #7753) | ||||||
|  | # 6. Re-import the pool readonly | ||||||
|  | # 7. Verify we can't remount its filesystem read-write | ||||||
| # | # | ||||||
| 
 | 
 | ||||||
| verify_runnable "both" | verify_runnable "both" | ||||||
| @ -130,11 +132,21 @@ readonlyfs $MNTPSNAP | |||||||
| checkmount $TESTSNAP 'ro' | checkmount $TESTSNAP 'ro' | ||||||
| log_must umount $MNTPSNAP | log_must umount $MNTPSNAP | ||||||
| 
 | 
 | ||||||
| # 5. Re-import the pool readonly | # 5. Verify we can remount a dataset readonly and unmount it with | ||||||
|  | #    encryption=on and sync=disabled (issue #7753) | ||||||
|  | log_must eval "echo 'password' | zfs create -o sync=disabled \ | ||||||
|  |     -o encryption=on -o keyformat=passphrase $TESTFS/crypt" | ||||||
|  | CRYPT_MNTPFS="$(get_prop mountpoint $TESTFS/crypt)" | ||||||
|  | log_must touch $CRYPT_MNTPFS/file.dat | ||||||
|  | log_must mount -o remount,ro $TESTFS/crypt $CRYPT_MNTPFS | ||||||
|  | log_must umount -f $CRYPT_MNTPFS | ||||||
|  | zpool sync $TESTPOOL | ||||||
|  | 
 | ||||||
|  | # 6. Re-import the pool readonly | ||||||
| log_must zpool export $TESTPOOL | log_must zpool export $TESTPOOL | ||||||
| log_must zpool import -o readonly=on $TESTPOOL | log_must zpool import -o readonly=on $TESTPOOL | ||||||
| 
 | 
 | ||||||
| # 6. Verify we can't remount its filesystem read-write | # 7. Verify we can't remount its filesystem read-write | ||||||
| readonlyfs $MNTPFS | readonlyfs $MNTPFS | ||||||
| checkmount $TESTFS 'ro' | checkmount $TESTFS 'ro' | ||||||
| log_mustnot mount -o remount,rw $MNTPFS | log_mustnot mount -o remount,rw $MNTPFS | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Tom Caputi
						Tom Caputi