mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	OpenZFS 3821 - Race in rollback, zil close, and zil flush
Authored by: George Wilson <george.wilson@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed by: Andriy Gapon <avg@FreeBSD.org> Approved by: Richard Lowe <richlowe@richlowe.net> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Ported-by: George Melikov <mail@gmelikov.ru> OpenZFS-issue: https://www.illumos.org/issues/3821 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/43297f9 Closes #5905
This commit is contained in:
		
							parent
							
								
									56a6054d55
								
							
						
					
					
						commit
						55922e73b4
					
				| @ -615,9 +615,16 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) | |||||||
| { | { | ||||||
| 	zilog_t *zilog; | 	zilog_t *zilog; | ||||||
| 
 | 
 | ||||||
| 	while ((zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg))) { | 	while ((zilog = txg_list_head(&dp->dp_dirty_zilogs, txg))) { | ||||||
| 		dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); | 		dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); | ||||||
|  | 		/*
 | ||||||
|  | 		 * We don't remove the zilog from the dp_dirty_zilogs | ||||||
|  | 		 * list until after we've cleaned it. This ensures that | ||||||
|  | 		 * callers of zilog_is_dirty() receive an accurate | ||||||
|  | 		 * answer when they are racing with the spa sync thread. | ||||||
|  | 		 */ | ||||||
| 		zil_clean(zilog, txg); | 		zil_clean(zilog, txg); | ||||||
|  | 		(void) txg_list_remove_this(&dp->dp_dirty_zilogs, zilog, txg); | ||||||
| 		ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg)); | 		ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg)); | ||||||
| 		dmu_buf_rele(ds->ds_dbuf, zilog); | 		dmu_buf_rele(ds->ds_dbuf, zilog); | ||||||
| 	} | 	} | ||||||
|  | |||||||
| @ -6749,8 +6749,6 @@ spa_sync(spa_t *spa, uint64_t txg) | |||||||
| 		spa->spa_config_syncing = NULL; | 		spa->spa_config_syncing = NULL; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	spa->spa_ubsync = spa->spa_uberblock; |  | ||||||
| 
 |  | ||||||
| 	dsl_pool_sync_done(dp, txg); | 	dsl_pool_sync_done(dp, txg); | ||||||
| 
 | 
 | ||||||
| 	mutex_enter(&spa->spa_alloc_lock); | 	mutex_enter(&spa->spa_alloc_lock); | ||||||
| @ -6775,6 +6773,13 @@ spa_sync(spa_t *spa, uint64_t txg) | |||||||
| 
 | 
 | ||||||
| 	spa->spa_sync_pass = 0; | 	spa->spa_sync_pass = 0; | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Update the last synced uberblock here. We want to do this at | ||||||
|  | 	 * the end of spa_sync() so that consumers of spa_last_synced_txg() | ||||||
|  | 	 * will be guaranteed that all the processing associated with | ||||||
|  | 	 * that txg has been completed. | ||||||
|  | 	 */ | ||||||
|  | 	spa->spa_ubsync = spa->spa_uberblock; | ||||||
| 	spa_config_exit(spa, SCL_CONFIG, FTAG); | 	spa_config_exit(spa, SCL_CONFIG, FTAG); | ||||||
| 
 | 
 | ||||||
| 	spa_handle_ignored_writes(spa); | 	spa_handle_ignored_writes(spa); | ||||||
|  | |||||||
| @ -20,7 +20,8 @@ | |||||||
|  */ |  */ | ||||||
| /*
 | /*
 | ||||||
|  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | ||||||
|  * Copyright (c) 2011, 2015 by Delphix. All rights reserved. |  * Copyright (c) 2011, 2016 by Delphix. All rights reserved. | ||||||
|  |  * Copyright (c) 2014 Integros [integros.com] | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| /* Portions Copyright 2010 Robert Milkowski */ | /* Portions Copyright 2010 Robert Milkowski */ | ||||||
| @ -504,6 +505,27 @@ zilog_dirty(zilog_t *zilog, uint64_t txg) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Determine if the zil is dirty in the specified txg. Callers wanting to | ||||||
|  |  * ensure that the dirty state does not change must hold the itxg_lock for | ||||||
|  |  * the specified txg. Holding the lock will ensure that the zil cannot be | ||||||
|  |  * dirtied (zil_itx_assign) or cleaned (zil_clean) while we check its current | ||||||
|  |  * state. | ||||||
|  |  */ | ||||||
|  | boolean_t | ||||||
|  | zilog_is_dirty_in_txg(zilog_t *zilog, uint64_t txg) | ||||||
|  | { | ||||||
|  | 	dsl_pool_t *dp = zilog->zl_dmu_pool; | ||||||
|  | 
 | ||||||
|  | 	if (txg_list_member(&dp->dp_dirty_zilogs, zilog, txg & TXG_MASK)) | ||||||
|  | 		return (B_TRUE); | ||||||
|  | 	return (B_FALSE); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Determine if the zil is dirty. The zil is considered dirty if it has | ||||||
|  |  * any pending itx records that have not been cleaned by zil_clean(). | ||||||
|  |  */ | ||||||
| boolean_t | boolean_t | ||||||
| zilog_is_dirty(zilog_t *zilog) | zilog_is_dirty(zilog_t *zilog) | ||||||
| { | { | ||||||
| @ -1091,8 +1113,6 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb) | |||||||
| 		return (NULL); | 		return (NULL); | ||||||
| 
 | 
 | ||||||
| 	ASSERT(lwb->lwb_buf != NULL); | 	ASSERT(lwb->lwb_buf != NULL); | ||||||
| 	ASSERT(zilog_is_dirty(zilog) || |  | ||||||
| 	    spa_freeze_txg(zilog->zl_spa) != UINT64_MAX); |  | ||||||
| 
 | 
 | ||||||
| 	if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) | 	if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) | ||||||
| 		dlen = P2ROUNDUP_TYPED( | 		dlen = P2ROUNDUP_TYPED( | ||||||
| @ -1339,6 +1359,8 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) | |||||||
| 			 * this itxg. Save the itxs for release below. | 			 * this itxg. Save the itxs for release below. | ||||||
| 			 * This should be rare. | 			 * This should be rare. | ||||||
| 			 */ | 			 */ | ||||||
|  | 			zfs_dbgmsg("zil_itx_assign: missed itx cleanup for " | ||||||
|  | 			    "txg %llu", itxg->itxg_txg); | ||||||
| 			atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod); | 			atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod); | ||||||
| 			itxg->itxg_sod = 0; | 			itxg->itxg_sod = 0; | ||||||
| 			clean = itxg->itxg_itxs; | 			clean = itxg->itxg_itxs; | ||||||
| @ -1439,6 +1461,11 @@ zil_get_commit_list(zilog_t *zilog) | |||||||
| 	else | 	else | ||||||
| 		otxg = spa_last_synced_txg(zilog->zl_spa) + 1; | 		otxg = spa_last_synced_txg(zilog->zl_spa) + 1; | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * This is inherently racy, since there is nothing to prevent | ||||||
|  | 	 * the last synced txg from changing. That's okay since we'll | ||||||
|  | 	 * only commit things in the future. | ||||||
|  | 	 */ | ||||||
| 	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { | 	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { | ||||||
| 		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; | 		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; | ||||||
| 
 | 
 | ||||||
| @ -1448,6 +1475,16 @@ zil_get_commit_list(zilog_t *zilog) | |||||||
| 			continue; | 			continue; | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * If we're adding itx records to the zl_itx_commit_list, | ||||||
|  | 		 * then the zil better be dirty in this "txg". We can assert | ||||||
|  | 		 * that here since we're holding the itxg_lock which will | ||||||
|  | 		 * prevent spa_sync from cleaning it. Once we add the itxs | ||||||
|  | 		 * to the zl_itx_commit_list we must commit it to disk even | ||||||
|  | 		 * if it's unnecessary (i.e. the txg was synced). | ||||||
|  | 		 */ | ||||||
|  | 		ASSERT(zilog_is_dirty_in_txg(zilog, txg) || | ||||||
|  | 		    spa_freeze_txg(zilog->zl_spa) != UINT64_MAX); | ||||||
| 		list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list); | 		list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list); | ||||||
| 		push_sod += itxg->itxg_sod; | 		push_sod += itxg->itxg_sod; | ||||||
| 		itxg->itxg_sod = 0; | 		itxg->itxg_sod = 0; | ||||||
| @ -1473,6 +1510,10 @@ zil_async_to_sync(zilog_t *zilog, uint64_t foid) | |||||||
| 	else | 	else | ||||||
| 		otxg = spa_last_synced_txg(zilog->zl_spa) + 1; | 		otxg = spa_last_synced_txg(zilog->zl_spa) + 1; | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * This is inherently racy, since there is nothing to prevent | ||||||
|  | 	 * the last synced txg from changing. | ||||||
|  | 	 */ | ||||||
| 	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { | 	for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) { | ||||||
| 		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; | 		itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; | ||||||
| 
 | 
 | ||||||
| @ -1545,8 +1586,14 @@ zil_commit_writer(zilog_t *zilog) | |||||||
| 	for (itx = list_head(&zilog->zl_itx_commit_list); itx != NULL; | 	for (itx = list_head(&zilog->zl_itx_commit_list); itx != NULL; | ||||||
| 	    itx = list_next(&zilog->zl_itx_commit_list, itx)) { | 	    itx = list_next(&zilog->zl_itx_commit_list, itx)) { | ||||||
| 		txg = itx->itx_lr.lrc_txg; | 		txg = itx->itx_lr.lrc_txg; | ||||||
| 		ASSERT(txg); | 		ASSERT3U(txg, !=, 0); | ||||||
| 
 | 
 | ||||||
|  | 		/*
 | ||||||
|  | 		 * This is inherently racy and may result in us writing | ||||||
|  | 		 * out a log block for a txg that was just synced. This is | ||||||
|  | 		 * ok since we'll end cleaning up that log block the next | ||||||
|  | 		 * time we call zil_sync(). | ||||||
|  | 		 */ | ||||||
| 		if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa)) | 		if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa)) | ||||||
| 			lwb = zil_lwb_commit(zilog, itx, lwb); | 			lwb = zil_lwb_commit(zilog, itx, lwb); | ||||||
| 	} | 	} | ||||||
| @ -1907,8 +1954,11 @@ zil_close(zilog_t *zilog) | |||||||
| 	mutex_exit(&zilog->zl_lock); | 	mutex_exit(&zilog->zl_lock); | ||||||
| 	if (txg) | 	if (txg) | ||||||
| 		txg_wait_synced(zilog->zl_dmu_pool, txg); | 		txg_wait_synced(zilog->zl_dmu_pool, txg); | ||||||
|  | 
 | ||||||
|  | 	if (zilog_is_dirty(zilog)) | ||||||
|  | 		zfs_dbgmsg("zil (%p) is dirty, txg %llu", zilog, txg); | ||||||
| 	if (txg < spa_freeze_txg(zilog->zl_spa)) | 	if (txg < spa_freeze_txg(zilog->zl_spa)) | ||||||
| 		ASSERT(!zilog_is_dirty(zilog)); | 		VERIFY(!zilog_is_dirty(zilog)); | ||||||
| 
 | 
 | ||||||
| 	taskq_destroy(zilog->zl_clean_taskq); | 	taskq_destroy(zilog->zl_clean_taskq); | ||||||
| 	zilog->zl_clean_taskq = NULL; | 	zilog->zl_clean_taskq = NULL; | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 George Wilson
						George Wilson