mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Fixed data integrity issue when underlying disk returns error
Errors in zil_lwb_write_done() are not propagated to zil_lwb_flush_vdevs_done() which can result in zil_commit_impl() not returning an error to applications even when zfs was not able to write data to the disk. Remove the ZIO_FLAG_DONT_PROPAGATE flag from zio_rewrite() to allow errors to propagate and consolidate the error handling for flush and write errors to a single location (rather than having error handling split between the "write done" and "flush done" handlers). Reviewed-by: George Wilson <gwilson@delphix.com> Reviewed-by: Prakash Surya <prakash.surya@delphix.com> Signed-off-by: Arun KV <arun.kv@datacore.com> Closes #12391 Closes #12443
This commit is contained in:
		
							parent
							
								
									695d4ae815
								
							
						
					
					
						commit
						f82f0279ed
					
				| @ -1178,6 +1178,20 @@ zil_lwb_flush_vdevs_done(zio_t *zio) | ||||
| 
 | ||||
| 		ASSERT3P(zcw->zcw_lwb, ==, lwb); | ||||
| 		zcw->zcw_lwb = NULL; | ||||
| 		/*
 | ||||
| 		 * We expect any ZIO errors from child ZIOs to have been | ||||
| 		 * propagated "up" to this specific LWB's root ZIO, in | ||||
| 		 * order for this error handling to work correctly. This | ||||
| 		 * includes ZIO errors from either this LWB's write or | ||||
| 		 * flush, as well as any errors from other dependent LWBs | ||||
| 		 * (e.g. a root LWB ZIO that might be a child of this LWB). | ||||
| 		 * | ||||
| 		 * With that said, it's important to note that LWB flush | ||||
| 		 * errors are not propagated up to the LWB root ZIO. | ||||
| 		 * This is incorrect behavior, and results in VDEV flush | ||||
| 		 * errors not being handled correctly here. See the | ||||
| 		 * comment above the call to "zio_flush" for details. | ||||
| 		 */ | ||||
| 
 | ||||
| 		zcw->zcw_zio_error = zio->io_error; | ||||
| 
 | ||||
| @ -1251,6 +1265,12 @@ zil_lwb_write_done(zio_t *zio) | ||||
| 	 * nodes. We avoid calling zio_flush() since there isn't any | ||||
| 	 * good reason for doing so, after the lwb block failed to be | ||||
| 	 * written out. | ||||
| 	 * | ||||
| 	 * Additionally, we don't perform any further error handling at | ||||
| 	 * this point (e.g. setting "zcw_zio_error" appropriately), as | ||||
| 	 * we expect that to occur in "zil_lwb_flush_vdevs_done" (thus, | ||||
| 	 * we expect any error seen here, to have been propagated to | ||||
| 	 * that function). | ||||
| 	 */ | ||||
| 	if (zio->io_error != 0) { | ||||
| 		while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) | ||||
| @ -1281,8 +1301,17 @@ zil_lwb_write_done(zio_t *zio) | ||||
| 
 | ||||
| 	while ((zv = avl_destroy_nodes(t, &cookie)) != NULL) { | ||||
| 		vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); | ||||
| 		if (vd != NULL) | ||||
| 		if (vd != NULL) { | ||||
| 			/*
 | ||||
| 			 * The "ZIO_FLAG_DONT_PROPAGATE" is currently | ||||
| 			 * always used within "zio_flush". This means, | ||||
| 			 * any errors when flushing the vdev(s), will | ||||
| 			 * (unfortunately) not be handled correctly, | ||||
| 			 * since these "zio_flush" errors will not be | ||||
| 			 * propagated up to "zil_lwb_flush_vdevs_done". | ||||
| 			 */ | ||||
| 			zio_flush(lwb->lwb_root_zio, vd); | ||||
| 		} | ||||
| 		kmem_free(zv, sizeof (*zv)); | ||||
| 	} | ||||
| } | ||||
| @ -1399,8 +1428,7 @@ zil_lwb_write_open(zilog_t *zilog, lwb_t *lwb) | ||||
| 		lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, | ||||
| 		    zilog->zl_spa, 0, &lwb->lwb_blk, lwb_abd, | ||||
| 		    BP_GET_LSIZE(&lwb->lwb_blk), zil_lwb_write_done, lwb, | ||||
| 		    prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | | ||||
| 		    ZIO_FLAG_FASTWRITE, &zb); | ||||
| 		    prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_FASTWRITE, &zb); | ||||
| 		ASSERT3P(lwb->lwb_write_zio, !=, NULL); | ||||
| 
 | ||||
| 		lwb->lwb_state = LWB_STATE_OPENED; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Arun KV
						Arun KV