mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	OpenZFS 9329 - panic in zap_leaf_lookup() due to concurrent zapification
For the null pointer issue shown below, the solution is to initialize the
contents of the object before changing its type, so that concurrent accessors
will see it as non-zapified until it is ready for access via the ZAP.
    BAD TRAP: type=e (#pf Page fault) rp=ffffff00ff520440 addr=20 occurred
    in module "zfs" due to a NULL pointer dereference
    ffffff00ff520320 unix:die+df ()
    ffffff00ff520430 unix:trap+dc0 ()
    ffffff00ff520440 unix:cmntrap+e6 ()
    ffffff00ff520590 zfs:zap_leaf_lookup+46 ()
    ffffff00ff520640 zfs:fzap_lookup+a9 ()
    ffffff00ff5206e0 zfs:zap_lookup_norm+111 ()
    ffffff00ff520730 zfs:zap_contains+42 ()
    ffffff00ff520760 zfs:dsl_dataset_has_resume_receive_state+47 ()
    ffffff00ff520900 zfs:get_receive_resume_stats+3e ()
    ffffff00ff520a90 zfs:dsl_dataset_stats+262 ()
    ffffff00ff520ac0 zfs:dmu_objset_stats+2b ()
    ffffff00ff520b10 zfs:zfs_ioc_objset_stats_impl+64 ()
    ffffff00ff520b60 zfs:zfs_ioc_objset_stats+33 ()
    ffffff00ff520bd0 zfs:zfs_ioc_dataset_list_next+140 ()
    ffffff00ff520c80 zfs:zfsdev_ioctl+4d7 ()
    ffffff00ff520cc0 genunix:cdev_ioctl+39 ()
    ffffff00ff520d10 specfs:spec_ioctl+60 ()
    ffffff00ff520da0 genunix:fop_ioctl+55 ()
    ffffff00ff520ec0 genunix:ioctl+9b ()
    ffffff00ff520f10 unix:brand_sys_sysenter+1c9 ()
Porting Notes:
* DMU_OT_BYTESWAP conditional in zap_lockdir_impl() kept.
Authored by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Approved by: Dan McDonald <danmcd@joyent.com>
Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
OpenZFS-issue: https://illumos.org/issues/9329
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/e8e0f97
Closes #7578
			
			
This commit is contained in:
		
							parent
							
								
									d2a12f9e2a
								
							
						
					
					
						commit
						1a5b96b8ee
					
				| @ -20,7 +20,7 @@ | |||||||
|  */ |  */ | ||||||
| /*
 | /*
 | ||||||
|  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | ||||||
|  * Copyright (c) 2013, 2015 by Delphix. All rights reserved. |  * Copyright (c) 2013, 2017 by Delphix. All rights reserved. | ||||||
|  * Copyright 2014 HybridCluster. All rights reserved. |  * Copyright 2014 HybridCluster. All rights reserved. | ||||||
|  */ |  */ | ||||||
| 
 | 
 | ||||||
| @ -381,12 +381,19 @@ dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type, | |||||||
| 	} | 	} | ||||||
| 	ASSERT3U(dn->dn_type, ==, old_type); | 	ASSERT3U(dn->dn_type, ==, old_type); | ||||||
| 	ASSERT0(dn->dn_maxblkid); | 	ASSERT0(dn->dn_maxblkid); | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * We must initialize the ZAP data before changing the type, | ||||||
|  | 	 * so that concurrent calls to *_is_zapified() can determine if | ||||||
|  | 	 * the object has been completely zapified by checking the type. | ||||||
|  | 	 */ | ||||||
|  | 	mzap_create_impl(mos, object, 0, 0, tx); | ||||||
|  | 
 | ||||||
| 	dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type = | 	dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type = | ||||||
| 	    DMU_OTN_ZAP_METADATA; | 	    DMU_OTN_ZAP_METADATA; | ||||||
| 	dnode_setdirty(dn, tx); | 	dnode_setdirty(dn, tx); | ||||||
| 	dnode_rele(dn, FTAG); | 	dnode_rele(dn, FTAG); | ||||||
| 
 | 
 | ||||||
| 	mzap_create_impl(mos, object, 0, 0, tx); |  | ||||||
| 
 | 
 | ||||||
| 	spa_feature_incr(dmu_objset_spa(mos), | 	spa_feature_incr(dmu_objset_spa(mos), | ||||||
| 	    SPA_FEATURE_EXTENSIBLE_DATASET, tx); | 	    SPA_FEATURE_EXTENSIBLE_DATASET, tx); | ||||||
|  | |||||||
| @ -21,7 +21,7 @@ | |||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | ||||||
|  * Copyright (c) 2011, 2016 by Delphix. All rights reserved. |  * Copyright (c) 2011, 2017 by Delphix. All rights reserved. | ||||||
|  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. |  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. | ||||||
|  * Copyright 2017 Nexenta Systems, Inc. |  * Copyright 2017 Nexenta Systems, Inc. | ||||||
|  */ |  */ | ||||||
| @ -501,6 +501,10 @@ handle_winner: | |||||||
| 	return (winner); | 	return (winner); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * This routine "consumes" the caller's hold on the dbuf, which must | ||||||
|  |  * have the specified tag. | ||||||
|  |  */ | ||||||
| static int | static int | ||||||
| zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx, | zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx, | ||||||
|     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) |     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) | ||||||
| @ -586,6 +590,14 @@ zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx, | |||||||
| 	if (err != 0) { | 	if (err != 0) { | ||||||
| 		return (err); | 		return (err); | ||||||
| 	} | 	} | ||||||
|  | #ifdef ZFS_DEBUG | ||||||
|  | 	{ | ||||||
|  | 		dmu_object_info_t doi; | ||||||
|  | 		dmu_object_info_from_db(db, &doi); | ||||||
|  | 		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); | ||||||
|  | 	} | ||||||
|  | #endif | ||||||
|  | 
 | ||||||
| 	err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); | 	err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); | ||||||
| 	if (err != 0) { | 	if (err != 0) { | ||||||
| 		dmu_buf_rele(db, tag); | 		dmu_buf_rele(db, tag); | ||||||
| @ -602,6 +614,13 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, | |||||||
| 	int err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH); | 	int err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH); | ||||||
| 	if (err != 0) | 	if (err != 0) | ||||||
| 		return (err); | 		return (err); | ||||||
|  | #ifdef ZFS_DEBUG | ||||||
|  | 	{ | ||||||
|  | 		dmu_object_info_t doi; | ||||||
|  | 		dmu_object_info_from_db(db, &doi); | ||||||
|  | 		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); | ||||||
|  | 	} | ||||||
|  | #endif | ||||||
| 	err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); | 	err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); | ||||||
| 	if (err != 0) | 	if (err != 0) | ||||||
| 		dmu_buf_rele(db, tag); | 		dmu_buf_rele(db, tag); | ||||||
| @ -687,28 +706,21 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, | |||||||
| 
 | 
 | ||||||
| 	VERIFY0(dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); | 	VERIFY0(dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); | ||||||
| 
 | 
 | ||||||
| #ifdef ZFS_DEBUG |  | ||||||
| 	{ |  | ||||||
| 		dmu_object_info_t doi; |  | ||||||
| 		dmu_object_info_from_db(db, &doi); |  | ||||||
| 		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); |  | ||||||
| 	} |  | ||||||
| #endif |  | ||||||
| 
 |  | ||||||
| 	dmu_buf_will_dirty(db, tx); | 	dmu_buf_will_dirty(db, tx); | ||||||
| 	mzap_phys_t *zp = db->db_data; | 	mzap_phys_t *zp = db->db_data; | ||||||
| 	zp->mz_block_type = ZBT_MICRO; | 	zp->mz_block_type = ZBT_MICRO; | ||||||
| 	zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; | 	zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; | ||||||
| 	zp->mz_normflags = normflags; | 	zp->mz_normflags = normflags; | ||||||
| 	dmu_buf_rele(db, FTAG); |  | ||||||
| 
 | 
 | ||||||
| 	if (flags != 0) { | 	if (flags != 0) { | ||||||
| 		zap_t *zap; | 		zap_t *zap; | ||||||
| 		/* Only fat zap supports flags; upgrade immediately. */ | 		/* Only fat zap supports flags; upgrade immediately. */ | ||||||
| 		VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, | 		VERIFY0(zap_lockdir_impl(db, FTAG, tx, RW_WRITER, | ||||||
| 		    B_FALSE, B_FALSE, FTAG, &zap)); | 		    B_FALSE, B_FALSE, &zap)); | ||||||
| 		VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags)); | 		VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags)); | ||||||
| 		zap_unlockdir(zap, FTAG); | 		zap_unlockdir(zap, FTAG); | ||||||
|  | 	} else { | ||||||
|  | 		dmu_buf_rele(db, FTAG); | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -742,6 +754,7 @@ zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags, | |||||||
|     dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, |     dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, | ||||||
|     int dnodesize, dmu_tx_t *tx) |     int dnodesize, dmu_tx_t *tx) | ||||||
| { | { | ||||||
|  | 	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); | ||||||
| 	int err = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen, | 	int err = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen, | ||||||
| 	    dnodesize, tx); | 	    dnodesize, tx); | ||||||
| 	if (err != 0) | 	if (err != 0) | ||||||
| @ -777,6 +790,7 @@ uint64_t | |||||||
| zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot, | zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot, | ||||||
|     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) |     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) | ||||||
| { | { | ||||||
|  | 	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); | ||||||
| 	uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen, | 	uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen, | ||||||
| 	    dnodesize, tx); | 	    dnodesize, tx); | ||||||
| 
 | 
 | ||||||
| @ -798,6 +812,7 @@ zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags, | |||||||
|     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, |     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, | ||||||
|     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) |     dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) | ||||||
| { | { | ||||||
|  | 	ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); | ||||||
| 	uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen, | 	uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen, | ||||||
| 	    dnodesize, tx); | 	    dnodesize, tx); | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Matthew Ahrens
						Matthew Ahrens