From dfbc86309fd8ebb70a55cafa876320dc1ea8e833 Mon Sep 17 00:00:00 2001 From: Chris Dunlop Date: Fri, 15 Jul 2016 00:44:38 +1000 Subject: [PATCH] Use native inode->i_nlink instead of znode->z_links A mostly mechanical change, taking into account i_nlink is 32 bits vs ZFS's 64 bit on-disk link count. We revert "xattr dir doesn't get purged during iput" (ddae16a) as this is a more Linux-integrated fix for the same issue. In addition, setting the initial link count on a new node has been changed from setting one less than required in zfs_mknode() then incrementing to the correct count in zfs_link_create() (which was somewhat bizarre in the first place), to setting the correct count in zfs_mknode() and not incrementing it in zfs_link_create(). This both means we no longer set the link count in sa_bulk_update() twice (once for the initial incorrect count then again for the correct count), as well as adhering to the Linux requirement of not incrementing a zero link count without I_LINKABLE (see linux commit f4e0c30c). Signed-off-by: Chris Dunlop Signed-off-by: Brian Behlendorf Signed-off-by: Chunwei Chen Closes #4838 Issue #227 --- include/sys/trace_acl.h | 6 ++--- include/sys/zfs_znode.h | 1 - module/zfs/zfs_ctldir.c | 1 - module/zfs/zfs_dir.c | 55 ++++++++++++++++++++++++++--------------- module/zfs/zfs_sa.c | 4 ++- module/zfs/zfs_vnops.c | 12 +++++---- module/zfs/zfs_znode.c | 17 ++++++++----- 7 files changed, 58 insertions(+), 38 deletions(-) diff --git a/include/sys/trace_acl.h b/include/sys/trace_acl.h index f1d8eb78c..db4334951 100644 --- a/include/sys/trace_acl.h +++ b/include/sys/trace_acl.h @@ -55,7 +55,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __field(uint_t, z_seq) __field(uint64_t, z_mapcnt) __field(uint64_t, z_size) - __field(uint64_t, z_links) __field(uint64_t, z_pflags) __field(uint64_t, z_uid) __field(uint64_t, z_gid) @@ -91,7 +90,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_seq = zn->z_seq; __entry->z_mapcnt = zn->z_mapcnt; __entry->z_size = zn->z_size; - __entry->z_links = zn->z_links; __entry->z_pflags = zn->z_pflags; __entry->z_uid = zn->z_uid; __entry->z_gid = zn->z_gid; @@ -119,7 +117,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class, ), TP_printk("zn { id %llu unlinked %u atime_dirty %u " "zn_prefetch %u moved %u blksz %u seq %u " - "mapcnt %llu size %llu links %llu pflags %llu " + "mapcnt %llu size %llu pflags %llu " "uid %llu gid %llu sync_cnt %u mode 0x%x is_sa %d " "is_mapped %d is_ctldir %d is_stale %d inode { " "ino %lu nlink %u version %llu size %lli blkbits %u " @@ -128,7 +126,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty, __entry->z_zn_prefetch, __entry->z_moved, __entry->z_blksz, __entry->z_seq, __entry->z_mapcnt, __entry->z_size, - __entry->z_links, __entry->z_pflags, __entry->z_uid, + __entry->z_pflags, __entry->z_uid, __entry->z_gid, __entry->z_sync_cnt, __entry->z_mode, __entry->z_is_sa, __entry->z_is_mapped, __entry->z_is_ctldir, __entry->z_is_stale, __entry->i_ino, diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index 661628698..bf03c1530 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -187,7 +187,6 @@ typedef struct znode { uint64_t z_mapcnt; /* number of pages mapped to file */ uint64_t z_dnodesize; /* dnode size */ uint64_t z_size; /* file size (cached) */ - uint64_t z_links; /* file links (cached) */ uint64_t z_pflags; /* pflags (cached) */ uint64_t z_uid; /* uid fuid (cached) */ uint64_t z_gid; /* gid fuid (cached) */ diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c index d279d1828..5930e62d9 100644 --- a/module/zfs/zfs_ctldir.c +++ b/module/zfs/zfs_ctldir.c @@ -478,7 +478,6 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id, zp->z_seq = 0; zp->z_mapcnt = 0; zp->z_size = 0; - zp->z_links = 0; zp->z_pflags = 0; zp->z_uid = 0; zp->z_gid = 0; diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c index 50fa7e248..564f71596 100644 --- a/module/zfs/zfs_dir.c +++ b/module/zfs/zfs_dir.c @@ -478,7 +478,7 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) zfs_sb_t *zsb = ZTOZSB(zp); ASSERT(zp->z_unlinked); - ASSERT(zp->z_links == 0); + ASSERT(ZTOI(zp)->i_nlink == 0); VERIFY3U(0, ==, zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx)); @@ -594,7 +594,7 @@ zfs_purgedir(znode_t *dzp) if (error) skipped += 1; dmu_tx_commit(tx); - set_nlink(ZTOI(xzp), xzp->z_links); + zfs_iput_async(ZTOI(xzp)); } zap_cursor_fini(&zc); @@ -612,9 +612,10 @@ zfs_rmnode(znode_t *zp) dmu_tx_t *tx; uint64_t acl_obj; uint64_t xattr_obj; + uint64_t links; int error; - ASSERT(zp->z_links == 0); + ASSERT(ZTOI(zp)->i_nlink == 0); ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0); /* @@ -694,10 +695,10 @@ zfs_rmnode(znode_t *zp) ASSERT(error == 0); mutex_enter(&xzp->z_lock); xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ - xzp->z_links = 0; /* no more links to it */ - set_nlink(ZTOI(xzp), 0); /* this will let iput purge us */ + clear_nlink(ZTOI(xzp)); /* no more links to it */ + links = 0; VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb), - &xzp->z_links, sizeof (xzp->z_links), tx)); + &links, sizeof (links), tx)); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); } @@ -736,6 +737,7 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode); sa_bulk_attr_t bulk[5]; uint64_t mtime[2], ctime[2]; + uint64_t links; int count = 0; int error; @@ -747,10 +749,16 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) mutex_exit(&zp->z_lock); return (SET_ERROR(ENOENT)); } - zp->z_links++; - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, - &zp->z_links, sizeof (zp->z_links)); - + if (!(flag & ZNEW)) { + /* + * ZNEW nodes come from zfs_mknode() where the link + * count has already been initialised + */ + inc_nlink(ZTOI(zp)); + links = ZTOI(zp)->i_nlink; + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, + &links, sizeof (links)); + } } SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &dzp->z_id, sizeof (dzp->z_id)); @@ -770,12 +778,14 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) mutex_enter(&dzp->z_lock); dzp->z_size++; - dzp->z_links += zp_is_dir; + if (zp_is_dir) + inc_nlink(ZTOI(dzp)); + links = ZTOI(dzp)->i_nlink; count = 0; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &dzp->z_size, sizeof (dzp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, - &dzp->z_links, sizeof (dzp->z_links)); + &links, sizeof (links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, sizeof (mtime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, @@ -836,6 +846,7 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, boolean_t unlinked = B_FALSE; sa_bulk_attr_t bulk[5]; uint64_t mtime[2], ctime[2]; + uint64_t links; int count = 0; int error; @@ -862,15 +873,16 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, return (error); } - if (zp->z_links <= zp_is_dir) { + if (ZTOI(zp)->i_nlink <= zp_is_dir) { zfs_panic_recover("zfs: link count on %lu is %u, " "should be at least %u", zp->z_id, - (int)zp->z_links, zp_is_dir + 1); - zp->z_links = zp_is_dir + 1; + (int)ZTOI(zp)->i_nlink, zp_is_dir + 1); + set_nlink(ZTOI(zp), zp_is_dir + 1); } - if (--zp->z_links == zp_is_dir) { + drop_nlink(ZTOI(zp)); + if (ZTOI(zp)->i_nlink == zp_is_dir) { zp->z_unlinked = B_TRUE; - zp->z_links = 0; + clear_nlink(ZTOI(zp)); unlinked = B_TRUE; } else { SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), @@ -880,8 +892,9 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime); } + links = ZTOI(zp)->i_nlink; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), - NULL, &zp->z_links, sizeof (zp->z_links)); + NULL, &links, sizeof (links)); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); count = 0; ASSERT(error == 0); @@ -894,9 +907,11 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, mutex_enter(&dzp->z_lock); dzp->z_size--; /* one dirent removed */ - dzp->z_links -= zp_is_dir; /* ".." link from zp */ + if (zp_is_dir) + drop_nlink(ZTOI(dzp)); /* ".." link from zp */ + links = ZTOI(dzp)->i_nlink; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), - NULL, &dzp->z_links, sizeof (dzp->z_links)); + NULL, &links, sizeof (links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &dzp->z_size, sizeof (dzp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index f3eac51f8..33b767808 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -277,6 +277,7 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) zfs_acl_locator_cb_t locate = { 0 }; uint64_t uid, gid, mode, rdev, xattr, parent, tmp_gen; uint64_t crtime[2], mtime[2], ctime[2], atime[2]; + uint64_t links; zfs_acl_phys_t znode_acl; char scanstamp[AV_SCANSTAMP_SZ]; boolean_t drop_lock = B_FALSE; @@ -351,8 +352,9 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx) &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16); + links = ZTOI(zp)->i_nlink; SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zsb), NULL, - &zp->z_links, 8); + &links, 8); if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode)) SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zsb), NULL, &rdev, 8); diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 48a72e302..bc3dd2166 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -1524,6 +1524,7 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags) uint64_t acl_obj, xattr_obj; uint64_t xattr_obj_unlinked = 0; uint64_t obj = 0; + uint64_t links; zfs_dirlock_t *dl; dmu_tx_t *tx; boolean_t may_delete_now, delete_now = FALSE; @@ -1672,12 +1673,13 @@ top: if (delete_now) { if (xattr_obj_unlinked) { - ASSERT3U(xzp->z_links, ==, 2); + ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2); mutex_enter(&xzp->z_lock); xzp->z_unlinked = 1; - xzp->z_links = 0; + clear_nlink(ZTOI(xzp)); + links = 0; error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb), - &xzp->z_links, sizeof (xzp->z_links), tx); + &links, sizeof (links), tx); ASSERT3U(error, ==, 0); mutex_exit(&xzp->z_lock); zfs_unlinked_add(xzp, tx); @@ -2297,9 +2299,9 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr) vap->va_fsid = ZTOI(zp)->i_sb->s_dev; vap->va_nodeid = zp->z_id; if ((zp->z_id == zsb->z_root) && zfs_show_ctldir(zp)) - links = zp->z_links + 1; + links = ZTOI(zp)->i_nlink + 1; else - links = zp->z_links; + links = ZTOI(zp)->i_nlink; vap->va_nlink = MIN(links, ZFS_LINK_MAX); vap->va_size = i_size_read(ip); vap->va_rdev = ip->i_rdev; diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 992ac7323..ac49c95a5 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -535,7 +535,6 @@ zfs_inode_update_impl(znode_t *zp, boolean_t new) spin_lock(&ip->i_lock); ip->i_uid = SUID_TO_KUID(zp->z_uid); ip->i_gid = SGID_TO_KGID(zp->z_gid); - set_nlink(ip, zp->z_links); ip->i_mode = zp->z_mode; zfs_set_inode_flags(zp, ip); ip->i_blkbits = SPA_MINBLOCKSHIFT; @@ -582,6 +581,7 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, uint64_t mode; uint64_t parent; uint64_t tmp_gen; + uint64_t links; sa_bulk_attr_t bulk[8]; int count = 0; @@ -616,7 +616,7 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &tmp_gen, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8); - SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, 8); + SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &links, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, @@ -635,6 +635,7 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz, zp->z_mode = mode; ip->i_generation = (uint32_t)tmp_gen; + set_nlink(ip, (uint32_t)links); ip->i_ino = obj; zfs_inode_update_new(zp); @@ -798,9 +799,10 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, if (S_ISDIR(vap->va_mode)) { size = 2; /* contents ("." and "..") */ - links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; + links = 2; } else { - size = links = 0; + size = 0; + links = 1; } if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) @@ -1152,6 +1154,7 @@ zfs_rezget(znode_t *zp) dmu_buf_t *db; uint64_t obj_num = zp->z_id; uint64_t mode; + uint64_t links; sa_bulk_attr_t bulk[7]; int err; int count = 0; @@ -1209,7 +1212,7 @@ zfs_rezget(znode_t *zp) SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, sizeof (zp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, - &zp->z_links, sizeof (zp->z_links)); + &links, sizeof (links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, @@ -1233,7 +1236,9 @@ zfs_rezget(znode_t *zp) return (SET_ERROR(EIO)); } - zp->z_unlinked = (zp->z_links == 0); + zp->z_unlinked = (ZTOI(zp)->i_nlink == 0); + set_nlink(ZTOI(zp), (uint32_t)links); + zp->z_blksz = doi.doi_data_block_size; zp->z_atime_dirty = 0; zfs_inode_update_new(zp);