diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index b7fdae926..918938d62 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -315,6 +315,62 @@ out: return (error); } +static void +zfs_clear_setid_bits_if_necessary(zfsvfs_t *zfsvfs, znode_t *zp, cred_t *cr, + uint64_t *clear_setid_bits_txgp, dmu_tx_t *tx) +{ + zilog_t *zilog = zfsvfs->z_log; + const uint64_t uid = KUID_TO_SUID(ZTOUID(zp)); + + ASSERT(clear_setid_bits_txgp != NULL); + ASSERT(tx != NULL); + + /* + * Clear Set-UID/Set-GID bits on successful write if not + * privileged and at least one of the execute bits is set. + * + * It would be nice to do this after all writes have + * been done, but that would still expose the ISUID/ISGID + * to another app after the partial write is committed. + * + * Note: we don't call zfs_fuid_map_id() here because + * user 0 is not an ephemeral uid. + */ + mutex_enter(&zp->z_acl_lock); + if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 && + (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && + secpolicy_vnode_setid_retain(zp, cr, + ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) { + uint64_t newmode; + + zp->z_mode &= ~(S_ISUID | S_ISGID); + newmode = zp->z_mode; + (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), + (void *)&newmode, sizeof (uint64_t), tx); + + mutex_exit(&zp->z_acl_lock); + + /* + * Make sure SUID/SGID bits will be removed when we replay the + * log. If the setid bits are keep coming back, don't log more + * than one TX_SETATTR per transaction group. + */ + if (*clear_setid_bits_txgp != dmu_tx_get_txg(tx)) { + vattr_t va; + + bzero(&va, sizeof (va)); + va.va_mask = AT_MODE; + va.va_nodeid = zp->z_id; + va.va_mode = newmode; + zfs_log_setattr(zilog, tx, TX_SETATTR, zp, &va, AT_MODE, + NULL); + *clear_setid_bits_txgp = dmu_tx_get_txg(tx); + } + } else { + mutex_exit(&zp->z_acl_lock); + } +} + /* * Write the bytes to a file. * @@ -340,6 +396,7 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) { int error = 0, error1; ssize_t start_resid = zfs_uio_resid(uio); + uint64_t clear_setid_bits_txg = 0; /* * Fasttrack empty write @@ -518,6 +575,11 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) break; } + /* + * NB: We must call zfs_clear_setid_bits_if_necessary before + * committing the transaction! + */ + /* * If rangelock_enter() over-locked we grow the blocksize * and then reduce the lock range. This will only happen @@ -559,6 +621,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) zfs_uio_fault_disable(uio, B_FALSE); #ifdef __linux__ if (error == EFAULT) { + zfs_clear_setid_bits_if_necessary(zfsvfs, zp, + cr, &clear_setid_bits_txg, tx); dmu_tx_commit(tx); /* * Account for partial writes before @@ -581,6 +645,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) * VFS, which will handle faulting and will retry. */ if (error != 0 && error != EFAULT) { + zfs_clear_setid_bits_if_necessary(zfsvfs, zp, + cr, &clear_setid_bits_txg, tx); dmu_tx_commit(tx); break; } @@ -605,6 +671,13 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) error = dmu_assign_arcbuf_by_dbuf( sa_get_db(zp->z_sa_hdl), woff, abuf, tx); if (error != 0) { + /* + * XXX This might not be necessary if + * dmu_assign_arcbuf_by_dbuf is guaranteed + * to be atomic. + */ + zfs_clear_setid_bits_if_necessary(zfsvfs, zp, + cr, &clear_setid_bits_txg, tx); dmu_return_arcbuf(abuf); dmu_tx_commit(tx); break; @@ -630,30 +703,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) break; } - /* - * Clear Set-UID/Set-GID bits on successful write if not - * privileged and at least one of the execute bits is set. - * - * It would be nice to do this after all writes have - * been done, but that would still expose the ISUID/ISGID - * to another app after the partial write is committed. - * - * Note: we don't call zfs_fuid_map_id() here because - * user 0 is not an ephemeral uid. - */ - mutex_enter(&zp->z_acl_lock); - if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | - (S_IXUSR >> 6))) != 0 && - (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && - secpolicy_vnode_setid_retain(zp, cr, - ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) { - uint64_t newmode; - zp->z_mode &= ~(S_ISUID | S_ISGID); - newmode = zp->z_mode; - (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), - (void *)&newmode, sizeof (uint64_t), tx); - } - mutex_exit(&zp->z_acl_lock); + zfs_clear_setid_bits_if_necessary(zfsvfs, zp, cr, + &clear_setid_bits_txg, tx); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); @@ -679,8 +730,14 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) /* Avoid clobbering EFAULT. */ error = error1; + /* + * NB: During replay, the TX_SETATTR record logged by + * zfs_clear_setid_bits_if_necessary must precede any of + * the TX_WRITE records logged here. + */ zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag, NULL, NULL); + dmu_tx_commit(tx); if (error != 0) diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index d1c0b8fe9..2ebe50df8 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -881,7 +881,7 @@ tags = ['functional', 'stat'] [tests/functional/suid] tests = ['suid_write_to_suid', 'suid_write_to_sgid', 'suid_write_to_suid_sgid', - 'suid_write_to_none'] + 'suid_write_to_none', 'suid_write_zil_replay'] tags = ['functional', 'suid'] [tests/functional/threadsappend] diff --git a/tests/zfs-tests/tests/functional/suid/Makefile.am b/tests/zfs-tests/tests/functional/suid/Makefile.am index 594d2b77c..0145c1205 100644 --- a/tests/zfs-tests/tests/functional/suid/Makefile.am +++ b/tests/zfs-tests/tests/functional/suid/Makefile.am @@ -7,6 +7,7 @@ dist_pkgdata_SCRIPTS = \ suid_write_to_sgid.ksh \ suid_write_to_suid_sgid.ksh \ suid_write_to_none.ksh \ + suid_write_zil_replay.ksh \ cleanup.ksh \ setup.ksh diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c index 571dc553b..f3febb903 100644 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_file.c @@ -29,86 +29,16 @@ #include #include #include - -static void -test_stat_mode(mode_t extra) -{ - struct stat st; - int i, fd; - char fpath[1024]; - char *penv[] = {"TESTDIR", "TESTFILE0"}; - char buf[] = "test"; - mode_t res; - mode_t mode = 0777 | extra; - - /* - * Get the environment variable values. - */ - for (i = 0; i < sizeof (penv) / sizeof (char *); i++) { - if ((penv[i] = getenv(penv[i])) == NULL) { - fprintf(stderr, "getenv(penv[%d])\n", i); - exit(1); - } - } - - umask(0); - if (stat(penv[0], &st) == -1 && mkdir(penv[0], mode) == -1) { - perror("mkdir"); - exit(2); - } - - snprintf(fpath, sizeof (fpath), "%s/%s", penv[0], penv[1]); - unlink(fpath); - if (stat(fpath, &st) == 0) { - fprintf(stderr, "%s exists\n", fpath); - exit(3); - } - - fd = creat(fpath, mode); - if (fd == -1) { - perror("creat"); - exit(4); - } - close(fd); - - if (setuid(65534) == -1) { - perror("setuid"); - exit(5); - } - - fd = open(fpath, O_RDWR); - if (fd == -1) { - perror("open"); - exit(6); - } - - if (write(fd, buf, sizeof (buf)) == -1) { - perror("write"); - exit(7); - } - close(fd); - - if (stat(fpath, &st) == -1) { - perror("stat"); - exit(8); - } - unlink(fpath); - - /* Verify SUID/SGID are dropped */ - res = st.st_mode & (0777 | S_ISUID | S_ISGID); - if (res != (mode & 0777)) { - fprintf(stderr, "stat(2) %o\n", res); - exit(9); - } -} +#include int main(int argc, char *argv[]) { - const char *name; + const char *name, *phase; mode_t extra; + struct stat st; - if (argc < 2) { + if (argc < 3) { fprintf(stderr, "Invalid argc\n"); exit(1); } @@ -127,7 +57,77 @@ main(int argc, char *argv[]) exit(1); } - test_stat_mode(extra); + const char *testdir = getenv("TESTDIR"); + if (!testdir) { + fprintf(stderr, "getenv(TESTDIR)\n"); + exit(1); + } + + umask(0); + if (stat(testdir, &st) == -1 && mkdir(testdir, 0777) == -1) { + perror("mkdir"); + exit(2); + } + + char fpath[1024]; + snprintf(fpath, sizeof (fpath), "%s/%s", testdir, name); + + + phase = argv[2]; + if (strcmp(phase, "PRECRASH") == 0) { + + /* clean up last run */ + unlink(fpath); + if (stat(fpath, &st) == 0) { + fprintf(stderr, "%s exists\n", fpath); + exit(3); + } + + int fd; + + fd = creat(fpath, 0777 | extra); + if (fd == -1) { + perror("creat"); + exit(4); + } + close(fd); + + if (setuid(65534) == -1) { + perror("setuid"); + exit(5); + } + + fd = open(fpath, O_RDWR); + if (fd == -1) { + perror("open"); + exit(6); + } + + const char buf[] = "test"; + if (write(fd, buf, sizeof (buf)) == -1) { + perror("write"); + exit(7); + } + close(fd); + + } else if (strcmp(phase, "REPLAY") == 0) { + /* created in PRECRASH run */ + } else { + fprintf(stderr, "Invalid phase %s\n", phase); + exit(1); + } + + if (stat(fpath, &st) == -1) { + perror("stat"); + exit(8); + } + + /* Verify SUID/SGID are dropped */ + mode_t res = st.st_mode & (0777 | S_ISUID | S_ISGID); + if (res != 0777) { + fprintf(stderr, "stat(2) %o\n", res); + exit(9); + } return (0); } diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh index dd0197861..470350f96 100755 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_none.ksh @@ -47,6 +47,6 @@ function cleanup log_onexit cleanup log_note "Verify write(2) to regular file by non-owner" -log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" "PRECRASH" log_pass "Verify write(2) to regular file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh index 49ae2bd1b..3c95a4026 100755 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_sgid.ksh @@ -47,6 +47,6 @@ function cleanup log_onexit cleanup log_note "Verify write(2) to SGID file by non-owner" -log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" "PRECRASH" log_pass "Verify write(2) to SGID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh index 3983aad2e..4183cbeef 100755 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid.ksh @@ -47,6 +47,6 @@ function cleanup log_onexit cleanup log_note "Verify write(2) to SUID file by non-owner" -log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" "PRECRASH" log_pass "Verify write(2) to SUID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh index a058c7e7d..f7a08a55f 100755 --- a/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh +++ b/tests/zfs-tests/tests/functional/suid/suid_write_to_suid_sgid.ksh @@ -47,6 +47,6 @@ function cleanup log_onexit cleanup log_note "Verify write(2) to SUID/SGID file by non-owner" -log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "PRECRASH" log_pass "Verify write(2) to SUID/SGID file by non-owner passed" diff --git a/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh b/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh new file mode 100755 index 000000000..81f431f6b --- /dev/null +++ b/tests/zfs-tests/tests/functional/suid/suid_write_zil_replay.ksh @@ -0,0 +1,99 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +. $STF_SUITE/tests/functional/slog/slog.kshlib + +verify_runnable "global" + +function cleanup_fs +{ + cleanup +} + +log_assert "Verify ZIL replay results in correct SUID/SGID bits for unprivileged write to SUID/SGID files" +log_onexit cleanup_fs +log_must setup + +# +# 1. Create a file system (TESTFS) +# +log_must zpool destroy "$TESTPOOL" +log_must zpool create $TESTPOOL $VDEV log mirror $LDEV +log_must zfs set compression=on $TESTPOOL +log_must zfs create -o mountpoint="$TESTDIR" $TESTPOOL/$TESTFS + +# Make all the writes from suid_write_to_file.c sync +log_must zfs set sync=always "$TESTPOOL/$TESTFS" + +# +# This dd command works around an issue where ZIL records aren't created +# after freezing the pool unless a ZIL header already exists. Create a file +# synchronously to force ZFS to write one out. +# +log_must dd if=/dev/zero of=$TESTDIR/sync \ + conv=fdatasync,fsync bs=1 count=1 + +# +# 2. Freeze TESTFS +# +log_must zpool freeze $TESTPOOL + +# +# 3. Unprivileged write to a setuid file +# +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" "PRECRASH" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" "PRECRASH" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" "PRECRASH" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "PRECRASH" + +# +# 4. Unmount filesystem and export the pool +# +# At this stage TESTFS is empty again and frozen, the intent log contains +# a complete set of deltas to replay. +# +log_must zfs unmount $TESTPOOL/$TESTFS + +log_note "List transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 5. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "NONE" "REPLAY" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID" "REPLAY" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SGID" "REPLAY" +log_must $STF_SUITE/tests/functional/suid/suid_write_to_file "SUID_SGID" "REPLAY" + +log_pass