From 92119cc259ee2f9ebde14145f549d6313f557759 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Sun, 13 Jul 2014 14:35:19 -0400 Subject: [PATCH 1/7] Mark IO pipeline with PF_FSTRANS In order to avoid deadlocking in the IO pipeline it is critical that pageout be avoided during direct memory reclaim. This ensures that the pipeline threads can always make forward progress and never end up blocking on a DMU transaction. For this very reason Linux now provides the PF_FSTRANS flag which may be set in the process context. Signed-off-by: Brian Behlendorf --- include/sys/zfs_context.h | 7 ++++++- lib/libzpool/kernel.c | 17 ++++++++++++++++ module/zfs/txg.c | 10 +--------- module/zfs/vdev_file.c | 24 +++++++++++++++++++++++ module/zfs/zio.c | 4 ++++ module/zfs/zpl_file.c | 11 +++-------- module/zfs/zvol.c | 41 +++++++++++++-------------------------- 7 files changed, 69 insertions(+), 45 deletions(-) diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index d4c6fb810..1487a99f4 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -733,6 +733,11 @@ void ksiddomain_rele(ksiddomain_t *); (void) nanosleep(&ts, NULL); \ } while (0) -#endif /* _KERNEL */ +typedef int fstrans_cookie_t; +extern fstrans_cookie_t spl_fstrans_mark(void); +extern void spl_fstrans_unmark(fstrans_cookie_t); +extern int spl_fstrans_check(void); + +#endif /* _KERNEL */ #endif /* _SYS_ZFS_CONTEXT_H */ diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c index 995f61d05..db50352c5 100644 --- a/lib/libzpool/kernel.c +++ b/lib/libzpool/kernel.c @@ -1275,3 +1275,20 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data) { return (0); } + +fstrans_cookie_t +spl_fstrans_mark(void) +{ + return ((fstrans_cookie_t) 0); +} + +void +spl_fstrans_unmark(fstrans_cookie_t cookie) +{ +} + +int +spl_fstrans_check(void) +{ + return (0); +} diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 4693762b8..81afeb373 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -483,15 +483,7 @@ txg_sync_thread(dsl_pool_t *dp) vdev_stat_t *vs1, *vs2; clock_t start, delta; -#ifdef _KERNEL - /* - * Annotate this process with a flag that indicates that it is - * unsafe to use KM_SLEEP during memory allocations due to the - * potential for a deadlock. KM_PUSHPAGE should be used instead. - */ - current->flags |= PF_NOFS; -#endif /* _KERNEL */ - + (void) spl_fstrans_mark(); txg_thread_enter(tx, &cpr); vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE); diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index 8059cdea4..8573a3a66 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -161,6 +161,17 @@ vdev_file_io_strategy(void *arg) zio_interrupt(zio); } +static void +vdev_file_io_fsync(void *arg) +{ + zio_t *zio = (zio_t *)arg; + vdev_file_t *vf = zio->io_vd->vdev_tsd; + + zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); + + zio_interrupt(zio); +} + static int vdev_file_io_start(zio_t *zio) { @@ -180,6 +191,19 @@ vdev_file_io_start(zio_t *zio) if (zfs_nocacheflush) break; + /* + * We cannot safely call vfs_fsync() when PF_FSTRANS + * is set in the current context. Filesystems like + * XFS include sanity checks to verify it is not + * already set, see xfs_vm_writepage(). Therefore + * the sync must be dispatched to a different context. + */ + if (spl_fstrans_check()) { + VERIFY3U(taskq_dispatch(vdev_file_taskq, + vdev_file_io_fsync, zio, TQ_SLEEP), !=, 0); + return (ZIO_PIPELINE_STOP); + } + zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC, kcred, NULL); break; diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 9d70b3e59..7c0e6bf7e 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1361,7 +1361,11 @@ static zio_pipe_stage_t *zio_pipeline[]; void zio_execute(zio_t *zio) { + fstrans_cookie_t cookie; + + cookie = spl_fstrans_mark(); __zio_execute(zio); + spl_fstrans_unmark(cookie); } __attribute__((always_inline)) diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c index cabe9bf15..61005dcd4 100644 --- a/module/zfs/zpl_file.c +++ b/module/zfs/zpl_file.c @@ -481,19 +481,14 @@ int zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) { struct address_space *mapping = data; + fstrans_cookie_t cookie; ASSERT(PageLocked(pp)); ASSERT(!PageWriteback(pp)); - ASSERT(!(current->flags & PF_NOFS)); - /* - * Annotate this call path with a flag that indicates that it is - * unsafe to use KM_SLEEP during memory allocations due to the - * potential for a deadlock. KM_PUSHPAGE should be used instead. - */ - current->flags |= PF_NOFS; + cookie = spl_fstrans_mark(); (void) zfs_putpage(mapping->host, pp, wbc); - current->flags &= ~PF_NOFS; + spl_fstrans_unmark(cookie); return (0); } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index fa5c7eb4e..ddaf520a2 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -577,20 +577,13 @@ zvol_write(void *arg) struct request *req = (struct request *)arg; struct request_queue *q = req->q; zvol_state_t *zv = q->queuedata; + fstrans_cookie_t cookie = spl_fstrans_mark(); uint64_t offset = blk_rq_pos(req) << 9; uint64_t size = blk_rq_bytes(req); int error = 0; dmu_tx_t *tx; rl_t *rl; - /* - * Annotate this call path with a flag that indicates that it is - * unsafe to use KM_SLEEP during memory allocations due to the - * potential for a deadlock. KM_PUSHPAGE should be used instead. - */ - ASSERT(!(current->flags & PF_NOFS)); - current->flags |= PF_NOFS; - if (req->cmd_flags & VDEV_REQ_FLUSH) zil_commit(zv->zv_zilog, ZVOL_OBJ); @@ -598,7 +591,7 @@ zvol_write(void *arg) * Some requests are just for flush and nothing else. */ if (size == 0) { - blk_end_request(req, 0, size); + error = 0; goto out; } @@ -612,7 +605,6 @@ zvol_write(void *arg) if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); - blk_end_request(req, -error, size); goto out; } @@ -628,9 +620,9 @@ zvol_write(void *arg) zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) zil_commit(zv->zv_zilog, ZVOL_OBJ); - blk_end_request(req, -error, size); out: - current->flags &= ~PF_NOFS; + blk_end_request(req, -error, size); + spl_fstrans_unmark(cookie); } #ifdef HAVE_BLK_QUEUE_DISCARD @@ -640,21 +632,14 @@ zvol_discard(void *arg) struct request *req = (struct request *)arg; struct request_queue *q = req->q; zvol_state_t *zv = q->queuedata; + fstrans_cookie_t cookie = spl_fstrans_mark(); uint64_t start = blk_rq_pos(req) << 9; uint64_t end = start + blk_rq_bytes(req); int error; rl_t *rl; - /* - * Annotate this call path with a flag that indicates that it is - * unsafe to use KM_SLEEP during memory allocations due to the - * potential for a deadlock. KM_PUSHPAGE should be used instead. - */ - ASSERT(!(current->flags & PF_NOFS)); - current->flags |= PF_NOFS; - if (end > zv->zv_volsize) { - blk_end_request(req, -EIO, blk_rq_bytes(req)); + error = EIO; goto out; } @@ -668,7 +653,7 @@ zvol_discard(void *arg) end = P2ALIGN(end, zv->zv_volblocksize); if (start >= end) { - blk_end_request(req, 0, blk_rq_bytes(req)); + error = 0; goto out; } @@ -681,10 +666,9 @@ zvol_discard(void *arg) */ zfs_range_unlock(rl); - - blk_end_request(req, -error, blk_rq_bytes(req)); out: - current->flags &= ~PF_NOFS; + blk_end_request(req, -error, blk_rq_bytes(req)); + spl_fstrans_unmark(cookie); } #endif /* HAVE_BLK_QUEUE_DISCARD */ @@ -700,14 +684,15 @@ zvol_read(void *arg) struct request *req = (struct request *)arg; struct request_queue *q = req->q; zvol_state_t *zv = q->queuedata; + fstrans_cookie_t cookie = spl_fstrans_mark(); uint64_t offset = blk_rq_pos(req) << 9; uint64_t size = blk_rq_bytes(req); int error; rl_t *rl; if (size == 0) { - blk_end_request(req, 0, size); - return; + error = 0; + goto out; } rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); @@ -720,7 +705,9 @@ zvol_read(void *arg) if (error == ECKSUM) error = SET_ERROR(EIO); +out: blk_end_request(req, -error, size); + spl_fstrans_unmark(cookie); } /* From 71f8548ea443718a5afb1598f1f3a27a369ccd8b Mon Sep 17 00:00:00 2001 From: Richard Yao Date: Mon, 3 Nov 2014 09:42:44 -0500 Subject: [PATCH 2/7] Use is_vmalloc_addr() in vdev_disk.c The initial port of ZFS to Linux required a way to identify virtual memory to make IO to virtual memory backed slabs work, so kmem_virt() was created. Linux 2.6.25 introduced is_vmalloc_addr(), which is logically equivalent to kmem_virt(). Support for kernels before 2.6.26 was later dropped and more recently, support for kernels before Linux 2.6.32 has been dropped. We retire kmem_virt() in favor of is_vmalloc_addr() to cleanup the code. Signed-off-by: Brian Behlendorf --- module/zfs/vdev_disk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index ec93884ed..f290fce52 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -479,7 +479,7 @@ bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size) if (size > bio_size) size = bio_size; - if (kmem_virt(bio_ptr)) + if (is_vmalloc_addr(bio_ptr)) page = vmalloc_to_page(bio_ptr); else page = virt_to_page(bio_ptr); From efcd79a883caddea4a20bfc771da31ecc6ce4ca2 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Wed, 3 Dec 2014 14:56:32 -0500 Subject: [PATCH 3/7] Retire KM_NODEBUG Callers of kmem_alloc() which passed the KM_NODEBUG flag to suppress the large allocation warning have been replaced by vmem_alloc() as appropriate. The updated vmem_alloc() call will not print a warning regardless of the size of the allocation. A careful reader will notice that not all callers have been changed to vmem_alloc(). Some have only had the KM_NODEBUG flag removed. This was possible because the default warning threshold has been increased to 32k. This is desirable because it minimizes the need for Linux specific code changes. Signed-off-by: Brian Behlendorf --- include/sys/spa.h | 1 + include/sys/zfs_context.h | 2 +- module/nvpair/nvpair_alloc_spl.c | 4 ++-- module/zfs/ddt.c | 2 +- module/zfs/dmu.c | 3 +-- module/zfs/dnode.c | 3 +-- module/zfs/dsl_scan.c | 2 +- module/zfs/spa.c | 2 +- module/zfs/spa_config.c | 4 ++-- module/zfs/spa_misc.c | 2 +- module/zfs/vdev.c | 2 +- module/zfs/zfs_ioctl.c | 11 +++++------ module/zfs/zfs_vfsops.c | 2 +- module/zfs/zfs_znode.c | 2 +- module/zfs/zil.c | 5 ++--- module/zfs/zio.c | 5 ++--- module/zfs/zpl_file.c | 4 ++-- 17 files changed, 26 insertions(+), 30 deletions(-) diff --git a/include/sys/spa.h b/include/sys/spa.h index 1faf0420f..c989c0b35 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -29,6 +29,7 @@ #include #include +#include #include #include #include diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 1487a99f4..dd02e6f1b 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -391,7 +392,6 @@ extern void kstat_set_raw_ops(kstat_t *ksp, #define KM_SLEEP UMEM_NOFAIL #define KM_PUSHPAGE KM_SLEEP #define KM_NOSLEEP UMEM_DEFAULT -#define KM_NODEBUG 0x0 #define KMC_NODEBUG UMC_NODEBUG #define KMC_KMEM 0x0 #define KMC_VMEM 0x0 diff --git a/module/nvpair/nvpair_alloc_spl.c b/module/nvpair/nvpair_alloc_spl.c index a75b4a6c7..f9055b94f 100644 --- a/module/nvpair/nvpair_alloc_spl.c +++ b/module/nvpair/nvpair_alloc_spl.c @@ -30,13 +30,13 @@ static void * nv_alloc_sleep_spl(nv_alloc_t *nva, size_t size) { - return (kmem_alloc(size, KM_SLEEP | KM_NODEBUG)); + return (kmem_alloc(size, KM_SLEEP)); } static void * nv_alloc_pushpage_spl(nv_alloc_t *nva, size_t size) { - return (kmem_alloc(size, KM_PUSHPAGE | KM_NODEBUG)); + return (kmem_alloc(size, KM_PUSHPAGE)); } static void * diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 0ce8ca1a9..3ecb53909 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -834,7 +834,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c) { ddt_t *ddt; - ddt = kmem_cache_alloc(ddt_cache, KM_PUSHPAGE | KM_NODEBUG); + ddt = kmem_cache_alloc(ddt_cache, KM_PUSHPAGE); bzero(ddt, sizeof (ddt_t)); mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 98defa3bb..607da887b 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -417,8 +417,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, } nblks = 1; } - dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, - KM_PUSHPAGE | KM_NODEBUG); + dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_PUSHPAGE); zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); blkid = dbuf_whichblock(dn, offset); diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 0a87aa9b1..e5c212cc0 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -1070,8 +1070,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int i; dnode_children_t *winner; children_dnodes = kmem_alloc(sizeof (dnode_children_t) + - (epb - 1) * sizeof (dnode_handle_t), - KM_PUSHPAGE | KM_NODEBUG); + (epb - 1) * sizeof (dnode_handle_t), KM_PUSHPAGE); children_dnodes->dnc_count = epb; dnh = &children_dnodes->dnc_children[0]; for (i = 0; i < epb; i++) { diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 0e16002b3..ebb6305eb 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -262,7 +262,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) if (dp->dp_blkstats == NULL) { dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t), - KM_PUSHPAGE | KM_NODEBUG); + KM_PUSHPAGE); } bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 776beda11..88641603a 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -1586,7 +1586,7 @@ load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) nvsize = *(uint64_t *)db->db_data; dmu_buf_rele(db, FTAG); - packed = kmem_alloc(nvsize, KM_PUSHPAGE | KM_NODEBUG); + packed = kmem_alloc(nvsize, KM_PUSHPAGE); error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, DMU_READ_PREFETCH); if (error == 0) diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index a08456d56..818a4308f 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -102,7 +102,7 @@ spa_config_load(void) if (kobj_get_filesize(file, &fsize) != 0) goto out; - buf = kmem_alloc(fsize, KM_PUSHPAGE | KM_NODEBUG); + buf = kmem_alloc(fsize, KM_PUSHPAGE); /* * Read the nvlist from the file. @@ -165,7 +165,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) */ VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0); - buf = kmem_alloc(buflen, KM_PUSHPAGE | KM_NODEBUG); + buf = kmem_alloc(buflen, KM_PUSHPAGE); temp = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE); VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR, diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 2a2e72d5a..363f982fe 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -520,7 +520,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) ASSERT(MUTEX_HELD(&spa_namespace_lock)); - spa = kmem_zalloc(sizeof (spa_t), KM_PUSHPAGE | KM_NODEBUG); + spa = kmem_zalloc(sizeof (spa_t), KM_PUSHPAGE); mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 37ce49297..18a246c33 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -856,7 +856,7 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) ASSERT(oldc <= newc); - mspp = kmem_zalloc(newc * sizeof (*mspp), KM_PUSHPAGE | KM_NODEBUG); + mspp = kmem_zalloc(newc * sizeof (*mspp), KM_PUSHPAGE); if (oldc != 0) { bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp)); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 5fb407900..7d443ecf1 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -311,7 +311,7 @@ history_str_get(zfs_cmd_t *zc) if (zc->zc_history == 0) return (NULL); - buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP | KM_NODEBUG); + buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP); if (copyinstr((void *)(uintptr_t)zc->zc_history, buf, HIS_MAX_RECORD_LEN, NULL) != 0) { history_str_free(buf); @@ -1328,7 +1328,7 @@ get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp) if (size == 0) return (SET_ERROR(EINVAL)); - packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG); + packed = kmem_alloc(size, KM_SLEEP); if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag)) != 0) { @@ -2443,8 +2443,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, if (err == 0 && intval >= ZPL_VERSION_USERSPACE) { zfs_cmd_t *zc; - zc = kmem_zalloc(sizeof (zfs_cmd_t), - KM_SLEEP | KM_NODEBUG); + zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); (void) strcpy(zc->zc_name, dsname); (void) zfs_ioc_userspace_upgrade(zc); kmem_free(zc, sizeof (zfs_cmd_t)); @@ -3875,7 +3874,7 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist) VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0); - zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG); + zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP); (void) strcpy(zc->zc_name, dataset); pair = nvlist_next_nvpair(props, NULL); while (pair != NULL) { @@ -5748,7 +5747,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL) return (-SET_ERROR(EINVAL)); - zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG); + zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag); if (error != 0) { diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c index d67f11eca..d29fc6560 100644 --- a/module/zfs/zfs_vfsops.c +++ b/module/zfs/zfs_vfsops.c @@ -653,7 +653,7 @@ zfs_sb_create(const char *osname, zfs_sb_t **zsbp) int i, error; uint64_t sa_obj; - zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP | KM_NODEBUG); + zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP); /* * We claim to always be readonly so we can open snapshots; diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index f02c02184..b51d78037 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -1702,7 +1702,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) rootzp->z_atime_dirty = 0; rootzp->z_is_sa = USE_SA(version, os); - zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE | KM_NODEBUG); + zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE); zsb->z_os = os; zsb->z_parent = zsb; zsb->z_version = version; diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 6ee6c9868..1d8e7cb4d 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -1192,8 +1192,7 @@ zil_itx_create(uint64_t txtype, size_t lrsize) lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t); - itx = kmem_alloc(offsetof(itx_t, itx_lr) + lrsize, - KM_PUSHPAGE | KM_NODEBUG); + itx = vmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_PUSHPAGE); itx->itx_lr.lrc_txtype = txtype; itx->itx_lr.lrc_reclen = lrsize; itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */ @@ -1208,7 +1207,7 @@ zil_itx_create(uint64_t txtype, size_t lrsize) void zil_itx_destroy(itx_t *itx) { - kmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); + vmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen); } /* diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 7c0e6bf7e..d7c8458f4 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -246,7 +246,7 @@ zio_buf_alloc(size_t size) ASSERT3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); - return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE | KM_NODEBUG)); + return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE)); } /* @@ -262,8 +262,7 @@ zio_data_buf_alloc(size_t size) ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT); - return (kmem_cache_alloc(zio_data_buf_cache[c], - KM_PUSHPAGE | KM_NODEBUG)); + return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE)); } void diff --git a/module/zfs/zpl_file.c b/module/zfs/zpl_file.c index 61005dcd4..1f4f219ed 100644 --- a/module/zfs/zpl_file.c +++ b/module/zfs/zpl_file.c @@ -248,7 +248,7 @@ zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp, size_t count = kiocb->ki_nbytes; ssize_t read; size_t alloc_size = sizeof (struct iovec) * nr_segs; - struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP | KM_NODEBUG); + struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP); bcopy(iovp, iov_tmp, alloc_size); ASSERT(iovp); @@ -325,7 +325,7 @@ zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp, size_t count = kiocb->ki_nbytes; ssize_t wrote; size_t alloc_size = sizeof (struct iovec) * nr_segs; - struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP | KM_NODEBUG); + struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP); bcopy(iovp, iov_tmp, alloc_size); ASSERT(iovp); From 79c76d5b65b19a602d4c7a340da7bf90d4a0c4f8 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Thu, 20 Nov 2014 19:09:39 -0500 Subject: [PATCH 4/7] Change KM_PUSHPAGE -> KM_SLEEP By marking DMU transaction processing contexts with PF_FSTRANS we can revert the KM_PUSHPAGE -> KM_SLEEP changes. This brings us back in line with upstream. In some cases this means simply swapping the flags back. For others fnvlist_alloc() was replaced by nvlist_alloc(..., KM_PUSHPAGE) and must be reverted back to fnvlist_alloc() which assumes KM_SLEEP. The one place KM_PUSHPAGE is kept is when allocating ARC buffers which allows us to dip in to reserved memory. This is again the same as upstream. Signed-off-by: Brian Behlendorf --- include/sys/dbuf.h | 12 +++--- include/sys/dsl_dataset.h | 2 +- include/sys/dsl_dir.h | 2 +- include/sys/spa.h | 12 +++--- include/sys/zfs_context.h | 1 - module/zcommon/zprop_common.c | 2 +- module/zfs/arc.c | 16 ++++---- module/zfs/bplist.c | 2 +- module/zfs/bptree.c | 2 +- module/zfs/dbuf.c | 10 ++--- module/zfs/ddt.c | 8 ++-- module/zfs/ddt_zap.c | 2 +- module/zfs/dmu.c | 12 +++--- module/zfs/dmu_objset.c | 6 +-- module/zfs/dmu_traverse.c | 8 ++-- module/zfs/dmu_tx.c | 6 +-- module/zfs/dmu_zfetch.c | 2 +- module/zfs/dnode.c | 4 +- module/zfs/dsl_dataset.c | 8 ++-- module/zfs/dsl_deadlist.c | 4 +- module/zfs/dsl_destroy.c | 13 +++--- module/zfs/dsl_dir.c | 10 ++--- module/zfs/dsl_pool.c | 2 +- module/zfs/dsl_prop.c | 8 ++-- module/zfs/dsl_scan.c | 12 +++--- module/zfs/dsl_userhold.c | 20 +++++---- module/zfs/fm.c | 6 +-- module/zfs/lz4.c | 2 +- module/zfs/lzjb.c | 2 +- module/zfs/metaslab.c | 12 +++--- module/zfs/range_tree.c | 6 +-- module/zfs/refcount.c | 4 +- module/zfs/rrwlock.c | 2 +- module/zfs/sa.c | 26 ++++++------ module/zfs/spa.c | 76 +++++++++++++++++------------------ module/zfs/spa_config.c | 24 +++++------ module/zfs/spa_history.c | 20 ++++----- module/zfs/spa_misc.c | 14 +++---- module/zfs/spa_stats.c | 4 +- module/zfs/space_map.c | 2 +- module/zfs/space_reftree.c | 2 +- module/zfs/txg.c | 6 +-- module/zfs/unique.c | 2 +- module/zfs/vdev.c | 10 ++--- module/zfs/vdev_cache.c | 4 +- module/zfs/vdev_disk.c | 6 +-- module/zfs/vdev_file.c | 4 +- module/zfs/vdev_label.c | 16 ++++---- module/zfs/vdev_mirror.c | 4 +- module/zfs/vdev_raidz.c | 6 +-- module/zfs/zap.c | 8 ++-- module/zfs/zap_leaf.c | 4 +- module/zfs/zap_micro.c | 8 ++-- module/zfs/zfs_acl.c | 6 +-- module/zfs/zfs_debug.c | 2 +- module/zfs/zfs_fm.c | 10 ++--- module/zfs/zfs_ioctl.c | 4 +- module/zfs/zfs_onexit.c | 2 +- module/zfs/zfs_rlock.c | 8 ++-- module/zfs/zfs_vnops.c | 4 +- module/zfs/zfs_znode.c | 12 +++--- module/zfs/zil.c | 16 ++++---- module/zfs/zio.c | 8 ++-- module/zfs/zpl_xattr.c | 2 +- module/zfs/zvol.c | 10 ++--- 65 files changed, 269 insertions(+), 281 deletions(-) diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index af509ca38..1eabfd7da 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -342,13 +342,13 @@ boolean_t dbuf_is_metadata(dmu_buf_impl_t *db); } \ _NOTE(CONSTCOND) } while (0) -#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \ - if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ - char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE); \ +#define dprintf_dbuf_bp(db, bp, fmt, ...) do { \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ + char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \ snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp); \ - dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \ - kmem_free(__blkbuf, BP_SPRINTF_LEN); \ - } \ + dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf); \ + kmem_free(__blkbuf, BP_SPRINTF_LEN); \ + } \ _NOTE(CONSTCOND) } while (0) #define DBUF_VERIFY(db) dbuf_verify(db) diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index 4979ae615..4ef70adc2 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -285,7 +285,7 @@ int dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result); #ifdef ZFS_DEBUG #define dprintf_ds(ds, fmt, ...) do { \ if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ - char *__ds_name = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE); \ + char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \ dsl_dataset_name(ds, __ds_name); \ dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \ kmem_free(__ds_name, MAXNAMELEN); \ diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h index 3aa775232..a0a3ef1de 100644 --- a/include/sys/dsl_dir.h +++ b/include/sys/dsl_dir.h @@ -150,7 +150,7 @@ void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, #define dprintf_dd(dd, fmt, ...) do { \ if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \ - KM_PUSHPAGE); \ + KM_SLEEP); \ dsl_dir_name(dd, __ds_name); \ dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \ kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \ diff --git a/include/sys/spa.h b/include/sys/spa.h index c989c0b35..2f73793fe 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -877,12 +877,12 @@ extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t); extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name); #ifdef ZFS_DEBUG -#define dprintf_bp(bp, fmt, ...) do { \ - if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ - char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE); \ - snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \ - dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \ - kmem_free(__blkbuf, BP_SPRINTF_LEN); \ +#define dprintf_bp(bp, fmt, ...) do { \ + if (zfs_flags & ZFS_DEBUG_DPRINTF) { \ + char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP); \ + snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, (bp)); \ + dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf); \ + kmem_free(__blkbuf, BP_SPRINTF_LEN); \ } \ _NOTE(CONSTCOND) } while (0) #else diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index dd02e6f1b..2fde3b5fc 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -447,7 +447,6 @@ typedef struct taskq_ent { #define TQ_SLEEP KM_SLEEP /* Can block for memory */ #define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */ -#define TQ_PUSHPAGE KM_PUSHPAGE /* Cannot perform I/O */ #define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */ #define TQ_FRONT 0x08 /* Queue in front */ diff --git a/module/zcommon/zprop_common.c b/module/zcommon/zprop_common.c index 035f3378d..b32c22657 100644 --- a/module/zcommon/zprop_common.c +++ b/module/zcommon/zprop_common.c @@ -175,7 +175,7 @@ zprop_iter_common(zprop_func func, void *cb, boolean_t show_all, size = num_props * sizeof (zprop_desc_t *); #if defined(_KERNEL) - order = kmem_alloc(size, KM_PUSHPAGE); + order = kmem_alloc(size, KM_SLEEP); #else if ((order = malloc(size)) == NULL) return (ZPROP_CONT); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index 48b8942d2..a0b74a473 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -978,7 +978,7 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force) return; } buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), - KM_PUSHPAGE); + KM_SLEEP); fletcher_2_native(buf->b_data, buf->b_hdr->b_size, buf->b_hdr->b_freeze_cksum); mutex_exit(&buf->b_hdr->b_freeze_lock); @@ -1477,7 +1477,7 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t)) if (HDR_L2_WRITING(hdr)) { l2arc_data_free_t *df; - df = kmem_alloc(sizeof (l2arc_data_free_t), KM_PUSHPAGE); + df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP); df->l2df_data = buf->b_data; df->l2df_size = hdr->b_size; df->l2df_func = free_func; @@ -3142,7 +3142,7 @@ top: arc_callback_t *acb = NULL; acb = kmem_zalloc(sizeof (arc_callback_t), - KM_PUSHPAGE); + KM_SLEEP); acb->acb_done = done; acb->acb_private = private; if (pio != NULL) @@ -3284,7 +3284,7 @@ top: ASSERT(!GHOST_STATE(hdr->b_state)); - acb = kmem_zalloc(sizeof (arc_callback_t), KM_PUSHPAGE); + acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); acb->acb_done = done; acb->acb_private = private; @@ -3341,7 +3341,7 @@ top: atomic_inc_32(&hdr->b_l2hdr->b_hits); cb = kmem_zalloc(sizeof (l2arc_read_callback_t), - KM_PUSHPAGE); + KM_SLEEP); cb->l2rcb_buf = buf; cb->l2rcb_spa = spa; cb->l2rcb_bp = *bp; @@ -3871,7 +3871,7 @@ arc_write(zio_t *pio, spa_t *spa, uint64_t txg, hdr->b_flags |= ARC_L2CACHE; if (l2arc_compress) hdr->b_flags |= ARC_L2COMPRESS; - callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_PUSHPAGE); + callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); callback->awcb_ready = ready; callback->awcb_physdone = physdone; callback->awcb_done = done; @@ -4979,7 +4979,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, list_insert_head(dev->l2ad_buflist, head); cb = kmem_alloc(sizeof (l2arc_write_callback_t), - KM_PUSHPAGE); + KM_SLEEP); cb->l2wcb_dev = dev; cb->l2wcb_head = head; pio = zio_root(spa, l2arc_write_done, cb, @@ -4989,7 +4989,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, /* * Create and add a new L2ARC header. */ - l2hdr = kmem_cache_alloc(l2arc_hdr_cache, KM_PUSHPAGE); + l2hdr = kmem_cache_alloc(l2arc_hdr_cache, KM_SLEEP); l2hdr->b_dev = dev; l2hdr->b_daddr = 0; arc_space_consume(L2HDR_SIZE, ARC_SPACE_L2HDRS); diff --git a/module/zfs/bplist.c b/module/zfs/bplist.c index c3927e74a..c81151e08 100644 --- a/module/zfs/bplist.c +++ b/module/zfs/bplist.c @@ -45,7 +45,7 @@ bplist_destroy(bplist_t *bpl) void bplist_append(bplist_t *bpl, const blkptr_t *bp) { - bplist_entry_t *bpe = kmem_alloc(sizeof (*bpe), KM_PUSHPAGE); + bplist_entry_t *bpe = kmem_alloc(sizeof (*bpe), KM_SLEEP); mutex_enter(&bpl->bpl_lock); bpe->bpe_blk = *bp; diff --git a/module/zfs/bptree.c b/module/zfs/bptree.c index 0eafa4d7d..d6ea9d7c6 100644 --- a/module/zfs/bptree.c +++ b/module/zfs/bptree.c @@ -134,7 +134,7 @@ bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg, VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); bt = db->db_data; - bte = kmem_zalloc(sizeof (*bte), KM_PUSHPAGE); + bte = kmem_zalloc(sizeof (*bte), KM_SLEEP); bte->be_birth_txg = birth_txg; bte->be_bp = *bp; dmu_write(os, obj, bt->bt_end * sizeof (*bte), sizeof (*bte), bte, tx); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 3484527da..9be69b5ae 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -315,7 +315,7 @@ retry: * Large allocations which do not require contiguous pages * should be using vmem_alloc() in the linux kernel */ - h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_PUSHPAGE); + h->hash_table = vmem_zalloc(hsize * sizeof (void *), KM_SLEEP); #else h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP); #endif @@ -1121,7 +1121,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); ASSERT(dn->dn_dirtyctx_firstset == NULL); - dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_PUSHPAGE); + dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); } mutex_exit(&dn->dn_mtx); @@ -1198,7 +1198,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * to make a copy of it so that the changes we make in this * transaction group won't leak out when we sync the older txg. */ - dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_PUSHPAGE); + dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); list_link_init(&dr->dr_dirty_node); if (db->db_level == 0) { void *data_old = db->db_buf; @@ -1764,7 +1764,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); ASSERT(dn->dn_type != DMU_OT_NONE); - db = kmem_cache_alloc(dbuf_cache, KM_PUSHPAGE); + db = kmem_cache_alloc(dbuf_cache, KM_SLEEP); db->db_objset = os; db->db.db_object = dn->dn_object; @@ -2059,7 +2059,7 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, int error; dh = kmem_zalloc(sizeof (struct dbuf_hold_impl_data) * - DBUF_HOLD_IMPL_MAX_DEPTH, KM_PUSHPAGE); + DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP); __dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0); error = __dbuf_hold_impl(dh); diff --git a/module/zfs/ddt.c b/module/zfs/ddt.c index 3ecb53909..18557ffb5 100644 --- a/module/zfs/ddt.c +++ b/module/zfs/ddt.c @@ -517,7 +517,7 @@ ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total) { ddt_histogram_t *ddh_total; - ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_PUSHPAGE); + ddh_total = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddt_get_dedup_histogram(spa, ddh_total); ddt_histogram_stat(dds_total, ddh_total); kmem_free(ddh_total, sizeof (ddt_histogram_t)); @@ -685,7 +685,7 @@ ddt_alloc(const ddt_key_t *ddk) { ddt_entry_t *dde; - dde = kmem_cache_alloc(ddt_entry_cache, KM_PUSHPAGE); + dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP); bzero(dde, sizeof (ddt_entry_t)); cv_init(&dde->dde_cv, NULL, CV_DEFAULT, NULL); @@ -834,7 +834,7 @@ ddt_table_alloc(spa_t *spa, enum zio_checksum c) { ddt_t *ddt; - ddt = kmem_cache_alloc(ddt_cache, KM_PUSHPAGE); + ddt = kmem_cache_alloc(ddt_cache, KM_SLEEP); bzero(ddt, sizeof (ddt_t)); mutex_init(&ddt->ddt_lock, NULL, MUTEX_DEFAULT, NULL); @@ -937,7 +937,7 @@ ddt_class_contains(spa_t *spa, enum ddt_class max_class, const blkptr_t *bp) return (B_TRUE); ddt = spa->spa_ddt[BP_GET_CHECKSUM(bp)]; - dde = kmem_cache_alloc(ddt_entry_cache, KM_PUSHPAGE); + dde = kmem_cache_alloc(ddt_entry_cache, KM_SLEEP); ddt_key_fill(&(dde->dde_key), bp); diff --git a/module/zfs/ddt_zap.c b/module/zfs/ddt_zap.c index a21ed4542..fc9cfec04 100644 --- a/module/zfs/ddt_zap.c +++ b/module/zfs/ddt_zap.c @@ -62,7 +62,7 @@ ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde) uint64_t one, csize; int error; - cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_PUSHPAGE); + cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_SLEEP); error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key, DDT_KEY_WORDS, &one, &csize); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 607da887b..a158738a5 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -417,7 +417,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, } nblks = 1; } - dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_PUSHPAGE); + dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); blkid = dbuf_whichblock(dn, offset); @@ -924,11 +924,11 @@ dmu_xuio_init(xuio_t *xuio, int nblk) uio_t *uio = &xuio->xu_uio; uio->uio_iovcnt = nblk; - uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_PUSHPAGE); + uio->uio_iov = kmem_zalloc(nblk * sizeof (iovec_t), KM_SLEEP); - priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_PUSHPAGE); + priv = kmem_zalloc(sizeof (dmu_xuio_t), KM_SLEEP); priv->cnt = nblk; - priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_PUSHPAGE); + priv->bufs = kmem_zalloc(nblk * sizeof (arc_buf_t *), KM_SLEEP); priv->iovp = uio->uio_iov; XUIO_XUZC_PRIV(xuio) = priv; @@ -1530,7 +1530,7 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd, return (SET_ERROR(EIO)); } - dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_PUSHPAGE); + dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP); dsa->dsa_dr = NULL; dsa->dsa_done = done; dsa->dsa_zgd = zgd; @@ -1670,7 +1670,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd) dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC; mutex_exit(&db->db_mtx); - dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_PUSHPAGE); + dsa = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP); dsa->dsa_dr = dr; dsa->dsa_done = done; dsa->dsa_zgd = zgd; diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c index 7876d7590..f438ca62a 100644 --- a/module/zfs/dmu_objset.c +++ b/module/zfs/dmu_objset.c @@ -279,7 +279,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); - os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE); + os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; @@ -1573,7 +1573,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj, } thisobj = dd->dd_phys->dd_head_dataset_obj; - attr = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE); + attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* * Iterate over all children. @@ -1685,7 +1685,7 @@ dmu_objset_find_impl(spa_t *spa, const char *name, } thisobj = dd->dd_phys->dd_head_dataset_obj; - attr = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE); + attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* * Iterate over all children. diff --git a/module/zfs/dmu_traverse.c b/module/zfs/dmu_traverse.c index 42dde7903..b090bc2fc 100644 --- a/module/zfs/dmu_traverse.c +++ b/module/zfs/dmu_traverse.c @@ -288,7 +288,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, if (err != 0) goto post; - czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_PUSHPAGE); + czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP); for (i = 0; i < epb; i++) { SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object, @@ -517,9 +517,9 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, */ ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA)); - td = kmem_alloc(sizeof (traverse_data_t), KM_PUSHPAGE); - pd = kmem_zalloc(sizeof (prefetch_data_t), KM_PUSHPAGE); - czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_PUSHPAGE); + td = kmem_alloc(sizeof (traverse_data_t), KM_SLEEP); + pd = kmem_zalloc(sizeof (prefetch_data_t), KM_SLEEP); + czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP); td->td_spa = spa; td->td_objset = objset; diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index ae39aebaa..890aecc1d 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -62,7 +62,7 @@ static kstat_t *dmu_tx_ksp; dmu_tx_t * dmu_tx_create_dd(dsl_dir_t *dd) { - dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE); + dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_SLEEP); tx->tx_dir = dd; if (dd != NULL) tx->tx_pool = dd->dd_pool; @@ -141,7 +141,7 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object, } } - txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_PUSHPAGE); + txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP); txh->txh_tx = tx; txh->txh_dnode = dn; #ifdef DEBUG_DMU_TX @@ -1467,7 +1467,7 @@ dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data) { dmu_tx_callback_t *dcb; - dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_PUSHPAGE); + dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_SLEEP); dcb->dcb_func = func; dcb->dcb_data = data; diff --git a/module/zfs/dmu_zfetch.c b/module/zfs/dmu_zfetch.c index 9bc919184..8ff2f0509 100644 --- a/module/zfs/dmu_zfetch.c +++ b/module/zfs/dmu_zfetch.c @@ -706,7 +706,7 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched) return; } newstream = - kmem_zalloc(sizeof (zstream_t), KM_PUSHPAGE); + kmem_zalloc(sizeof (zstream_t), KM_SLEEP); } newstream->zst_offset = zst.zst_offset; diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index e5c212cc0..dc082ff3e 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -366,7 +366,7 @@ static dnode_t * dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db, uint64_t object, dnode_handle_t *dnh) { - dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_PUSHPAGE); + dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP); ASSERT(!POINTER_IS_VALID(dn->dn_objset)); dn->dn_moved = 0; @@ -1070,7 +1070,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int i; dnode_children_t *winner; children_dnodes = kmem_alloc(sizeof (dnode_children_t) + - (epb - 1) * sizeof (dnode_handle_t), KM_PUSHPAGE); + (epb - 1) * sizeof (dnode_handle_t), KM_SLEEP); children_dnodes->dnc_count = epb; dnh = &children_dnodes->dnc_children[0]; for (i = 0; i < epb; i++) { diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 3d5551cc4..79cb6a3a2 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -365,7 +365,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, if (ds == NULL) { dsl_dataset_t *winner = NULL; - ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_PUSHPAGE); + ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); ds->ds_dbuf = dbuf; ds->ds_object = dsobj; ds->ds_phys = dbuf->db_data; @@ -1772,9 +1772,9 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) } /* must not have any bookmarks after the most recent snapshot */ - VERIFY0(nvlist_alloc(&proprequest, NV_UNIQUE_NAME, KM_PUSHPAGE)); + proprequest = fnvlist_alloc(); fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); - VERIFY0(nvlist_alloc(&bookmarks, NV_UNIQUE_NAME, KM_PUSHPAGE)); + bookmarks = fnvlist_alloc(); error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); fnvlist_free(proprequest); if (error != 0) @@ -2262,7 +2262,7 @@ snaplist_make(dsl_pool_t *dp, if (first_obj == 0) first_obj = ds->ds_dir->dd_phys->dd_origin_obj; - snap = kmem_alloc(sizeof (*snap), KM_PUSHPAGE); + snap = kmem_alloc(sizeof (*snap), KM_SLEEP); snap->ds = ds; list_insert_tail(l, snap); obj = ds->ds_phys->ds_prev_snap_obj; diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c index 909b5f8fc..8a4362ff9 100644 --- a/module/zfs/dsl_deadlist.c +++ b/module/zfs/dsl_deadlist.c @@ -82,7 +82,7 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl) zap_cursor_advance(&zc)) { dsl_deadlist_entry_t *dle; - dle = kmem_alloc(sizeof (*dle), KM_PUSHPAGE); + dle = kmem_alloc(sizeof (*dle), KM_SLEEP); dle->dle_mintxg = strtonum(za.za_name, NULL); VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, za.za_first_integer)); @@ -254,7 +254,7 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx) dsl_deadlist_load_tree(dl); - dle = kmem_alloc(sizeof (*dle), KM_PUSHPAGE); + dle = kmem_alloc(sizeof (*dle), KM_SLEEP); dle->dle_mintxg = mintxg; obj = bpobj_alloc_empty(dl->dl_os, SPA_MAXBLOCKSIZE, tx); VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj)); diff --git a/module/zfs/dsl_destroy.c b/module/zfs/dsl_destroy.c index 50a9c5f31..9765ba155 100644 --- a/module/zfs/dsl_destroy.c +++ b/module/zfs/dsl_destroy.c @@ -214,8 +214,8 @@ dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) if (ds->ds_dir->dd_phys->dd_clones == 0) return; - zc = kmem_alloc(sizeof (zap_cursor_t), KM_PUSHPAGE); - za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE); + zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); for (zap_cursor_init(zc, mos, ds->ds_dir->dd_phys->dd_clones); zap_cursor_retrieve(zc, za) == 0; @@ -504,7 +504,7 @@ dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, dsda.dsda_snaps = snaps; VERIFY0(nvlist_alloc(&dsda.dsda_successful_snaps, - NV_UNIQUE_NAME, KM_PUSHPAGE)); + NV_UNIQUE_NAME, KM_SLEEP)); dsda.dsda_defer = defer; dsda.dsda_errlist = errlist; @@ -520,11 +520,8 @@ int dsl_destroy_snapshot(const char *name, boolean_t defer) { int error; - nvlist_t *nvl; - nvlist_t *errlist; - - VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); - VERIFY0(nvlist_alloc(&errlist, NV_UNIQUE_NAME, KM_PUSHPAGE)); + nvlist_t *nvl = fnvlist_alloc(); + nvlist_t *errlist = fnvlist_alloc(); fnvlist_add_boolean(nvl, name); error = dsl_destroy_snapshots_nvl(nvl, defer, errlist); diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c index 4cbe04e2d..b94b68e15 100644 --- a/module/zfs/dsl_dir.c +++ b/module/zfs/dsl_dir.c @@ -97,7 +97,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, if (dd == NULL) { dsl_dir_t *winner; - dd = kmem_zalloc(sizeof (dsl_dir_t), KM_PUSHPAGE); + dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); dd->dd_object = ddobj; dd->dd_dbuf = dbuf; dd->dd_pool = dp; @@ -313,7 +313,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, dsl_dir_t *dd; uint64_t ddobj; - buf = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE); + buf = kmem_alloc(MAXNAMELEN, KM_SLEEP); err = getcomponent(name, buf, &next); if (err != 0) goto error; @@ -696,7 +696,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, asize - ref_rsrv); mutex_exit(&dd->dd_lock); - tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE); + tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); tr->tr_ds = dd; tr->tr_size = asize; list_insert_tail(tr_list, tr); @@ -730,7 +730,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, return (0); } - tr_list = kmem_alloc(sizeof (list_t), KM_PUSHPAGE); + tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); list_create(tr_list, sizeof (struct tempreserve), offsetof(struct tempreserve, tr_node)); ASSERT3S(asize, >, 0); @@ -740,7 +740,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, if (err == 0) { struct tempreserve *tr; - tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE); + tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); tr->tr_size = lsize; list_insert_tail(tr_list, tr); } else { diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index 100d04532..b54c03bc3 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -323,7 +323,7 @@ dsl_pool_close(dsl_pool_t *dp) mutex_destroy(&dp->dp_lock); taskq_destroy(dp->dp_iput_taskq); if (dp->dp_blkstats) - kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); + vmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); kmem_free(dp, sizeof (dsl_pool_t)); } diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index ded0da991..d71247326 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -241,9 +241,9 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname, if (err != 0) return (err); - cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_PUSHPAGE); + cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP); cbr->cbr_ds = ds; - cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_PUSHPAGE); + cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_SLEEP); (void) strcpy((char *)cbr->cbr_propname, propname); cbr->cbr_func = callback; cbr->cbr_arg = cbarg; @@ -513,7 +513,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, } mutex_exit(&dd->dd_lock); - za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE); + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); for (zap_cursor_init(&zc, mos, dd->dd_phys->dd_child_dir_zapobj); zap_cursor_retrieve(&zc, za) == 0; @@ -668,7 +668,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname, if (source == ZPROP_SRC_LOCAL) { valstr = value; } else { - tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_PUSHPAGE); + tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP); if (dsl_prop_get_ds(ds, propname, 1, ZAP_MAXVALUELEN, tbuf, NULL) == 0) valstr = tbuf; diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index ebb6305eb..8b166bcc6 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -261,8 +261,8 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) /* back to the generic stuff */ if (dp->dp_blkstats == NULL) { - dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t), - KM_PUSHPAGE); + dp->dp_blkstats = + vmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP); } bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); @@ -762,7 +762,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, dsl_pool_t *dp = scn->scn_dp; blkptr_t *bp_toread; - bp_toread = kmem_alloc(sizeof (blkptr_t), KM_PUSHPAGE); + bp_toread = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); *bp_toread = *bp; /* ASSERT(pbuf == NULL || arc_released(pbuf)); */ @@ -1059,7 +1059,7 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx) dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_scan_visit_rootbp(scn, ds, &ds->ds_phys->ds_bp, tx); - dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_PUSHPAGE); + dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP); dsl_dataset_name(ds, dsname); zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; " "pausing=%u", @@ -1325,8 +1325,8 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) * bookmark so we don't think that we're still trying to resume. */ bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t)); - zc = kmem_alloc(sizeof (zap_cursor_t), KM_PUSHPAGE); - za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE); + zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); /* keep pulling things out of the zap-object-as-queue */ while (zap_cursor_init(zc, dp->dp_meta_objset, diff --git a/module/zfs/dsl_userhold.c b/module/zfs/dsl_userhold.c index 93fd5d9e1..1d6c9df89 100644 --- a/module/zfs/dsl_userhold.c +++ b/module/zfs/dsl_userhold.c @@ -166,8 +166,7 @@ dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds, (u_longlong_t)ds->ds_object); if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) { - VERIFY0(nvlist_alloc(&tags, NV_UNIQUE_NAME, - KM_PUSHPAGE)); + tags = fnvlist_alloc(); fnvlist_add_boolean(tags, htag); fnvlist_add_nvlist(tmpholds, name, tags); fnvlist_free(tags); @@ -226,7 +225,7 @@ dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor) } ASSERT(spa != NULL); - ca = kmem_alloc(sizeof (*ca), KM_PUSHPAGE); + ca = kmem_alloc(sizeof (*ca), KM_SLEEP); (void) strlcpy(ca->zhca_spaname, spa_name(spa), sizeof (ca->zhca_spaname)); @@ -243,7 +242,7 @@ dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag, nvlist_t *tmpholds; if (minor != 0) - VERIFY0(nvlist_alloc(&tmpholds, NV_UNIQUE_NAME, KM_PUSHPAGE)); + tmpholds = fnvlist_alloc(); else tmpholds = NULL; dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx); @@ -260,7 +259,7 @@ dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx) uint64_t now = gethrestime_sec(); if (dduha->dduha_minor != 0) - VERIFY0(nvlist_alloc(&tmpholds, NV_UNIQUE_NAME, KM_PUSHPAGE)); + tmpholds = fnvlist_alloc(); else tmpholds = NULL; for (pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL); @@ -315,8 +314,7 @@ dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist) return (0); dduha.dduha_holds = holds; - VERIFY0(nvlist_alloc(&dduha.dduha_chkholds, NV_UNIQUE_NAME, - KM_PUSHPAGE)); + dduha.dduha_chkholds = fnvlist_alloc(); dduha.dduha_errlist = errlist; dduha.dduha_minor = cleanup_minor; @@ -365,7 +363,7 @@ dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura, numholds = 0; mos = ds->ds_dir->dd_pool->dp_meta_objset; zapobj = ds->ds_phys->ds_userrefs_obj; - VERIFY0(nvlist_alloc(&holds_found, NV_UNIQUE_NAME, KM_PUSHPAGE)); + VERIFY0(nvlist_alloc(&holds_found, NV_UNIQUE_NAME, KM_SLEEP)); for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL; pair = nvlist_next_nvpair(holds, pair)) { @@ -605,9 +603,9 @@ dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist, ddura.ddura_holds = holds; ddura.ddura_errlist = errlist; VERIFY0(nvlist_alloc(&ddura.ddura_todelete, NV_UNIQUE_NAME, - KM_PUSHPAGE)); + KM_SLEEP)); VERIFY0(nvlist_alloc(&ddura.ddura_chkholds, NV_UNIQUE_NAME, - KM_PUSHPAGE)); + KM_SLEEP)); error = dsl_sync_task(pool, dsl_dataset_user_release_check, dsl_dataset_user_release_sync, &ddura, 0); @@ -657,7 +655,7 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl) zap_attribute_t *za; zap_cursor_t zc; - za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE); + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, ds->ds_phys->ds_userrefs_obj); zap_cursor_retrieve(&zc, za) == 0; diff --git a/module/zfs/fm.c b/module/zfs/fm.c index d38cb067e..56787137b 100644 --- a/module/zfs/fm.c +++ b/module/zfs/fm.c @@ -426,7 +426,7 @@ zfs_zevent_alloc(void) { zevent_t *ev; - ev = kmem_zalloc(sizeof (zevent_t), KM_PUSHPAGE); + ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP); if (ev == NULL) return (NULL); @@ -776,7 +776,7 @@ zfs_zevent_destroy(zfs_zevent_t *ze) static void * i_fm_alloc(nv_alloc_t *nva, size_t size) { - return (kmem_zalloc(size, KM_PUSHPAGE)); + return (kmem_zalloc(size, KM_SLEEP)); } /* ARGSUSED */ @@ -844,7 +844,7 @@ fm_nvlist_create(nv_alloc_t *nva) nv_alloc_t *nvhdl; if (nva == NULL) { - nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_PUSHPAGE); + nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP); if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) { kmem_free(nvhdl, sizeof (nv_alloc_t)); diff --git a/module/zfs/lz4.c b/module/zfs/lz4.c index 5c3c6cdb1..cf406b936 100644 --- a/module/zfs/lz4.c +++ b/module/zfs/lz4.c @@ -838,7 +838,7 @@ real_LZ4_compress(const char *source, char *dest, int isize, int osize) int result; ASSERT(lz4_cache != NULL); - ctx = kmem_cache_alloc(lz4_cache, KM_PUSHPAGE); + ctx = kmem_cache_alloc(lz4_cache, KM_SLEEP); /* * out of kernel memory, gently fall through - this will disable diff --git a/module/zfs/lzjb.c b/module/zfs/lzjb.c index 83ff409ce..ae1846701 100644 --- a/module/zfs/lzjb.c +++ b/module/zfs/lzjb.c @@ -57,7 +57,7 @@ lzjb_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) uint16_t *hp; uint16_t *lempel; - lempel = kmem_zalloc(LEMPEL_SIZE * sizeof (uint16_t), KM_PUSHPAGE); + lempel = kmem_zalloc(LEMPEL_SIZE * sizeof (uint16_t), KM_SLEEP); while (src < (uchar_t *)s_start + s_len) { if ((copymask <<= 1) == (1 << NBBY)) { if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY) { diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index 1d3943c7f..f9eef272b 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -192,7 +192,7 @@ metaslab_class_create(spa_t *spa, metaslab_ops_t *ops) { metaslab_class_t *mc; - mc = kmem_zalloc(sizeof (metaslab_class_t), KM_PUSHPAGE); + mc = kmem_zalloc(sizeof (metaslab_class_t), KM_SLEEP); mc->mc_spa = spa; mc->mc_rotor = NULL; @@ -286,7 +286,7 @@ metaslab_class_histogram_verify(metaslab_class_t *mc) return; mc_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE, - KM_PUSHPAGE); + KM_SLEEP); for (c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; @@ -482,7 +482,7 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd) { metaslab_group_t *mg; - mg = kmem_zalloc(sizeof (metaslab_group_t), KM_PUSHPAGE); + mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP); mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&mg->mg_metaslab_tree, metaslab_compare, sizeof (metaslab_t), offsetof(struct metaslab, ms_group_node)); @@ -598,7 +598,7 @@ metaslab_group_histogram_verify(metaslab_group_t *mg) return; mg_hist = kmem_zalloc(sizeof (uint64_t) * RANGE_TREE_HISTOGRAM_SIZE, - KM_PUSHPAGE); + KM_SLEEP); ASSERT3U(RANGE_TREE_HISTOGRAM_SIZE, >=, SPACE_MAP_HISTOGRAM_SIZE + ashift); @@ -1246,7 +1246,7 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg, metaslab_t *ms; int error; - ms = kmem_zalloc(sizeof (metaslab_t), KM_PUSHPAGE); + ms = kmem_zalloc(sizeof (metaslab_t), KM_SLEEP); mutex_init(&ms->ms_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&ms->ms_load_cv, NULL, CV_DEFAULT, NULL); ms->ms_id = id; @@ -1639,7 +1639,7 @@ metaslab_group_preload(metaslab_group_t *mg) */ mutex_exit(&mg->mg_lock); VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload, - msp, TQ_PUSHPAGE) != 0); + msp, TQ_SLEEP) != 0); mutex_enter(&mg->mg_lock); msp = msp_next; } diff --git a/module/zfs/range_tree.c b/module/zfs/range_tree.c index 4643d2632..22175e06a 100644 --- a/module/zfs/range_tree.c +++ b/module/zfs/range_tree.c @@ -132,7 +132,7 @@ range_tree_create(range_tree_ops_t *ops, void *arg, kmutex_t *lp) { range_tree_t *rt; - rt = kmem_zalloc(sizeof (range_tree_t), KM_PUSHPAGE); + rt = kmem_zalloc(sizeof (range_tree_t), KM_SLEEP); avl_create(&rt->rt_root, range_tree_seg_compare, sizeof (range_seg_t), offsetof(range_seg_t, rs_node)); @@ -221,7 +221,7 @@ range_tree_add(void *arg, uint64_t start, uint64_t size) rs_after->rs_start = start; rs = rs_after; } else { - rs = kmem_cache_alloc(range_seg_cache, KM_PUSHPAGE); + rs = kmem_cache_alloc(range_seg_cache, KM_SLEEP); rs->rs_start = start; rs->rs_end = end; avl_insert(&rt->rt_root, rs, where); @@ -270,7 +270,7 @@ range_tree_remove(void *arg, uint64_t start, uint64_t size) rt->rt_ops->rtop_remove(rt, rs, rt->rt_arg); if (left_over && right_over) { - newseg = kmem_cache_alloc(range_seg_cache, KM_PUSHPAGE); + newseg = kmem_cache_alloc(range_seg_cache, KM_SLEEP); newseg->rs_start = end; newseg->rs_end = rs->rs_end; range_tree_stat_incr(rt, newseg); diff --git a/module/zfs/refcount.c b/module/zfs/refcount.c index 49980efcc..4c460a200 100644 --- a/module/zfs/refcount.c +++ b/module/zfs/refcount.c @@ -121,7 +121,7 @@ refcount_add_many(refcount_t *rc, uint64_t number, void *holder) int64_t count; if (rc->rc_tracked) { - ref = kmem_cache_alloc(reference_cache, KM_PUSHPAGE); + ref = kmem_cache_alloc(reference_cache, KM_SLEEP); ref->ref_holder = holder; ref->ref_number = number; } @@ -165,7 +165,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder) if (reference_history > 0) { ref->ref_removed = kmem_cache_alloc(reference_history_cache, - KM_PUSHPAGE); + KM_SLEEP); list_insert_head(&rc->rc_removed, ref); rc->rc_removed_count++; if (rc->rc_removed_count > reference_history) { diff --git a/module/zfs/rrwlock.c b/module/zfs/rrwlock.c index 357afbfa5..8e80166c7 100644 --- a/module/zfs/rrwlock.c +++ b/module/zfs/rrwlock.c @@ -103,7 +103,7 @@ rrn_add(rrwlock_t *rrl, void *tag) { rrw_node_t *rn; - rn = kmem_alloc(sizeof (*rn), KM_PUSHPAGE); + rn = kmem_alloc(sizeof (*rn), KM_SLEEP); rn->rn_rrl = rrl; rn->rn_next = tsd_get(rrw_tsd_key); rn->rn_tag = tag; diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 25153a839..ea68e40a2 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -434,10 +434,10 @@ sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count, avl_index_t loc; ASSERT(MUTEX_HELD(&sa->sa_lock)); - tb = kmem_zalloc(sizeof (sa_lot_t), KM_PUSHPAGE); + tb = kmem_zalloc(sizeof (sa_lot_t), KM_SLEEP); tb->lot_attr_count = attr_count; tb->lot_attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, - KM_PUSHPAGE); + KM_SLEEP); bcopy(attrs, tb->lot_attrs, sizeof (sa_attr_type_t) * attr_count); tb->lot_num = lot_num; tb->lot_hash = hash; @@ -740,7 +740,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count, buf_space = hdl->sa_bonus->db_size - hdrsize; attrs_start = attrs = kmem_alloc(sizeof (sa_attr_type_t) * attr_count, - KM_PUSHPAGE); + KM_SLEEP); lot_count = 0; for (i = 0, len_idx = 0, hash = -1ULL; i != attr_count; i++) { @@ -864,7 +864,7 @@ sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) dmu_objset_type_t ostype = dmu_objset_type(os); sa->sa_user_table = - kmem_zalloc(count * sizeof (sa_attr_type_t), KM_PUSHPAGE); + kmem_zalloc(count * sizeof (sa_attr_type_t), KM_SLEEP); sa->sa_user_table_sz = count * sizeof (sa_attr_type_t); if (sa->sa_reg_attr_obj != 0) { @@ -923,7 +923,7 @@ sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) sa->sa_num_attrs = sa_attr_count; tb = sa->sa_attr_table = - kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_PUSHPAGE); + kmem_zalloc(sizeof (sa_attr_table_t) * sa_attr_count, KM_SLEEP); /* * Attribute table is constructed from requested attribute list, @@ -948,7 +948,7 @@ sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) continue; } tb[ATTR_NUM(value)].sa_name = - kmem_zalloc(strlen(za.za_name) +1, KM_PUSHPAGE); + kmem_zalloc(strlen(za.za_name) +1, KM_SLEEP); (void) strlcpy(tb[ATTR_NUM(value)].sa_name, za.za_name, strlen(za.za_name) +1); } @@ -974,7 +974,7 @@ sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) tb[i].sa_registered = B_FALSE; tb[i].sa_name = kmem_zalloc(strlen(sa_legacy_attrs[i].sa_name) +1, - KM_PUSHPAGE); + KM_SLEEP); (void) strlcpy(tb[i].sa_name, sa_legacy_attrs[i].sa_name, strlen(sa_legacy_attrs[i].sa_name) + 1); @@ -992,7 +992,7 @@ sa_attr_table_setup(objset_t *os, sa_attr_reg_t *reg_attrs, int count) tb[attr_id].sa_byteswap = reg_attrs[i].sa_byteswap; tb[attr_id].sa_attr = attr_id; tb[attr_id].sa_name = - kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_PUSHPAGE); + kmem_zalloc(strlen(reg_attrs[i].sa_name) + 1, KM_SLEEP); (void) strlcpy(tb[attr_id].sa_name, reg_attrs[i].sa_name, strlen(reg_attrs[i].sa_name) + 1); } @@ -1029,7 +1029,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, return (0); } - sa = kmem_zalloc(sizeof (sa_os_t), KM_PUSHPAGE); + sa = kmem_zalloc(sizeof (sa_os_t), KM_SLEEP); mutex_init(&sa->sa_lock, NULL, MUTEX_DEFAULT, NULL); sa->sa_master_obj = sa_obj; @@ -1077,7 +1077,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count, uint64_t lot_num; lot_attrs = kmem_zalloc(sizeof (sa_attr_type_t) * - za.za_num_integers, KM_PUSHPAGE); + za.za_num_integers, KM_SLEEP); if ((error = (zap_lookup(os, sa->sa_layout_attr_obj, za.za_name, 2, za.za_num_integers, @@ -1563,14 +1563,14 @@ sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data) } /* No such luck, create a new entry */ - idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_PUSHPAGE); + idx_tab = kmem_zalloc(sizeof (sa_idx_tab_t), KM_SLEEP); idx_tab->sa_idx_tab = - kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_PUSHPAGE); + kmem_zalloc(sizeof (uint32_t) * sa->sa_num_attrs, KM_SLEEP); idx_tab->sa_layout = tb; refcount_create(&idx_tab->sa_refcount); if (tb->lot_var_sizes) idx_tab->sa_variable_lengths = kmem_alloc(sizeof (uint16_t) * - tb->lot_var_sizes, KM_PUSHPAGE); + tb->lot_var_sizes, KM_SLEEP); sa_attr_iter(os, hdr, bonustype, sa_build_idx_tab, tb, idx_tab); diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 88641603a..55bcf43f8 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -170,7 +170,7 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval, const char *propname = zpool_prop_to_name(prop); nvlist_t *propval; - VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0); if (strval != NULL) @@ -285,7 +285,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) zap_attribute_t za; int err; - err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_PUSHPAGE); + err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP); if (err) return (err); @@ -337,7 +337,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) strval = kmem_alloc( MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, - KM_PUSHPAGE); + KM_SLEEP); dsl_dataset_name(ds, strval); dsl_dataset_rele(ds, FTAG); dsl_pool_config_exit(dp, FTAG); @@ -356,7 +356,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp) case 1: /* string property */ - strval = kmem_alloc(za.za_num_integers, KM_PUSHPAGE); + strval = kmem_alloc(za.za_num_integers, KM_SLEEP); err = zap_lookup(mos, spa->spa_pool_props_object, za.za_name, 1, za.za_num_integers, strval); if (err) { @@ -609,7 +609,7 @@ spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync) return; dp = kmem_alloc(sizeof (spa_config_dirent_t), - KM_PUSHPAGE); + KM_SLEEP); if (cachefile[0] == '\0') dp->scd_path = spa_strdup(spa_config_path); @@ -1380,7 +1380,7 @@ spa_load_spares(spa_t *spa) * active configuration, then we also mark this vdev as an active spare. */ spa->spa_spares.sav_vdevs = kmem_zalloc(nspares * sizeof (void *), - KM_PUSHPAGE); + KM_SLEEP); for (i = 0; i < spa->spa_spares.sav_count; i++) { VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0, VDEV_ALLOC_SPARE) == 0); @@ -1428,7 +1428,7 @@ spa_load_spares(spa_t *spa) DATA_TYPE_NVLIST_ARRAY) == 0); spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *), - KM_PUSHPAGE); + KM_SLEEP); for (i = 0; i < spa->spa_spares.sav_count; i++) spares[i] = vdev_config_generate(spa, spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE); @@ -1462,7 +1462,7 @@ spa_load_l2cache(spa_t *spa) if (sav->sav_config != NULL) { VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0); - newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_PUSHPAGE); + newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP); } else { nl2cache = 0; newvdevs = NULL; @@ -1557,7 +1557,7 @@ spa_load_l2cache(spa_t *spa) VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); - l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_PUSHPAGE); + l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP); for (i = 0; i < sav->sav_count; i++) l2cache[i] = vdev_config_generate(spa, sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE); @@ -1586,7 +1586,7 @@ load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value) nvsize = *(uint64_t *)db->db_data; dmu_buf_rele(db, FTAG); - packed = kmem_alloc(nvsize, KM_PUSHPAGE); + packed = kmem_alloc(nvsize, KM_SLEEP); error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed, DMU_READ_PREFETCH); if (error == 0) @@ -1643,8 +1643,8 @@ spa_config_valid(spa_t *spa, nvlist_t *config) uint64_t idx = 0; child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **), - KM_PUSHPAGE); - VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + KM_SLEEP); + VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0); for (c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; @@ -2037,7 +2037,7 @@ spa_try_repair(spa_t *spa, nvlist_t *config) &glist, &gcount) != 0) return; - vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_PUSHPAGE); + vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP); /* attempt to online all the vdevs & validate */ attempt_reopen = B_TRUE; @@ -2123,7 +2123,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type, if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) == 0) { VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); } nvlist_free(spa->spa_load_info); @@ -3010,7 +3010,7 @@ spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy, */ if (config != NULL && spa->spa_config) { VERIFY(nvlist_dup(spa->spa_config, config, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO, spa->spa_load_info) == 0); @@ -3434,13 +3434,13 @@ spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, &olddevs, &oldndevs) == 0); newdevs = kmem_alloc(sizeof (void *) * - (ndevs + oldndevs), KM_PUSHPAGE); + (ndevs + oldndevs), KM_SLEEP); for (i = 0; i < oldndevs; i++) VERIFY(nvlist_dup(olddevs[i], &newdevs[i], - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); for (i = 0; i < ndevs; i++) VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs], - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); VERIFY(nvlist_remove(sav->sav_config, config, DATA_TYPE_NVLIST_ARRAY) == 0); @@ -3455,7 +3455,7 @@ spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs, * Generate a new dev list. */ VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(sav->sav_config, config, devs, ndevs) == 0); } @@ -3610,7 +3610,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) { VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); @@ -3625,7 +3625,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props, if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0) { VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, - NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); @@ -3766,7 +3766,7 @@ spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid) /* * Put this pool's top-level vdevs into a root vdev. */ - VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0); VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0); @@ -4074,7 +4074,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0); else VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, - NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES, spares, nspares) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); @@ -4089,7 +4089,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags) ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0); else VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config, - NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config, ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0); spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER); @@ -4180,7 +4180,7 @@ spa_tryimport(nvlist_t *tryconfig) * pools are bootable. */ if ((!error || error == EEXIST) && spa->spa_bootfs) { - char *tmpname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE); + char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP); /* * We have to play games with the name since the @@ -4191,7 +4191,7 @@ spa_tryimport(nvlist_t *tryconfig) char *cp; char *dsname; - dsname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE); + dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP); cp = strchr(tmpname, '/'); if (cp == NULL) { @@ -4596,7 +4596,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) { spa_strfree(oldvd->vdev_path); oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5, - KM_PUSHPAGE); + KM_SLEEP); (void) sprintf(oldvd->vdev_path, "%s/%s", newvd->vdev_path, "old"); if (oldvd->vdev_devid != NULL) { @@ -4992,8 +4992,8 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0) return (spa_vdev_exit(spa, NULL, txg, EINVAL)); - vml = kmem_zalloc(children * sizeof (vdev_t *), KM_PUSHPAGE); - glist = kmem_zalloc(children * sizeof (uint64_t), KM_PUSHPAGE); + vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP); + glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP); /* then, loop over each vdev and validate it */ for (c = 0; c < children; c++) { @@ -5073,7 +5073,7 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, * Temporarily record the splitting vdevs in the spa config. This * will disappear once the config is regenerated. */ - VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST, glist, children) == 0); kmem_free(glist, children * sizeof (uint64_t)); @@ -5120,7 +5120,7 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config, /* if that worked, generate a real config for the new pool */ if (newspa->spa_root_vdev != NULL) { VERIFY(nvlist_alloc(&newspa->spa_config_splitting, - NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(newspa->spa_config_splitting, ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0); spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL, @@ -5231,12 +5231,12 @@ spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count, int i, j; if (count > 1) - newdev = kmem_alloc((count - 1) * sizeof (void *), KM_PUSHPAGE); + newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP); for (i = 0, j = 0; i < count; i++) { if (dev[i] == dev_to_remove) continue; - VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_PUSHPAGE) == 0); + VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0); } VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0); @@ -5910,10 +5910,10 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) * saves us a pre-read to get data we don't actually care about. */ bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE); - packed = vmem_alloc(bufsize, KM_PUSHPAGE); + packed = vmem_alloc(bufsize, KM_SLEEP); VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); bzero(packed + nvsize, bufsize - nvsize); dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx); @@ -5951,11 +5951,11 @@ spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx, &sav->sav_object, tx) == 0); } - VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0); if (sav->sav_count == 0) { VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0); } else { - list = kmem_alloc(sav->sav_count*sizeof (void *), KM_PUSHPAGE); + list = kmem_alloc(sav->sav_count*sizeof (void *), KM_SLEEP); for (i = 0; i < sav->sav_count; i++) list[i] = vdev_config_generate(spa, sav->sav_vdevs[i], B_FALSE, VDEV_CONFIG_L2CACHE); @@ -6283,7 +6283,7 @@ spa_sync(spa_t *spa, uint64_t txg) spa->spa_sync_starttime = gethrtime(); taskq_cancel_id(system_taskq, spa->spa_deadman_tqid); spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq, - spa_deadman, spa, TQ_PUSHPAGE, ddi_get_lbolt() + + spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() + NSEC_TO_TICK(spa->spa_deadman_synctime)); /* diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 818a4308f..ed2344cf3 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -87,7 +87,7 @@ spa_config_load(void) /* * Open the configuration file. */ - pathname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE); + pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); (void) snprintf(pathname, MAXPATHLEN, "%s%s", (rootdir != NULL) ? "./" : "", spa_config_path); @@ -102,7 +102,7 @@ spa_config_load(void) if (kobj_get_filesize(file, &fsize) != 0) goto out; - buf = kmem_alloc(fsize, KM_PUSHPAGE); + buf = kmem_alloc(fsize, KM_SLEEP); /* * Read the nvlist from the file. @@ -113,7 +113,7 @@ spa_config_load(void) /* * Unpack the nvlist. */ - if (nvlist_unpack(buf, fsize, &nvlist, KM_PUSHPAGE) != 0) + if (nvlist_unpack(buf, fsize, &nvlist, KM_SLEEP) != 0) goto out; /* @@ -165,11 +165,11 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl) */ VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0); - buf = kmem_alloc(buflen, KM_PUSHPAGE); - temp = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE); + buf = kmem_alloc(buflen, KM_SLEEP); + temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP); VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); /* * Write the configuration to disk. We need to do the traditional @@ -252,7 +252,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent) if (nvl == NULL) VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) { VERIFY0(nvlist_lookup_string(spa->spa_config, @@ -301,7 +301,7 @@ spa_all_configs(uint64_t *generation) if (*generation == spa_config_generation) return (NULL); - VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0); mutex_enter(&spa_namespace_lock); while ((spa = spa_next(spa)) != NULL) { @@ -376,7 +376,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) } else pool_name = spa_name(spa); - VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); @@ -463,21 +463,21 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats) ddt_stat_t *dds; ddt_object_t *ddo; - ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_PUSHPAGE); + ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP); ddt_get_dedup_histogram(spa, ddh); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM, (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0); kmem_free(ddh, sizeof (ddt_histogram_t)); - ddo = kmem_zalloc(sizeof (ddt_object_t), KM_PUSHPAGE); + ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP); ddt_get_dedup_object_stats(spa, ddo); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS, (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0); kmem_free(ddo, sizeof (ddt_object_t)); - dds = kmem_zalloc(sizeof (ddt_stat_t), KM_PUSHPAGE); + dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP); ddt_get_dedup_stats(spa, dds); VERIFY(nvlist_add_uint64_array(config, ZPOOL_CONFIG_DDT_STATS, diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c index 1c434197d..950bf98e2 100644 --- a/module/zfs/spa_history.c +++ b/module/zfs/spa_history.c @@ -260,7 +260,7 @@ spa_history_log_sync(void *arg, dmu_tx_t *tx) } VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE, - KM_PUSHPAGE), ==, 0); + KM_SLEEP), ==, 0); mutex_enter(&spa->spa_history_lock); @@ -289,9 +289,7 @@ int spa_history_log(spa_t *spa, const char *msg) { int err; - nvlist_t *nvl; - - VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + nvlist_t *nvl = fnvlist_alloc(); fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg); err = spa_history_log_nvl(spa, nvl); @@ -316,7 +314,7 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) return (err); } - VERIFY0(nvlist_dup(nvl, &nvarg, KM_PUSHPAGE)); + VERIFY0(nvlist_dup(nvl, &nvarg, KM_SLEEP)); if (spa_history_zone() != NULL) { fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE, spa_history_zone()); @@ -453,7 +451,7 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, va_copy(adx1, adx); size = vsnprintf(NULL, 0, fmt, adx1) + 1; - msg = kmem_alloc(size, KM_PUSHPAGE); + msg = kmem_alloc(size, KM_SLEEP); va_end(adx1); va_copy(adx1, adx); (void) vsprintf(msg, fmt, adx1); @@ -479,7 +477,6 @@ spa_history_log_internal(spa_t *spa, const char *operation, { dmu_tx_t *htx = tx; va_list adx; - nvlist_t *nvl; /* create a tx if we didn't get one */ if (tx == NULL) { @@ -491,8 +488,7 @@ spa_history_log_internal(spa_t *spa, const char *operation, } va_start(adx, fmt); - VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); - log_internal(nvl, operation, spa, htx, fmt, adx); + log_internal(fnvlist_alloc(), operation, spa, htx, fmt, adx); va_end(adx); /* if we didn't get a tx from the caller, commit the one we made */ @@ -506,12 +502,11 @@ spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation, { va_list adx; char namebuf[MAXNAMELEN]; - nvlist_t *nvl; + nvlist_t *nvl = fnvlist_alloc(); ASSERT(tx != NULL); dsl_dataset_name(ds, namebuf); - VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object); @@ -526,12 +521,11 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation, { va_list adx; char namebuf[MAXNAMELEN]; - nvlist_t *nvl; + nvlist_t *nvl = fnvlist_alloc(); ASSERT(tx != NULL); dsl_dir_name(dd, namebuf); - VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf); fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, dd->dd_phys->dd_head_dataset_obj); diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 363f982fe..ce6f02dc4 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -501,7 +501,7 @@ spa_deadman(void *arg) vdev_deadman(spa->spa_root_vdev); spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq, - spa_deadman, spa, TQ_PUSHPAGE, ddi_get_lbolt() + + spa_deadman, spa, KM_SLEEP, ddi_get_lbolt() + NSEC_TO_TICK(spa->spa_deadman_synctime)); } @@ -520,7 +520,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) ASSERT(MUTEX_HELD(&spa_namespace_lock)); - spa = kmem_zalloc(sizeof (spa_t), KM_PUSHPAGE); + spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP); mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL); @@ -570,12 +570,12 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t), offsetof(spa_config_dirent_t, scd_link)); - dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_PUSHPAGE); + dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP); dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path); list_insert_head(&spa->spa_config_list, dp); VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); if (config != NULL) { nvlist_t *features; @@ -591,7 +591,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot) if (spa->spa_label_features == NULL) { VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME, - KM_PUSHPAGE) == 0); + KM_SLEEP) == 0); } spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0); @@ -772,7 +772,7 @@ spa_aux_add(vdev_t *vd, avl_tree_t *avl) if ((aux = avl_find(avl, &search, &where)) != NULL) { aux->aux_count++; } else { - aux = kmem_zalloc(sizeof (spa_aux_t), KM_PUSHPAGE); + aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP); aux->aux_guid = vd->vdev_guid; aux->aux_count = 1; avl_insert(avl, aux, where); @@ -1280,7 +1280,7 @@ spa_strdup(const char *s) char *new; len = strlen(s); - new = kmem_alloc(len + 1, KM_PUSHPAGE); + new = kmem_alloc(len + 1, KM_SLEEP); bcopy(s, new, len); new[len] = '\0'; diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c index 7c97ba105..3e39dba2c 100644 --- a/module/zfs/spa_stats.c +++ b/module/zfs/spa_stats.c @@ -203,7 +203,7 @@ spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags) if (zfs_read_history_hits == 0 && (aflags & ARC_CACHED)) return; - srh = kmem_zalloc(sizeof (spa_read_history_t), KM_PUSHPAGE); + srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP); strlcpy(srh->comm, getcomm(), sizeof (srh->comm)); srh->start = gethrtime(); srh->objset = zb->zb_objset; @@ -423,7 +423,7 @@ spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time) if (zfs_txg_history == 0 && ssh->size == 0) return; - sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_PUSHPAGE); + sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP); sth->txg = txg; sth->state = TXG_STATE_OPEN; sth->times[TXG_STATE_BIRTH] = birth_time; diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c index fc0df756e..b3aa469bf 100644 --- a/module/zfs/space_map.c +++ b/module/zfs/space_map.c @@ -364,7 +364,7 @@ space_map_open(space_map_t **smp, objset_t *os, uint64_t object, ASSERT(os != NULL); ASSERT(object != 0); - sm = kmem_alloc(sizeof (space_map_t), KM_PUSHPAGE); + sm = kmem_alloc(sizeof (space_map_t), KM_SLEEP); sm->sm_start = start; sm->sm_size = size; diff --git a/module/zfs/space_reftree.c b/module/zfs/space_reftree.c index d20281e16..a508092c5 100644 --- a/module/zfs/space_reftree.c +++ b/module/zfs/space_reftree.c @@ -94,7 +94,7 @@ space_reftree_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt) { space_ref_t *sr; - sr = kmem_alloc(sizeof (*sr), KM_PUSHPAGE); + sr = kmem_alloc(sizeof (*sr), KM_SLEEP); sr->sr_offset = offset; sr->sr_refcnt = refcnt; diff --git a/module/zfs/txg.c b/module/zfs/txg.c index 81afeb373..2977bf9f3 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -449,7 +449,7 @@ txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg) TASKQ_THREADS_CPU_PCT | TASKQ_PREPOPULATE); } - cb_list = kmem_alloc(sizeof (list_t), KM_PUSHPAGE); + cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP); list_create(cb_list, sizeof (dmu_tx_callback_t), offsetof(dmu_tx_callback_t, dcb_node)); @@ -486,8 +486,8 @@ txg_sync_thread(dsl_pool_t *dp) (void) spl_fstrans_mark(); txg_thread_enter(tx, &cpr); - vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE); - vs2 = kmem_alloc(sizeof (vdev_stat_t), KM_PUSHPAGE); + vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_SLEEP); + vs2 = kmem_alloc(sizeof (vdev_stat_t), KM_SLEEP); start = delta = 0; for (;;) { diff --git a/module/zfs/unique.c b/module/zfs/unique.c index 5c7ca4875..8c1d2e2f9 100644 --- a/module/zfs/unique.c +++ b/module/zfs/unique.c @@ -79,7 +79,7 @@ uint64_t unique_insert(uint64_t value) { avl_index_t idx; - unique_t *un = kmem_alloc(sizeof (unique_t), KM_PUSHPAGE); + unique_t *un = kmem_alloc(sizeof (unique_t), KM_SLEEP); un->un_value = value; diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 18a246c33..52198261e 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -200,7 +200,7 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd) pvd->vdev_children = MAX(pvd->vdev_children, id + 1); newsize = pvd->vdev_children * sizeof (vdev_t *); - newchild = kmem_alloc(newsize, KM_PUSHPAGE); + newchild = kmem_alloc(newsize, KM_SLEEP); if (pvd->vdev_child != NULL) { bcopy(pvd->vdev_child, newchild, oldsize); kmem_free(pvd->vdev_child, oldsize); @@ -270,7 +270,7 @@ vdev_compact_children(vdev_t *pvd) if (pvd->vdev_child[c]) newc++; - newchild = kmem_zalloc(newc * sizeof (vdev_t *), KM_PUSHPAGE); + newchild = kmem_zalloc(newc * sizeof (vdev_t *), KM_SLEEP); for (c = newc = 0; c < oldc; c++) { if ((cvd = pvd->vdev_child[c]) != NULL) { @@ -293,7 +293,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops) vdev_t *vd; int t; - vd = kmem_zalloc(sizeof (vdev_t), KM_PUSHPAGE); + vd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP); if (spa->spa_root_vdev == NULL) { ASSERT(ops == &vdev_root_ops); @@ -856,7 +856,7 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg) ASSERT(oldc <= newc); - mspp = kmem_zalloc(newc * sizeof (*mspp), KM_PUSHPAGE); + mspp = kmem_zalloc(newc * sizeof (*mspp), KM_SLEEP); if (oldc != 0) { bcopy(vd->vdev_ms, mspp, oldc * sizeof (*mspp)); @@ -1011,7 +1011,7 @@ vdev_probe(vdev_t *vd, zio_t *zio) mutex_enter(&vd->vdev_probe_lock); if ((pio = vd->vdev_probe_zio) == NULL) { - vps = kmem_zalloc(sizeof (*vps), KM_PUSHPAGE); + vps = kmem_zalloc(sizeof (*vps), KM_SLEEP); vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE | diff --git a/module/zfs/vdev_cache.c b/module/zfs/vdev_cache.c index e4ce3eaef..389fa6fd9 100644 --- a/module/zfs/vdev_cache.c +++ b/module/zfs/vdev_cache.c @@ -180,7 +180,7 @@ vdev_cache_allocate(zio_t *zio) vdev_cache_evict(vc, ve); } - ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_PUSHPAGE); + ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP); ve->ve_offset = offset; ve->ve_lastused = ddi_get_lbolt(); ve->ve_data = zio_buf_alloc(VCBS); @@ -277,7 +277,7 @@ vdev_cache_read(zio_t *zio) mutex_enter(&vc->vc_lock); - ve_search = kmem_alloc(sizeof (vdev_cache_entry_t), KM_PUSHPAGE); + ve_search = kmem_alloc(sizeof (vdev_cache_entry_t), KM_SLEEP); ve_search->ve_offset = cache_offset; ve = avl_find(&vc->vc_offset_tree, ve_search, NULL); kmem_free(ve_search, sizeof (vdev_cache_entry_t)); diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c index f290fce52..7f2263457 100644 --- a/module/zfs/vdev_disk.c +++ b/module/zfs/vdev_disk.c @@ -262,7 +262,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize, goto skip_open; } - vd = kmem_zalloc(sizeof (vdev_disk_t), KM_PUSHPAGE); + vd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP); if (vd == NULL) return (ENOMEM); @@ -339,7 +339,7 @@ vdev_disk_dio_alloc(int bio_count) int i; dr = kmem_zalloc(sizeof (dio_request_t) + - sizeof (struct bio *) * bio_count, KM_PUSHPAGE); + sizeof (struct bio *) * bio_count, KM_SLEEP); if (dr) { init_completion(&dr->dr_comp); atomic_set(&dr->dr_ref, 0); @@ -807,7 +807,7 @@ vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config) } size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t); - label = vmem_alloc(sizeof (vdev_label_t), KM_PUSHPAGE); + label = vmem_alloc(sizeof (vdev_label_t), KM_SLEEP); for (i = 0; i < VDEV_LABELS; i++) { uint64_t offset, state, txg = 0; diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c index 8573a3a66..7f43ad800 100644 --- a/module/zfs/vdev_file.c +++ b/module/zfs/vdev_file.c @@ -77,7 +77,7 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, goto skip_open; } - vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_PUSHPAGE); + vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); /* * We always open the files from the root of the global zone, even if @@ -215,7 +215,7 @@ vdev_file_io_start(zio_t *zio) } VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio, - TQ_PUSHPAGE), !=, 0); + TQ_SLEEP), !=, 0); return (ZIO_PIPELINE_STOP); } diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index 1c2f00fe2..7f588ed6b 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -216,7 +216,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, { nvlist_t *nv = NULL; - VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + nv = fnvlist_alloc(); fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type); if (!(flags & (VDEV_CONFIG_SPARE | VDEV_CONFIG_L2CACHE))) @@ -314,7 +314,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats, ASSERT(!vd->vdev_ishole); child = kmem_alloc(vd->vdev_children * sizeof (nvlist_t *), - KM_PUSHPAGE); + KM_SLEEP); for (c = 0, idx = 0; c < vd->vdev_children; c++) { vdev_t *cvd = vd->vdev_child[c]; @@ -396,7 +396,7 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config) uint64_t *array; uint_t c, idx; - array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_PUSHPAGE); + array = kmem_alloc(rvd->vdev_children * sizeof (uint64_t), KM_SLEEP); for (c = 0, idx = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; @@ -728,7 +728,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) * active hot spare (in which case we want to revert the * labels). */ - VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); @@ -741,7 +741,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) /* * For level 2 ARC devices, add a special label. */ - VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0); + VERIFY(nvlist_alloc(&label, NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(label, ZPOOL_CONFIG_VERSION, spa_version(spa)) == 0); @@ -768,7 +768,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason) buf = vp->vp_nvlist; buflen = sizeof (vp->vp_nvlist); - error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_PUSHPAGE); + error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP); if (error != 0) { nvlist_free(label); zio_buf_free(vp, sizeof (vdev_phys_t)); @@ -1118,7 +1118,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags) buf = vp->vp_nvlist; buflen = sizeof (vp->vp_nvlist); - if (!nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_PUSHPAGE)) { + if (!nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP)) { for (; l < VDEV_LABELS; l += 2) { vdev_label_write(zio, vd, l, vp, offsetof(vdev_label_t, vl_vdev_phys), @@ -1151,7 +1151,7 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags) ASSERT(!vd->vdev_ishole); - good_writes = kmem_zalloc(sizeof (uint64_t), KM_PUSHPAGE); + good_writes = kmem_zalloc(sizeof (uint64_t), KM_SLEEP); vio = zio_null(zio, spa, NULL, (vd->vdev_islog || vd->vdev_aux != NULL) ? vdev_label_sync_ignore_done : vdev_label_sync_top_done, diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 17417a0fa..77c3d8d38 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -111,7 +111,7 @@ vdev_mirror_map_alloc(zio_t *zio) c = BP_GET_NDVAS(zio->io_bp); mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), - KM_PUSHPAGE); + KM_SLEEP); mm->mm_children = c; mm->mm_replacing = B_FALSE; mm->mm_preferred = spa_get_random(c); @@ -142,7 +142,7 @@ vdev_mirror_map_alloc(zio_t *zio) c = vd->vdev_children; mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), - KM_PUSHPAGE); + KM_SLEEP); mm->mm_children = c; mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops || vd->vdev_ops == &vdev_spare_ops); diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index 4cd21df89..493b332c4 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -486,7 +486,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols, ASSERT3U(acols, <=, scols); - rm = kmem_alloc(offsetof(raidz_map_t, rm_col[scols]), KM_PUSHPAGE); + rm = kmem_alloc(offsetof(raidz_map_t, rm_col[scols]), KM_SLEEP); rm->rm_cols = acols; rm->rm_scols = scols; @@ -1227,7 +1227,7 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing, size_t psize; psize = sizeof (invlog[0][0]) * n * nmissing; - p = kmem_alloc(psize, KM_PUSHPAGE); + p = kmem_alloc(psize, KM_SLEEP); for (pp = p, i = 0; i < nmissing; i++) { invlog[i] = pp; @@ -1344,7 +1344,7 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts) psize = (sizeof (rows[0][0]) + sizeof (invrows[0][0])) * nmissing_rows * n + sizeof (used[0]) * n; - p = kmem_alloc(psize, KM_PUSHPAGE); + p = kmem_alloc(psize, KM_SLEEP); for (pp = p, i = 0; i < nmissing_rows; i++) { rows[i] = pp; diff --git a/module/zfs/zap.c b/module/zfs/zap.c index 4ee75b6d9..5ffa138a6 100644 --- a/module/zfs/zap.c +++ b/module/zfs/zap.c @@ -115,7 +115,7 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags) 1<l_dbuf = db; l->l_phys = db->db_data; @@ -392,7 +392,7 @@ static zap_leaf_t * zap_create_leaf(zap_t *zap, dmu_tx_t *tx) { void *winner; - zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_PUSHPAGE); + zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); @@ -454,7 +454,7 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db) ASSERT(blkid != 0); - l = kmem_alloc(sizeof (zap_leaf_t), KM_PUSHPAGE); + l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL); rw_enter(&l->l_rwlock, RW_WRITER); l->l_blkid = blkid; @@ -972,7 +972,7 @@ zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask, if (mask == 0) mask = -1ULL; - za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE); + za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); for (zap_cursor_init(&zc, os, zapobj); (err = zap_cursor_retrieve(&zc, za)) == 0; zap_cursor_advance(&zc)) { diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c index 0e30ef319..78f05d7a7 100644 --- a/module/zfs/zap_leaf.c +++ b/module/zfs/zap_leaf.c @@ -342,7 +342,7 @@ zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, ASSERT(zn->zn_key_intlen == sizeof (*thiskey)); thiskey = kmem_alloc(array_numints * sizeof (*thiskey), - KM_PUSHPAGE); + KM_SLEEP); zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints, sizeof (*thiskey), array_numints, thiskey); @@ -354,7 +354,7 @@ zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, ASSERT(zn->zn_key_intlen == 1); if (zn->zn_matchtype == MT_FIRST) { - char *thisname = kmem_alloc(array_numints, KM_PUSHPAGE); + char *thisname = kmem_alloc(array_numints, KM_SLEEP); boolean_t match; zap_leaf_array_read(l, chunk, sizeof (char), array_numints, diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c index 5d812bdd9..0c2e76319 100644 --- a/module/zfs/zap_micro.c +++ b/module/zfs/zap_micro.c @@ -172,7 +172,7 @@ zap_name_free(zap_name_t *zn) zap_name_t * zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) { - zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_PUSHPAGE); + zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); zn->zn_zap = zap; zn->zn_key_intlen = sizeof (*key); @@ -202,7 +202,7 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) zap_name_t * zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) { - zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_PUSHPAGE); + zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); ASSERT(zap->zap_normflags == 0); zn->zn_zap = zap; @@ -271,7 +271,7 @@ mze_insert(zap_t *zap, int chunkid, uint64_t hash) ASSERT(zap->zap_ismicro); ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); - mze = kmem_alloc(sizeof (mzap_ent_t), KM_PUSHPAGE); + mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); mze->mze_chunkid = chunkid; mze->mze_hash = hash; mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; @@ -365,7 +365,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); - zap = kmem_zalloc(sizeof (zap_t), KM_PUSHPAGE); + zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL); rw_enter(&zap->zap_rwlock, RW_WRITER); zap->zap_objset = os; diff --git a/module/zfs/zfs_acl.c b/module/zfs/zfs_acl.c index 7357bfa5c..a208dea15 100644 --- a/module/zfs/zfs_acl.c +++ b/module/zfs/zfs_acl.c @@ -454,7 +454,7 @@ zfs_acl_alloc(int vers) { zfs_acl_t *aclp; - aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_PUSHPAGE); + aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP); list_create(&aclp->z_acl, sizeof (zfs_acl_node_t), offsetof(zfs_acl_node_t, z_next)); aclp->z_version = vers; @@ -470,9 +470,9 @@ zfs_acl_node_alloc(size_t bytes) { zfs_acl_node_t *aclnode; - aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_PUSHPAGE); + aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP); if (bytes) { - aclnode->z_acldata = kmem_alloc(bytes, KM_PUSHPAGE); + aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP); aclnode->z_allocdata = aclnode->z_acldata; aclnode->z_allocsize = bytes; aclnode->z_size = bytes; diff --git a/module/zfs/zfs_debug.c b/module/zfs/zfs_debug.c index e1675c818..e835397e9 100644 --- a/module/zfs/zfs_debug.c +++ b/module/zfs/zfs_debug.c @@ -78,7 +78,7 @@ zfs_dbgmsg(const char *fmt, ...) * There is one byte of string in sizeof (zfs_dbgmsg_t), used * for the terminating null. */ - zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_PUSHPAGE); + zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP); zdm->zdm_timestamp = gethrestime_sec(); va_start(adx, fmt); diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index fb65ec6a6..7e9c473d3 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -321,9 +321,9 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, spare_count = spa->spa_spares.sav_count; spare_paths = kmem_zalloc(sizeof (char *) * spare_count, - KM_PUSHPAGE); + KM_SLEEP); spare_guids = kmem_zalloc(sizeof (uint64_t) * spare_count, - KM_PUSHPAGE); + KM_SLEEP); for (i = 0; i < spare_count; i++) { spare_vd = spa->spa_spares.sav_vdevs[i]; @@ -583,7 +583,7 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, size_t offset = 0; ssize_t start = -1; - zfs_ecksum_info_t *eip = kmem_zalloc(sizeof (*eip), KM_PUSHPAGE); + zfs_ecksum_info_t *eip = kmem_zalloc(sizeof (*eip), KM_SLEEP); /* don't do any annotation for injected checksum errors */ if (info != NULL && info->zbc_injected) @@ -752,7 +752,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, uint64_t offset, uint64_t length, void *arg, zio_bad_cksum_t *info) { - zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_PUSHPAGE); + zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_SLEEP); if (zio->io_vsd != NULL) zio->io_vsd_ops->vsd_cksum_report(zio, report, arg); @@ -761,7 +761,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, /* copy the checksum failure information if it was provided */ if (info != NULL) { - report->zcr_ckinfo = kmem_zalloc(sizeof (*info), KM_PUSHPAGE); + report->zcr_ckinfo = kmem_zalloc(sizeof (*info), KM_SLEEP); bcopy(info, report->zcr_ckinfo, sizeof (*info)); } diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 7d443ecf1..5b9c8f17b 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -254,7 +254,7 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...) { const char *newfile; size_t size = 4096; - char *buf = kmem_alloc(size, KM_PUSHPAGE); + char *buf = kmem_alloc(size, KM_SLEEP); char *nl; va_list adx; @@ -5827,7 +5827,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) } } - VERIFY0(nvlist_alloc(&outnvl, NV_UNIQUE_NAME, KM_PUSHPAGE)); + outnvl = fnvlist_alloc(); error = vec->zvec_func(zc->zc_name, innvl, outnvl); if (error == 0 && vec->zvec_allow_log && diff --git a/module/zfs/zfs_onexit.c b/module/zfs/zfs_onexit.c index 2b286e71f..18a0671a8 100644 --- a/module/zfs/zfs_onexit.c +++ b/module/zfs/zfs_onexit.c @@ -156,7 +156,7 @@ zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data, if (error) return (error); - ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_PUSHPAGE); + ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP); list_link_init(&ap->za_link); ap->za_func = func; ap->za_data = data; diff --git a/module/zfs/zfs_rlock.c b/module/zfs/zfs_rlock.c index 2533ced64..5064eb796 100644 --- a/module/zfs/zfs_rlock.c +++ b/module/zfs/zfs_rlock.c @@ -202,7 +202,7 @@ zfs_range_proxify(avl_tree_t *tree, rl_t *rl) rl->r_cnt = 0; /* create a proxy range lock */ - proxy = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE); + proxy = kmem_alloc(sizeof (rl_t), KM_SLEEP); proxy->r_off = rl->r_off; proxy->r_len = rl->r_len; proxy->r_cnt = 1; @@ -231,7 +231,7 @@ zfs_range_split(avl_tree_t *tree, rl_t *rl, uint64_t off) ASSERT(rl->r_read_wanted == B_FALSE); /* create the rear proxy range lock */ - rear = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE); + rear = kmem_alloc(sizeof (rl_t), KM_SLEEP); rear->r_off = off; rear->r_len = rl->r_off + rl->r_len - off; rear->r_cnt = rl->r_cnt; @@ -256,7 +256,7 @@ zfs_range_new_proxy(avl_tree_t *tree, uint64_t off, uint64_t len) rl_t *rl; ASSERT(len); - rl = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE); + rl = kmem_alloc(sizeof (rl_t), KM_SLEEP); rl->r_off = off; rl->r_len = len; rl->r_cnt = 1; @@ -429,7 +429,7 @@ zfs_range_lock(znode_t *zp, uint64_t off, uint64_t len, rl_type_t type) ASSERT(type == RL_READER || type == RL_WRITER || type == RL_APPEND); - new = kmem_alloc(sizeof (rl_t), KM_PUSHPAGE); + new = kmem_alloc(sizeof (rl_t), KM_SLEEP); new->r_zp = zp; new->r_off = off; if (len + off < off) /* overflow */ diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 33cdbb3f5..723d6210f 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -922,7 +922,7 @@ zfs_iput_async(struct inode *ip) if (atomic_read(&ip->i_count) == 1) taskq_dispatch(dsl_pool_iput_taskq(dmu_objset_pool(os)), - (task_func_t *)iput, ip, TQ_PUSHPAGE); + (task_func_t *)iput, ip, TQ_SLEEP); else iput(ip); } @@ -987,7 +987,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) return (SET_ERROR(ENOENT)); } - zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_PUSHPAGE); + zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_zilog = zsb->z_log; zgd->zgd_private = zp; diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index b51d78037..3a7c30db2 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -182,7 +182,7 @@ zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx) vattr.va_uid = crgetuid(kcred); vattr.va_gid = crgetgid(kcred); - sharezp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE); + sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); sharezp->z_moved = 0; sharezp->z_unlinked = 0; sharezp->z_atime_dirty = 0; @@ -256,7 +256,7 @@ zfs_inode_alloc(struct super_block *sb, struct inode **ip) { znode_t *zp; - zp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE); + zp = kmem_cache_alloc(znode_cache, KM_SLEEP); *ip = ZTOI(zp); return (0); @@ -682,7 +682,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ - sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_PUSHPAGE); + sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb), @@ -1696,13 +1696,13 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) vattr.va_uid = crgetuid(cr); vattr.va_gid = crgetgid(cr); - rootzp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE); + rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); rootzp->z_moved = 0; rootzp->z_unlinked = 0; rootzp->z_atime_dirty = 0; rootzp->z_is_sa = USE_SA(version, os); - zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE); + zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP); zsb->z_os = os; zsb->z_parent = zsb; zsb->z_version = version; @@ -1710,7 +1710,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) zsb->z_use_sa = USE_SA(version, os); zsb->z_norm = norm; - sb = kmem_zalloc(sizeof (struct super_block), KM_PUSHPAGE); + sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP); sb->s_fs_info = zsb; ZTOI(rootzp)->i_sb = sb; diff --git a/module/zfs/zil.c b/module/zfs/zil.c index 1d8e7cb4d..9bf8c5e5d 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -172,7 +172,7 @@ zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp) if (avl_find(t, dva, &where) != NULL) return (SET_ERROR(EEXIST)); - zn = kmem_alloc(sizeof (zil_bp_node_t), KM_PUSHPAGE); + zn = kmem_alloc(sizeof (zil_bp_node_t), KM_SLEEP); zn->zn_dva = *dva; avl_insert(t, zn, where); @@ -464,7 +464,7 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg, boolean_t fastwrite) { lwb_t *lwb; - lwb = kmem_cache_alloc(zil_lwb_cache, KM_PUSHPAGE); + lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); lwb->lwb_zilog = zilog; lwb->lwb_blk = *bp; lwb->lwb_fastwrite = fastwrite; @@ -815,7 +815,7 @@ zil_add_block(zilog_t *zilog, const blkptr_t *bp) for (i = 0; i < ndvas; i++) { zvsearch.zv_vdev = DVA_GET_VDEV(&bp->blk_dva[i]); if (avl_find(t, &zvsearch, &where) == NULL) { - zv = kmem_alloc(sizeof (*zv), KM_PUSHPAGE); + zv = kmem_alloc(sizeof (*zv), KM_SLEEP); zv->zv_vdev = zvsearch.zv_vdev; avl_insert(t, zv, where); } @@ -1192,7 +1192,7 @@ zil_itx_create(uint64_t txtype, size_t lrsize) lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t); - itx = vmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_PUSHPAGE); + itx = vmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP); itx->itx_lr.lrc_txtype = txtype; itx->itx_lr.lrc_reclen = lrsize; itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */ @@ -1359,7 +1359,7 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) ASSERT(itxg->itxg_sod == 0); itxg->itxg_txg = txg; itxs = itxg->itxg_itxs = kmem_zalloc(sizeof (itxs_t), - KM_PUSHPAGE); + KM_SLEEP); list_create(&itxs->i_sync_list, sizeof (itx_t), offsetof(itx_t, itx_node)); @@ -1380,7 +1380,7 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx) ian = avl_find(t, &foid, &where); if (ian == NULL) { ian = kmem_alloc(sizeof (itx_async_node_t), - KM_PUSHPAGE); + KM_SLEEP); list_create(&ian->ia_list, sizeof (itx_t), offsetof(itx_t, itx_node)); ian->ia_foid = foid; @@ -1798,7 +1798,7 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys) zilog_t *zilog; int i; - zilog = kmem_zalloc(sizeof (zilog_t), KM_PUSHPAGE); + zilog = kmem_zalloc(sizeof (zilog_t), KM_SLEEP); zilog->zl_header = zh_phys; zilog->zl_os = os; @@ -2204,7 +2204,7 @@ zil_replay(objset_t *os, void *arg, zil_replay_func_t replay_func[TX_MAX_TYPE]) zr.zr_replay = replay_func; zr.zr_arg = arg; zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); - zr.zr_lr = vmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_PUSHPAGE); + zr.zr_lr = vmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); /* * Wait for in-progress removes to sync before starting replay. diff --git a/module/zfs/zio.c b/module/zfs/zio.c index d7c8458f4..b4e19067f 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -312,7 +312,7 @@ static void zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize, zio_transform_func_t *transform) { - zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_PUSHPAGE); + zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP); zt->zt_orig_data = zio->io_data; zt->zt_orig_size = zio->io_size; @@ -427,7 +427,7 @@ zio_unique_parent(zio_t *cio) void zio_add_child(zio_t *pio, zio_t *cio) { - zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_PUSHPAGE); + zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP); int w; /* @@ -551,7 +551,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp, ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER)); ASSERT(vd || stage == ZIO_STAGE_OPEN); - zio = kmem_cache_alloc(zio_cache, KM_PUSHPAGE); + zio = kmem_cache_alloc(zio_cache, KM_SLEEP); if (vd != NULL) zio->io_child_type = ZIO_CHILD_VDEV; @@ -1757,7 +1757,7 @@ zio_gang_node_alloc(zio_gang_node_t **gnpp) ASSERT(*gnpp == NULL); - gn = kmem_zalloc(sizeof (*gn), KM_PUSHPAGE); + gn = kmem_zalloc(sizeof (*gn), KM_SLEEP); gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE); *gnpp = gn; diff --git a/module/zfs/zpl_xattr.c b/module/zfs/zpl_xattr.c index 526c3f9e6..6a74b8190 100644 --- a/module/zfs/zpl_xattr.c +++ b/module/zfs/zpl_xattr.c @@ -824,7 +824,7 @@ zpl_get_acl(struct inode *ip, int type) size = zpl_xattr_get(ip, name, NULL, 0); if (size > 0) { - value = kmem_alloc(size, KM_PUSHPAGE); + value = kmem_alloc(size, KM_SLEEP); size = zpl_xattr_get(ip, name, value, size); } diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index ddaf520a2..6a84c8ad9 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -825,7 +825,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) ASSERT(zio != NULL); ASSERT(size != 0); - zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_PUSHPAGE); + zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); zgd->zgd_zilog = zv->zv_zilog; zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER); @@ -1234,7 +1234,7 @@ zvol_alloc(dev_t dev, const char *name) zvol_state_t *zv; int error = 0; - zv = kmem_zalloc(sizeof (zvol_state_t), KM_PUSHPAGE); + zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP); spin_lock_init(&zv->zv_lock); list_link_init(&zv->zv_next); @@ -1314,7 +1314,7 @@ __zvol_snapdev_hidden(const char *name) char *atp; int error = 0; - parent = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE); + parent = kmem_alloc(MAXPATHLEN, KM_SLEEP); (void) strlcpy(parent, name, MAXPATHLEN); if ((atp = strrchr(parent, '@')) != NULL) { @@ -1353,7 +1353,7 @@ __zvol_create_minor(const char *name, boolean_t ignore_snapdev) goto out; } - doi = kmem_alloc(sizeof (dmu_object_info_t), KM_PUSHPAGE); + doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) @@ -1565,7 +1565,7 @@ zvol_rename_minors(const char *oldname, const char *newname) oldnamelen = strlen(oldname); newnamelen = strlen(newname); - name = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE); + name = kmem_alloc(MAXNAMELEN, KM_SLEEP); mutex_enter(&zvol_state_lock); From 60e1eda929b04445a0ab0451674f83b86c6ec347 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 8 Dec 2014 19:03:50 -0500 Subject: [PATCH 5/7] Add kmem_cache.h include to default context As part of the spl kmem/vmem refactoring the kmem_cache_* functions were split in to their own kmem_cache.h header. This was done in part so that kmem_* consumers would not be forced to include the kmem_cache_* functions which mask several Linux SLAB/SLAB functions. Because of this we now much explicitly include kmem_cache.h in the zfs_context.h. However, consumers such as Lustre which need access to the KM_FLAGS but not the kmem_cache_* functions can now safely just include kmem.h. Signed-off-by: Brian Behlendorf --- include/sys/zfs_context.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index 2fde3b5fc..3dc54f1d7 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include From 285b29d959d3792e45d75c2ce228552d396b445f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 12 Dec 2014 16:40:21 -0800 Subject: [PATCH 6/7] Revert "Pre-allocate vdev I/O buffers" Commit 86dd0fd added preallocated I/O buffers. This is no longer required after the recent kmem changes designed to make our memory allocation interfaces behave more like those found on Illumos. A deadlock in this situation is no longer possible. However, these allocations still have the potential to be expensive. So a potential future optimization might be to perform then KM_NOSLEEP so that they either succeed of fail quicky. Either case is acceptable here because we can safely abort the aggregation. Signed-off-by: Brian Behlendorf --- include/sys/vdev_impl.h | 7 ------- include/sys/zio.h | 2 -- module/zfs/vdev_queue.c | 43 ++--------------------------------------- module/zfs/zio.c | 22 --------------------- 4 files changed, 2 insertions(+), 72 deletions(-) diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index c0c8f5287..a8dc9510e 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -50,7 +50,6 @@ extern "C" { * Forward declarations that lots of things need. */ typedef struct vdev_queue vdev_queue_t; -typedef struct vdev_io vdev_io_t; typedef struct vdev_cache vdev_cache_t; typedef struct vdev_cache_entry vdev_cache_entry_t; @@ -117,16 +116,10 @@ struct vdev_queue { uint64_t vq_last_offset; hrtime_t vq_io_complete_ts; /* time last i/o completed */ hrtime_t vq_io_delta_ts; - list_t vq_io_list; zio_t vq_io_search; /* used as local for stack reduction */ kmutex_t vq_lock; }; -struct vdev_io { - char vi_buffer[SPA_MAXBLOCKSIZE]; /* Must be first */ - list_node_t vi_node; -}; - /* * Virtual device descriptor */ diff --git a/include/sys/zio.h b/include/sys/zio.h index e10e965e6..18e7a40a3 100644 --- a/include/sys/zio.h +++ b/include/sys/zio.h @@ -522,8 +522,6 @@ extern void *zio_buf_alloc(size_t size); extern void zio_buf_free(void *buf, size_t size); extern void *zio_data_buf_alloc(size_t size); extern void zio_data_buf_free(void *buf, size_t size); -extern void *zio_vdev_alloc(void); -extern void zio_vdev_free(void *buf); extern void zio_resubmit_stage_async(void *); diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c index 34e4420da..3fa4219f2 100644 --- a/module/zfs/vdev_queue.c +++ b/module/zfs/vdev_queue.c @@ -328,9 +328,7 @@ void vdev_queue_init(vdev_t *vd) { vdev_queue_t *vq = &vd->vdev_queue; - int max_active_sum; zio_priority_t p; - int i; mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL); vq->vq_vdev = vd; @@ -352,39 +350,18 @@ vdev_queue_init(vdev_t *vd) vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_queue_node)); } - - /* - * A list of buffers which can be used for aggregate I/O, this - * avoids the need to allocate them on demand when memory is low. - */ - list_create(&vq->vq_io_list, sizeof (vdev_io_t), - offsetof(vdev_io_t, vi_node)); - - max_active_sum = zfs_vdev_sync_read_max_active + - zfs_vdev_sync_write_max_active + zfs_vdev_async_read_max_active + - zfs_vdev_async_write_max_active + zfs_vdev_scrub_max_active; - for (i = 0; i < max_active_sum; i++) - list_insert_tail(&vq->vq_io_list, zio_vdev_alloc()); } void vdev_queue_fini(vdev_t *vd) { vdev_queue_t *vq = &vd->vdev_queue; - vdev_io_t *vi; zio_priority_t p; for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) avl_destroy(&vq->vq_class[p].vqc_queued_tree); avl_destroy(&vq->vq_active_tree); - while ((vi = list_head(&vq->vq_io_list)) != NULL) { - list_remove(&vq->vq_io_list, vi); - zio_vdev_free(vi); - } - - list_destroy(&vq->vq_io_list); - mutex_destroy(&vq->vq_lock); } @@ -468,9 +445,6 @@ vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) static void vdev_queue_agg_io_done(zio_t *aio) { - vdev_queue_t *vq = &aio->io_vd->vdev_queue; - vdev_io_t *vi = aio->io_data; - if (aio->io_type == ZIO_TYPE_READ) { zio_t *pio; while ((pio = zio_walk_parents(aio)) != NULL) { @@ -479,9 +453,7 @@ vdev_queue_agg_io_done(zio_t *aio) } } - mutex_enter(&vq->vq_lock); - list_insert_tail(&vq->vq_io_list, vi); - mutex_exit(&vq->vq_lock); + zio_buf_free(aio->io_data, aio->io_size); } /* @@ -496,7 +468,6 @@ vdev_queue_agg_io_done(zio_t *aio) static zio_t * vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) { - vdev_io_t *vi; zio_t *first, *last, *aio, *dio, *mandatory, *nio; uint64_t maxgap = 0; uint64_t size; @@ -529,12 +500,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) if (zio->io_type == ZIO_TYPE_READ) maxgap = zfs_vdev_read_gap_limit; - vi = list_head(&vq->vq_io_list); - if (vi == NULL) { - vi = zio_vdev_alloc(); - list_insert_head(&vq->vq_io_list, vi); - } - /* * We can aggregate I/Os that are sufficiently adjacent and of * the same flavor, as expressed by the AGG_INHERIT flags. @@ -622,13 +587,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) if (first == last) return (NULL); - ASSERT(vi != NULL); - size = IO_SPAN(first, last); ASSERT3U(size, <=, zfs_vdev_aggregation_limit); aio = zio_vdev_delegated_io(first->io_vd, first->io_offset, - vi, size, first->io_type, zio->io_priority, + zio_buf_alloc(size), size, first->io_type, zio->io_priority, flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL); aio->io_timestamp = first->io_timestamp; @@ -655,8 +618,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) zio_execute(dio); } while (dio != last); - list_remove(&vq->vq_io_list, vi); - return (aio); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index b4e19067f..6fa23d2fc 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -55,7 +55,6 @@ const char *zio_type_name[ZIO_TYPES] = { */ kmem_cache_t *zio_cache; kmem_cache_t *zio_link_cache; -kmem_cache_t *zio_vdev_cache; kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; int zio_bulk_flags = 0; @@ -132,8 +131,6 @@ zio_init(void) zio_cons, zio_dest, NULL, NULL, NULL, 0); zio_link_cache = kmem_cache_create("zio_link_cache", sizeof (zio_link_t), 0, NULL, NULL, NULL, NULL, NULL, 0); - zio_vdev_cache = kmem_cache_create("zio_vdev_cache", sizeof (vdev_io_t), - PAGESIZE, NULL, NULL, NULL, NULL, NULL, 0); /* * For small buffers, we want a cache for each multiple of @@ -218,7 +215,6 @@ zio_fini(void) zio_data_buf_cache[c] = NULL; } - kmem_cache_destroy(zio_vdev_cache); kmem_cache_destroy(zio_link_cache); kmem_cache_destroy(zio_cache); @@ -285,24 +281,6 @@ zio_data_buf_free(void *buf, size_t size) kmem_cache_free(zio_data_buf_cache[c], buf); } -/* - * Dedicated I/O buffers to ensure that memory fragmentation never prevents - * or significantly delays the issuing of a zio. These buffers are used - * to aggregate I/O and could be used for raidz stripes. - */ -void * -zio_vdev_alloc(void) -{ - return (kmem_cache_alloc(zio_vdev_cache, KM_PUSHPAGE)); -} - -void -zio_vdev_free(void *buf) -{ - kmem_cache_free(zio_vdev_cache, buf); - -} - /* * ========================================================================== * Push and pop I/O transform buffers From 81971b137ada2097ed73a4364cb896a99d71f578 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 16 Dec 2014 11:44:24 -0800 Subject: [PATCH 7/7] Revert "SA spill block cache" The SA spill_cache was originally introduced to avoid the need to perform large kmem or vmem allocations. Instead a small dedicated cache of preallocated SA buffers was kept. This solution was viable while the maximum block size was limited to 128K. But with the planned increase of the maximum block size to 16M callers need to migrate to the zio_buf_alloc(). However, they should be aware this interface is expected to change again once the zio buffers are fully backed by scatter-gather lists. Alternately, if the callers know these buffers will never be large or be infrequently accessed they may kmem_alloc() or vmem_alloc() the needed temporary space. This change has the additional benegit of bringing the code back inline with the upstream Illumos source. Signed-off-by: Brian Behlendorf --- include/sys/sa.h | 2 -- module/zfs/sa.c | 27 ++++----------------------- module/zfs/zfs_sa.c | 8 ++++---- module/zfs/zio.c | 4 ++++ 4 files changed, 12 insertions(+), 29 deletions(-) diff --git a/include/sys/sa.h b/include/sys/sa.h index b8db0c130..7b5b03a56 100644 --- a/include/sys/sa.h +++ b/include/sys/sa.h @@ -150,8 +150,6 @@ int sa_replace_all_by_template_locked(sa_handle_t *, sa_bulk_attr_t *, boolean_t sa_enabled(objset_t *); void sa_cache_init(void); void sa_cache_fini(void); -void *sa_spill_alloc(int); -void sa_spill_free(void *); int sa_set_sa_object(objset_t *, uint64_t); int sa_hdrsize(void *); void sa_handle_lock(sa_handle_t *); diff --git a/module/zfs/sa.c b/module/zfs/sa.c index ea68e40a2..1263d37ab 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -202,7 +202,6 @@ sa_attr_type_t sa_dummy_zpl_layout[] = { 0 }; static int sa_legacy_attr_count = 16; static kmem_cache_t *sa_cache = NULL; -static kmem_cache_t *spill_cache = NULL; /*ARGSUSED*/ static int @@ -234,8 +233,6 @@ sa_cache_init(void) sa_cache = kmem_cache_create("sa_cache", sizeof (sa_handle_t), 0, sa_cache_constructor, sa_cache_destructor, NULL, NULL, NULL, 0); - spill_cache = kmem_cache_create("spill_cache", - SPA_MAXBLOCKSIZE, 0, NULL, NULL, NULL, NULL, NULL, 0); } void @@ -243,21 +240,6 @@ sa_cache_fini(void) { if (sa_cache) kmem_cache_destroy(sa_cache); - - if (spill_cache) - kmem_cache_destroy(spill_cache); -} - -void * -sa_spill_alloc(int flags) -{ - return (kmem_cache_alloc(spill_cache, flags)); -} - -void -sa_spill_free(void *obj) -{ - kmem_cache_free(spill_cache, obj); } static int @@ -1672,6 +1654,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, void *old_data[2]; int bonus_attr_count = 0; int bonus_data_size = 0; + int spill_data_size = 0; int spill_attr_count = 0; int error; uint16_t length; @@ -1701,8 +1684,8 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, /* Bring spill buffer online if it isn't currently */ if ((error = sa_get_spill(hdl)) == 0) { - ASSERT3U(hdl->sa_spill->db_size, <=, SPA_MAXBLOCKSIZE); - old_data[1] = sa_spill_alloc(KM_SLEEP); + spill_data_size = hdl->sa_spill->db_size; + old_data[1] = zio_buf_alloc(spill_data_size); bcopy(hdl->sa_spill->db_data, old_data[1], hdl->sa_spill->db_size); spill_attr_count = @@ -1787,7 +1770,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, if (old_data[0]) kmem_free(old_data[0], bonus_data_size); if (old_data[1]) - sa_spill_free(old_data[1]); + zio_buf_free(old_data[1], spill_data_size); kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count); return (error); @@ -2077,8 +2060,6 @@ EXPORT_SYMBOL(sa_replace_all_by_template_locked); EXPORT_SYMBOL(sa_enabled); EXPORT_SYMBOL(sa_cache_init); EXPORT_SYMBOL(sa_cache_fini); -EXPORT_SYMBOL(sa_spill_alloc); -EXPORT_SYMBOL(sa_spill_free); EXPORT_SYMBOL(sa_set_sa_object); EXPORT_SYMBOL(sa_hdrsize); EXPORT_SYMBOL(sa_handle_lock); diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index ebe92bb3a..257ab4254 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -205,13 +205,13 @@ zfs_sa_get_xattr(znode_t *zp) return (error); } - obj = sa_spill_alloc(KM_SLEEP); + obj = zio_buf_alloc(size); error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, size); if (error == 0) error = nvlist_unpack(obj, size, &zp->z_xattr_cached, KM_SLEEP); - sa_spill_free(obj); + zio_buf_free(obj, size); return (error); } @@ -233,7 +233,7 @@ zfs_sa_set_xattr(znode_t *zp) if (error) goto out; - obj = sa_spill_alloc(KM_SLEEP); + obj = zio_buf_alloc(size); error = nvlist_pack(zp->z_xattr_cached, &obj, &size, NV_ENCODE_XDR, KM_SLEEP); @@ -256,7 +256,7 @@ zfs_sa_set_xattr(znode_t *zp) dmu_tx_commit(tx); } out_free: - sa_spill_free(obj); + zio_buf_free(obj, size); out: return (error); } diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 6fa23d2fc..7dcb42006 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -3393,6 +3393,10 @@ EXPORT_SYMBOL(zio_handle_fault_injection); EXPORT_SYMBOL(zio_handle_device_injection); EXPORT_SYMBOL(zio_handle_label_injection); EXPORT_SYMBOL(zio_type_name); +EXPORT_SYMBOL(zio_buf_alloc); +EXPORT_SYMBOL(zio_data_buf_alloc); +EXPORT_SYMBOL(zio_buf_free); +EXPORT_SYMBOL(zio_data_buf_free); module_param(zio_bulk_flags, int, 0644); MODULE_PARM_DESC(zio_bulk_flags, "Additional flags to pass to bulk buffers");