/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* * University Copyright- Copyright (c) 1982, 1986, 1988 * The Regents of the University of California * All Rights Reserved * * University Acknowledgment- Portions of this document are derived from * software developed by the University of California, Berkeley, and its * contributors. */ /* * $FreeBSD$ */ #include #include #include #include #include #include #include #include static void zfs_freeuio(struct uio *uio) { #if __FreeBSD_version > 1500013 freeuio(uio); #else free(uio, M_IOV); #endif } int zfs_uiomove(void *cp, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) { ASSERT3U(zfs_uio_rw(uio), ==, dir); return (uiomove(cp, (int)n, GET_UIO_STRUCT(uio))); } /* * same as zfs_uiomove() but doesn't modify uio structure. * return in cbytes how many bytes were copied. */ int zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes) { struct iovec small_iovec[1]; struct uio small_uio_clone; struct uio *uio_clone; int error; ASSERT3U(zfs_uio_rw(uio), ==, rw); if (zfs_uio_iovcnt(uio) == 1) { small_uio_clone = *(GET_UIO_STRUCT(uio)); small_iovec[0] = *(GET_UIO_STRUCT(uio)->uio_iov); small_uio_clone.uio_iov = small_iovec; uio_clone = &small_uio_clone; } else { uio_clone = cloneuio(GET_UIO_STRUCT(uio)); } error = vn_io_fault_uiomove(p, n, uio_clone); *cbytes = zfs_uio_resid(uio) - uio_clone->uio_resid; if (uio_clone != &small_uio_clone) zfs_freeuio(uio_clone); return (error); } /* * Drop the next n chars out of *uiop. */ void zfs_uioskip(zfs_uio_t *uio, size_t n) { zfs_uio_seg_t segflg; /* For the full compatibility with illumos. */ if (n > zfs_uio_resid(uio)) return; segflg = zfs_uio_segflg(uio); zfs_uio_segflg(uio) = UIO_NOCOPY; zfs_uiomove(NULL, n, zfs_uio_rw(uio), uio); zfs_uio_segflg(uio) = segflg; } int zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) { ASSERT3U(zfs_uio_rw(uio), ==, dir); return (vn_io_fault_uiomove(p, n, GET_UIO_STRUCT(uio))); } /* * Check if the uio is page-aligned in memory. */ boolean_t zfs_uio_page_aligned(zfs_uio_t *uio) { const struct iovec *iov = GET_UIO_STRUCT(uio)->uio_iov; for (int i = zfs_uio_iovcnt(uio); i > 0; iov++, i--) { uintptr_t addr = (uintptr_t)iov->iov_base; size_t size = iov->iov_len; if ((addr & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { return (B_FALSE); } } return (B_TRUE); } static void zfs_uio_set_pages_to_stable(zfs_uio_t *uio) { ASSERT3P(uio->uio_dio.pages, !=, NULL); ASSERT3S(uio->uio_dio.npages, >, 0); for (int i = 0; i < uio->uio_dio.npages; i++) { vm_page_t page = uio->uio_dio.pages[i]; ASSERT3P(page, !=, NULL); MPASS(page == PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(page))); vm_page_busy_acquire(page, VM_ALLOC_SBUSY); pmap_remove_write(page); } } static void zfs_uio_release_stable_pages(zfs_uio_t *uio) { ASSERT3P(uio->uio_dio.pages, !=, NULL); for (int i = 0; i < uio->uio_dio.npages; i++) { vm_page_t page = uio->uio_dio.pages[i]; ASSERT3P(page, !=, NULL); vm_page_sunbusy(page); } } /* * If the operation is marked as read, then we are stating the pages will be * written to and must be given write access. */ static int zfs_uio_hold_pages(unsigned long start, size_t len, int nr_pages, zfs_uio_rw_t rw, vm_page_t *pages) { vm_map_t map; vm_prot_t prot; int count; map = &curthread->td_proc->p_vmspace->vm_map; ASSERT3S(len, >, 0); prot = rw == UIO_READ ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages); return (count); } void zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw) { ASSERT(uio->uio_extflg & UIO_DIRECT); ASSERT3P(uio->uio_dio.pages, !=, NULL); ASSERT(zfs_uio_rw(uio) == rw); if (rw == UIO_WRITE) zfs_uio_release_stable_pages(uio); vm_page_unhold_pages(&uio->uio_dio.pages[0], uio->uio_dio.npages); kmem_free(uio->uio_dio.pages, uio->uio_dio.npages * sizeof (vm_page_t)); } static int zfs_uio_get_user_pages(unsigned long start, int nr_pages, size_t len, zfs_uio_rw_t rw, vm_page_t *pages) { int count; count = zfs_uio_hold_pages(start, len, nr_pages, rw, pages); if (count != nr_pages) { if (count > 0) vm_page_unhold_pages(pages, count); return (0); } ASSERT3S(count, ==, nr_pages); return (count); } static int zfs_uio_iov_step(struct iovec v, zfs_uio_t *uio, int *numpages) { unsigned long addr = (unsigned long)(v.iov_base); size_t len = v.iov_len; int n = DIV_ROUND_UP(len, PAGE_SIZE); int res = zfs_uio_get_user_pages( P2ALIGN_TYPED(addr, PAGE_SIZE, unsigned long), n, len, zfs_uio_rw(uio), &uio->uio_dio.pages[uio->uio_dio.npages]); if (res != n) return (SET_ERROR(EFAULT)); ASSERT3U(len, ==, res * PAGE_SIZE); *numpages = res; return (0); } static int zfs_uio_get_dio_pages_impl(zfs_uio_t *uio) { const struct iovec *iovp = GET_UIO_STRUCT(uio)->uio_iov; size_t len = zfs_uio_resid(uio); for (int i = 0; i < zfs_uio_iovcnt(uio); i++) { struct iovec iov; int numpages = 0; if (iovp->iov_len == 0) { iovp++; continue; } iov.iov_len = MIN(len, iovp->iov_len); iov.iov_base = iovp->iov_base; int error = zfs_uio_iov_step(iov, uio, &numpages); if (error) return (error); uio->uio_dio.npages += numpages; len -= iov.iov_len; iovp++; } ASSERT0(len); return (0); } /* * This function holds user pages into the kernel. In the event that the user * pages are not successfully held an error value is returned. * * On success, 0 is returned. */ int zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw) { int error = 0; int npages = DIV_ROUND_UP(zfs_uio_resid(uio), PAGE_SIZE); size_t size = npages * sizeof (vm_page_t); ASSERT(zfs_uio_rw(uio) == rw); uio->uio_dio.pages = kmem_alloc(size, KM_SLEEP); error = zfs_uio_get_dio_pages_impl(uio); if (error) { vm_page_unhold_pages(&uio->uio_dio.pages[0], uio->uio_dio.npages); kmem_free(uio->uio_dio.pages, size); return (error); } ASSERT3S(uio->uio_dio.npages, >, 0); /* * Since we will be writing the user pages we must make sure that * they are stable. That way the contents of the pages can not change * while we are doing: compression, checksumming, encryption, parity * calculations or deduplication. */ if (zfs_uio_rw(uio) == UIO_WRITE) zfs_uio_set_pages_to_stable(uio); uio->uio_extflg |= UIO_DIRECT; return (0); }