mirror_ubuntu-kernels/drivers/accel/ivpu/ivpu_mmu_context.c

552 lines
15 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020-2023 Intel Corporation
*/
#include <linux/bitfield.h>
#include <linux/highmem.h>
#include <linux/set_memory.h>
#include <drm/drm_cache.h>
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
#define IVPU_MMU_VPU_ADDRESS_MASK GENMASK(47, 12)
#define IVPU_MMU_PGD_INDEX_MASK GENMASK(47, 39)
#define IVPU_MMU_PUD_INDEX_MASK GENMASK(38, 30)
#define IVPU_MMU_PMD_INDEX_MASK GENMASK(29, 21)
#define IVPU_MMU_PTE_INDEX_MASK GENMASK(20, 12)
#define IVPU_MMU_ENTRY_FLAGS_MASK (BIT(52) | GENMASK(11, 0))
#define IVPU_MMU_ENTRY_FLAG_CONT BIT(52)
#define IVPU_MMU_ENTRY_FLAG_NG BIT(11)
#define IVPU_MMU_ENTRY_FLAG_AF BIT(10)
#define IVPU_MMU_ENTRY_FLAG_USER BIT(6)
#define IVPU_MMU_ENTRY_FLAG_LLC_COHERENT BIT(2)
#define IVPU_MMU_ENTRY_FLAG_TYPE_PAGE BIT(1)
#define IVPU_MMU_ENTRY_FLAG_VALID BIT(0)
#define IVPU_MMU_PAGE_SIZE SZ_4K
#define IVPU_MMU_CONT_PAGES_SIZE (IVPU_MMU_PAGE_SIZE * 16)
#define IVPU_MMU_PTE_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PAGE_SIZE)
#define IVPU_MMU_PMD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PTE_MAP_SIZE)
#define IVPU_MMU_PUD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PMD_MAP_SIZE)
#define IVPU_MMU_PGD_MAP_SIZE (IVPU_MMU_PGTABLE_ENTRIES * IVPU_MMU_PUD_MAP_SIZE)
#define IVPU_MMU_PGTABLE_SIZE (IVPU_MMU_PGTABLE_ENTRIES * sizeof(u64))
#define IVPU_MMU_DUMMY_ADDRESS 0xdeadb000
#define IVPU_MMU_ENTRY_VALID (IVPU_MMU_ENTRY_FLAG_TYPE_PAGE | IVPU_MMU_ENTRY_FLAG_VALID)
#define IVPU_MMU_ENTRY_INVALID (IVPU_MMU_DUMMY_ADDRESS & ~IVPU_MMU_ENTRY_FLAGS_MASK)
#define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma)
{
dma_addr_t dma_addr;
struct page *page;
void *cpu;
page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
if (!page)
return NULL;
set_pages_array_wc(&page, 1);
dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
if (dma_mapping_error(vdev->drm.dev, dma_addr))
goto err_free_page;
cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
if (!cpu)
goto err_dma_unmap_page;
*dma = dma_addr;
return cpu;
err_dma_unmap_page:
dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
err_free_page:
put_page(page);
return NULL;
}
static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
{
struct page *page;
if (cpu_addr) {
page = vmalloc_to_page(cpu_addr);
vunmap(cpu_addr);
dma_unmap_page(vdev->drm.dev, dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE,
DMA_BIDIRECTIONAL);
set_pages_array_wb(&page, 1);
put_page(page);
}
}
static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
dma_addr_t pgd_dma;
pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
if (!pgtable->pgd_dma_ptr)
return -ENOMEM;
pgtable->pgd_dma = pgd_dma;
return 0;
}
static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
int pgd_idx, pud_idx, pmd_idx;
dma_addr_t pud_dma, pmd_dma, pte_dma;
u64 *pud_dma_ptr, *pmd_dma_ptr, *pte_dma_ptr;
for (pgd_idx = 0; pgd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pgd_idx) {
pud_dma_ptr = pgtable->pud_ptrs[pgd_idx];
pud_dma = pgtable->pgd_dma_ptr[pgd_idx];
if (!pud_dma_ptr)
continue;
for (pud_idx = 0; pud_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pud_idx) {
pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx];
pmd_dma = pgtable->pud_ptrs[pgd_idx][pud_idx];
if (!pmd_dma_ptr)
continue;
for (pmd_idx = 0; pmd_idx < IVPU_MMU_PGTABLE_ENTRIES; ++pmd_idx) {
pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
ivpu_pgtable_free_page(vdev, pte_dma_ptr, pte_dma);
}
kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
}
kfree(pgtable->pmd_ptrs[pgd_idx]);
kfree(pgtable->pte_ptrs[pgd_idx]);
ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
}
ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
}
static u64*
ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx)
{
u64 *pud_dma_ptr = pgtable->pud_ptrs[pgd_idx];
dma_addr_t pud_dma;
if (pud_dma_ptr)
return pud_dma_ptr;
pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma);
if (!pud_dma_ptr)
return NULL;
drm_WARN_ON(&vdev->drm, pgtable->pmd_ptrs[pgd_idx]);
pgtable->pmd_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
if (!pgtable->pmd_ptrs[pgd_idx])
goto err_free_pud_dma_ptr;
drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx]);
pgtable->pte_ptrs[pgd_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
if (!pgtable->pte_ptrs[pgd_idx])
goto err_free_pmd_ptrs;
pgtable->pud_ptrs[pgd_idx] = pud_dma_ptr;
pgtable->pgd_dma_ptr[pgd_idx] = pud_dma | IVPU_MMU_ENTRY_VALID;
return pud_dma_ptr;
err_free_pmd_ptrs:
kfree(pgtable->pmd_ptrs[pgd_idx]);
err_free_pud_dma_ptr:
ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
return NULL;
}
static u64*
ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, int pgd_idx,
int pud_idx)
{
u64 *pmd_dma_ptr = pgtable->pmd_ptrs[pgd_idx][pud_idx];
dma_addr_t pmd_dma;
if (pmd_dma_ptr)
return pmd_dma_ptr;
pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma);
if (!pmd_dma_ptr)
return NULL;
drm_WARN_ON(&vdev->drm, pgtable->pte_ptrs[pgd_idx][pud_idx]);
pgtable->pte_ptrs[pgd_idx][pud_idx] = kzalloc(IVPU_MMU_PGTABLE_SIZE, GFP_KERNEL);
if (!pgtable->pte_ptrs[pgd_idx][pud_idx])
goto err_free_pmd_dma_ptr;
pgtable->pmd_ptrs[pgd_idx][pud_idx] = pmd_dma_ptr;
pgtable->pud_ptrs[pgd_idx][pud_idx] = pmd_dma | IVPU_MMU_ENTRY_VALID;
return pmd_dma_ptr;
err_free_pmd_dma_ptr:
ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
return NULL;
}
static u64*
ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
int pgd_idx, int pud_idx, int pmd_idx)
{
u64 *pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
dma_addr_t pte_dma;
if (pte_dma_ptr)
return pte_dma_ptr;
pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma);
if (!pte_dma_ptr)
return NULL;
pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma_ptr;
pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx] = pte_dma | IVPU_MMU_ENTRY_VALID;
return pte_dma_ptr;
}
static int
ivpu_mmu_context_map_page(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, dma_addr_t dma_addr, u64 prot)
{
u64 *pte;
int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
/* Allocate PUD - second level page table if needed */
if (!ivpu_mmu_ensure_pud(vdev, &ctx->pgtable, pgd_idx))
return -ENOMEM;
/* Allocate PMD - third level page table if needed */
if (!ivpu_mmu_ensure_pmd(vdev, &ctx->pgtable, pgd_idx, pud_idx))
return -ENOMEM;
/* Allocate PTE - fourth level page table if needed */
pte = ivpu_mmu_ensure_pte(vdev, &ctx->pgtable, pgd_idx, pud_idx, pmd_idx);
if (!pte)
return -ENOMEM;
/* Update PTE */
pte[pte_idx] = dma_addr | prot;
return 0;
}
static int
ivpu_mmu_context_map_cont_64k(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr,
dma_addr_t dma_addr, u64 prot)
{
size_t size = IVPU_MMU_CONT_PAGES_SIZE;
drm_WARN_ON(&vdev->drm, !IS_ALIGNED(vpu_addr, size));
drm_WARN_ON(&vdev->drm, !IS_ALIGNED(dma_addr, size));
prot |= IVPU_MMU_ENTRY_FLAG_CONT;
while (size) {
int ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
if (ret)
return ret;
size -= IVPU_MMU_PAGE_SIZE;
vpu_addr += IVPU_MMU_PAGE_SIZE;
dma_addr += IVPU_MMU_PAGE_SIZE;
}
return 0;
}
static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_addr)
{
int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
int pte_idx = FIELD_GET(IVPU_MMU_PTE_INDEX_MASK, vpu_addr);
/* Update PTE with dummy physical address and clear flags */
ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
}
static int
ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
{
int map_size;
int ret;
while (size) {
if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE &&
IS_ALIGNED(vpu_addr | dma_addr, IVPU_MMU_CONT_PAGES_SIZE)) {
ret = ivpu_mmu_context_map_cont_64k(vdev, ctx, vpu_addr, dma_addr, prot);
map_size = IVPU_MMU_CONT_PAGES_SIZE;
} else {
ret = ivpu_mmu_context_map_page(vdev, ctx, vpu_addr, dma_addr, prot);
map_size = IVPU_MMU_PAGE_SIZE;
}
if (ret)
return ret;
vpu_addr += map_size;
dma_addr += map_size;
size -= map_size;
}
return 0;
}
static void ivpu_mmu_context_unmap_pages(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
{
while (size) {
ivpu_mmu_context_unmap_page(ctx, vpu_addr);
vpu_addr += IVPU_MMU_PAGE_SIZE;
size -= IVPU_MMU_PAGE_SIZE;
}
}
int
ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt, bool llc_coherent)
{
struct scatterlist *sg;
int ret;
u64 prot;
u64 i;
if (drm_WARN_ON(&vdev->drm, !ctx))
return -EINVAL;
if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
return -EINVAL;
if (vpu_addr & ~IVPU_MMU_VPU_ADDRESS_MASK)
return -EINVAL;
prot = IVPU_MMU_ENTRY_MAPPED;
if (llc_coherent)
prot |= IVPU_MMU_ENTRY_FLAG_LLC_COHERENT;
mutex_lock(&ctx->lock);
for_each_sgtable_dma_sg(sgt, sg, i) {
dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
size_t size = sg_dma_len(sg) + sg->offset;
ivpu_dbg(vdev, MMU_MAP, "Map ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n",
ctx->id, dma_addr, vpu_addr, size);
ret = ivpu_mmu_context_map_pages(vdev, ctx, vpu_addr, dma_addr, size, prot);
if (ret) {
ivpu_err(vdev, "Failed to map context pages\n");
mutex_unlock(&ctx->lock);
return ret;
}
vpu_addr += size;
}
/* Ensure page table modifications are flushed from wc buffers to memory */
wmb();
mutex_unlock(&ctx->lock);
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
if (ret)
ivpu_err(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
return ret;
}
void
ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt)
{
struct scatterlist *sg;
int ret;
u64 i;
if (drm_WARN_ON(&vdev->drm, !ctx))
return;
mutex_lock(&ctx->lock);
for_each_sgtable_dma_sg(sgt, sg, i) {
dma_addr_t dma_addr = sg_dma_address(sg) - sg->offset;
size_t size = sg_dma_len(sg) + sg->offset;
ivpu_dbg(vdev, MMU_MAP, "Unmap ctx: %u dma_addr: 0x%llx vpu_addr: 0x%llx size: %lu\n",
ctx->id, dma_addr, vpu_addr, size);
ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
vpu_addr += size;
}
/* Ensure page table modifications are flushed from wc buffers to memory */
wmb();
mutex_unlock(&ctx->lock);
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
if (ret)
ivpu_warn(vdev, "Failed to invalidate TLB for ctx %u: %d\n", ctx->id, ret);
}
int
ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
u64 size, struct drm_mm_node *node)
{
int ret;
WARN_ON(!range);
mutex_lock(&ctx->lock);
if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
range->start, range->end, DRM_MM_INSERT_BEST);
if (!ret)
goto unlock;
}
ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
range->start, range->end, DRM_MM_INSERT_BEST);
unlock:
mutex_unlock(&ctx->lock);
return ret;
}
void
ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
{
mutex_lock(&ctx->lock);
drm_mm_remove_node(node);
mutex_unlock(&ctx->lock);
}
static int
ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 context_id)
{
u64 start, end;
int ret;
mutex_init(&ctx->lock);
ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
if (ret) {
ivpu_err(vdev, "Failed to initialize pgtable for ctx %u: %d\n", context_id, ret);
return ret;
}
if (!context_id) {
start = vdev->hw->ranges.global.start;
end = vdev->hw->ranges.shave.end;
} else {
start = vdev->hw->ranges.user.start;
end = vdev->hw->ranges.dma.end;
}
drm_mm_init(&ctx->mm, start, end - start);
ctx->id = context_id;
return 0;
}
static void ivpu_mmu_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
if (drm_WARN_ON(&vdev->drm, !ctx->pgtable.pgd_dma_ptr))
return;
mutex_destroy(&ctx->lock);
ivpu_mmu_pgtables_free(vdev, &ctx->pgtable);
drm_mm_takedown(&ctx->mm);
ctx->pgtable.pgd_dma_ptr = NULL;
ctx->pgtable.pgd_dma = 0;
}
int ivpu_mmu_global_context_init(struct ivpu_device *vdev)
{
return ivpu_mmu_context_init(vdev, &vdev->gctx, IVPU_GLOBAL_CONTEXT_MMU_SSID);
}
void ivpu_mmu_global_context_fini(struct ivpu_device *vdev)
{
return ivpu_mmu_context_fini(vdev, &vdev->gctx);
}
int ivpu_mmu_reserved_context_init(struct ivpu_device *vdev)
{
return ivpu_mmu_user_context_init(vdev, &vdev->rctx, IVPU_RESERVED_CONTEXT_MMU_SSID);
}
void ivpu_mmu_reserved_context_fini(struct ivpu_device *vdev)
{
return ivpu_mmu_user_context_fini(vdev, &vdev->rctx);
}
void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid)
{
struct ivpu_file_priv *file_priv;
xa_lock(&vdev->context_xa);
file_priv = xa_load(&vdev->context_xa, ssid);
if (file_priv)
file_priv->has_mmu_faults = true;
xa_unlock(&vdev->context_xa);
}
int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u32 ctx_id)
{
int ret;
drm_WARN_ON(&vdev->drm, !ctx_id);
ret = ivpu_mmu_context_init(vdev, ctx, ctx_id);
if (ret) {
ivpu_err(vdev, "Failed to initialize context %u: %d\n", ctx_id, ret);
return ret;
}
ret = ivpu_mmu_set_pgtable(vdev, ctx_id, &ctx->pgtable);
if (ret) {
ivpu_err(vdev, "Failed to set page table for context %u: %d\n", ctx_id, ret);
goto err_context_fini;
}
return 0;
err_context_fini:
ivpu_mmu_context_fini(vdev, ctx);
return ret;
}
void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
drm_WARN_ON(&vdev->drm, !ctx->id);
ivpu_mmu_clear_pgtable(vdev, ctx->id);
ivpu_mmu_context_fini(vdev, ctx);
}