365 lines
8.3 KiB
C
365 lines
8.3 KiB
C
|
// SPDX-License-Identifier: GPL-2.0
|
||
|
#include <string.h>
|
||
|
#include <fcntl.h>
|
||
|
#include <dirent.h>
|
||
|
#include <sys/ioctl.h>
|
||
|
#include <linux/userfaultfd.h>
|
||
|
#include <linux/fs.h>
|
||
|
#include <sys/syscall.h>
|
||
|
#include <unistd.h>
|
||
|
#include "../kselftest.h"
|
||
|
#include "vm_util.h"
|
||
|
|
||
|
#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
|
||
|
#define SMAP_FILE_PATH "/proc/self/smaps"
|
||
|
#define MAX_LINE_LENGTH 500
|
||
|
|
||
|
unsigned int __page_size;
|
||
|
unsigned int __page_shift;
|
||
|
|
||
|
uint64_t pagemap_get_entry(int fd, char *start)
|
||
|
{
|
||
|
const unsigned long pfn = (unsigned long)start / getpagesize();
|
||
|
uint64_t entry;
|
||
|
int ret;
|
||
|
|
||
|
ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
|
||
|
if (ret != sizeof(entry))
|
||
|
ksft_exit_fail_msg("reading pagemap failed\n");
|
||
|
return entry;
|
||
|
}
|
||
|
|
||
|
static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r)
|
||
|
{
|
||
|
struct pm_scan_arg arg;
|
||
|
|
||
|
arg.start = (uintptr_t)start;
|
||
|
arg.end = (uintptr_t)(start + psize());
|
||
|
arg.vec = (uintptr_t)r;
|
||
|
arg.vec_len = 1;
|
||
|
arg.flags = 0;
|
||
|
arg.size = sizeof(struct pm_scan_arg);
|
||
|
arg.max_pages = 0;
|
||
|
arg.category_inverted = 0;
|
||
|
arg.category_mask = 0;
|
||
|
arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
|
||
|
PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
|
||
|
PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
|
||
|
arg.return_mask = arg.category_anyof_mask;
|
||
|
|
||
|
return ioctl(fd, PAGEMAP_SCAN, &arg);
|
||
|
}
|
||
|
|
||
|
static uint64_t pagemap_scan_get_categories(int fd, char *start)
|
||
|
{
|
||
|
struct page_region r;
|
||
|
long ret;
|
||
|
|
||
|
ret = __pagemap_scan_get_categories(fd, start, &r);
|
||
|
if (ret < 0)
|
||
|
ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
|
||
|
if (ret == 0)
|
||
|
return 0;
|
||
|
return r.categories;
|
||
|
}
|
||
|
|
||
|
/* `start` is any valid address. */
|
||
|
static bool pagemap_scan_supported(int fd, char *start)
|
||
|
{
|
||
|
static int supported = -1;
|
||
|
int ret;
|
||
|
|
||
|
if (supported != -1)
|
||
|
return supported;
|
||
|
|
||
|
/* Provide an invalid address in order to trigger EFAULT. */
|
||
|
ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL);
|
||
|
if (ret == 0)
|
||
|
ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n");
|
||
|
|
||
|
supported = errno == EFAULT;
|
||
|
|
||
|
return supported;
|
||
|
}
|
||
|
|
||
|
static bool page_entry_is(int fd, char *start, char *desc,
|
||
|
uint64_t pagemap_flags, uint64_t pagescan_flags)
|
||
|
{
|
||
|
bool m = pagemap_get_entry(fd, start) & pagemap_flags;
|
||
|
|
||
|
if (pagemap_scan_supported(fd, start)) {
|
||
|
bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags;
|
||
|
|
||
|
if (m == s)
|
||
|
return m;
|
||
|
|
||
|
ksft_exit_fail_msg(
|
||
|
"read and ioctl return unmatched results for %s: %d %d", desc, m, s);
|
||
|
}
|
||
|
return m;
|
||
|
}
|
||
|
|
||
|
bool pagemap_is_softdirty(int fd, char *start)
|
||
|
{
|
||
|
return page_entry_is(fd, start, "soft-dirty",
|
||
|
PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
|
||
|
}
|
||
|
|
||
|
bool pagemap_is_swapped(int fd, char *start)
|
||
|
{
|
||
|
return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
|
||
|
}
|
||
|
|
||
|
bool pagemap_is_populated(int fd, char *start)
|
||
|
{
|
||
|
return page_entry_is(fd, start, "populated",
|
||
|
PM_PRESENT | PM_SWAP,
|
||
|
PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
|
||
|
}
|
||
|
|
||
|
unsigned long pagemap_get_pfn(int fd, char *start)
|
||
|
{
|
||
|
uint64_t entry = pagemap_get_entry(fd, start);
|
||
|
|
||
|
/* If present (63th bit), PFN is at bit 0 -- 54. */
|
||
|
if (entry & PM_PRESENT)
|
||
|
return entry & 0x007fffffffffffffull;
|
||
|
return -1ul;
|
||
|
}
|
||
|
|
||
|
void clear_softdirty(void)
|
||
|
{
|
||
|
int ret;
|
||
|
const char *ctrl = "4";
|
||
|
int fd = open("/proc/self/clear_refs", O_WRONLY);
|
||
|
|
||
|
if (fd < 0)
|
||
|
ksft_exit_fail_msg("opening clear_refs failed\n");
|
||
|
ret = write(fd, ctrl, strlen(ctrl));
|
||
|
close(fd);
|
||
|
if (ret != strlen(ctrl))
|
||
|
ksft_exit_fail_msg("writing clear_refs failed\n");
|
||
|
}
|
||
|
|
||
|
bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
|
||
|
{
|
||
|
while (fgets(buf, len, fp)) {
|
||
|
if (!strncmp(buf, pattern, strlen(pattern)))
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
uint64_t read_pmd_pagesize(void)
|
||
|
{
|
||
|
int fd;
|
||
|
char buf[20];
|
||
|
ssize_t num_read;
|
||
|
|
||
|
fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
|
||
|
if (fd == -1)
|
||
|
return 0;
|
||
|
|
||
|
num_read = read(fd, buf, 19);
|
||
|
if (num_read < 1) {
|
||
|
close(fd);
|
||
|
return 0;
|
||
|
}
|
||
|
buf[num_read] = '\0';
|
||
|
close(fd);
|
||
|
|
||
|
return strtoul(buf, NULL, 10);
|
||
|
}
|
||
|
|
||
|
bool __check_huge(void *addr, char *pattern, int nr_hpages,
|
||
|
uint64_t hpage_size)
|
||
|
{
|
||
|
uint64_t thp = -1;
|
||
|
int ret;
|
||
|
FILE *fp;
|
||
|
char buffer[MAX_LINE_LENGTH];
|
||
|
char addr_pattern[MAX_LINE_LENGTH];
|
||
|
|
||
|
ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
|
||
|
(unsigned long) addr);
|
||
|
if (ret >= MAX_LINE_LENGTH)
|
||
|
ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
|
||
|
|
||
|
fp = fopen(SMAP_FILE_PATH, "r");
|
||
|
if (!fp)
|
||
|
ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
|
||
|
|
||
|
if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
|
||
|
goto err_out;
|
||
|
|
||
|
/*
|
||
|
* Fetch the pattern in the same block and check the number of
|
||
|
* hugepages.
|
||
|
*/
|
||
|
if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
|
||
|
goto err_out;
|
||
|
|
||
|
snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);
|
||
|
|
||
|
if (sscanf(buffer, addr_pattern, &thp) != 1)
|
||
|
ksft_exit_fail_msg("Reading smap error\n");
|
||
|
|
||
|
err_out:
|
||
|
fclose(fp);
|
||
|
return thp == (nr_hpages * (hpage_size >> 10));
|
||
|
}
|
||
|
|
||
|
bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
|
||
|
{
|
||
|
return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
|
||
|
}
|
||
|
|
||
|
bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
|
||
|
{
|
||
|
return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
|
||
|
}
|
||
|
|
||
|
bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
|
||
|
{
|
||
|
return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
|
||
|
}
|
||
|
|
||
|
int64_t allocate_transhuge(void *ptr, int pagemap_fd)
|
||
|
{
|
||
|
uint64_t ent[2];
|
||
|
|
||
|
/* drop pmd */
|
||
|
if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
|
||
|
MAP_FIXED | MAP_ANONYMOUS |
|
||
|
MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
|
||
|
errx(2, "mmap transhuge");
|
||
|
|
||
|
if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
|
||
|
err(2, "MADV_HUGEPAGE");
|
||
|
|
||
|
/* allocate transparent huge page */
|
||
|
*(volatile void **)ptr = ptr;
|
||
|
|
||
|
if (pread(pagemap_fd, ent, sizeof(ent),
|
||
|
(uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent))
|
||
|
err(2, "read pagemap");
|
||
|
|
||
|
if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
|
||
|
PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
|
||
|
!(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1)))
|
||
|
return PAGEMAP_PFN(ent[0]);
|
||
|
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
unsigned long default_huge_page_size(void)
|
||
|
{
|
||
|
unsigned long hps = 0;
|
||
|
char *line = NULL;
|
||
|
size_t linelen = 0;
|
||
|
FILE *f = fopen("/proc/meminfo", "r");
|
||
|
|
||
|
if (!f)
|
||
|
return 0;
|
||
|
while (getline(&line, &linelen, f) > 0) {
|
||
|
if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
|
||
|
hps <<= 10;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
free(line);
|
||
|
fclose(f);
|
||
|
return hps;
|
||
|
}
|
||
|
|
||
|
int detect_hugetlb_page_sizes(size_t sizes[], int max)
|
||
|
{
|
||
|
DIR *dir = opendir("/sys/kernel/mm/hugepages/");
|
||
|
int count = 0;
|
||
|
|
||
|
if (!dir)
|
||
|
return 0;
|
||
|
|
||
|
while (count < max) {
|
||
|
struct dirent *entry = readdir(dir);
|
||
|
size_t kb;
|
||
|
|
||
|
if (!entry)
|
||
|
break;
|
||
|
if (entry->d_type != DT_DIR)
|
||
|
continue;
|
||
|
if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
|
||
|
continue;
|
||
|
sizes[count++] = kb * 1024;
|
||
|
ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
|
||
|
kb);
|
||
|
}
|
||
|
closedir(dir);
|
||
|
return count;
|
||
|
}
|
||
|
|
||
|
/* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
|
||
|
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
|
||
|
bool miss, bool wp, bool minor, uint64_t *ioctls)
|
||
|
{
|
||
|
struct uffdio_register uffdio_register = { 0 };
|
||
|
uint64_t mode = 0;
|
||
|
int ret = 0;
|
||
|
|
||
|
if (miss)
|
||
|
mode |= UFFDIO_REGISTER_MODE_MISSING;
|
||
|
if (wp)
|
||
|
mode |= UFFDIO_REGISTER_MODE_WP;
|
||
|
if (minor)
|
||
|
mode |= UFFDIO_REGISTER_MODE_MINOR;
|
||
|
|
||
|
uffdio_register.range.start = (unsigned long)addr;
|
||
|
uffdio_register.range.len = len;
|
||
|
uffdio_register.mode = mode;
|
||
|
|
||
|
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
|
||
|
ret = -errno;
|
||
|
else if (ioctls)
|
||
|
*ioctls = uffdio_register.ioctls;
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
int uffd_register(int uffd, void *addr, uint64_t len,
|
||
|
bool miss, bool wp, bool minor)
|
||
|
{
|
||
|
return uffd_register_with_ioctls(uffd, addr, len,
|
||
|
miss, wp, minor, NULL);
|
||
|
}
|
||
|
|
||
|
int uffd_unregister(int uffd, void *addr, uint64_t len)
|
||
|
{
|
||
|
struct uffdio_range range = { .start = (uintptr_t)addr, .len = len };
|
||
|
int ret = 0;
|
||
|
|
||
|
if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1)
|
||
|
ret = -errno;
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
unsigned long get_free_hugepages(void)
|
||
|
{
|
||
|
unsigned long fhp = 0;
|
||
|
char *line = NULL;
|
||
|
size_t linelen = 0;
|
||
|
FILE *f = fopen("/proc/meminfo", "r");
|
||
|
|
||
|
if (!f)
|
||
|
return fhp;
|
||
|
while (getline(&line, &linelen, f) > 0) {
|
||
|
if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
free(line);
|
||
|
fclose(f);
|
||
|
return fhp;
|
||
|
}
|