Add PPC cpu feature tests for FreeBSD and Linux

Add needed cpu feature tests for powerpc architecture.

Overview:
zfs_altivec_available() - needed by RAID-Z
zfs_vsx_available()     - needed by BLAKE3
zfs_isa207_available()  - needed by SHA2

Part 1 - Userspace
- use getauxval() for Linux and elf_aux_info() for FreeBSD
- direct including <sys/auxv.h> fails with double definitions
- so we self define the needed functions and definitions

Part 2 - Kernel space FreeBSD
- use exported cpu_features of <powerpc/cpu.h>

Part 3 - Kernel space Linux
- use cpu_has_feature() function of <asm/cpufeature.h>

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Ryan Moeller <ryan@iXsystems.com>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13725
This commit is contained in:
Tino Reichardt 2022-09-07 20:33:59 +02:00 committed by Brian Behlendorf
parent eeca9d27d7
commit 48cf170d5a
6 changed files with 204 additions and 117 deletions

View File

@ -50,6 +50,7 @@ noinst_HEADERS = \
%D%/spl/sys/sid.h \ %D%/spl/sys/sid.h \
%D%/spl/sys/sig.h \ %D%/spl/sys/sig.h \
%D%/spl/sys/simd.h \ %D%/spl/sys/simd.h \
%D%/spl/sys/simd_powerpc.h \
%D%/spl/sys/simd_x86.h \ %D%/spl/sys/simd_x86.h \
%D%/spl/sys/spl_condvar.h \ %D%/spl/sys/spl_condvar.h \
%D%/spl/sys/string.h \ %D%/spl/sys/string.h \

View File

@ -26,13 +26,16 @@
* $FreeBSD$ * $FreeBSD$
*/ */
#ifndef _FREEBSD_SIMD_H #ifndef _FREEBSD_SIMD_H
#define _FREEBSD_SIMD_H #define _FREEBSD_SIMD_H
#if defined(__amd64__) || defined(__i386__) #if defined(__amd64__) || defined(__i386__)
#include <sys/simd_x86.h> #include <sys/simd_x86.h>
#else
#elif defined(__powerpc__)
#include <sys/simd_powerpc.h>
#else
#define kfpu_allowed() 0 #define kfpu_allowed() 0
#define kfpu_initialize(tsk) do {} while (0) #define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() do {} while (0) #define kfpu_begin() do {} while (0)
@ -40,4 +43,5 @@
#define kfpu_init() (0) #define kfpu_init() (0)
#define kfpu_fini() do {} while (0) #define kfpu_fini() do {} while (0)
#endif #endif
#endif #endif

View File

@ -0,0 +1,90 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (C) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
*/
/*
* USER API:
*
* Kernel fpu methods:
* kfpu_allowed()
* kfpu_begin()
* kfpu_end()
* kfpu_init()
* kfpu_fini()
*
* SIMD support:
*
* Following functions should be called to determine whether CPU feature
* is supported. All functions are usable in kernel and user space.
* If a SIMD algorithm is using more than one instruction set
* all relevant feature test functions should be called.
*
* Supported features:
* zfs_altivec_available()
* zfs_vsx_available()
* zfs_isa207_available()
*/
#ifndef _FREEBSD_SIMD_POWERPC_H
#define _FREEBSD_SIMD_POWERPC_H
#include <sys/types.h>
#include <sys/cdefs.h>
#include <machine/pcb.h>
#include <powerpc/cpu.h>
#define kfpu_allowed() 1
#define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() do {} while (0)
#define kfpu_end() do {} while (0)
#define kfpu_init() (0)
#define kfpu_fini() do {} while (0)
/*
* Check if Altivec is available
*/
static inline boolean_t
zfs_altivec_available(void)
{
return ((cpu_features & PPC_FEATURE_HAS_ALTIVEC) != 0);
}
/*
* Check if VSX is available
*/
static inline boolean_t
zfs_vsx_available(void)
{
return ((cpu_features & PPC_FEATURE_HAS_VSX) != 0);
}
/*
* Check if POWER ISA 2.07 is available (SHA2)
*/
static inline boolean_t
zfs_isa207_available(void)
{
return ((cpu_features2 & PPC_FEATURE2_ARCH_2_07) != 0);
}

View File

@ -77,7 +77,7 @@ __simd_state_enabled(const uint64_t state)
boolean_t has_osxsave; boolean_t has_osxsave;
uint64_t xcr0; uint64_t xcr0;
has_osxsave = !!(cpu_feature2 & CPUID2_OSXSAVE); has_osxsave = (cpu_feature2 & CPUID2_OSXSAVE) != 0;
if (!has_osxsave) if (!has_osxsave)
return (B_FALSE); return (B_FALSE);
@ -99,7 +99,7 @@ __simd_state_enabled(const uint64_t state)
static inline boolean_t static inline boolean_t
zfs_sse_available(void) zfs_sse_available(void)
{ {
return (!!(cpu_feature & CPUID_SSE)); return ((cpu_feature & CPUID_SSE) != 0);
} }
/* /*
@ -108,7 +108,7 @@ zfs_sse_available(void)
static inline boolean_t static inline boolean_t
zfs_sse2_available(void) zfs_sse2_available(void)
{ {
return (!!(cpu_feature & CPUID_SSE2)); return ((cpu_feature & CPUID_SSE2) != 0);
} }
/* /*
@ -117,7 +117,7 @@ zfs_sse2_available(void)
static inline boolean_t static inline boolean_t
zfs_sse3_available(void) zfs_sse3_available(void)
{ {
return (!!(cpu_feature2 & CPUID2_SSE3)); return ((cpu_feature2 & CPUID2_SSE3) != 0);
} }
/* /*
@ -126,7 +126,7 @@ zfs_sse3_available(void)
static inline boolean_t static inline boolean_t
zfs_ssse3_available(void) zfs_ssse3_available(void)
{ {
return (!!(cpu_feature2 & CPUID2_SSSE3)); return ((cpu_feature2 & CPUID2_SSSE3) != 0);
} }
/* /*
@ -135,7 +135,7 @@ zfs_ssse3_available(void)
static inline boolean_t static inline boolean_t
zfs_sse4_1_available(void) zfs_sse4_1_available(void)
{ {
return (!!(cpu_feature2 & CPUID2_SSE41)); return ((cpu_feature2 & CPUID2_SSE41) != 0);
} }
/* /*
@ -144,7 +144,7 @@ zfs_sse4_1_available(void)
static inline boolean_t static inline boolean_t
zfs_sse4_2_available(void) zfs_sse4_2_available(void)
{ {
return (!!(cpu_feature2 & CPUID2_SSE42)); return ((cpu_feature2 & CPUID2_SSE42) != 0);
} }
/* /*
@ -155,7 +155,7 @@ zfs_avx_available(void)
{ {
boolean_t has_avx; boolean_t has_avx;
has_avx = !!(cpu_feature2 & CPUID2_AVX); has_avx = (cpu_feature2 & CPUID2_AVX) != 0;
return (has_avx && __ymm_enabled()); return (has_avx && __ymm_enabled());
} }
@ -168,7 +168,7 @@ zfs_avx2_available(void)
{ {
boolean_t has_avx2; boolean_t has_avx2;
has_avx2 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX2); has_avx2 = (cpu_stdext_feature & CPUID_STDEXT_AVX2) != 0;
return (has_avx2 && __ymm_enabled()); return (has_avx2 && __ymm_enabled());
} }
@ -196,7 +196,7 @@ zfs_avx512f_available(void)
{ {
boolean_t has_avx512; boolean_t has_avx512;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F); has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }
@ -207,8 +207,8 @@ zfs_avx512cd_available(void)
{ {
boolean_t has_avx512; boolean_t has_avx512;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) && has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512CD); (cpu_stdext_feature & CPUID_STDEXT_AVX512CD) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }
@ -219,8 +219,8 @@ zfs_avx512er_available(void)
{ {
boolean_t has_avx512; boolean_t has_avx512;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) && has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512CD); (cpu_stdext_feature & CPUID_STDEXT_AVX512CD) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }
@ -231,8 +231,8 @@ zfs_avx512pf_available(void)
{ {
boolean_t has_avx512; boolean_t has_avx512;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) && has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512PF); (cpu_stdext_feature & CPUID_STDEXT_AVX512PF) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }
@ -243,7 +243,7 @@ zfs_avx512bw_available(void)
{ {
boolean_t has_avx512 = B_FALSE; boolean_t has_avx512 = B_FALSE;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512BW); has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512BW) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }
@ -254,8 +254,8 @@ zfs_avx512dq_available(void)
{ {
boolean_t has_avx512; boolean_t has_avx512;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) && has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512DQ); (cpu_stdext_feature & CPUID_STDEXT_AVX512DQ) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }
@ -266,8 +266,8 @@ zfs_avx512vl_available(void)
{ {
boolean_t has_avx512; boolean_t has_avx512;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) && has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512VL); (cpu_stdext_feature & CPUID_STDEXT_AVX512VL) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }
@ -278,8 +278,8 @@ zfs_avx512ifma_available(void)
{ {
boolean_t has_avx512; boolean_t has_avx512;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) && has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
!!(cpu_stdext_feature & CPUID_STDEXT_AVX512IFMA); (cpu_stdext_feature & CPUID_STDEXT_AVX512IFMA) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }
@ -290,8 +290,8 @@ zfs_avx512vbmi_available(void)
{ {
boolean_t has_avx512; boolean_t has_avx512;
has_avx512 = !!(cpu_stdext_feature & CPUID_STDEXT_AVX512F) && has_avx512 = (cpu_stdext_feature & CPUID_STDEXT_AVX512F) != 0 &&
!!(cpu_stdext_feature & CPUID_STDEXT_BMI1); (cpu_stdext_feature & CPUID_STDEXT_BMI1) != 0;
return (has_avx512 && __zmm_enabled()); return (has_avx512 && __zmm_enabled());
} }

View File

@ -21,6 +21,7 @@
/* /*
* Copyright (C) 2019 Romain Dolbeau * Copyright (C) 2019 Romain Dolbeau
* <romain.dolbeau@european-processor-initiative.eu> * <romain.dolbeau@european-processor-initiative.eu>
* Copyright (C) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
*/ */
/* /*
@ -41,7 +42,9 @@
* all relevant feature test functions should be called. * all relevant feature test functions should be called.
* *
* Supported features: * Supported features:
* zfs_altivec_available() * zfs_altivec_available()
* zfs_vsx_available()
* zfs_isa207_available()
*/ */
#ifndef _LINUX_SIMD_POWERPC_H #ifndef _LINUX_SIMD_POWERPC_H
@ -57,73 +60,65 @@
#include <sys/types.h> #include <sys/types.h>
#include <linux/version.h> #include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
#include <asm/cpufeature.h>
#else
#include <asm/cputable.h>
#endif
#define kfpu_allowed() 1 #define kfpu_allowed() 1
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
#define kfpu_end() \
{ \
disable_kernel_vsx(); \
disable_kernel_altivec(); \
preempt_enable(); \
}
#define kfpu_begin() \ #define kfpu_begin() \
{ \ { \
preempt_disable(); \ preempt_disable(); \
enable_kernel_altivec(); \ enable_kernel_altivec(); \
enable_kernel_vsx(); \ enable_kernel_vsx(); \
enable_kernel_spe(); \
}
#define kfpu_end() \
{ \
disable_kernel_spe(); \
disable_kernel_vsx(); \
disable_kernel_altivec(); \
preempt_enable(); \
} }
#else #else
/* seems that before 4.5 no-one bothered */ /* seems that before 4.5 no-one bothered */
#define kfpu_begin() #define kfpu_begin()
#define kfpu_end() preempt_enable() #define kfpu_end() preempt_enable()
#endif #endif
#define kfpu_init() 0 #define kfpu_init() 0
#define kfpu_fini() ((void) 0) #define kfpu_fini() ((void) 0)
static inline boolean_t
zfs_vsx_available(void)
{
boolean_t res;
#if defined(__powerpc64__)
u64 msr;
#else
u32 msr;
#endif
kfpu_begin();
__asm volatile("mfmsr %0" : "=r"(msr));
res = (msr & 0x800000) != 0;
kfpu_end();
return (res);
}
/* /*
* Check if AltiVec instruction set is available * Check if AltiVec instruction set is available
*/ */
static inline boolean_t static inline boolean_t
zfs_altivec_available(void) zfs_altivec_available(void)
{ {
boolean_t res; return (cpu_has_feature(CPU_FTR_ALTIVEC));
/* suggested by macallan at netbsd dot org */
#if defined(__powerpc64__)
u64 msr;
#else
u32 msr;
#endif
kfpu_begin();
__asm volatile("mfmsr %0" : "=r"(msr));
/*
* 64 bits -> need to check bit 38
* Power ISA Version 3.0B
* p944
* 32 bits -> Need to check bit 6
* AltiVec Technology Programming Environments Manual
* p49 (2-9)
* They are the same, as ppc counts 'backward' ...
*/
res = (msr & 0x2000000) != 0;
kfpu_end();
return (res);
} }
/*
* Check if VSX is available
*/
static inline boolean_t
zfs_vsx_available(void)
{
return (cpu_has_feature(CPU_FTR_VSX));
}
/*
* Check if POWER ISA 2.07 is available (SHA2)
*/
static inline boolean_t
zfs_isa207_available(void)
{
return (cpu_has_feature(CPU_FTR_ARCH_207S));
}
#endif /* defined(__powerpc) */ #endif /* defined(__powerpc) */
#endif /* _LINUX_SIMD_POWERPC_H */ #endif /* _LINUX_SIMD_POWERPC_H */

View File

@ -20,8 +20,8 @@
* CDDL HEADER END * CDDL HEADER END
*/ */
/* /*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms. * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
*/ */
#ifndef _LIBSPL_SYS_SIMD_H #ifndef _LIBSPL_SYS_SIMD_H
@ -452,63 +452,60 @@ zfs_avx512vbmi_available(void)
#elif defined(__powerpc__) #elif defined(__powerpc__)
/* including <sys/auxv.h> clashes with AT_UID and others */
extern unsigned long getauxval(unsigned long type);
#if defined(__FreeBSD__)
#define AT_HWCAP 25 /* CPU feature flags. */
#define AT_HWCAP2 26 /* CPU feature flags 2. */
extern int elf_aux_info(int aux, void *buf, int buflen);
static unsigned long getauxval(unsigned long key)
{
unsigned long val = 0UL;
if (elf_aux_info((int)key, &val, sizeof (val)) != 0)
return (0UL);
return (val);
}
#elif defined(__linux__)
#define AT_HWCAP 16 /* CPU feature flags. */
#define AT_HWCAP2 26 /* CPU feature flags 2. */
#endif
#define kfpu_allowed() 1 #define kfpu_allowed() 1
#define kfpu_initialize(tsk) do {} while (0) #define kfpu_initialize(tsk) do {} while (0)
#define kfpu_begin() do {} while (0) #define kfpu_begin() do {} while (0)
#define kfpu_end() do {} while (0) #define kfpu_end() do {} while (0)
/* #define PPC_FEATURE_HAS_ALTIVEC 0x10000000
* Check if AltiVec instruction set is available
* No easy way beyond 'altivec works' :-(
*/
#include <signal.h>
#include <setjmp.h>
#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
static jmp_buf env;
static void sigillhandler(int x)
{
(void) x;
longjmp(env, 1);
}
#endif
static inline boolean_t static inline boolean_t
zfs_altivec_available(void) zfs_altivec_available(void)
{ {
boolean_t has_altivec = B_FALSE; unsigned long hwcap = getauxval(AT_HWCAP);
#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
sighandler_t savesig; return (hwcap & PPC_FEATURE_HAS_ALTIVEC);
savesig = signal(SIGILL, sigillhandler);
if (setjmp(env)) {
signal(SIGILL, savesig);
has_altivec = B_FALSE;
} else {
__asm__ __volatile__("vor 0,0,0\n" : : : "v0");
signal(SIGILL, savesig);
has_altivec = B_TRUE;
}
#endif
return (has_altivec);
} }
#define PPC_FEATURE_HAS_VSX 0x00000080
static inline boolean_t static inline boolean_t
zfs_vsx_available(void) zfs_vsx_available(void)
{ {
boolean_t has_vsx = B_FALSE; unsigned long hwcap = getauxval(AT_HWCAP);
#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
sighandler_t savesig; return (hwcap & PPC_FEATURE_HAS_VSX);
savesig = signal(SIGILL, sigillhandler);
if (setjmp(env)) {
signal(SIGILL, savesig);
has_vsx = B_FALSE;
} else {
__asm__ __volatile__("xssubsp 0,0,0\n");
signal(SIGILL, savesig);
has_vsx = B_TRUE;
}
#endif
return (has_vsx);
} }
#define PPC_FEATURE2_ARCH_2_07 0x80000000
static inline boolean_t
zfs_isa207_available(void)
{
unsigned long hwcap = getauxval(AT_HWCAP);
unsigned long hwcap2 = getauxval(AT_HWCAP2);
return ((hwcap & PPC_FEATURE_HAS_VSX) &&
(hwcap2 & PPC_FEATURE2_ARCH_2_07));
}
#else #else
#define kfpu_allowed() 0 #define kfpu_allowed() 0