ABD raidz NEON support

Port NEON implementation of RAID-Z functions to ABD.

Signed-off-by: Roomain Dolbeau <romain.dolbeau@atos.net>
This commit is contained in:
Romain Dolbeau 2016-11-22 08:38:34 +01:00 committed by Brian Behlendorf
parent 65d71d4212
commit 88cc2352ea
4 changed files with 234 additions and 99 deletions

View File

@ -64,8 +64,8 @@ const raidz_impl_ops_t *raidz_all_maths[] = {
// &vdev_raidz_avx512bw_impl,
#endif
#if defined(__aarch64__)
// &vdev_raidz_aarch64_neon_impl,
// &vdev_raidz_aarch64_neonx2_impl,
&vdev_raidz_aarch64_neon_impl,
&vdev_raidz_aarch64_neonx2_impl,
#endif
};

View File

@ -25,10 +25,36 @@
#include <sys/isa_defs.h>
#include <sys/types.h>
#if 0 // defined(__aarch64__)
#if defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h"
#define SYN_STRIDE 4
#define ZERO_STRIDE 4
#define ZERO_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
#define ZERO_D 0, 1, 2, 3
#define COPY_STRIDE 4
#define COPY_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
#define COPY_D 0, 1, 2, 3
#define ADD_STRIDE 4
#define ADD_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
#define ADD_D 0, 1, 2, 3
#define MUL_STRIDE 4
#define MUL_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
#define MUL_D 0, 1, 2, 3
#define GEN_P_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
@ -39,15 +65,12 @@
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_10_11() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define GEN_PQ_STRIDE 4
#define GEN_PQ_D 0, 1, 2, 3
#define GEN_PQ_P 4, 5, 6, 7
#define GEN_PQ_Q 8, 9, 10, 11
#define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
@ -55,69 +78,115 @@
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_31() \
GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
#define GEN_PQR_STRIDE 2
#define GEN_PQR_D 0, 1
#define GEN_PQR_P 2, 3
#define GEN_PQR_Q 4, 5
#define GEN_PQR_R 6, 7
#define GEN_PQR_STRIDE 4
#define GEN_PQR_D 0, 1, 2, 3
#define GEN_PQR_C 4, 5, 6, 7
#define REC_P_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
#define REC_P_STRIDE 4
#define REC_P_X 0, 1, 2, 3
#define REC_Q_DEFINE() \
#define SYN_Q_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define REC_Q_STRIDE 4
#define REC_Q_X 0, 1, 2, 3
#define SYN_Q_STRIDE 4
#define SYN_Q_D 0, 1, 2, 3
#define SYN_Q_X 4, 5, 6, 7
#define REC_R_DEFINE() \
#define SYN_R_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define REC_R_STRIDE 4
#define REC_R_X 0, 1, 2, 3
#define SYN_R_STRIDE 4
#define SYN_R_D 0, 1, 2, 3
#define SYN_R_X 4, 5, 6, 7
#define SYN_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define SYN_PQ_STRIDE 4
#define SYN_PQ_D 0, 1, 2, 3
#define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_31() \
GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
#define REC_PQ_STRIDE 2
#define REC_PQ_X 0, 1
#define REC_PQ_Y 2, 3
#define REC_PQ_D 4, 5
#define REC_PQ_T 4, 5
#define REC_PR_DEFINE() REC_PQ_DEFINE()
#define SYN_PR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define SYN_PR_STRIDE 4
#define SYN_PR_D 0, 1, 2, 3
#define SYN_PR_X 4, 5, 6, 7
#define REC_PR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_31() \
GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
#define REC_PR_STRIDE 2
#define REC_PR_X 0, 1
#define REC_PR_Y 2, 3
#define REC_PR_D 4, 5
#define REC_PR_T 4, 5
#define REC_QR_DEFINE() REC_PQ_DEFINE()
#define SYN_QR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define SYN_QR_STRIDE 4
#define SYN_QR_D 0, 1, 2, 3
#define SYN_QR_X 4, 5, 6, 7
#define REC_QR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_31() \
GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
#define REC_QR_STRIDE 2
#define REC_QR_X 0, 1
#define REC_QR_Y 2, 3
#define REC_QR_D 4, 5
#define REC_QR_T 4, 5
#define SYN_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define SYN_PQR_STRIDE 4
#define SYN_PQR_D 0, 1, 2, 3
#define SYN_PQR_X 4, 5, 6, 7
#define REC_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_31() \
GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
@ -125,7 +194,6 @@
#define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5
#define REC_PQR_D 6, 7
#define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9
@ -154,7 +222,7 @@ const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
#endif /* defined(__aarch64__) */
#if 0 // defined(__aarch64__)
#if defined(__aarch64__)
const uint8_t
__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = {

View File

@ -125,7 +125,7 @@
#define ASM_BUG() ASSERT(0)
#define OFFSET(ptr, val) (((unsigned char *)ptr)+val)
#define OFFSET(ptr, val) (((unsigned char *)(ptr))+val)
extern const uint8_t gf_clmul_mod_lt[4*256][16];
@ -135,20 +135,6 @@ typedef struct v {
uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
} v_t;
#define PREFETCHNTA(ptr, offset) \
{ \
__asm( \
"prfm pstl1strm, %[MEM]\n" \
: : [MEM] "Q" (*(ptr + offset))); \
}
#define PREFETCH(ptr, offset) \
{ \
__asm( \
"prfm pldl1keep, %[MEM]\n" \
: : [MEM] "Q" (*(ptr + offset))); \
}
#define XOR_ACC(src, r...) \
{ \
switch (REG_CNT(r)) { \
@ -242,6 +228,19 @@ typedef struct v {
#define ZERO(r...) \
{ \
switch (REG_CNT(r)) { \
case 8: \
__asm( \
"eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
"eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \
"eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n" \
"eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n" \
"eor " VR4(r) ".16b," VR4(r) ".16b," VR4(r) ".16b\n" \
"eor " VR5(r) ".16b," VR5(r) ".16b," VR5(r) ".16b\n" \
"eor " VR6(r) ".16b," VR6(r) ".16b," VR6(r) ".16b\n" \
"eor " VR7(r) ".16b," VR7(r) ".16b," VR7(r) ".16b\n" \
: WVR0(r), WVR1(r), WVR2(r), WVR3(r), \
WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \
break; \
case 4: \
__asm( \
"eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \

View File

@ -24,115 +24,183 @@
#include <sys/isa_defs.h>
#if 0 // defined(__aarch64__)
#if defined(__aarch64__)
#include "vdev_raidz_math_aarch64_neon_common.h"
#define GEN_P_DEFINE() \
#define SYN_STRIDE 4
#define ZERO_STRIDE 8
#define ZERO_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7()
#define GEN_P_STRIDE 8
#define GEN_P_P 0, 1, 2, 3, 4, 5, 6, 7
#define ZERO_D 0, 1, 2, 3, 4, 5, 6, 7
#define GEN_PQ_DEFINE() \
#define COPY_STRIDE 8
#define COPY_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7()
#define COPY_D 0, 1, 2, 3, 4, 5, 6, 7
#define ADD_STRIDE 8
#define ADD_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7()
#define ADD_D 0, 1, 2, 3, 4, 5, 6, 7
#define MUL_STRIDE 4
#define MUL_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
#define MUL_D 0, 1, 2, 3
#define GEN_P_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
#define GEN_P_STRIDE 4
#define GEN_P_P 0, 1, 2, 3
#define GEN_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_10_11() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define GEN_PQ_STRIDE 4
#define GEN_PQ_D 0, 1, 2, 3
#define GEN_PQ_P 4, 5, 6, 7
#define GEN_PQ_Q 8, 9, 10, 11
#define GEN_PQ_C 4, 5, 6, 7
#define GEN_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_22_23() \
GEN_X_DEFINE_24_27() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define GEN_PQR_STRIDE 4
#define GEN_PQR_D 0, 1, 2, 3
#define GEN_PQR_P 4, 5, 6, 7
#define GEN_PQR_Q 8, 9, 22, 23
#define GEN_PQR_R 24, 25, 26, 27
#define GEN_PQR_C 4, 5, 6, 7
#define REC_P_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_33_36()
#define REC_P_STRIDE 4
#define REC_P_X 0, 1, 2, 3
#define REC_Q_DEFINE() \
#define SYN_Q_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define REC_Q_STRIDE 4
#define REC_Q_X 0, 1, 2, 3
#define SYN_Q_STRIDE 4
#define SYN_Q_D 0, 1, 2, 3
#define SYN_Q_X 4, 5, 6, 7
#define REC_R_DEFINE() \
#define SYN_R_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define REC_R_STRIDE 4
#define REC_R_X 0, 1, 2, 3
#define SYN_R_STRIDE 4
#define SYN_R_D 0, 1, 2, 3
#define SYN_R_X 4, 5, 6, 7
#define SYN_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define SYN_PQ_STRIDE 4
#define SYN_PQ_D 0, 1, 2, 3
#define SYN_PQ_X 4, 5, 6, 7
#define REC_PQ_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_22_23() \
GEN_X_DEFINE_33_36()
#define REC_PQ_STRIDE 4
#define REC_PQ_X 0, 1, 2, 3
#define REC_PQ_Y 4, 5, 6, 7
#define REC_PQ_D 8, 9, 22, 23
#define REC_PQ_T 8, 9, 22, 23
#define REC_PR_DEFINE() REC_PQ_DEFINE()
#define SYN_PR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define SYN_PR_STRIDE 4
#define SYN_PR_D 0, 1, 2, 3
#define SYN_PR_X 4, 5, 6, 7
#define REC_PR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_22_23() \
GEN_X_DEFINE_33_36()
#define REC_PR_STRIDE 4
#define REC_PR_X 0, 1, 2, 3
#define REC_PR_Y 4, 5, 6, 7
#define REC_PR_D 8, 9, 22, 23
#define REC_PR_T 8, 9, 22, 23
#define REC_QR_DEFINE() REC_PQ_DEFINE()
#define SYN_QR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define SYN_QR_STRIDE 4
#define SYN_QR_D 0, 1, 2, 3
#define SYN_QR_X 4, 5, 6, 7
#define REC_QR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_22_23() \
GEN_X_DEFINE_33_36()
#define REC_QR_STRIDE 4
#define REC_QR_X 0, 1, 2, 3
#define REC_QR_Y 4, 5, 6, 7
#define REC_QR_D 8, 9, 22, 23
#define REC_QR_T 8, 9, 22, 23
#define SYN_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_33_36()
#define SYN_PQR_STRIDE 4
#define SYN_PQR_D 0, 1, 2, 3
#define SYN_PQR_X 4, 5, 6, 7
#define REC_PQR_DEFINE() \
GEN_X_DEFINE_0_3() \
GEN_X_DEFINE_4_5() \
GEN_X_DEFINE_6_7() \
GEN_X_DEFINE_8_9() \
GEN_X_DEFINE_16() \
GEN_X_DEFINE_17() \
GEN_X_DEFINE_22_23() \
GEN_X_DEFINE_24_27() \
GEN_X_DEFINE_28_30() \
GEN_X_DEFINE_31() \
GEN_X_DEFINE_32() \
GEN_X_DEFINE_33_36()
#define REC_PQR_STRIDE 4
#define REC_PQR_X 0, 1, 2, 3
#define REC_PQR_Y 4, 5, 6, 7
#define REC_PQR_Z 8, 9, 22, 23
#define REC_PQR_D 24, 25, 26, 27
#define REC_PQR_XS 24, 25, 26, 27
#define REC_PQR_YS 28, 29, 30, 31
#define REC_PQR_STRIDE 2
#define REC_PQR_X 0, 1
#define REC_PQR_Y 2, 3
#define REC_PQR_Z 4, 5
#define REC_PQR_XS 6, 7
#define REC_PQR_YS 8, 9
#include <sys/vdev_raidz_impl.h>
#include "vdev_raidz_math_impl.h"