From 118fc3ef07c53a88ea1d4c21142a2b01c4648434 Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Wed, 18 Dec 2019 04:34:52 +0100 Subject: [PATCH] Minor performance fix for NEON RAID-Z The NEON code replicates too closely the SSE code, including a masked 16-bits shift. But NEON, like AltiVec (#9539), has unsigned 8-bits shift, so use that instead and drop the masking. Reviewed-by: Brian Behlendorf Signed-off-by: Romain Dolbeau Closes #9725 --- module/zfs/vdev_raidz_math_aarch64_neon_common.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/module/zfs/vdev_raidz_math_aarch64_neon_common.h index 5312b9094..92a50b3a0 100644 --- a/module/zfs/vdev_raidz_math_aarch64_neon_common.h +++ b/module/zfs/vdev_raidz_math_aarch64_neon_common.h @@ -479,10 +479,8 @@ typedef struct v { /* upper part */ \ "and v14.16b," VR0(r) ".16b,v15.16b\n" \ "and v13.16b," VR1(r) ".16b,v15.16b\n" \ - "sshr " VR0(r) ".8h," VR0(r) ".8h,#4\n" \ - "sshr " VR1(r) ".8h," VR1(r) ".8h,#4\n" \ - "and " VR0(r) ".16b," VR0(r) ".16b,v15.16b\n" \ - "and " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n" \ + "ushr " VR0(r) ".16b," VR0(r) ".16b,#4\n" \ + "ushr " VR1(r) ".16b," VR1(r) ".16b,#4\n" \ \ "tbl v12.16b,{v10.16b}," VR0(r) ".16b\n" \ "tbl v10.16b,{v10.16b}," VR1(r) ".16b\n" \