From b844489ec0e35b0a9b3cda5ba72bf29334f81081 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Sun, 16 Oct 2022 16:53:22 +0200 Subject: [PATCH] icp: properly fix all RETs in x86_64 Asm code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 43569ee37420 ("Fix objtool: missing int3 after ret warning") addressed replacing all `ret`s in x86 asm code to a macro in the Linux kernel in order to enable SLS. That was done by copying the upstream macro definitions and fixed objtool complaints. Since then, several more mitigations were introduced, including Rethunk. It requires to have a jump to one of the thunks in order to work, so the RET macro was changed again. And, as ZFS code didn't use the mainline defition, but copied it, this is currently missing. Objtool reminds about it time to time (Clang 16, CONFIG_RETHUNK=y): fs/zfs/lua/zlua.o: warning: objtool: setjmp+0x25: 'naked' return found in RETHUNK build fs/zfs/lua/zlua.o: warning: objtool: longjmp+0x27: 'naked' return found in RETHUNK build Do it the following way: * if we're building under Linux, unconditionally include in the related files. It is available in x86 sources since even pre-2.6 times, so doesn't need any conftests; * then, if RET macro is available, it will be used directly, so that we will always have the version actual to the kernel we build; * if there's no such macro, we define it as a simple `ret`, as it was on pre-SLS times. This ensures we always have the up-to-date definition with no need to update it manually, and at the same time is safe for the whole variety of kernels ZFS module supports. Then, there's a couple more "naked" rets left in the code, they're just defined as: .byte 0xf3,0xc3 In fact, this is just: rep ret `rep ret` instead of just `ret` seems to mitigate performance issues on some old AMD processors and most likely makes no sense as of today. Anyways, address those rets, so that they will be protected with Rethunk and SLS. Include here which now always has RET definition and replace those constructs with just RET. This wipes the last couple of places with unpatched rets objtool's been complaining about. Reviewed-by: Attila Fülöp Reviewed-by: Tino Reichardt Reviewed-by: Richard Yao Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Lobakin Closes #14035 --- module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S | 13 ++++++++----- module/icp/asm-x86_64/modes/ghash-x86_64.S | 9 ++++++--- module/icp/include/sys/ia32/asm_linkage.h | 9 ++++++--- module/lua/setjmp/setjmp_x86_64.S | 15 ++++++++------- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index 70e419c2e..7414b3540 100644 --- a/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -47,6 +47,9 @@ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) +#define _ASM +#include + .extern gcm_avx_can_use_movbe .text @@ -363,7 +366,7 @@ _aesni_ctr32_ghash_6x: vpxor 16+8(%rsp),%xmm8,%xmm8 vpxor %xmm4,%xmm8,%xmm8 - .byte 0xf3,0xc3 + RET .cfi_endproc .size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x #endif /* ifdef HAVE_MOVBE */ @@ -691,7 +694,7 @@ _aesni_ctr32_ghash_no_movbe_6x: vpxor 16+8(%rsp),%xmm8,%xmm8 vpxor %xmm4,%xmm8,%xmm8 - .byte 0xf3,0xc3 + RET .cfi_endproc .size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x @@ -810,7 +813,7 @@ aesni_gcm_decrypt: .cfi_def_cfa_register %rsp .Lgcm_dec_abort: movq %r10,%rax - .byte 0xf3,0xc3 + RET .cfi_endproc .size aesni_gcm_decrypt,.-aesni_gcm_decrypt .type _aesni_ctr32_6x,@function @@ -880,7 +883,7 @@ _aesni_ctr32_6x: vmovups %xmm14,80(%rsi) leaq 96(%rsi),%rsi - .byte 0xf3,0xc3 + RET .align 32 .Lhandle_ctr32_2: vpshufb %xmm0,%xmm1,%xmm6 @@ -1186,7 +1189,7 @@ aesni_gcm_encrypt: .cfi_def_cfa_register %rsp .Lgcm_enc_abort: movq %r10,%rax - .byte 0xf3,0xc3 + RET .cfi_endproc .size aesni_gcm_encrypt,.-aesni_gcm_encrypt diff --git a/module/icp/asm-x86_64/modes/ghash-x86_64.S b/module/icp/asm-x86_64/modes/ghash-x86_64.S index 90cc36b43..77a3ce185 100644 --- a/module/icp/asm-x86_64/modes/ghash-x86_64.S +++ b/module/icp/asm-x86_64/modes/ghash-x86_64.S @@ -97,6 +97,9 @@ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) +#define _ASM +#include + .text .globl gcm_gmult_clmul @@ -149,7 +152,7 @@ gcm_gmult_clmul: pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) - .byte 0xf3,0xc3 + RET .cfi_endproc .size gcm_gmult_clmul,.-gcm_gmult_clmul @@ -262,7 +265,7 @@ gcm_init_htab_avx: vmovdqu %xmm5,-16(%rdi) vzeroupper - .byte 0xf3,0xc3 + RET .cfi_endproc .size gcm_init_htab_avx,.-gcm_init_htab_avx @@ -649,7 +652,7 @@ gcm_ghash_avx: vpshufb %xmm13,%xmm10,%xmm10 vmovdqu %xmm10,(%rdi) vzeroupper - .byte 0xf3,0xc3 + RET .cfi_endproc .size gcm_ghash_avx,.-gcm_ghash_avx .align 64 diff --git a/module/icp/include/sys/ia32/asm_linkage.h b/module/icp/include/sys/ia32/asm_linkage.h index 58964c5d4..f0aa2dc18 100644 --- a/module/icp/include/sys/ia32/asm_linkage.h +++ b/module/icp/include/sys/ia32/asm_linkage.h @@ -30,9 +30,11 @@ #include #include -#if defined(__linux__) && defined(CONFIG_SLS) -#define RET ret; int3 -#else +#if defined(_KERNEL) && defined(__linux__) +#include +#endif + +#ifndef RET #define RET ret #endif @@ -122,6 +124,7 @@ extern "C" { * insert the calls to mcount for profiling. ENTRY_NP is identical, but * never calls mcount. */ +#undef ENTRY #define ENTRY(x) \ .text; \ .align ASM_ENTRY_ALIGN; \ diff --git a/module/lua/setjmp/setjmp_x86_64.S b/module/lua/setjmp/setjmp_x86_64.S index fd661d72e..7e13fea05 100644 --- a/module/lua/setjmp/setjmp_x86_64.S +++ b/module/lua/setjmp/setjmp_x86_64.S @@ -23,7 +23,15 @@ * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ +#if defined(_KERNEL) && defined(__linux__) +#include +#endif +#ifndef RET +#define RET ret +#endif + +#undef ENTRY #define ENTRY(x) \ .text; \ .align 8; \ @@ -34,13 +42,6 @@ x: #define SET_SIZE(x) \ .size x, [.-x] - -#if defined(__linux__) && defined(CONFIG_SLS) -#define RET ret; int3 -#else -#define RET ret -#endif - /* * Setjmp and longjmp implement non-local gotos using state vectors * type label_t.