x86 asm: Replace .align with .balign

The .align directive used to align storage locations is
ambiguous. On some platforms and assemblers it takes a byte count,
on others the argument is interpreted as a shift value. The current
usage expects the first interpretation.

Replace it with the unambiguous .balign directive which always
expects a byte count, regardless of platform and assembler.

Reviewed-by: Jorgen Lundman <lundman@lundman.net>
Reviewed-by: Tino Reichardt <milky-zfs@mcmilk.de>
Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Signed-off-by: Attila Fülöp <attila@fueloep.org>
Closes #14422
This commit is contained in:
Attila Fülöp
2023-01-23 20:25:21 +01:00
committed by Brian Behlendorf
parent 58ca7b1011
commit 037e4f2536
16 changed files with 63 additions and 63 deletions
+13 -13
View File
@@ -58,7 +58,7 @@
.text
#ifdef HAVE_MOVBE
.align 32
.balign 32
FUNCTION(_aesni_ctr32_ghash_6x)
.cfi_startproc
ENDBR
@@ -75,7 +75,7 @@ FUNCTION(_aesni_ctr32_ghash_6x)
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x
.align 32
.balign 32
.Loop6x:
addl $100663296,%ebx
jc .Lhandle_ctr32
@@ -287,7 +287,7 @@ FUNCTION(_aesni_ctr32_ghash_6x)
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail
.align 32
.balign 32
.Lhandle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
@@ -309,7 +309,7 @@ FUNCTION(_aesni_ctr32_ghash_6x)
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32
.align 32
.balign 32
.Lenc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
@@ -374,7 +374,7 @@ FUNCTION(_aesni_ctr32_ghash_6x)
SET_SIZE(_aesni_ctr32_ghash_6x)
#endif /* ifdef HAVE_MOVBE */
.align 32
.balign 32
FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
.cfi_startproc
ENDBR
@@ -391,7 +391,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x_nmb
.align 32
.balign 32
.Loop6x_nmb:
addl $100663296,%ebx
jc .Lhandle_ctr32_nmb
@@ -615,7 +615,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail_nmb
.align 32
.balign 32
.Lhandle_ctr32_nmb:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
@@ -637,7 +637,7 @@ FUNCTION(_aesni_ctr32_ghash_no_movbe_6x)
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32_nmb
.align 32
.balign 32
.Lenc_tail_nmb:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
@@ -818,7 +818,7 @@ ENTRY_ALIGN(aesni_gcm_decrypt, 32)
.cfi_endproc
SET_SIZE(aesni_gcm_decrypt)
.align 32
.balign 32
FUNCTION(_aesni_ctr32_6x)
.cfi_startproc
ENDBR
@@ -843,7 +843,7 @@ FUNCTION(_aesni_ctr32_6x)
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.align 16
.balign 16
.Loop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
@@ -886,7 +886,7 @@ FUNCTION(_aesni_ctr32_6x)
leaq 96(%rsi),%rsi
RET
.align 32
.balign 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
@@ -1237,7 +1237,7 @@ SET_SIZE(atomic_toggle_boolean_nv)
SECTION_STATIC
.align 64
.balign 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lpoly:
@@ -1249,7 +1249,7 @@ SECTION_STATIC
.Lone_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.balign 64
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)
+1 -1
View File
@@ -102,7 +102,7 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
// static uint8_t byte_swap16_mask[] = {
// 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
.section .rodata
.align XMM_ALIGN
.balign XMM_ALIGN
.Lbyte_swap16_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+7 -7
View File
@@ -188,7 +188,7 @@ ENTRY_ALIGN(gcm_init_htab_avx, 32)
vpxor %xmm2,%xmm6,%xmm6
movq $4,%r10
jmp .Linit_start_avx
.align 32
.balign 32
.Linit_loop_avx:
vpalignr $8,%xmm3,%xmm4,%xmm5
vmovdqu %xmm5,-16(%rdi)
@@ -386,7 +386,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32)
subq $0x80,%rcx
jmp .Loop8x_avx
.align 32
.balign 32
.Loop8x_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 112(%rdx),%xmm14
@@ -506,7 +506,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32)
addq $0x80,%rcx
jmp .Ltail_no_xor_avx
.align 32
.balign 32
.Lshort_avx:
vmovdqu -16(%rdx,%rcx,1),%xmm14
leaq (%rdx,%rcx,1),%rdx
@@ -610,7 +610,7 @@ ENTRY_ALIGN(gcm_ghash_avx, 32)
subq $0x10,%rcx
jmp .Ltail_avx
.align 32
.balign 32
.Ltail_avx:
vpxor %xmm10,%xmm15,%xmm15
.Ltail_no_xor_avx:
@@ -658,7 +658,7 @@ SET_SIZE(gcm_ghash_avx)
#endif /* !_WIN32 || _KERNEL */
SECTION_STATIC
.align 64
.balign 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.L0x1c2_polynomial:
@@ -667,7 +667,7 @@ SECTION_STATIC
.long 7,0,7,0
.L7_mask_poly:
.long 7,0,450,0
.align 64
.balign 64
SET_OBJ(.Lrem_4bit)
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
@@ -710,7 +710,7 @@ SET_OBJ(.Lrem_8bit)
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
.balign 64
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)