mirror of
				https://git.proxmox.com/git/mirror_zfs.git
				synced 2025-10-26 18:05:04 +03:00 
			
		
		
		
	Sponsored-by: https://despairlabs.com/sponsor/ Signed-off-by: Rob Norris <robn@despairlabs.com> Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
		
			
				
	
	
		
			2301 lines
		
	
	
		
			68 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			2301 lines
		
	
	
		
			68 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
// SPDX-License-Identifier: CDDL-1.0
 | 
						|
/*
 | 
						|
 * CDDL HEADER START
 | 
						|
 *
 | 
						|
 * The contents of this file are subject to the terms of the
 | 
						|
 * Common Development and Distribution License (the "License").
 | 
						|
 * You may not use this file except in compliance with the License.
 | 
						|
 *
 | 
						|
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 | 
						|
 * or https://opensource.org/licenses/CDDL-1.0.
 | 
						|
 * See the License for the specific language governing permissions
 | 
						|
 * and limitations under the License.
 | 
						|
 *
 | 
						|
 * When distributing Covered Code, include this CDDL HEADER in each
 | 
						|
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 | 
						|
 * If applicable, add the following below this CDDL HEADER, with the
 | 
						|
 * fields enclosed by brackets "[]" replaced with your own identifying
 | 
						|
 * information: Portions Copyright [yyyy] [name of copyright owner]
 | 
						|
 *
 | 
						|
 * CDDL HEADER END
 | 
						|
 */
 | 
						|
 | 
						|
/*
 | 
						|
 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
 | 
						|
 * Copyright (c) 2019-2020 Samuel Neves and Matthew Krupcale
 | 
						|
 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
 | 
						|
 */
 | 
						|
 | 
						|
#if defined(HAVE_SSE2)
 | 
						|
 | 
						|
#define _ASM
 | 
						|
#include <sys/asm_linkage.h>
 | 
						|
 | 
						|
.intel_syntax noprefix
 | 
						|
 | 
						|
SECTION_TEXT
 | 
						|
 | 
						|
ENTRY_ALIGN(zfs_blake3_hash_many_sse2, 64)
 | 
						|
        ENDBR
 | 
						|
        push    r15
 | 
						|
        push    r14
 | 
						|
        push    r13
 | 
						|
        push    r12
 | 
						|
        push    rbx
 | 
						|
        push    rbp
 | 
						|
        mov     rbp, rsp
 | 
						|
        sub     rsp, 360
 | 
						|
        and     rsp, 0xFFFFFFFFFFFFFFC0
 | 
						|
        neg     r9d
 | 
						|
        movd    xmm0, r9d
 | 
						|
        pshufd  xmm0, xmm0, 0x00
 | 
						|
        movdqa  xmmword ptr [rsp+0x130], xmm0
 | 
						|
        movdqa  xmm1, xmm0
 | 
						|
        pand    xmm1, xmmword ptr [ADD0+rip]
 | 
						|
        pand    xmm0, xmmword ptr [ADD1+rip]
 | 
						|
        movdqa  xmmword ptr [rsp+0x150], xmm0
 | 
						|
        movd    xmm0, r8d
 | 
						|
        pshufd  xmm0, xmm0, 0x00
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        movdqa  xmmword ptr [rsp+0x110], xmm0
 | 
						|
        pxor    xmm0, xmmword ptr [CMP_MSB_MASK+rip]
 | 
						|
        pxor    xmm1, xmmword ptr [CMP_MSB_MASK+rip]
 | 
						|
        pcmpgtd xmm1, xmm0
 | 
						|
        shr     r8, 32
 | 
						|
        movd    xmm2, r8d
 | 
						|
        pshufd  xmm2, xmm2, 0x00
 | 
						|
        psubd   xmm2, xmm1
 | 
						|
        movdqa  xmmword ptr [rsp+0x120], xmm2
 | 
						|
        mov     rbx, qword ptr [rbp+0x50]
 | 
						|
        mov     r15, rdx
 | 
						|
        shl     r15, 6
 | 
						|
        movzx   r13d, byte ptr [rbp+0x38]
 | 
						|
        movzx   r12d, byte ptr [rbp+0x48]
 | 
						|
        cmp     rsi, 4
 | 
						|
        jc      3f
 | 
						|
2:
 | 
						|
        movdqu  xmm3, xmmword ptr [rcx]
 | 
						|
        pshufd  xmm0, xmm3, 0x00
 | 
						|
        pshufd  xmm1, xmm3, 0x55
 | 
						|
        pshufd  xmm2, xmm3, 0xAA
 | 
						|
        pshufd  xmm3, xmm3, 0xFF
 | 
						|
        movdqu  xmm7, xmmword ptr [rcx+0x10]
 | 
						|
        pshufd  xmm4, xmm7, 0x00
 | 
						|
        pshufd  xmm5, xmm7, 0x55
 | 
						|
        pshufd  xmm6, xmm7, 0xAA
 | 
						|
        pshufd  xmm7, xmm7, 0xFF
 | 
						|
        mov     r8, qword ptr [rdi]
 | 
						|
        mov     r9, qword ptr [rdi+0x8]
 | 
						|
        mov     r10, qword ptr [rdi+0x10]
 | 
						|
        mov     r11, qword ptr [rdi+0x18]
 | 
						|
        movzx   eax, byte ptr [rbp+0x40]
 | 
						|
        or      eax, r13d
 | 
						|
        xor     edx, edx
 | 
						|
9:
 | 
						|
        mov     r14d, eax
 | 
						|
        or      eax, r12d
 | 
						|
        add     rdx, 64
 | 
						|
        cmp     rdx, r15
 | 
						|
        cmovne  eax, r14d
 | 
						|
        movdqu  xmm8, xmmword ptr [r8+rdx-0x40]
 | 
						|
        movdqu  xmm9, xmmword ptr [r9+rdx-0x40]
 | 
						|
        movdqu  xmm10, xmmword ptr [r10+rdx-0x40]
 | 
						|
        movdqu  xmm11, xmmword ptr [r11+rdx-0x40]
 | 
						|
        movdqa  xmm12, xmm8
 | 
						|
        punpckldq xmm8, xmm9
 | 
						|
        punpckhdq xmm12, xmm9
 | 
						|
        movdqa  xmm14, xmm10
 | 
						|
        punpckldq xmm10, xmm11
 | 
						|
        punpckhdq xmm14, xmm11
 | 
						|
        movdqa  xmm9, xmm8
 | 
						|
        punpcklqdq xmm8, xmm10
 | 
						|
        punpckhqdq xmm9, xmm10
 | 
						|
        movdqa  xmm13, xmm12
 | 
						|
        punpcklqdq xmm12, xmm14
 | 
						|
        punpckhqdq xmm13, xmm14
 | 
						|
        movdqa  xmmword ptr [rsp], xmm8
 | 
						|
        movdqa  xmmword ptr [rsp+0x10], xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x20], xmm12
 | 
						|
        movdqa  xmmword ptr [rsp+0x30], xmm13
 | 
						|
        movdqu  xmm8, xmmword ptr [r8+rdx-0x30]
 | 
						|
        movdqu  xmm9, xmmword ptr [r9+rdx-0x30]
 | 
						|
        movdqu  xmm10, xmmword ptr [r10+rdx-0x30]
 | 
						|
        movdqu  xmm11, xmmword ptr [r11+rdx-0x30]
 | 
						|
        movdqa  xmm12, xmm8
 | 
						|
        punpckldq xmm8, xmm9
 | 
						|
        punpckhdq xmm12, xmm9
 | 
						|
        movdqa  xmm14, xmm10
 | 
						|
        punpckldq xmm10, xmm11
 | 
						|
        punpckhdq xmm14, xmm11
 | 
						|
        movdqa  xmm9, xmm8
 | 
						|
        punpcklqdq xmm8, xmm10
 | 
						|
        punpckhqdq xmm9, xmm10
 | 
						|
        movdqa  xmm13, xmm12
 | 
						|
        punpcklqdq xmm12, xmm14
 | 
						|
        punpckhqdq xmm13, xmm14
 | 
						|
        movdqa  xmmword ptr [rsp+0x40], xmm8
 | 
						|
        movdqa  xmmword ptr [rsp+0x50], xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x60], xmm12
 | 
						|
        movdqa  xmmword ptr [rsp+0x70], xmm13
 | 
						|
        movdqu  xmm8, xmmword ptr [r8+rdx-0x20]
 | 
						|
        movdqu  xmm9, xmmword ptr [r9+rdx-0x20]
 | 
						|
        movdqu  xmm10, xmmword ptr [r10+rdx-0x20]
 | 
						|
        movdqu  xmm11, xmmword ptr [r11+rdx-0x20]
 | 
						|
        movdqa  xmm12, xmm8
 | 
						|
        punpckldq xmm8, xmm9
 | 
						|
        punpckhdq xmm12, xmm9
 | 
						|
        movdqa  xmm14, xmm10
 | 
						|
        punpckldq xmm10, xmm11
 | 
						|
        punpckhdq xmm14, xmm11
 | 
						|
        movdqa  xmm9, xmm8
 | 
						|
        punpcklqdq xmm8, xmm10
 | 
						|
        punpckhqdq xmm9, xmm10
 | 
						|
        movdqa  xmm13, xmm12
 | 
						|
        punpcklqdq xmm12, xmm14
 | 
						|
        punpckhqdq xmm13, xmm14
 | 
						|
        movdqa  xmmword ptr [rsp+0x80], xmm8
 | 
						|
        movdqa  xmmword ptr [rsp+0x90], xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0xA0], xmm12
 | 
						|
        movdqa  xmmword ptr [rsp+0xB0], xmm13
 | 
						|
        movdqu  xmm8, xmmword ptr [r8+rdx-0x10]
 | 
						|
        movdqu  xmm9, xmmword ptr [r9+rdx-0x10]
 | 
						|
        movdqu  xmm10, xmmword ptr [r10+rdx-0x10]
 | 
						|
        movdqu  xmm11, xmmword ptr [r11+rdx-0x10]
 | 
						|
        movdqa  xmm12, xmm8
 | 
						|
        punpckldq xmm8, xmm9
 | 
						|
        punpckhdq xmm12, xmm9
 | 
						|
        movdqa  xmm14, xmm10
 | 
						|
        punpckldq xmm10, xmm11
 | 
						|
        punpckhdq xmm14, xmm11
 | 
						|
        movdqa  xmm9, xmm8
 | 
						|
        punpcklqdq xmm8, xmm10
 | 
						|
        punpckhqdq xmm9, xmm10
 | 
						|
        movdqa  xmm13, xmm12
 | 
						|
        punpcklqdq xmm12, xmm14
 | 
						|
        punpckhqdq xmm13, xmm14
 | 
						|
        movdqa  xmmword ptr [rsp+0xC0], xmm8
 | 
						|
        movdqa  xmmword ptr [rsp+0xD0], xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0xE0], xmm12
 | 
						|
        movdqa  xmmword ptr [rsp+0xF0], xmm13
 | 
						|
        movdqa  xmm9, xmmword ptr [BLAKE3_IV_1+rip]
 | 
						|
        movdqa  xmm10, xmmword ptr [BLAKE3_IV_2+rip]
 | 
						|
        movdqa  xmm11, xmmword ptr [BLAKE3_IV_3+rip]
 | 
						|
        movdqa  xmm12, xmmword ptr [rsp+0x110]
 | 
						|
        movdqa  xmm13, xmmword ptr [rsp+0x120]
 | 
						|
        movdqa  xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip]
 | 
						|
        movd    xmm15, eax
 | 
						|
        pshufd  xmm15, xmm15, 0x00
 | 
						|
        prefetcht0 [r8+rdx+0x80]
 | 
						|
        prefetcht0 [r9+rdx+0x80]
 | 
						|
        prefetcht0 [r10+rdx+0x80]
 | 
						|
        prefetcht0 [r11+rdx+0x80]
 | 
						|
        paddd   xmm0, xmmword ptr [rsp]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x20]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x40]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x60]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        movdqa  xmm8, xmmword ptr [BLAKE3_IV_0+rip]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x10]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x30]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x50]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x70]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x80]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xA0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xC0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xE0]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x90]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xB0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xD0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xF0]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x20]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x30]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x70]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x40]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x60]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xA0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xD0]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x10]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xC0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x90]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xF0]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xB0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x50]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xE0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x80]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x30]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xA0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xD0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x70]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x40]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xC0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x20]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xE0]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x60]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x90]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xB0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x80]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x50]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xF0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x10]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xA0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xC0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xE0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xD0]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x70]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x90]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x30]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xF0]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x40]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xB0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x50]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x10]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x20]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x80]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x60]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xC0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x90]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xF0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xE0]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xD0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xB0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xA0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x80]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x70]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x50]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x60]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x20]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x30]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x10]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x40]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x90]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xB0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x80]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xF0]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xE0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x50]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0xC0]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x10]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xD0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x20]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x40]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0x30]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xA0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x60]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x70]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xB0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x50]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x10]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x80]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xF0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x90]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x60]
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm1, xmm5
 | 
						|
        paddd   xmm2, xmm6
 | 
						|
        paddd   xmm3, xmm7
 | 
						|
        pxor    xmm12, xmm0
 | 
						|
        pxor    xmm13, xmm1
 | 
						|
        pxor    xmm14, xmm2
 | 
						|
        pxor    xmm15, xmm3
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        paddd   xmm9, xmm13
 | 
						|
        paddd   xmm10, xmm14
 | 
						|
        paddd   xmm11, xmm15
 | 
						|
        pxor    xmm4, xmm8
 | 
						|
        pxor    xmm5, xmm9
 | 
						|
        pxor    xmm6, xmm10
 | 
						|
        pxor    xmm7, xmm11
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xE0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x20]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x30]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0x70]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        pshuflw xmm15, xmm15, 0xB1
 | 
						|
        pshufhw xmm15, xmm15, 0xB1
 | 
						|
        pshuflw xmm12, xmm12, 0xB1
 | 
						|
        pshufhw xmm12, xmm12, 0xB1
 | 
						|
        pshuflw xmm13, xmm13, 0xB1
 | 
						|
        pshufhw xmm13, xmm13, 0xB1
 | 
						|
        pshuflw xmm14, xmm14, 0xB1
 | 
						|
        pshufhw xmm14, xmm14, 0xB1
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        movdqa  xmmword ptr [rsp+0x100], xmm8
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm5, 20
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm6, 20
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm7, 20
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 12
 | 
						|
        pslld   xmm4, 20
 | 
						|
        por     xmm4, xmm8
 | 
						|
        paddd   xmm0, xmmword ptr [rsp+0xA0]
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0xC0]
 | 
						|
        paddd   xmm2, xmmword ptr [rsp+0x40]
 | 
						|
        paddd   xmm3, xmmword ptr [rsp+0xD0]
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm1, xmm6
 | 
						|
        paddd   xmm2, xmm7
 | 
						|
        paddd   xmm3, xmm4
 | 
						|
        pxor    xmm15, xmm0
 | 
						|
        pxor    xmm12, xmm1
 | 
						|
        pxor    xmm13, xmm2
 | 
						|
        pxor    xmm14, xmm3
 | 
						|
        movdqa  xmm8, xmm15
 | 
						|
        psrld   xmm15, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm15, xmm8
 | 
						|
        movdqa  xmm8, xmm12
 | 
						|
        psrld   xmm12, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm12, xmm8
 | 
						|
        movdqa  xmm8, xmm13
 | 
						|
        psrld   xmm13, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm13, xmm8
 | 
						|
        movdqa  xmm8, xmm14
 | 
						|
        psrld   xmm14, 8
 | 
						|
        pslld   xmm8, 24
 | 
						|
        pxor    xmm14, xmm8
 | 
						|
        paddd   xmm10, xmm15
 | 
						|
        paddd   xmm11, xmm12
 | 
						|
        movdqa  xmm8, xmmword ptr [rsp+0x100]
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        paddd   xmm9, xmm14
 | 
						|
        pxor    xmm5, xmm10
 | 
						|
        pxor    xmm6, xmm11
 | 
						|
        pxor    xmm7, xmm8
 | 
						|
        pxor    xmm4, xmm9
 | 
						|
        pxor    xmm0, xmm8
 | 
						|
        pxor    xmm1, xmm9
 | 
						|
        pxor    xmm2, xmm10
 | 
						|
        pxor    xmm3, xmm11
 | 
						|
        movdqa  xmm8, xmm5
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm5, 25
 | 
						|
        por     xmm5, xmm8
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm6, 25
 | 
						|
        por     xmm6, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm7, 25
 | 
						|
        por     xmm7, xmm8
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        psrld   xmm8, 7
 | 
						|
        pslld   xmm4, 25
 | 
						|
        por     xmm4, xmm8
 | 
						|
        pxor    xmm4, xmm12
 | 
						|
        pxor    xmm5, xmm13
 | 
						|
        pxor    xmm6, xmm14
 | 
						|
        pxor    xmm7, xmm15
 | 
						|
        mov     eax, r13d
 | 
						|
        jne     9b
 | 
						|
        movdqa  xmm9, xmm0
 | 
						|
        punpckldq xmm0, xmm1
 | 
						|
        punpckhdq xmm9, xmm1
 | 
						|
        movdqa  xmm11, xmm2
 | 
						|
        punpckldq xmm2, xmm3
 | 
						|
        punpckhdq xmm11, xmm3
 | 
						|
        movdqa  xmm1, xmm0
 | 
						|
        punpcklqdq xmm0, xmm2
 | 
						|
        punpckhqdq xmm1, xmm2
 | 
						|
        movdqa  xmm3, xmm9
 | 
						|
        punpcklqdq xmm9, xmm11
 | 
						|
        punpckhqdq xmm3, xmm11
 | 
						|
        movdqu  xmmword ptr [rbx], xmm0
 | 
						|
        movdqu  xmmword ptr [rbx+0x20], xmm1
 | 
						|
        movdqu  xmmword ptr [rbx+0x40], xmm9
 | 
						|
        movdqu  xmmword ptr [rbx+0x60], xmm3
 | 
						|
        movdqa  xmm9, xmm4
 | 
						|
        punpckldq xmm4, xmm5
 | 
						|
        punpckhdq xmm9, xmm5
 | 
						|
        movdqa  xmm11, xmm6
 | 
						|
        punpckldq xmm6, xmm7
 | 
						|
        punpckhdq xmm11, xmm7
 | 
						|
        movdqa  xmm5, xmm4
 | 
						|
        punpcklqdq xmm4, xmm6
 | 
						|
        punpckhqdq xmm5, xmm6
 | 
						|
        movdqa  xmm7, xmm9
 | 
						|
        punpcklqdq xmm9, xmm11
 | 
						|
        punpckhqdq xmm7, xmm11
 | 
						|
        movdqu  xmmword ptr [rbx+0x10], xmm4
 | 
						|
        movdqu  xmmword ptr [rbx+0x30], xmm5
 | 
						|
        movdqu  xmmword ptr [rbx+0x50], xmm9
 | 
						|
        movdqu  xmmword ptr [rbx+0x70], xmm7
 | 
						|
        movdqa  xmm1, xmmword ptr [rsp+0x110]
 | 
						|
        movdqa  xmm0, xmm1
 | 
						|
        paddd   xmm1, xmmword ptr [rsp+0x150]
 | 
						|
        movdqa  xmmword ptr [rsp+0x110], xmm1
 | 
						|
        pxor    xmm0, xmmword ptr [CMP_MSB_MASK+rip]
 | 
						|
        pxor    xmm1, xmmword ptr [CMP_MSB_MASK+rip]
 | 
						|
        pcmpgtd xmm0, xmm1
 | 
						|
        movdqa  xmm1, xmmword ptr [rsp+0x120]
 | 
						|
        psubd   xmm1, xmm0
 | 
						|
        movdqa  xmmword ptr [rsp+0x120], xmm1
 | 
						|
        add     rbx, 128
 | 
						|
        add     rdi, 32
 | 
						|
        sub     rsi, 4
 | 
						|
        cmp     rsi, 4
 | 
						|
        jnc     2b
 | 
						|
        test    rsi, rsi
 | 
						|
        jnz     3f
 | 
						|
4:
 | 
						|
        mov     rsp, rbp
 | 
						|
        pop     rbp
 | 
						|
        pop     rbx
 | 
						|
        pop     r12
 | 
						|
        pop     r13
 | 
						|
        pop     r14
 | 
						|
        pop     r15
 | 
						|
        RET
 | 
						|
.p2align 5
 | 
						|
3:
 | 
						|
        test    esi, 0x2
 | 
						|
        je      3f
 | 
						|
        movups  xmm0, xmmword ptr [rcx]
 | 
						|
        movups  xmm1, xmmword ptr [rcx+0x10]
 | 
						|
        movaps  xmm8, xmm0
 | 
						|
        movaps  xmm9, xmm1
 | 
						|
        movd    xmm13, dword ptr [rsp+0x110]
 | 
						|
        movd    xmm14, dword ptr [rsp+0x120]
 | 
						|
        punpckldq xmm13, xmm14
 | 
						|
        movaps  xmmword ptr [rsp], xmm13
 | 
						|
        movd    xmm14, dword ptr [rsp+0x114]
 | 
						|
        movd    xmm13, dword ptr [rsp+0x124]
 | 
						|
        punpckldq xmm14, xmm13
 | 
						|
        movaps  xmmword ptr [rsp+0x10], xmm14
 | 
						|
        mov     r8, qword ptr [rdi]
 | 
						|
        mov     r9, qword ptr [rdi+0x8]
 | 
						|
        movzx   eax, byte ptr [rbp+0x40]
 | 
						|
        or      eax, r13d
 | 
						|
        xor     edx, edx
 | 
						|
2:
 | 
						|
        mov     r14d, eax
 | 
						|
        or      eax, r12d
 | 
						|
        add     rdx, 64
 | 
						|
        cmp     rdx, r15
 | 
						|
        cmovne  eax, r14d
 | 
						|
        movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
 | 
						|
        movaps  xmm10, xmm2
 | 
						|
        movups  xmm4, xmmword ptr [r8+rdx-0x40]
 | 
						|
        movups  xmm5, xmmword ptr [r8+rdx-0x30]
 | 
						|
        movaps  xmm3, xmm4
 | 
						|
        shufps  xmm4, xmm5, 136
 | 
						|
        shufps  xmm3, xmm5, 221
 | 
						|
        movaps  xmm5, xmm3
 | 
						|
        movups  xmm6, xmmword ptr [r8+rdx-0x20]
 | 
						|
        movups  xmm7, xmmword ptr [r8+rdx-0x10]
 | 
						|
        movaps  xmm3, xmm6
 | 
						|
        shufps  xmm6, xmm7, 136
 | 
						|
        pshufd  xmm6, xmm6, 0x93
 | 
						|
        shufps  xmm3, xmm7, 221
 | 
						|
        pshufd  xmm7, xmm3, 0x93
 | 
						|
        movups  xmm12, xmmword ptr [r9+rdx-0x40]
 | 
						|
        movups  xmm13, xmmword ptr [r9+rdx-0x30]
 | 
						|
        movaps  xmm11, xmm12
 | 
						|
        shufps  xmm12, xmm13, 136
 | 
						|
        shufps  xmm11, xmm13, 221
 | 
						|
        movaps  xmm13, xmm11
 | 
						|
        movups  xmm14, xmmword ptr [r9+rdx-0x20]
 | 
						|
        movups  xmm15, xmmword ptr [r9+rdx-0x10]
 | 
						|
        movaps  xmm11, xmm14
 | 
						|
        shufps  xmm14, xmm15, 136
 | 
						|
        pshufd  xmm14, xmm14, 0x93
 | 
						|
        shufps  xmm11, xmm15, 221
 | 
						|
        pshufd  xmm15, xmm11, 0x93
 | 
						|
        shl     rax, 0x20
 | 
						|
        or      rax, 0x40
 | 
						|
        movq    xmm3, rax
 | 
						|
        movdqa  xmmword ptr [rsp+0x20], xmm3
 | 
						|
        movaps  xmm3, xmmword ptr [rsp]
 | 
						|
        movaps  xmm11, xmmword ptr [rsp+0x10]
 | 
						|
        punpcklqdq xmm3, xmmword ptr [rsp+0x20]
 | 
						|
        punpcklqdq xmm11, xmmword ptr [rsp+0x20]
 | 
						|
        mov     al, 7
 | 
						|
9:
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm8, xmm12
 | 
						|
        movaps  xmmword ptr [rsp+0x20], xmm4
 | 
						|
        movaps  xmmword ptr [rsp+0x30], xmm12
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        paddd   xmm8, xmm9
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pxor    xmm11, xmm8
 | 
						|
        pshuflw xmm3, xmm3, 0xB1
 | 
						|
        pshufhw xmm3, xmm3, 0xB1
 | 
						|
        pshuflw xmm11, xmm11, 0xB1
 | 
						|
        pshufhw xmm11, xmm11, 0xB1
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        paddd   xmm10, xmm11
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        pxor    xmm9, xmm10
 | 
						|
        movdqa  xmm4, xmm1
 | 
						|
        pslld   xmm1, 20
 | 
						|
        psrld   xmm4, 12
 | 
						|
        por     xmm1, xmm4
 | 
						|
        movdqa  xmm4, xmm9
 | 
						|
        pslld   xmm9, 20
 | 
						|
        psrld   xmm4, 12
 | 
						|
        por     xmm9, xmm4
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm8, xmm13
 | 
						|
        movaps  xmmword ptr [rsp+0x40], xmm5
 | 
						|
        movaps  xmmword ptr [rsp+0x50], xmm13
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        paddd   xmm8, xmm9
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pxor    xmm11, xmm8
 | 
						|
        movdqa  xmm13, xmm3
 | 
						|
        psrld   xmm3, 8
 | 
						|
        pslld   xmm13, 24
 | 
						|
        pxor    xmm3, xmm13
 | 
						|
        movdqa  xmm13, xmm11
 | 
						|
        psrld   xmm11, 8
 | 
						|
        pslld   xmm13, 24
 | 
						|
        pxor    xmm11, xmm13
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        paddd   xmm10, xmm11
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        pxor    xmm9, xmm10
 | 
						|
        movdqa  xmm4, xmm1
 | 
						|
        pslld   xmm1, 25
 | 
						|
        psrld   xmm4, 7
 | 
						|
        por     xmm1, xmm4
 | 
						|
        movdqa  xmm4, xmm9
 | 
						|
        pslld   xmm9, 25
 | 
						|
        psrld   xmm4, 7
 | 
						|
        por     xmm9, xmm4
 | 
						|
        pshufd  xmm0, xmm0, 0x93
 | 
						|
        pshufd  xmm8, xmm8, 0x93
 | 
						|
        pshufd  xmm3, xmm3, 0x4E
 | 
						|
        pshufd  xmm11, xmm11, 0x4E
 | 
						|
        pshufd  xmm2, xmm2, 0x39
 | 
						|
        pshufd  xmm10, xmm10, 0x39
 | 
						|
        paddd   xmm0, xmm6
 | 
						|
        paddd   xmm8, xmm14
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        paddd   xmm8, xmm9
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pxor    xmm11, xmm8
 | 
						|
        pshuflw xmm3, xmm3, 0xB1
 | 
						|
        pshufhw xmm3, xmm3, 0xB1
 | 
						|
        pshuflw xmm11, xmm11, 0xB1
 | 
						|
        pshufhw xmm11, xmm11, 0xB1
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        paddd   xmm10, xmm11
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        pxor    xmm9, xmm10
 | 
						|
        movdqa  xmm4, xmm1
 | 
						|
        pslld   xmm1, 20
 | 
						|
        psrld   xmm4, 12
 | 
						|
        por     xmm1, xmm4
 | 
						|
        movdqa  xmm4, xmm9
 | 
						|
        pslld   xmm9, 20
 | 
						|
        psrld   xmm4, 12
 | 
						|
        por     xmm9, xmm4
 | 
						|
        paddd   xmm0, xmm7
 | 
						|
        paddd   xmm8, xmm15
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        paddd   xmm8, xmm9
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pxor    xmm11, xmm8
 | 
						|
        movdqa  xmm13, xmm3
 | 
						|
        psrld   xmm3, 8
 | 
						|
        pslld   xmm13, 24
 | 
						|
        pxor    xmm3, xmm13
 | 
						|
        movdqa  xmm13, xmm11
 | 
						|
        psrld   xmm11, 8
 | 
						|
        pslld   xmm13, 24
 | 
						|
        pxor    xmm11, xmm13
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        paddd   xmm10, xmm11
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        pxor    xmm9, xmm10
 | 
						|
        movdqa  xmm4, xmm1
 | 
						|
        pslld   xmm1, 25
 | 
						|
        psrld   xmm4, 7
 | 
						|
        por     xmm1, xmm4
 | 
						|
        movdqa  xmm4, xmm9
 | 
						|
        pslld   xmm9, 25
 | 
						|
        psrld   xmm4, 7
 | 
						|
        por     xmm9, xmm4
 | 
						|
        pshufd  xmm0, xmm0, 0x39
 | 
						|
        pshufd  xmm8, xmm8, 0x39
 | 
						|
        pshufd  xmm3, xmm3, 0x4E
 | 
						|
        pshufd  xmm11, xmm11, 0x4E
 | 
						|
        pshufd  xmm2, xmm2, 0x93
 | 
						|
        pshufd  xmm10, xmm10, 0x93
 | 
						|
        dec     al
 | 
						|
        je      9f
 | 
						|
        movdqa  xmm12, xmmword ptr [rsp+0x20]
 | 
						|
        movdqa  xmm5, xmmword ptr [rsp+0x40]
 | 
						|
        pshufd  xmm13, xmm12, 0x0F
 | 
						|
        shufps  xmm12, xmm5, 214
 | 
						|
        pshufd  xmm4, xmm12, 0x39
 | 
						|
        movdqa  xmm12, xmm6
 | 
						|
        shufps  xmm12, xmm7, 250
 | 
						|
        pand    xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip]
 | 
						|
        pand    xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip]
 | 
						|
        por     xmm13, xmm12
 | 
						|
        movdqa  xmmword ptr [rsp+0x20], xmm13
 | 
						|
        movdqa  xmm12, xmm7
 | 
						|
        punpcklqdq xmm12, xmm5
 | 
						|
        movdqa  xmm13, xmm6
 | 
						|
        pand    xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip]
 | 
						|
        pand    xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip]
 | 
						|
        por     xmm12, xmm13
 | 
						|
        pshufd  xmm12, xmm12, 0x78
 | 
						|
        punpckhdq xmm5, xmm7
 | 
						|
        punpckldq xmm6, xmm5
 | 
						|
        pshufd  xmm7, xmm6, 0x1E
 | 
						|
        movdqa  xmmword ptr [rsp+0x40], xmm12
 | 
						|
        movdqa  xmm5, xmmword ptr [rsp+0x30]
 | 
						|
        movdqa  xmm13, xmmword ptr [rsp+0x50]
 | 
						|
        pshufd  xmm6, xmm5, 0x0F
 | 
						|
        shufps  xmm5, xmm13, 214
 | 
						|
        pshufd  xmm12, xmm5, 0x39
 | 
						|
        movdqa  xmm5, xmm14
 | 
						|
        shufps  xmm5, xmm15, 250
 | 
						|
        pand    xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip]
 | 
						|
        pand    xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip]
 | 
						|
        por     xmm6, xmm5
 | 
						|
        movdqa  xmm5, xmm15
 | 
						|
        punpcklqdq xmm5, xmm13
 | 
						|
        movdqa  xmmword ptr [rsp+0x30], xmm2
 | 
						|
        movdqa  xmm2, xmm14
 | 
						|
        pand    xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip]
 | 
						|
        pand    xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip]
 | 
						|
        por     xmm5, xmm2
 | 
						|
        movdqa  xmm2, xmmword ptr [rsp+0x30]
 | 
						|
        pshufd  xmm5, xmm5, 0x78
 | 
						|
        punpckhdq xmm13, xmm15
 | 
						|
        punpckldq xmm14, xmm13
 | 
						|
        pshufd  xmm15, xmm14, 0x1E
 | 
						|
        movdqa  xmm13, xmm6
 | 
						|
        movdqa  xmm14, xmm5
 | 
						|
        movdqa  xmm5, xmmword ptr [rsp+0x20]
 | 
						|
        movdqa  xmm6, xmmword ptr [rsp+0x40]
 | 
						|
        jmp     9b
 | 
						|
9:
 | 
						|
        pxor    xmm0, xmm2
 | 
						|
        pxor    xmm1, xmm3
 | 
						|
        pxor    xmm8, xmm10
 | 
						|
        pxor    xmm9, xmm11
 | 
						|
        mov     eax, r13d
 | 
						|
        cmp     rdx, r15
 | 
						|
        jne     2b
 | 
						|
        movups  xmmword ptr [rbx], xmm0
 | 
						|
        movups  xmmword ptr [rbx+0x10], xmm1
 | 
						|
        movups  xmmword ptr [rbx+0x20], xmm8
 | 
						|
        movups  xmmword ptr [rbx+0x30], xmm9
 | 
						|
        mov     eax, dword ptr [rsp+0x130]
 | 
						|
        neg     eax
 | 
						|
        mov    r10d, dword ptr [rsp+0x110+8*rax]
 | 
						|
        mov    r11d, dword ptr [rsp+0x120+8*rax]
 | 
						|
        mov dword ptr [rsp+0x110], r10d
 | 
						|
        mov dword ptr [rsp+0x120], r11d
 | 
						|
        add     rdi, 16
 | 
						|
        add     rbx, 64
 | 
						|
        sub     rsi, 2
 | 
						|
3:
 | 
						|
        test    esi, 0x1
 | 
						|
        je      4b
 | 
						|
        movups  xmm0, xmmword ptr [rcx]
 | 
						|
        movups  xmm1, xmmword ptr [rcx+0x10]
 | 
						|
        movd    xmm13, dword ptr [rsp+0x110]
 | 
						|
        movd    xmm14, dword ptr [rsp+0x120]
 | 
						|
        punpckldq xmm13, xmm14
 | 
						|
        mov     r8, qword ptr [rdi]
 | 
						|
        movzx   eax, byte ptr [rbp+0x40]
 | 
						|
        or      eax, r13d
 | 
						|
        xor     edx, edx
 | 
						|
2:
 | 
						|
        mov     r14d, eax
 | 
						|
        or      eax, r12d
 | 
						|
        add     rdx, 64
 | 
						|
        cmp     rdx, r15
 | 
						|
        cmovne  eax, r14d
 | 
						|
        movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
 | 
						|
        shl     rax, 32
 | 
						|
        or      rax, 64
 | 
						|
        movq    xmm12, rax
 | 
						|
        movdqa  xmm3, xmm13
 | 
						|
        punpcklqdq xmm3, xmm12
 | 
						|
        movups  xmm4, xmmword ptr [r8+rdx-0x40]
 | 
						|
        movups  xmm5, xmmword ptr [r8+rdx-0x30]
 | 
						|
        movaps  xmm8, xmm4
 | 
						|
        shufps  xmm4, xmm5, 136
 | 
						|
        shufps  xmm8, xmm5, 221
 | 
						|
        movaps  xmm5, xmm8
 | 
						|
        movups  xmm6, xmmword ptr [r8+rdx-0x20]
 | 
						|
        movups  xmm7, xmmword ptr [r8+rdx-0x10]
 | 
						|
        movaps  xmm8, xmm6
 | 
						|
        shufps  xmm6, xmm7, 136
 | 
						|
        pshufd  xmm6, xmm6, 0x93
 | 
						|
        shufps  xmm8, xmm7, 221
 | 
						|
        pshufd  xmm7, xmm8, 0x93
 | 
						|
        mov     al, 7
 | 
						|
9:
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pshuflw xmm3, xmm3, 0xB1
 | 
						|
        pshufhw xmm3, xmm3, 0xB1
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 20
 | 
						|
        psrld   xmm11, 12
 | 
						|
        por     xmm1, xmm11
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        movdqa  xmm14, xmm3
 | 
						|
        psrld   xmm3, 8
 | 
						|
        pslld   xmm14, 24
 | 
						|
        pxor    xmm3, xmm14
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 25
 | 
						|
        psrld   xmm11, 7
 | 
						|
        por     xmm1, xmm11
 | 
						|
        pshufd  xmm0, xmm0, 0x93
 | 
						|
        pshufd  xmm3, xmm3, 0x4E
 | 
						|
        pshufd  xmm2, xmm2, 0x39
 | 
						|
        paddd   xmm0, xmm6
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pshuflw xmm3, xmm3, 0xB1
 | 
						|
        pshufhw xmm3, xmm3, 0xB1
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 20
 | 
						|
        psrld   xmm11, 12
 | 
						|
        por     xmm1, xmm11
 | 
						|
        paddd   xmm0, xmm7
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        movdqa  xmm14, xmm3
 | 
						|
        psrld   xmm3, 8
 | 
						|
        pslld   xmm14, 24
 | 
						|
        pxor    xmm3, xmm14
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 25
 | 
						|
        psrld   xmm11, 7
 | 
						|
        por     xmm1, xmm11
 | 
						|
        pshufd  xmm0, xmm0, 0x39
 | 
						|
        pshufd  xmm3, xmm3, 0x4E
 | 
						|
        pshufd  xmm2, xmm2, 0x93
 | 
						|
        dec     al
 | 
						|
        jz      9f
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        shufps  xmm8, xmm5, 214
 | 
						|
        pshufd  xmm9, xmm4, 0x0F
 | 
						|
        pshufd  xmm4, xmm8, 0x39
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        shufps  xmm8, xmm7, 250
 | 
						|
        pand    xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
 | 
						|
        pand    xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
 | 
						|
        por     xmm9, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        punpcklqdq xmm8, xmm5
 | 
						|
        movdqa  xmm10, xmm6
 | 
						|
        pand    xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
 | 
						|
        pand    xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
 | 
						|
        por     xmm8, xmm10
 | 
						|
        pshufd  xmm8, xmm8, 0x78
 | 
						|
        punpckhdq xmm5, xmm7
 | 
						|
        punpckldq xmm6, xmm5
 | 
						|
        pshufd  xmm7, xmm6, 0x1E
 | 
						|
        movdqa  xmm5, xmm9
 | 
						|
        movdqa  xmm6, xmm8
 | 
						|
        jmp     9b
 | 
						|
9:
 | 
						|
        pxor    xmm0, xmm2
 | 
						|
        pxor    xmm1, xmm3
 | 
						|
        mov     eax, r13d
 | 
						|
        cmp     rdx, r15
 | 
						|
        jne     2b
 | 
						|
        movups  xmmword ptr [rbx], xmm0
 | 
						|
        movups  xmmword ptr [rbx+0x10], xmm1
 | 
						|
        jmp     4b
 | 
						|
SET_SIZE(zfs_blake3_hash_many_sse2)
 | 
						|
 | 
						|
ENTRY_ALIGN(zfs_blake3_compress_in_place_sse2, 64)
 | 
						|
        ENDBR
 | 
						|
        movups  xmm0, xmmword ptr [rdi]
 | 
						|
        movups  xmm1, xmmword ptr [rdi+0x10]
 | 
						|
        movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
 | 
						|
        shl     r8, 32
 | 
						|
        add     rdx, r8
 | 
						|
        movq    xmm3, rcx
 | 
						|
        movq    xmm4, rdx
 | 
						|
        punpcklqdq xmm3, xmm4
 | 
						|
        movups  xmm4, xmmword ptr [rsi]
 | 
						|
        movups  xmm5, xmmword ptr [rsi+0x10]
 | 
						|
        movaps  xmm8, xmm4
 | 
						|
        shufps  xmm4, xmm5, 136
 | 
						|
        shufps  xmm8, xmm5, 221
 | 
						|
        movaps  xmm5, xmm8
 | 
						|
        movups  xmm6, xmmword ptr [rsi+0x20]
 | 
						|
        movups  xmm7, xmmword ptr [rsi+0x30]
 | 
						|
        movaps  xmm8, xmm6
 | 
						|
        shufps  xmm6, xmm7, 136
 | 
						|
        pshufd  xmm6, xmm6, 0x93
 | 
						|
        shufps  xmm8, xmm7, 221
 | 
						|
        pshufd  xmm7, xmm8, 0x93
 | 
						|
        mov     al, 7
 | 
						|
9:
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pshuflw xmm3, xmm3, 0xB1
 | 
						|
        pshufhw xmm3, xmm3, 0xB1
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 20
 | 
						|
        psrld   xmm11, 12
 | 
						|
        por     xmm1, xmm11
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        movdqa  xmm14, xmm3
 | 
						|
        psrld   xmm3, 8
 | 
						|
        pslld   xmm14, 24
 | 
						|
        pxor    xmm3, xmm14
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 25
 | 
						|
        psrld   xmm11, 7
 | 
						|
        por     xmm1, xmm11
 | 
						|
        pshufd  xmm0, xmm0, 0x93
 | 
						|
        pshufd  xmm3, xmm3, 0x4E
 | 
						|
        pshufd  xmm2, xmm2, 0x39
 | 
						|
        paddd   xmm0, xmm6
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pshuflw xmm3, xmm3, 0xB1
 | 
						|
        pshufhw xmm3, xmm3, 0xB1
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 20
 | 
						|
        psrld   xmm11, 12
 | 
						|
        por     xmm1, xmm11
 | 
						|
        paddd   xmm0, xmm7
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        movdqa  xmm14, xmm3
 | 
						|
        psrld   xmm3, 8
 | 
						|
        pslld   xmm14, 24
 | 
						|
        pxor    xmm3, xmm14
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 25
 | 
						|
        psrld   xmm11, 7
 | 
						|
        por     xmm1, xmm11
 | 
						|
        pshufd  xmm0, xmm0, 0x39
 | 
						|
        pshufd  xmm3, xmm3, 0x4E
 | 
						|
        pshufd  xmm2, xmm2, 0x93
 | 
						|
        dec     al
 | 
						|
        jz      9f
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        shufps  xmm8, xmm5, 214
 | 
						|
        pshufd  xmm9, xmm4, 0x0F
 | 
						|
        pshufd  xmm4, xmm8, 0x39
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        shufps  xmm8, xmm7, 250
 | 
						|
        pand    xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
 | 
						|
        pand    xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
 | 
						|
        por     xmm9, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        punpcklqdq xmm8, xmm5
 | 
						|
        movdqa  xmm10, xmm6
 | 
						|
        pand    xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
 | 
						|
        pand    xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
 | 
						|
        por     xmm8, xmm10
 | 
						|
        pshufd  xmm8, xmm8, 0x78
 | 
						|
        punpckhdq xmm5, xmm7
 | 
						|
        punpckldq xmm6, xmm5
 | 
						|
        pshufd  xmm7, xmm6, 0x1E
 | 
						|
        movdqa  xmm5, xmm9
 | 
						|
        movdqa  xmm6, xmm8
 | 
						|
        jmp     9b
 | 
						|
9:
 | 
						|
        pxor    xmm0, xmm2
 | 
						|
        pxor    xmm1, xmm3
 | 
						|
        movups  xmmword ptr [rdi], xmm0
 | 
						|
        movups  xmmword ptr [rdi+0x10], xmm1
 | 
						|
        RET
 | 
						|
SET_SIZE(zfs_blake3_compress_in_place_sse2)
 | 
						|
 | 
						|
ENTRY_ALIGN(zfs_blake3_compress_xof_sse2, 64)
 | 
						|
        ENDBR
 | 
						|
        movups  xmm0, xmmword ptr [rdi]
 | 
						|
        movups  xmm1, xmmword ptr [rdi+0x10]
 | 
						|
        movaps  xmm2, xmmword ptr [BLAKE3_IV+rip]
 | 
						|
        movzx   eax, r8b
 | 
						|
        movzx   edx, dl
 | 
						|
        shl     rax, 32
 | 
						|
        add     rdx, rax
 | 
						|
        movq    xmm3, rcx
 | 
						|
        movq    xmm4, rdx
 | 
						|
        punpcklqdq xmm3, xmm4
 | 
						|
        movups  xmm4, xmmword ptr [rsi]
 | 
						|
        movups  xmm5, xmmword ptr [rsi+0x10]
 | 
						|
        movaps  xmm8, xmm4
 | 
						|
        shufps  xmm4, xmm5, 136
 | 
						|
        shufps  xmm8, xmm5, 221
 | 
						|
        movaps  xmm5, xmm8
 | 
						|
        movups  xmm6, xmmword ptr [rsi+0x20]
 | 
						|
        movups  xmm7, xmmword ptr [rsi+0x30]
 | 
						|
        movaps  xmm8, xmm6
 | 
						|
        shufps  xmm6, xmm7, 136
 | 
						|
        pshufd  xmm6, xmm6, 0x93
 | 
						|
        shufps  xmm8, xmm7, 221
 | 
						|
        pshufd  xmm7, xmm8, 0x93
 | 
						|
        mov     al, 7
 | 
						|
9:
 | 
						|
        paddd   xmm0, xmm4
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pshuflw xmm3, xmm3, 0xB1
 | 
						|
        pshufhw xmm3, xmm3, 0xB1
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 20
 | 
						|
        psrld   xmm11, 12
 | 
						|
        por     xmm1, xmm11
 | 
						|
        paddd   xmm0, xmm5
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        movdqa  xmm14, xmm3
 | 
						|
        psrld   xmm3, 8
 | 
						|
        pslld   xmm14, 24
 | 
						|
        pxor    xmm3, xmm14
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 25
 | 
						|
        psrld   xmm11, 7
 | 
						|
        por     xmm1, xmm11
 | 
						|
        pshufd  xmm0, xmm0, 0x93
 | 
						|
        pshufd  xmm3, xmm3, 0x4E
 | 
						|
        pshufd  xmm2, xmm2, 0x39
 | 
						|
        paddd   xmm0, xmm6
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        pshuflw xmm3, xmm3, 0xB1
 | 
						|
        pshufhw xmm3, xmm3, 0xB1
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 20
 | 
						|
        psrld   xmm11, 12
 | 
						|
        por     xmm1, xmm11
 | 
						|
        paddd   xmm0, xmm7
 | 
						|
        paddd   xmm0, xmm1
 | 
						|
        pxor    xmm3, xmm0
 | 
						|
        movdqa  xmm14, xmm3
 | 
						|
        psrld   xmm3, 8
 | 
						|
        pslld   xmm14, 24
 | 
						|
        pxor    xmm3, xmm14
 | 
						|
        paddd   xmm2, xmm3
 | 
						|
        pxor    xmm1, xmm2
 | 
						|
        movdqa  xmm11, xmm1
 | 
						|
        pslld   xmm1, 25
 | 
						|
        psrld   xmm11, 7
 | 
						|
        por     xmm1, xmm11
 | 
						|
        pshufd  xmm0, xmm0, 0x39
 | 
						|
        pshufd  xmm3, xmm3, 0x4E
 | 
						|
        pshufd  xmm2, xmm2, 0x93
 | 
						|
        dec     al
 | 
						|
        jz      9f
 | 
						|
        movdqa  xmm8, xmm4
 | 
						|
        shufps  xmm8, xmm5, 214
 | 
						|
        pshufd  xmm9, xmm4, 0x0F
 | 
						|
        pshufd  xmm4, xmm8, 0x39
 | 
						|
        movdqa  xmm8, xmm6
 | 
						|
        shufps  xmm8, xmm7, 250
 | 
						|
        pand    xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip]
 | 
						|
        pand    xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip]
 | 
						|
        por     xmm9, xmm8
 | 
						|
        movdqa  xmm8, xmm7
 | 
						|
        punpcklqdq xmm8, xmm5
 | 
						|
        movdqa  xmm10, xmm6
 | 
						|
        pand    xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
 | 
						|
        pand    xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
 | 
						|
        por     xmm8, xmm10
 | 
						|
        pshufd  xmm8, xmm8, 0x78
 | 
						|
        punpckhdq xmm5, xmm7
 | 
						|
        punpckldq xmm6, xmm5
 | 
						|
        pshufd  xmm7, xmm6, 0x1E
 | 
						|
        movdqa  xmm5, xmm9
 | 
						|
        movdqa  xmm6, xmm8
 | 
						|
        jmp     9b
 | 
						|
9:
 | 
						|
        movdqu  xmm4, xmmword ptr [rdi]
 | 
						|
        movdqu  xmm5, xmmword ptr [rdi+0x10]
 | 
						|
        pxor    xmm0, xmm2
 | 
						|
        pxor    xmm1, xmm3
 | 
						|
        pxor    xmm2, xmm4
 | 
						|
        pxor    xmm3, xmm5
 | 
						|
        movups  xmmword ptr [r9], xmm0
 | 
						|
        movups  xmmword ptr [r9+0x10], xmm1
 | 
						|
        movups  xmmword ptr [r9+0x20], xmm2
 | 
						|
        movups  xmmword ptr [r9+0x30], xmm3
 | 
						|
        RET
 | 
						|
SET_SIZE(zfs_blake3_compress_xof_sse2)
 | 
						|
 | 
						|
SECTION_STATIC
 | 
						|
.p2align  6
 | 
						|
BLAKE3_IV:
 | 
						|
        .long  0x6A09E667, 0xBB67AE85
 | 
						|
        .long  0x3C6EF372, 0xA54FF53A
 | 
						|
ADD0:
 | 
						|
        .long  0, 1, 2, 3
 | 
						|
ADD1:
 | 
						|
	.long  4, 4, 4, 4
 | 
						|
BLAKE3_IV_0:
 | 
						|
	.long  0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667
 | 
						|
BLAKE3_IV_1:
 | 
						|
	.long  0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85
 | 
						|
BLAKE3_IV_2:
 | 
						|
	.long  0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372
 | 
						|
BLAKE3_IV_3:
 | 
						|
	.long  0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A
 | 
						|
BLAKE3_BLOCK_LEN:
 | 
						|
	.long  64, 64, 64, 64
 | 
						|
CMP_MSB_MASK:
 | 
						|
	.long  0x80000000, 0x80000000, 0x80000000, 0x80000000
 | 
						|
PBLENDW_0x33_MASK:
 | 
						|
	.long  0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000
 | 
						|
PBLENDW_0xCC_MASK:
 | 
						|
	.long  0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF
 | 
						|
PBLENDW_0x3F_MASK:
 | 
						|
	.long  0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000
 | 
						|
PBLENDW_0xC0_MASK:
 | 
						|
	.long  0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF
 | 
						|
 | 
						|
#endif	/* HAVE_SSE2 */
 | 
						|
 | 
						|
#ifdef __ELF__
 | 
						|
.section .note.GNU-stack,"",%progbits
 | 
						|
#endif
 |