2025-01-04 03:04:27 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								// SPDX-License-Identifier: CDDL-1.0
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								/*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * CDDL HEADER START
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * The contents of this file are subject to the terms of the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * Common Development and Distribution License (the "License").
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * You may not use this file except in compliance with the License.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
							 | 
						
					
						
							
								
									
										
										
										
											2022-07-12 00:16:13 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								 * or https://opensource.org/licenses/CDDL-1.0.
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * See the License for the specific language governing permissions
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * and limitations under the License.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * When distributing Covered Code, include this CDDL HEADER in each
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * If applicable, add the following below this CDDL HEADER, with the
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * fields enclosed by brackets "[]" replaced with your own identifying
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * information: Portions Copyright [yyyy] [name of copyright owner]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 *
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * CDDL HEADER END
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								/*
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 */
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#if defined(__x86_64) && defined(HAVE_AVX512F)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#include <sys/byteorder.h>
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-07 21:28:50 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <sys/frame.h>
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#include <sys/spa_checksum.h>
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-22 03:56:46 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <sys/string.h>
							 | 
						
					
						
							
								
									
										
										
										
											2019-09-05 19:34:54 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#include <sys/simd.h>
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#include <zfs_fletcher.h>
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-07-26 06:09:50 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#ifdef __linux__
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#define	__asm __asm__ __volatile__
							 | 
						
					
						
							
								
									
										
										
										
											2020-07-26 06:09:50 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#endif
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								static void
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								fletcher_4_avx512f_init(fletcher_4_ctx_t *ctx)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-25 16:26:54 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									memset(ctx->avx512, 0, 4 * sizeof (zfs_fletcher_avx512_t));
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static void
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								fletcher_4_avx512f_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									static const uint64_t
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									CcA[] = {   0,   0,   1,   3,   6,  10,  15,  21 },
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									CcB[] = {  28,  36,  44,  52,  60,  68,  76,  84 },
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									DcA[] = {   0,   0,   0,   1,   4,  10,  20,  35 },
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									DcB[] = {  56,  84, 120, 164, 216, 276, 344, 420 },
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									DcC[] = { 448, 512, 576, 640, 704, 768, 832, 896 };
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									uint64_t A, B, C, D;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									uint64_t i;
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									A = ctx->avx512[0].v[0];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									B = 8 * ctx->avx512[1].v[0];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									C = 64 * ctx->avx512[2].v[0] - CcB[0] * ctx->avx512[1].v[0];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									D = 512 * ctx->avx512[3].v[0] - DcC[0] * ctx->avx512[2].v[0] +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									    DcB[0] * ctx->avx512[1].v[0];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									for (i = 1; i < 8; i++) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										A += ctx->avx512[0].v[i];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										B += 8 * ctx->avx512[1].v[i] - i * ctx->avx512[0].v[i];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										C += 64 * ctx->avx512[2].v[i] - CcB[i] * ctx->avx512[1].v[i] +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										    CcA[i] * ctx->avx512[0].v[i];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										D += 512 * ctx->avx512[3].v[i] - DcC[i] * ctx->avx512[2].v[i] +
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										    DcB[i] * ctx->avx512[1].v[i] - DcA[i] * ctx->avx512[0].v[i];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									ZIO_SET_CHECKSUM(zcp, A, B, C, D);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#define	FLETCHER_4_AVX512_RESTORE_CTX(ctx)				\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{									\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %0, %%zmm0" :: "m" ((ctx)->avx512[0]));	\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %0, %%zmm1" :: "m" ((ctx)->avx512[1]));	\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %0, %%zmm2" :: "m" ((ctx)->avx512[2]));	\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %0, %%zmm3" :: "m" ((ctx)->avx512[3]));	\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#define	FLETCHER_4_AVX512_SAVE_CTX(ctx)					\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{									\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %%zmm0, %0" : "=m" ((ctx)->avx512[0]));	\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %%zmm1, %0" : "=m" ((ctx)->avx512[1]));	\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %%zmm2, %0" : "=m" ((ctx)->avx512[2]));	\
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %%zmm3, %0" : "=m" ((ctx)->avx512[3]));	\
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static void
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								fletcher_4_avx512f_native(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									const uint32_t *ip = buf;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									FLETCHER_4_AVX512_RESTORE_CTX(ctx);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-05 22:00:34 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									do {
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpmovzxdq %0, %%zmm4"::"m" (*ip));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm4, %zmm0, %zmm0");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm0, %zmm1, %zmm1");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm1, %zmm2, %zmm2");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm2, %zmm3, %zmm3");
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-05 22:00:34 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									} while ((ip += 8) < ipend);
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									FLETCHER_4_AVX512_SAVE_CTX(ctx);
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-07 21:28:50 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_native);
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static void
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								fletcher_4_avx512f_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    uint64_t size)
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									static const uint64_t byteswap_mask = 0xFFULL;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									const uint32_t *ip = buf;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									FLETCHER_4_AVX512_RESTORE_CTX(ctx);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vpbroadcastq %0, %%zmm8" :: "r" (byteswap_mask));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vpsllq $8, %zmm8, %zmm9");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vpsllq $16, %zmm8, %zmm10");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vpsllq $24, %zmm8, %zmm11");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-05 22:00:34 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									do {
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpmovzxdq %0, %%zmm5"::"m" (*ip));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpsrlq $24, %zmm5, %zmm6");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpandd %zmm8, %zmm6, %zmm6");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpsrlq $8, %zmm5, %zmm7");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpandd %zmm9, %zmm7, %zmm7");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpord %zmm6, %zmm7, %zmm4");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpsllq $8, %zmm5, %zmm6");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpandd %zmm10, %zmm6, %zmm6");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpord %zmm6, %zmm4, %zmm4");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpsllq $24, %zmm5, %zmm5");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpandd %zmm11, %zmm5, %zmm5");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpord %zmm5, %zmm4, %zmm4");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm4, %zmm0, %zmm0");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm0, %zmm1, %zmm1");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm1, %zmm2, %zmm2");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm2, %zmm3, %zmm3");
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-05 22:00:34 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									} while ((ip += 8) < ipend);
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-09-25 01:56:22 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									FLETCHER_4_AVX512_SAVE_CTX(ctx)
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							
								
									
										
										
										
											2017-12-07 21:28:50 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								STACK_FRAME_NON_STANDARD(fletcher_4_avx512f_byteswap);
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static boolean_t
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								fletcher_4_avx512f_valid(void)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							
								
									
										
										
										
											2019-07-12 19:31:20 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									return (kfpu_allowed() && zfs_avx512f_available());
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								const fletcher_4_ops_t fletcher_4_avx512f_ops = {
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-12 18:50:54 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									.init_native = fletcher_4_avx512f_init,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.fini_native = fletcher_4_avx512f_fini,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.compute_native = fletcher_4_avx512f_native,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.init_byteswap = fletcher_4_avx512f_init,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.fini_byteswap = fletcher_4_avx512f_fini,
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.compute_byteswap = fletcher_4_avx512f_byteswap,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.valid = fletcher_4_avx512f_valid,
							 | 
						
					
						
							
								
									
										
										
										
											2023-03-14 19:45:28 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									.uses_fpu = B_TRUE,
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.name = "avx512f"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								};
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2019-10-30 22:26:14 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								#if defined(HAVE_AVX512BW)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								static void
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								fletcher_4_avx512bw_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    uint64_t size)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									static const zfs_fletcher_avx512_t mask = {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										.v = { 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									};
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									const uint32_t *ip = buf;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									FLETCHER_4_AVX512_RESTORE_CTX(ctx);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									__asm("vmovdqu64 %0, %%zmm5" :: "m" (mask));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-05 22:00:34 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									do {
							 | 
						
					
						
							
								
									
										
										
										
											2019-10-30 22:26:14 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										__asm("vpmovzxdq %0, %%zmm4"::"m" (*ip));
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpshufb %zmm5, %zmm4, %zmm4");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm4, %zmm0, %zmm0");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm0, %zmm1, %zmm1");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm1, %zmm2, %zmm2");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										__asm("vpaddq %zmm2, %zmm3, %zmm3");
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-05 22:00:34 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									} while ((ip += 8) < ipend);
							 | 
						
					
						
							
								
									
										
										
										
											2019-10-30 22:26:14 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									FLETCHER_4_AVX512_SAVE_CTX(ctx)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								STACK_FRAME_NON_STANDARD(fletcher_4_avx512bw_byteswap);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-26 22:42:42 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								static boolean_t
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								fletcher_4_avx512bw_valid(void)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								{
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									return (fletcher_4_avx512f_valid() && zfs_avx512bw_available());
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2019-10-30 22:26:14 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								const fletcher_4_ops_t fletcher_4_avx512bw_ops = {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.init_native = fletcher_4_avx512f_init,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.fini_native = fletcher_4_avx512f_fini,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.compute_native = fletcher_4_avx512f_native,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.init_byteswap = fletcher_4_avx512f_init,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.fini_byteswap = fletcher_4_avx512f_fini,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									.compute_byteswap = fletcher_4_avx512bw_byteswap,
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-26 22:42:42 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									.valid = fletcher_4_avx512bw_valid,
							 | 
						
					
						
							
								
									
										
										
										
											2023-03-14 19:45:28 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									.uses_fpu = B_TRUE,
							 | 
						
					
						
							
								
									
										
										
										
											2019-10-30 22:26:14 +03:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									.name = "avx512bw"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								};
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#endif
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2016-07-06 14:42:04 +03:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								#endif /* defined(__x86_64) && defined(HAVE_AVX512F) */
							 |