Linux premium71.web-hosting.com 4.18.0-553.44.1.lve.el8.x86_64 #1 SMP Thu Mar 13 14:29:12 UTC 2025 x86_64
LiteSpeed
Server IP : 198.187.29.8 & Your IP : 216.73.216.95
Domains :
Cant Read [ /etc/named.conf ]
User : cleahvkv
Terminal
Auto Root
Create File
Create Folder
Localroot Suggester
Backdoor Destroyer
Readme
/
lib64 /
llvm17 /
lib /
clang /
17 /
include /
Delete
Unzip
Name
Size
Permission
Date
Action
cuda_wrappers
[ DIR ]
drwxr-xr-x
2025-06-02 12:56
llvm_libc_wrappers
[ DIR ]
drwxr-xr-x
2025-06-02 12:56
openmp_wrappers
[ DIR ]
drwxr-xr-x
2025-06-02 12:56
ppc_wrappers
[ DIR ]
drwxr-xr-x
2025-06-02 12:56
__clang_cuda_builtin_vars.h
4.78
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_cmath.h
18.06
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_complex_builtins.h
9.36
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_device_functions.h
56.68
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_intrinsics.h
29.93
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_libdevice_declares.h
21.87
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_math.h
15.99
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_math_forward_declares.h
8.27
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_runtime_wrapper.h
17.61
KB
-rw-r--r--
2023-11-28 08:52
__clang_cuda_texture_intrinsics.h
31.86
KB
-rw-r--r--
2023-11-28 08:52
__clang_hip_cmath.h
26.34
KB
-rw-r--r--
2023-11-28 08:52
__clang_hip_libdevice_declares.h
19.87
KB
-rw-r--r--
2023-11-28 08:52
__clang_hip_math.h
31.96
KB
-rw-r--r--
2023-11-28 08:52
__clang_hip_runtime_wrapper.h
4.65
KB
-rw-r--r--
2023-11-28 08:52
__clang_hip_stdlib.h
1.19
KB
-rw-r--r--
2023-11-28 08:52
__stddef_max_align_t.h
857
B
-rw-r--r--
2023-11-28 08:52
__wmmintrin_aes.h
5.15
KB
-rw-r--r--
2023-11-28 08:52
__wmmintrin_pclmul.h
1.99
KB
-rw-r--r--
2023-11-28 08:52
adxintrin.h
7.37
KB
-rw-r--r--
2023-11-28 08:52
altivec.h
697.32
KB
-rw-r--r--
2023-11-28 08:52
ammintrin.h
7.54
KB
-rw-r--r--
2023-11-28 08:52
amxcomplexintrin.h
6.81
KB
-rw-r--r--
2023-11-28 08:52
amxfp16intrin.h
1.82
KB
-rw-r--r--
2023-11-28 08:52
amxintrin.h
21.12
KB
-rw-r--r--
2023-11-28 08:52
arm64intr.h
993
B
-rw-r--r--
2023-11-28 08:52
arm_acle.h
25.66
KB
-rw-r--r--
2023-11-28 08:52
arm_bf16.h
548
B
-rw-r--r--
2024-11-06 08:03
arm_cde.h
32.67
KB
-rw-r--r--
2024-11-06 08:03
arm_cmse.h
6.21
KB
-rw-r--r--
2023-11-28 08:52
arm_fp16.h
16.92
KB
-rw-r--r--
2024-11-06 08:03
arm_mve.h
1.48
MB
-rw-r--r--
2024-11-06 08:03
arm_neon.h
2.45
MB
-rw-r--r--
2024-11-06 08:03
arm_neon_sve_bridge.h
9.48
KB
-rw-r--r--
2023-11-28 08:52
arm_sme_draft_spec_subject_to_change.h
60.2
KB
-rw-r--r--
2024-11-06 08:03
arm_sve.h
1.51
MB
-rw-r--r--
2024-11-06 08:03
armintr.h
843
B
-rw-r--r--
2023-11-28 08:52
avx2intrin.h
186.96
KB
-rw-r--r--
2023-11-28 08:52
avx512bf16intrin.h
10.51
KB
-rw-r--r--
2023-11-28 08:52
avx512bitalgintrin.h
2.41
KB
-rw-r--r--
2023-11-28 08:52
avx512bwintrin.h
75.33
KB
-rw-r--r--
2023-11-28 08:52
avx512cdintrin.h
4.12
KB
-rw-r--r--
2023-11-28 08:52
avx512dqintrin.h
58.75
KB
-rw-r--r--
2023-11-28 08:52
avx512erintrin.h
11.83
KB
-rw-r--r--
2023-11-28 08:52
avx512fintrin.h
382.64
KB
-rw-r--r--
2023-11-28 08:52
avx512fp16intrin.h
156.63
KB
-rw-r--r--
2023-11-28 08:52
avx512ifmaintrin.h
2.49
KB
-rw-r--r--
2023-11-28 08:52
avx512ifmavlintrin.h
4.31
KB
-rw-r--r--
2023-11-28 08:52
avx512pfintrin.h
4.53
KB
-rw-r--r--
2023-11-28 08:52
avx512vbmi2intrin.h
13.17
KB
-rw-r--r--
2023-11-28 08:52
avx512vbmiintrin.h
3.72
KB
-rw-r--r--
2023-11-28 08:52
avx512vbmivlintrin.h
6.94
KB
-rw-r--r--
2023-11-28 08:52
avx512vlbf16intrin.h
19.21
KB
-rw-r--r--
2023-11-28 08:52
avx512vlbitalgintrin.h
4.23
KB
-rw-r--r--
2023-11-28 08:52
avx512vlbwintrin.h
121.26
KB
-rw-r--r--
2023-11-28 08:52
avx512vlcdintrin.h
7.66
KB
-rw-r--r--
2023-11-28 08:52
avx512vldqintrin.h
46.41
KB
-rw-r--r--
2023-11-28 08:52
avx512vlfp16intrin.h
85.51
KB
-rw-r--r--
2023-11-28 08:52
avx512vlintrin.h
322.29
KB
-rw-r--r--
2023-11-28 08:52
avx512vlvbmi2intrin.h
25.72
KB
-rw-r--r--
2023-11-28 08:52
avx512vlvnniintrin.h
13.13
KB
-rw-r--r--
2023-11-28 08:52
avx512vlvp2intersectintrin.h
4.44
KB
-rw-r--r--
2023-11-28 08:52
avx512vnniintrin.h
4.21
KB
-rw-r--r--
2023-11-28 08:52
avx512vp2intersectintrin.h
2.9
KB
-rw-r--r--
2023-11-28 08:52
avx512vpopcntdqintrin.h
2
KB
-rw-r--r--
2023-11-28 08:52
avx512vpopcntdqvlintrin.h
3.31
KB
-rw-r--r--
2023-11-28 08:52
avxifmaintrin.h
5.75
KB
-rw-r--r--
2023-11-28 08:52
avxintrin.h
195.41
KB
-rw-r--r--
2023-11-28 08:52
avxneconvertintrin.h
14.09
KB
-rw-r--r--
2023-11-28 08:52
avxvnniint16intrin.h
17.41
KB
-rw-r--r--
2023-11-28 08:52
avxvnniint8intrin.h
18.67
KB
-rw-r--r--
2023-11-28 08:52
avxvnniintrin.h
10.44
KB
-rw-r--r--
2023-11-28 08:52
bmi2intrin.h
7.09
KB
-rw-r--r--
2023-11-28 08:52
bmiintrin.h
14.12
KB
-rw-r--r--
2023-11-28 08:52
builtins.h
741
B
-rw-r--r--
2023-11-28 08:52
cet.h
1.49
KB
-rw-r--r--
2023-11-28 08:52
cetintrin.h
3.27
KB
-rw-r--r--
2023-11-28 08:52
cldemoteintrin.h
1.18
KB
-rw-r--r--
2023-11-28 08:52
clflushoptintrin.h
1.17
KB
-rw-r--r--
2023-11-28 08:52
clwbintrin.h
1.2
KB
-rw-r--r--
2023-11-28 08:52
clzerointrin.h
1.19
KB
-rw-r--r--
2023-11-28 08:52
cmpccxaddintrin.h
2.33
KB
-rw-r--r--
2023-11-28 08:52
cpuid.h
11.01
KB
-rw-r--r--
2023-11-28 08:52
crc32intrin.h
3.27
KB
-rw-r--r--
2023-11-28 08:52
emmintrin.h
192.64
KB
-rw-r--r--
2023-11-28 08:52
enqcmdintrin.h
2.12
KB
-rw-r--r--
2023-11-28 08:52
f16cintrin.h
5.39
KB
-rw-r--r--
2023-11-28 08:52
float.h
5.63
KB
-rw-r--r--
2023-11-28 08:52
fma4intrin.h
6.82
KB
-rw-r--r--
2023-11-28 08:52
fmaintrin.h
28.4
KB
-rw-r--r--
2023-11-28 08:52
fxsrintrin.h
2.82
KB
-rw-r--r--
2023-11-28 08:52
gfniintrin.h
7.57
KB
-rw-r--r--
2023-11-28 08:52
hexagon_circ_brev_intrinsics.h
15.59
KB
-rw-r--r--
2023-11-28 08:52
hexagon_protos.h
374.42
KB
-rw-r--r--
2023-11-28 08:52
hexagon_types.h
130.33
KB
-rw-r--r--
2023-11-28 08:52
hresetintrin.h
1.36
KB
-rw-r--r--
2023-11-28 08:52
htmintrin.h
6.14
KB
-rw-r--r--
2023-11-28 08:52
htmxlintrin.h
9.01
KB
-rw-r--r--
2023-11-28 08:52
hvx_hexagon_protos.h
254.26
KB
-rw-r--r--
2023-11-28 08:52
ia32intrin.h
12.72
KB
-rw-r--r--
2023-11-28 08:52
immintrin.h
23.57
KB
-rw-r--r--
2023-11-28 08:52
intrin.h
28.22
KB
-rw-r--r--
2023-11-28 08:52
inttypes.h
2.26
KB
-rw-r--r--
2023-11-28 08:52
invpcidintrin.h
764
B
-rw-r--r--
2023-11-28 08:52
iso646.h
656
B
-rw-r--r--
2023-11-28 08:52
keylockerintrin.h
17.98
KB
-rw-r--r--
2023-11-28 08:52
larchintrin.h
7.8
KB
-rw-r--r--
2023-11-28 08:52
limits.h
3.61
KB
-rw-r--r--
2023-11-28 08:52
lwpintrin.h
5
KB
-rw-r--r--
2023-11-28 08:52
lzcntintrin.h
3.18
KB
-rw-r--r--
2023-11-28 08:52
mm3dnow.h
4.5
KB
-rw-r--r--
2023-11-28 08:52
mm_malloc.h
1.88
KB
-rw-r--r--
2023-11-28 08:52
mmintrin.h
55.98
KB
-rw-r--r--
2023-11-28 08:52
module.modulemap
3.33
KB
-rw-r--r--
2023-11-28 08:52
movdirintrin.h
1.57
KB
-rw-r--r--
2023-11-28 08:52
msa.h
25.01
KB
-rw-r--r--
2023-11-28 08:52
mwaitxintrin.h
2.19
KB
-rw-r--r--
2023-11-28 08:52
nmmintrin.h
709
B
-rw-r--r--
2023-11-28 08:52
opencl-c-base.h
30.38
KB
-rw-r--r--
2023-11-28 08:52
opencl-c.h
874.39
KB
-rw-r--r--
2023-11-28 08:52
pconfigintrin.h
1.19
KB
-rw-r--r--
2023-11-28 08:52
pkuintrin.h
934
B
-rw-r--r--
2023-11-28 08:52
pmmintrin.h
10.5
KB
-rw-r--r--
2023-11-28 08:52
popcntintrin.h
1.82
KB
-rw-r--r--
2023-11-28 08:52
prfchiintrin.h
2.02
KB
-rw-r--r--
2023-11-28 08:52
prfchwintrin.h
2.06
KB
-rw-r--r--
2023-11-28 08:52
ptwriteintrin.h
1.05
KB
-rw-r--r--
2023-11-28 08:52
raointintrin.h
6.59
KB
-rw-r--r--
2023-11-28 08:52
rdpruintrin.h
1.59
KB
-rw-r--r--
2023-11-28 08:52
rdseedintrin.h
2.85
KB
-rw-r--r--
2023-11-28 08:52
riscv_ntlh.h
855
B
-rw-r--r--
2023-11-28 08:52
rtmintrin.h
1.25
KB
-rw-r--r--
2023-11-28 08:52
s390intrin.h
604
B
-rw-r--r--
2023-11-28 08:52
serializeintrin.h
881
B
-rw-r--r--
2023-11-28 08:52
sgxintrin.h
1.77
KB
-rw-r--r--
2023-11-28 08:52
sha512intrin.h
5.95
KB
-rw-r--r--
2023-11-28 08:52
shaintrin.h
7.37
KB
-rw-r--r--
2023-11-28 08:52
sifive_vector.h
522
B
-rw-r--r--
2023-11-28 08:52
sm3intrin.h
7.29
KB
-rw-r--r--
2023-11-28 08:52
sm4intrin.h
8.2
KB
-rw-r--r--
2023-11-28 08:52
smmintrin.h
99.32
KB
-rw-r--r--
2023-11-28 08:52
stdalign.h
911
B
-rw-r--r--
2023-11-28 08:52
stdarg.h
1.66
KB
-rw-r--r--
2023-11-28 08:52
stdatomic.h
8.3
KB
-rw-r--r--
2023-11-28 08:52
stdbool.h
1.04
KB
-rw-r--r--
2023-11-28 08:52
stddef.h
4.16
KB
-rw-r--r--
2023-11-28 08:52
stdint.h
32.49
KB
-rw-r--r--
2023-11-28 08:52
stdnoreturn.h
1.17
KB
-rw-r--r--
2023-11-28 08:52
tbmintrin.h
3.15
KB
-rw-r--r--
2023-11-28 08:52
tgmath.h
29.68
KB
-rw-r--r--
2023-11-28 08:52
tmmintrin.h
29.51
KB
-rw-r--r--
2023-11-28 08:52
tsxldtrkintrin.h
1.97
KB
-rw-r--r--
2023-11-28 08:52
uintrintrin.h
4.96
KB
-rw-r--r--
2023-11-28 08:52
unwind.h
11.21
KB
-rw-r--r--
2023-11-28 08:52
vadefs.h
1.39
KB
-rw-r--r--
2023-11-28 08:52
vaesintrin.h
2.46
KB
-rw-r--r--
2023-11-28 08:52
varargs.h
477
B
-rw-r--r--
2023-11-28 08:52
vecintrin.h
360.82
KB
-rw-r--r--
2023-11-28 08:52
velintrin.h
2.1
KB
-rw-r--r--
2023-11-28 08:52
velintrin_approx.h
3.54
KB
-rw-r--r--
2023-11-28 08:52
velintrin_gen.h
69.06
KB
-rw-r--r--
2023-11-28 08:52
vpclmulqdqintrin.h
1.06
KB
-rw-r--r--
2023-11-28 08:52
waitpkgintrin.h
1.33
KB
-rw-r--r--
2023-11-28 08:52
wasm_simd128.h
76.25
KB
-rw-r--r--
2023-11-28 08:52
wbnoinvdintrin.h
749
B
-rw-r--r--
2023-11-28 08:52
wmmintrin.h
659
B
-rw-r--r--
2023-11-28 08:52
x86gprintrin.h
2.32
KB
-rw-r--r--
2023-11-28 08:52
x86intrin.h
1.81
KB
-rw-r--r--
2023-11-28 08:52
xmmintrin.h
106.73
KB
-rw-r--r--
2023-11-28 08:52
xopintrin.h
19.96
KB
-rw-r--r--
2023-11-28 08:52
xsavecintrin.h
2.51
KB
-rw-r--r--
2023-11-28 08:52
xsaveintrin.h
1.64
KB
-rw-r--r--
2023-11-28 08:52
xsaveoptintrin.h
1
KB
-rw-r--r--
2023-11-28 08:52
xsavesintrin.h
1.24
KB
-rw-r--r--
2023-11-28 08:52
xtestintrin.h
873
B
-rw-r--r--
2023-11-28 08:52
Save
Rename
/*===---------- avx512vlfp16intrin.h - AVX512-FP16 intrinsics --------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error \ "Never use <avx512vlfp16intrin.h> directly; include <immintrin.h> instead." #endif #ifdef __SSE2__ #ifndef __AVX512VLFP16INTRIN_H #define __AVX512VLFP16INTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512fp16, avx512vl"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512fp16, avx512vl"), \ __min_vector_width__(128))) static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_cvtsh_h(__m128h __a) { return __a[0]; } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_cvtsh_h(__m256h __a) { return __a[0]; } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_set_sh(_Float16 __h) { return __extension__(__m128h){__h, 0, 0, 0, 0, 0, 0, 0}; } static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_ph(_Float16 __h) { return (__m128h)(__v8hf){__h, __h, __h, __h, __h, __h, __h, __h}; } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_ph(_Float16 __h) { return (__m256h)(__v16hf){__h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h, __h}; } static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8) { return (__m128h)(__v8hf){__h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1}; } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set1_pch(_Float16 _Complex h) { return (__m256h)_mm256_set1_ps(__builtin_bit_cast(float, h)); } static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_set1_pch(_Float16 _Complex h) { return (__m128h)_mm_set1_ps(__builtin_bit_cast(float, h)); } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4, _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8, _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12, _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16) { return (__m256h)(__v16hf){__h16, __h15, __h14, __h13, __h12, __h11, __h10, __h9, __h8, __h7, __h6, __h5, __h4, __h3, __h2, __h1}; } #define _mm_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8) \ _mm_set_ph((h8), (h7), (h6), (h5), (h4), (h3), (h2), (h1)) #define _mm256_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \ h14, h15, h16) \ _mm256_set_ph((h16), (h15), (h14), (h13), (h12), (h11), (h10), (h9), (h8), \ (h7), (h6), (h5), (h4), (h3), (h2), (h1)) static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_add_ph(__m256h __A, __m256h __B) { return (__m256h)((__v16hf)__A + (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_add_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_add_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_ph(__m128h __A, __m128h __B) { return (__m128h)((__v8hf)__A + (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_add_ph(__A, __B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_sub_ph(__m256h __A, __m256h __B) { return (__m256h)((__v16hf)__A - (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_sub_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_ph(__m128h __A, __m128h __B) { return (__m128h)((__v8hf)__A - (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_sub_ph(__A, __B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mul_ph(__m256h __A, __m256h __B) { return (__m256h)((__v16hf)__A * (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_mul_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_ph(__m128h __A, __m128h __B) { return (__m128h)((__v8hf)__A * (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_mul_ph(__A, __B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_div_ph(__m256h __A, __m256h __B) { return (__m256h)((__v16hf)__A / (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_div_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( __U, (__v16hf)_mm256_div_ph(__A, __B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_ph(__m128h __A, __m128h __B) { return (__m128h)((__v8hf)__A / (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128(__U, (__v8hf)_mm_div_ph(__A, __B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_min_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_min_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)__builtin_ia32_minph256((__v16hf)__A, (__v16hf)__B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_minph128((__v8hf)__A, (__v8hf)__B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_max_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_max_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)__builtin_ia32_maxph256((__v16hf)__A, (__v16hf)__B), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)__builtin_ia32_maxph128((__v8hf)__A, (__v8hf)__B), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_abs_ph(__m256h __A) { return (__m256h)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), (__m256i)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_abs_ph(__m128h __A) { return (__m128h)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_conj_pch(__m256h __A) { return (__m256h)_mm256_xor_ps((__m256)__A, _mm256_set1_ps(-0.0f)); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_conj_pch(__m256h __W, __mmask8 __U, __m256h __A) { return (__m256h)__builtin_ia32_selectps_256( (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_conj_pch(__mmask8 __U, __m256h __A) { return (__m256h)__builtin_ia32_selectps_256( (__mmask8)__U, (__v8sf)_mm256_conj_pch(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_conj_pch(__m128h __A) { return (__m128h)_mm_xor_ps((__m128)__A, _mm_set1_ps(-0.0f)); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_conj_pch(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_selectps_128( (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_conj_pch(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_selectps_128( (__mmask8)__U, (__v4sf)_mm_conj_pch(__A), (__v4sf)_mm_setzero_ps()); } #define _mm256_cmp_ph_mask(a, b, p) \ ((__mmask16)__builtin_ia32_cmpph256_mask( \ (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)-1)) #define _mm256_mask_cmp_ph_mask(m, a, b, p) \ ((__mmask16)__builtin_ia32_cmpph256_mask( \ (__v16hf)(__m256h)(a), (__v16hf)(__m256h)(b), (int)(p), (__mmask16)(m))) #define _mm_cmp_ph_mask(a, b, p) \ ((__mmask8)__builtin_ia32_cmpph128_mask( \ (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)-1)) #define _mm_mask_cmp_ph_mask(m, a, b, p) \ ((__mmask8)__builtin_ia32_cmpph128_mask( \ (__v8hf)(__m128h)(a), (__v8hf)(__m128h)(b), (int)(p), (__mmask8)(m))) static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rcp_ph(__m256h __A) { return (__m256h)__builtin_ia32_rcpph256_mask( (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_rcp_ph(__m256h __W, __mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_rcpph256_mask((__v16hf)__A, (__v16hf)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp_ph(__mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_rcpph256_mask( (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_ph(__m128h __A) { return (__m128h)__builtin_ia32_rcpph128_mask( (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_ph(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_rcpph128_mask((__v8hf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_ph(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_rcpph128_mask( (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_rsqrt_ph(__m256h __A) { return (__m256h)__builtin_ia32_rsqrtph256_mask( (__v16hf)__A, (__v16hf)_mm256_undefined_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_rsqrtph256_mask((__v16hf)__A, (__v16hf)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt_ph(__mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_rsqrtph256_mask( (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_ph(__m128h __A) { return (__m128h)__builtin_ia32_rsqrtph128_mask( (__v8hf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_ph(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_rsqrtph128_mask((__v8hf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt_ph(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_rsqrtph128_mask( (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_ph(__m128h __A) { return (__m128h)__builtin_ia32_getexpph128_mask( (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ph(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_getexpph128_mask((__v8hf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ph(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_getexpph128_mask( (__v8hf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_getexp_ph(__m256h __A) { return (__m256h)__builtin_ia32_getexpph256_mask( (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ph(__m256h __W, __mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_getexpph256_mask((__v16hf)__A, (__v16hf)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ph(__mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_getexpph256_mask( (__v16hf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } #define _mm_getmant_ph(A, B, C) \ ((__m128h)__builtin_ia32_getmantph128_mask( \ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1)) #define _mm_mask_getmant_ph(W, U, A, B, C) \ ((__m128h)__builtin_ia32_getmantph128_mask( \ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)(__m128h)(W), \ (__mmask8)(U))) #define _mm_maskz_getmant_ph(U, A, B, C) \ ((__m128h)__builtin_ia32_getmantph128_mask( \ (__v8hf)(__m128h)(A), (int)(((C) << 2) | (B)), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U))) #define _mm256_getmant_ph(A, B, C) \ ((__m256h)__builtin_ia32_getmantph256_mask( \ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ (__v16hf)_mm256_setzero_ph(), (__mmask16)-1)) #define _mm256_mask_getmant_ph(W, U, A, B, C) \ ((__m256h)__builtin_ia32_getmantph256_mask( \ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \ (__mmask16)(U))) #define _mm256_maskz_getmant_ph(U, A, B, C) \ ((__m256h)__builtin_ia32_getmantph256_mask( \ (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ (__v16hf)_mm256_setzero_ph(), (__mmask16)(U))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_ph(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefph128_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ph(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefph128_mask((__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ph(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_scalefph128_mask( (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_scalef_ph(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_scalefph256_mask( (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ph(__m256h __W, __mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_scalefph256_mask((__v16hf)__A, (__v16hf)__B, (__v16hf)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ph(__mmask16 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_scalefph256_mask( (__v16hf)__A, (__v16hf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U); } #define _mm_roundscale_ph(A, imm) \ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1)) #define _mm_mask_roundscale_ph(W, U, A, imm) \ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U))) #define _mm_maskz_roundscale_ph(U, A, imm) \ ((__m128h)__builtin_ia32_rndscaleph_128_mask( \ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U))) #define _mm256_roundscale_ph(A, imm) \ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ (__mmask16)-1)) #define _mm256_mask_roundscale_ph(W, U, A, imm) \ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \ (__mmask16)(U))) #define _mm256_maskz_roundscale_ph(U, A, imm) \ ((__m256h)__builtin_ia32_rndscaleph_256_mask( \ (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ (__mmask16)(U))) #define _mm_reduce_ph(A, imm) \ ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \ (__v8hf)_mm_setzero_ph(), \ (__mmask8)-1)) #define _mm_mask_reduce_ph(W, U, A, imm) \ ((__m128h)__builtin_ia32_reduceph128_mask( \ (__v8hf)(__m128h)(A), (int)(imm), (__v8hf)(__m128h)(W), (__mmask8)(U))) #define _mm_maskz_reduce_ph(U, A, imm) \ ((__m128h)__builtin_ia32_reduceph128_mask((__v8hf)(__m128h)(A), (int)(imm), \ (__v8hf)_mm_setzero_ph(), \ (__mmask8)(U))) #define _mm256_reduce_ph(A, imm) \ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ (__v16hf)_mm256_setzero_ph(), \ (__mmask16)-1)) #define _mm256_mask_reduce_ph(W, U, A, imm) \ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ (__v16hf)(__m256h)(W), \ (__mmask16)(U))) #define _mm256_maskz_reduce_ph(U, A, imm) \ ((__m256h)__builtin_ia32_reduceph256_mask((__v16hf)(__m256h)(A), (int)(imm), \ (__v16hf)_mm256_setzero_ph(), \ (__mmask16)(U))) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_ph(__m128h __a) { return __builtin_ia32_sqrtph((__v8hf)__a); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ph(__m128h __W, __mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ph(__mmask8 __U, __m128h __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_sqrt_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_sqrt_ph(__m256h __a) { return (__m256h)__builtin_ia32_sqrtph256((__v16hf)__a); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ph(__m256h __W, __mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ph(__mmask16 __U, __m256h __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_sqrt_ph(__A), (__v16hf)_mm256_setzero_ph()); } #define _mm_mask_fpclass_ph_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \ (int)(imm), (__mmask8)(U))) #define _mm_fpclass_ph_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclassph128_mask((__v8hf)(__m128h)(A), \ (int)(imm), (__mmask8)-1)) #define _mm256_mask_fpclass_ph_mask(U, A, imm) \ ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \ (int)(imm), (__mmask16)(U))) #define _mm256_fpclass_ph_mask(A, imm) \ ((__mmask16)__builtin_ia32_fpclassph256_mask((__v16hf)(__m256h)(A), \ (int)(imm), (__mmask16)-1)) static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpd_ph(__m128d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph128_mask( (__v2df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m128d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph128_mask((__v2df)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ph(__mmask8 __U, __m128d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph128_mask( (__v2df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtpd_ph(__m256d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph256_mask( (__v4df)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m256d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph256_mask((__v4df)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ph(__mmask8 __U, __m256d __A) { return (__m128h)__builtin_ia32_vcvtpd2ph256_mask( (__v4df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtph_pd(__m128h __A) { return (__m128d)__builtin_ia32_vcvtph2pd128_mask( (__v8hf)__A, (__v2df)_mm_undefined_pd(), (__mmask8)-1); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_pd(__m128d __W, __mmask8 __U, __m128h __A) { return (__m128d)__builtin_ia32_vcvtph2pd128_mask((__v8hf)__A, (__v2df)__W, (__mmask8)__U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { return (__m128d)__builtin_ia32_vcvtph2pd128_mask( (__v8hf)__A, (__v2df)_mm_setzero_pd(), (__mmask8)__U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtph_pd(__m128h __A) { return (__m256d)__builtin_ia32_vcvtph2pd256_mask( (__v8hf)__A, (__v4df)_mm256_undefined_pd(), (__mmask8)-1); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_pd(__m256d __W, __mmask8 __U, __m128h __A) { return (__m256d)__builtin_ia32_vcvtph2pd256_mask((__v8hf)__A, (__v4df)__W, (__mmask8)__U); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_pd(__mmask8 __U, __m128h __A) { return (__m256d)__builtin_ia32_vcvtph2pd256_mask( (__v8hf)__A, (__v4df)_mm256_setzero_pd(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi16(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2w128_mask( (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi16(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2w128_mask((__v8hf)__A, (__v8hi)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi16(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2w128_mask( (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi16(__m256h __A) { return (__m256i)__builtin_ia32_vcvtph2w256_mask( (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi16(__m256i __W, __mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvtph2w256_mask((__v16hf)__A, (__v16hi)__W, (__mmask16)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi16(__mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvtph2w256_mask( (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi16(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2w128_mask( (__v8hf)__A, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi16(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2w128_mask((__v8hf)__A, (__v8hi)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi16(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2w128_mask( (__v8hf)__A, (__v8hi)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi16(__m256h __A) { return (__m256i)__builtin_ia32_vcvttph2w256_mask( (__v16hf)__A, (__v16hi)_mm256_undefined_si256(), (__mmask16)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi16(__m256i __W, __mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvttph2w256_mask((__v16hf)__A, (__v16hi)__W, (__mmask16)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi16(__mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvttph2w256_mask( (__v16hf)__A, (__v16hi)_mm256_setzero_si256(), (__mmask16)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi16_ph(__m128i __A) { return (__m128h) __builtin_convertvector((__v8hi)__A, __v8hf); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepi16_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_ph(__m256i __A) { return (__m256h) __builtin_convertvector((__v16hi)__A, __v16hf); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_ph(__m256h __W, __mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_ph(__mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_cvtepi16_ph(__A), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu16(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uw128_mask( (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu16(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uw128_mask((__v8hf)__A, (__v8hu)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu16(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uw128_mask( (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu16(__m256h __A) { return (__m256i)__builtin_ia32_vcvtph2uw256_mask( (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu16(__m256i __W, __mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvtph2uw256_mask((__v16hf)__A, (__v16hu)__W, (__mmask16)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu16(__mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvtph2uw256_mask( (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu16(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uw128_mask( (__v8hf)__A, (__v8hu)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu16(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uw128_mask((__v8hf)__A, (__v8hu)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu16(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uw128_mask( (__v8hf)__A, (__v8hu)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu16(__m256h __A) { return (__m256i)__builtin_ia32_vcvttph2uw256_mask( (__v16hf)__A, (__v16hu)_mm256_undefined_si256(), (__mmask16)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu16(__m256i __W, __mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvttph2uw256_mask((__v16hf)__A, (__v16hu)__W, (__mmask16)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu16(__mmask16 __U, __m256h __A) { return (__m256i)__builtin_ia32_vcvttph2uw256_mask( (__v16hf)__A, (__v16hu)_mm256_setzero_si256(), (__mmask16)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu16_ph(__m128i __A) { return (__m128h) __builtin_convertvector((__v8hu)__A, __v8hf); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm_cvtepu16_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_ph(__m256i __A) { return (__m256h) __builtin_convertvector((__v16hu)__A, __v16hf); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_ph(__m256h __W, __mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)__W); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_ph(__mmask16 __U, __m256i __A) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)_mm256_cvtepu16_ph(__A), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi32(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2dq128_mask( (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi32(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2dq128_mask((__v8hf)__A, (__v4si)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2dq128_mask( (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi32(__m128h __A) { return (__m256i)__builtin_ia32_vcvtph2dq256_mask( (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi32(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2dq256_mask((__v8hf)__A, (__v8si)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi32(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2dq256_mask( (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu32(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2udq128_mask( (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu32(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2udq128_mask((__v8hf)__A, (__v4su)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2udq128_mask( (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu32(__m128h __A) { return (__m256i)__builtin_ia32_vcvtph2udq256_mask( (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu32(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2udq256_mask((__v8hf)__A, (__v8su)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu32(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2udq256_mask( (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi32_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvtdq2ph128_mask( (__v4si)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtdq2ph128_mask((__v4si)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtdq2ph128_mask( (__v4si)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_ph(__m256i __A) { return (__m128h) __builtin_convertvector((__v8si)__A, __v8hf); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepi32_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvtudq2ph128_mask( (__v4su)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtudq2ph128_mask((__v4su)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtudq2ph128_mask( (__v4su)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ph(__m256i __A) { return (__m128h) __builtin_convertvector((__v8su)__A, __v8hf); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)__W); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, (__v8hf)_mm256_cvtepu32_ph(__A), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi32(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2dq128_mask( (__v8hf)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi32(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2dq128_mask((__v8hf)__A, (__v4si)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2dq128_mask( (__v8hf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi32(__m128h __A) { return (__m256i)__builtin_ia32_vcvttph2dq256_mask( (__v8hf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi32(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2dq256_mask((__v8hf)__A, (__v8si)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi32(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2dq256_mask( (__v8hf)__A, (__v8si)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu32(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2udq128_mask( (__v8hf)__A, (__v4su)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu32(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2udq128_mask((__v8hf)__A, (__v4su)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2udq128_mask( (__v8hf)__A, (__v4su)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu32(__m128h __A) { return (__m256i)__builtin_ia32_vcvttph2udq256_mask( (__v8hf)__A, (__v8su)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu32(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2udq256_mask((__v8hf)__A, (__v8su)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu32(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2udq256_mask( (__v8hf)__A, (__v8su)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph128_mask( (__v2di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph128_mask((__v2di)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph128_mask( (__v2di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ph(__m256i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph256_mask( (__v4di)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph256_mask((__v4di)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_vcvtqq2ph256_mask( (__v4di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epi64(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2qq128_mask( (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epi64(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2qq128_mask((__v8hf)__A, (__v2di)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2qq128_mask( (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epi64(__m128h __A) { return (__m256i)__builtin_ia32_vcvtph2qq256_mask( (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epi64(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2qq256_mask((__v8hf)__A, (__v4di)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2qq256_mask( (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ph(__m128i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask( (__v2du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask((__v2du)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ph(__mmask8 __U, __m128i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph128_mask( (__v2du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ph(__m256i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask( (__v4du)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask((__v4du)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ph(__mmask8 __U, __m256i __A) { return (__m128h)__builtin_ia32_vcvtuqq2ph256_mask( (__v4du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_epu64(__m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uqq128_mask( (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_epu64(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uqq128_mask((__v8hf)__A, (__v2du)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvtph2uqq128_mask( (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtph_epu64(__m128h __A) { return (__m256i)__builtin_ia32_vcvtph2uqq256_mask( (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_epu64(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2uqq256_mask((__v8hf)__A, (__v4du)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvtph2uqq256_mask( (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epi64(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2qq128_mask( (__v8hf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epi64(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2qq128_mask((__v8hf)__A, (__v2di)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2qq128_mask( (__v8hf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epi64(__m128h __A) { return (__m256i)__builtin_ia32_vcvttph2qq256_mask( (__v8hf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epi64(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2qq256_mask((__v8hf)__A, (__v4di)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2qq256_mask( (__v8hf)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttph_epu64(__m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uqq128_mask( (__v8hf)__A, (__v2du)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttph_epu64(__m128i __W, __mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uqq128_mask((__v8hf)__A, (__v2du)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { return (__m128i)__builtin_ia32_vcvttph2uqq128_mask( (__v8hf)__A, (__v2du)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttph_epu64(__m128h __A) { return (__m256i)__builtin_ia32_vcvttph2uqq256_mask( (__v8hf)__A, (__v4du)_mm256_undefined_si256(), (__mmask8)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttph_epu64(__m256i __W, __mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2uqq256_mask((__v8hf)__A, (__v4du)__W, (__mmask8)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) { return (__m256i)__builtin_ia32_vcvttph2uqq256_mask( (__v8hf)__A, (__v4du)_mm256_setzero_si256(), (__mmask8)__U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtxph_ps(__m128h __A) { return (__m128)__builtin_ia32_vcvtph2psx128_mask( (__v8hf)__A, (__v4sf)_mm_undefined_ps(), (__mmask8)-1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtxph_ps(__m128 __W, __mmask8 __U, __m128h __A) { return (__m128)__builtin_ia32_vcvtph2psx128_mask((__v8hf)__A, (__v4sf)__W, (__mmask8)__U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) { return (__m128)__builtin_ia32_vcvtph2psx128_mask( (__v8hf)__A, (__v4sf)_mm_setzero_ps(), (__mmask8)__U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtxph_ps(__m128h __A) { return (__m256)__builtin_ia32_vcvtph2psx256_mask( (__v8hf)__A, (__v8sf)_mm256_undefined_ps(), (__mmask8)-1); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtxph_ps(__m256 __W, __mmask8 __U, __m128h __A) { return (__m256)__builtin_ia32_vcvtph2psx256_mask((__v8hf)__A, (__v8sf)__W, (__mmask8)__U); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtxph_ps(__mmask8 __U, __m128h __A) { return (__m256)__builtin_ia32_vcvtph2psx256_mask( (__v8hf)__A, (__v8sf)_mm256_setzero_ps(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtxps_ph(__m128 __A) { return (__m128h)__builtin_ia32_vcvtps2phx128_mask( (__v4sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m128 __A) { return (__m128h)__builtin_ia32_vcvtps2phx128_mask((__v4sf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtxps_ph(__mmask8 __U, __m128 __A) { return (__m128h)__builtin_ia32_vcvtps2phx128_mask( (__v4sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_cvtxps_ph(__m256 __A) { return (__m128h)__builtin_ia32_vcvtps2phx256_mask( (__v8sf)__A, (__v8hf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_mask_cvtxps_ph(__m128h __W, __mmask8 __U, __m256 __A) { return (__m128h)__builtin_ia32_vcvtps2phx256_mask((__v8sf)__A, (__v8hf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtxps_ph(__mmask8 __U, __m256 __A) { return (__m128h)__builtin_ia32_vcvtps2phx256_mask( (__v8sf)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, _mm_fmsub_ph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph(-(__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsub_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256(-(__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, (__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ph(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)_mm_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, (__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ph(__mmask16 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)_mm256_setzero_ph()); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddsubph((__v8hf)__A, (__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddsubph256((__v16hf)__A, (__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C), (__v8hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, (__v16hf)__C), (__v16hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ph(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ph(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_selectph_128( (__mmask8)__U, __builtin_ia32_vfmaddph((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C), (__v8hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ph(__m256h __A, __mmask16 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ph(__m256h __A, __m256h __B, __m256h __C, __mmask16 __U) { return (__m256h)__builtin_ia32_selectph_256( (__mmask16)__U, __builtin_ia32_vfmaddph256((__v16hf)__A, -(__v16hf)__B, -(__v16hf)__C), (__v16hf)__C); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_pch(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcph128_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmul_pch(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfcmulcph128_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS128 _mm256_fcmul_pch(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfcmulcph256_mask( (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fcmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfcmulcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__W, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fcmul_pch(__mmask8 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfcmulcph256_mask( (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fcmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectps_128( __U, __builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)(__m128h)__B, (__v4sf)__C, (__mmask8)__U), (__v4sf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fcmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_vfcmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fcmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfcmaddcph128_maskz( (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fcmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectps_256( __U, __builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U), (__v8sf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fcmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) { return (__m256h)__builtin_ia32_vfcmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fcmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfcmaddcph256_maskz( (__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_pch(__m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcph128_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_pch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__W, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmul_pch(__mmask8 __U, __m128h __A, __m128h __B) { return (__m128h)__builtin_ia32_vfmulcph128_mask( (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmul_pch(__m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfmulcph256_mask( (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_undefined_ph(), (__mmask8)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmul_pch(__m256h __W, __mmask8 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfmulcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__W, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmul_pch(__mmask8 __U, __m256h __A, __m256h __B) { return (__m256h)__builtin_ia32_vfmulcph256_mask( (__v8sf)__A, (__v8sf)__B, (__v8sf)_mm256_setzero_ph(), (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_pch(__m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)-1); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_selectps_128( __U, __builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U), (__v4sf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) { return (__m128h)__builtin_ia32_vfmaddcph128_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) { return (__m128h)__builtin_ia32_vfmaddcph128_maskz((__v4sf)__A, (__v4sf)__B, (__v4sf)__C, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_fmadd_pch(__m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pch(__m256h __A, __mmask8 __U, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_selectps_256( __U, __builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U), (__v8sf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pch(__m256h __A, __m256h __B, __m256h __C, __mmask8 __U) { return (__m256h)__builtin_ia32_vfmaddcph256_mask((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pch(__mmask8 __U, __m256h __A, __m256h __B, __m256h __C) { return (__m256h)__builtin_ia32_vfmaddcph256_maskz((__v8sf)__A, (__v8sf)__B, (__v8sf)__C, (__mmask8)__U); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_blend_ph(__mmask8 __U, __m128h __A, __m128h __W) { return (__m128h)__builtin_ia32_selectph_128((__mmask8)__U, (__v8hf)__W, (__v8hf)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ph(__mmask16 __U, __m256h __A, __m256h __W) { return (__m256h)__builtin_ia32_selectph_256((__mmask16)__U, (__v16hf)__W, (__v16hf)__A); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_permutex2var_ph(__m128h __A, __m128i __I, __m128h __B) { return (__m128h)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, (__v8hi)__B); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ph(__m256h __A, __m256i __I, __m256h __B) { return (__m256h)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, (__v16hi)__B); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_permutexvar_ph(__m128i __A, __m128h __B) { return (__m128h)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_permutexvar_ph(__m256i __A, __m256h __B) { return (__m256h)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_add_ph(__m256h __W) { return __builtin_ia32_reduce_fadd_ph256(-0.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_mul_ph(__m256h __W) { return __builtin_ia32_reduce_fmul_ph256(1.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_max_ph(__m256h __V) { return __builtin_ia32_reduce_fmax_ph256(__V); } static __inline__ _Float16 __DEFAULT_FN_ATTRS256 _mm256_reduce_min_ph(__m256h __V) { return __builtin_ia32_reduce_fmin_ph256(__V); } static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_add_ph(__m128h __W) { return __builtin_ia32_reduce_fadd_ph128(-0.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_mul_ph(__m128h __W) { return __builtin_ia32_reduce_fmul_ph128(1.0f16, __W); } static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_max_ph(__m128h __V) { return __builtin_ia32_reduce_fmax_ph128(__V); } static __inline__ _Float16 __DEFAULT_FN_ATTRS128 _mm_reduce_min_ph(__m128h __V) { return __builtin_ia32_reduce_fmin_ph128(__V); } // intrinsics below are alias for f*mul_*ch #define _mm_mul_pch(A, B) _mm_fmul_pch(A, B) #define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch(W, U, A, B) #define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch(U, A, B) #define _mm256_mul_pch(A, B) _mm256_fmul_pch(A, B) #define _mm256_mask_mul_pch(W, U, A, B) _mm256_mask_fmul_pch(W, U, A, B) #define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch(U, A, B) #define _mm_cmul_pch(A, B) _mm_fcmul_pch(A, B) #define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch(W, U, A, B) #define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch(U, A, B) #define _mm256_cmul_pch(A, B) _mm256_fcmul_pch(A, B) #define _mm256_mask_cmul_pch(W, U, A, B) _mm256_mask_fcmul_pch(W, U, A, B) #define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch(U, A, B) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif #endif