core/stdarch/crates/core_arch/src/x86/
avx512dq.rs

1use crate::{
2    core_arch::{simd::*, x86::*},
3    intrinsics::simd::*,
4    mem::transmute,
5};
6
7// And //
8
9/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
10/// and store the results in dst using writemask k (elements are copied from src if the corresponding
11/// bit is not set).
12///
13/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288)
14#[inline]
15#[target_feature(enable = "avx512dq,avx512vl")]
16#[cfg_attr(test, assert_instr(vandpd))]
17#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18pub unsafe fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19    let and = _mm_and_pd(a, b).as_f64x2();
20    transmute(simd_select_bitmask(k, and, src.as_f64x2()))
21}
22
23/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
24/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
25///
26/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289)
27#[inline]
28#[target_feature(enable = "avx512dq,avx512vl")]
29#[cfg_attr(test, assert_instr(vandpd))]
30#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31pub unsafe fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
32    let and = _mm_and_pd(a, b).as_f64x2();
33    transmute(simd_select_bitmask(k, and, f64x2::ZERO))
34}
35
36/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
37/// and store the results in dst using writemask k (elements are copied from src if the corresponding
38/// bit is not set).
39///
40/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291)
41#[inline]
42#[target_feature(enable = "avx512dq,avx512vl")]
43#[cfg_attr(test, assert_instr(vandpd))]
44#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
45pub unsafe fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
46    let and = _mm256_and_pd(a, b).as_f64x4();
47    transmute(simd_select_bitmask(k, and, src.as_f64x4()))
48}
49
50/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
51/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
52///
53/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292)
54#[inline]
55#[target_feature(enable = "avx512dq,avx512vl")]
56#[cfg_attr(test, assert_instr(vandpd))]
57#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
58pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
59    let and = _mm256_and_pd(a, b).as_f64x4();
60    transmute(simd_select_bitmask(k, and, f64x4::ZERO))
61}
62
63/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
64/// and store the results in dst.
65///
66/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293)
67#[inline]
68#[target_feature(enable = "avx512dq")]
69#[cfg_attr(test, assert_instr(vandp))]
70#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
71pub unsafe fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
72    transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
73}
74
75/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
76/// and store the results in dst using writemask k (elements are copied from src if the corresponding
77/// bit is not set).
78///
79/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294)
80#[inline]
81#[target_feature(enable = "avx512dq")]
82#[cfg_attr(test, assert_instr(vandpd))]
83#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
84pub unsafe fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
85    let and = _mm512_and_pd(a, b).as_f64x8();
86    transmute(simd_select_bitmask(k, and, src.as_f64x8()))
87}
88
89/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
90/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
91///
92/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295)
93#[inline]
94#[target_feature(enable = "avx512dq")]
95#[cfg_attr(test, assert_instr(vandpd))]
96#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
97pub unsafe fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
98    let and = _mm512_and_pd(a, b).as_f64x8();
99    transmute(simd_select_bitmask(k, and, f64x8::ZERO))
100}
101
102/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
103/// and store the results in dst using writemask k (elements are copied from src if the corresponding
104/// bit is not set).
105///
106/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297)
107#[inline]
108#[target_feature(enable = "avx512dq,avx512vl")]
109#[cfg_attr(test, assert_instr(vandps))]
110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
111pub unsafe fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
112    let and = _mm_and_ps(a, b).as_f32x4();
113    transmute(simd_select_bitmask(k, and, src.as_f32x4()))
114}
115
116/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
117/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
118///
119/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298)
120#[inline]
121#[target_feature(enable = "avx512dq,avx512vl")]
122#[cfg_attr(test, assert_instr(vandps))]
123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
124pub unsafe fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
125    let and = _mm_and_ps(a, b).as_f32x4();
126    transmute(simd_select_bitmask(k, and, f32x4::ZERO))
127}
128
129/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
130/// and store the results in dst using writemask k (elements are copied from src if the corresponding
131/// bit is not set).
132///
133/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300)
134#[inline]
135#[target_feature(enable = "avx512dq,avx512vl")]
136#[cfg_attr(test, assert_instr(vandps))]
137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
138pub unsafe fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
139    let and = _mm256_and_ps(a, b).as_f32x8();
140    transmute(simd_select_bitmask(k, and, src.as_f32x8()))
141}
142
143/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
144/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
145///
146/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301)
147#[inline]
148#[target_feature(enable = "avx512dq,avx512vl")]
149#[cfg_attr(test, assert_instr(vandps))]
150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
151pub unsafe fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
152    let and = _mm256_and_ps(a, b).as_f32x8();
153    transmute(simd_select_bitmask(k, and, f32x8::ZERO))
154}
155
156/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
157/// and store the results in dst.
158///
159/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303)
160#[inline]
161#[target_feature(enable = "avx512dq")]
162#[cfg_attr(test, assert_instr(vandps))]
163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
164pub unsafe fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
165    transmute(simd_and(
166        transmute::<_, u32x16>(a),
167        transmute::<_, u32x16>(b),
168    ))
169}
170
171/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
172/// and store the results in dst using writemask k (elements are copied from src if the corresponding
173/// bit is not set).
174///
175/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304)
176#[inline]
177#[target_feature(enable = "avx512dq")]
178#[cfg_attr(test, assert_instr(vandps))]
179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
180pub unsafe fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
181    let and = _mm512_and_ps(a, b).as_f32x16();
182    transmute(simd_select_bitmask(k, and, src.as_f32x16()))
183}
184
185/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
186/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
187///
188/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305)
189#[inline]
190#[target_feature(enable = "avx512dq")]
191#[cfg_attr(test, assert_instr(vandps))]
192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
193pub unsafe fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
194    let and = _mm512_and_ps(a, b).as_f32x16();
195    transmute(simd_select_bitmask(k, and, f32x16::ZERO))
196}
197
198// Andnot
199
200/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
201/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
202/// corresponding bit is not set).
203///
204/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326)
205#[inline]
206#[target_feature(enable = "avx512dq,avx512vl")]
207#[cfg_attr(test, assert_instr(vandnpd))]
208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
209pub unsafe fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
210    let andnot = _mm_andnot_pd(a, b).as_f64x2();
211    transmute(simd_select_bitmask(k, andnot, src.as_f64x2()))
212}
213
214/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
215/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
216/// corresponding bit is not set).
217///
218/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327)
219#[inline]
220#[target_feature(enable = "avx512dq,avx512vl")]
221#[cfg_attr(test, assert_instr(vandnpd))]
222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223pub unsafe fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
224    let andnot = _mm_andnot_pd(a, b).as_f64x2();
225    transmute(simd_select_bitmask(k, andnot, f64x2::ZERO))
226}
227
228/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
229/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
230/// corresponding bit is not set).
231///
232/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329)
233#[inline]
234#[target_feature(enable = "avx512dq,avx512vl")]
235#[cfg_attr(test, assert_instr(vandnpd))]
236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237pub unsafe fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
238    let andnot = _mm256_andnot_pd(a, b).as_f64x4();
239    transmute(simd_select_bitmask(k, andnot, src.as_f64x4()))
240}
241
242/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
243/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
244/// corresponding bit is not set).
245///
246/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330)
247#[inline]
248#[target_feature(enable = "avx512dq,avx512vl")]
249#[cfg_attr(test, assert_instr(vandnpd))]
250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
252    let andnot = _mm256_andnot_pd(a, b).as_f64x4();
253    transmute(simd_select_bitmask(k, andnot, f64x4::ZERO))
254}
255
256/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
257/// bitwise AND with b and store the results in dst.
258///
259/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331)
260#[inline]
261#[target_feature(enable = "avx512dq")]
262#[cfg_attr(test, assert_instr(vandnp))]
263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
264pub unsafe fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
265    _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b)
266}
267
268/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
269/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
270/// corresponding bit is not set).
271///
272/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332)
273#[inline]
274#[target_feature(enable = "avx512dq")]
275#[cfg_attr(test, assert_instr(vandnpd))]
276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
277pub unsafe fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
278    let andnot = _mm512_andnot_pd(a, b).as_f64x8();
279    transmute(simd_select_bitmask(k, andnot, src.as_f64x8()))
280}
281
282/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
283/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
284/// corresponding bit is not set).
285///
286/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333)
287#[inline]
288#[target_feature(enable = "avx512dq")]
289#[cfg_attr(test, assert_instr(vandnpd))]
290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
291pub unsafe fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
292    let andnot = _mm512_andnot_pd(a, b).as_f64x8();
293    transmute(simd_select_bitmask(k, andnot, f64x8::ZERO))
294}
295
296/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
297/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
298/// corresponding bit is not set).
299///
300/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335)
301#[inline]
302#[target_feature(enable = "avx512dq,avx512vl")]
303#[cfg_attr(test, assert_instr(vandnps))]
304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
305pub unsafe fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
306    let andnot = _mm_andnot_ps(a, b).as_f32x4();
307    transmute(simd_select_bitmask(k, andnot, src.as_f32x4()))
308}
309
310/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
311/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
312/// corresponding bit is not set).
313///
314/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336)
315#[inline]
316#[target_feature(enable = "avx512dq,avx512vl")]
317#[cfg_attr(test, assert_instr(vandnps))]
318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
319pub unsafe fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
320    let andnot = _mm_andnot_ps(a, b).as_f32x4();
321    transmute(simd_select_bitmask(k, andnot, f32x4::ZERO))
322}
323
324/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
325/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
326/// corresponding bit is not set).
327///
328/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338)
329#[inline]
330#[target_feature(enable = "avx512dq,avx512vl")]
331#[cfg_attr(test, assert_instr(vandnps))]
332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
333pub unsafe fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
334    let andnot = _mm256_andnot_ps(a, b).as_f32x8();
335    transmute(simd_select_bitmask(k, andnot, src.as_f32x8()))
336}
337
338/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
339/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
340/// corresponding bit is not set).
341///
342/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339)
343#[inline]
344#[target_feature(enable = "avx512dq,avx512vl")]
345#[cfg_attr(test, assert_instr(vandnps))]
346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
347pub unsafe fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
348    let andnot = _mm256_andnot_ps(a, b).as_f32x8();
349    transmute(simd_select_bitmask(k, andnot, f32x8::ZERO))
350}
351
352/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
353/// bitwise AND with b and store the results in dst.
354///
355/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340)
356#[inline]
357#[target_feature(enable = "avx512dq")]
358#[cfg_attr(test, assert_instr(vandnps))]
359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
360pub unsafe fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
361    _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b)
362}
363
364/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
365/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
366/// corresponding bit is not set).
367///
368/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341)
369#[inline]
370#[target_feature(enable = "avx512dq")]
371#[cfg_attr(test, assert_instr(vandnps))]
372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
373pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
374    let andnot = _mm512_andnot_ps(a, b).as_f32x16();
375    transmute(simd_select_bitmask(k, andnot, src.as_f32x16()))
376}
377
378/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
379/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
380/// corresponding bit is not set).
381///
382/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342)
383#[inline]
384#[target_feature(enable = "avx512dq")]
385#[cfg_attr(test, assert_instr(vandnps))]
386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
387pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
388    let andnot = _mm512_andnot_ps(a, b).as_f32x16();
389    transmute(simd_select_bitmask(k, andnot, f32x16::ZERO))
390}
391
392// Or
393
394/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
395/// and store the results in dst using writemask k (elements are copied from src if the corresponding
396/// bit is not set).
397///
398/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824)
399#[inline]
400#[target_feature(enable = "avx512dq,avx512vl")]
401#[cfg_attr(test, assert_instr(vorpd))]
402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
403pub unsafe fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
404    let or = _mm_or_pd(a, b).as_f64x2();
405    transmute(simd_select_bitmask(k, or, src.as_f64x2()))
406}
407
408/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
409/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
410///
411/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825)
412#[inline]
413#[target_feature(enable = "avx512dq,avx512vl")]
414#[cfg_attr(test, assert_instr(vorpd))]
415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
416pub unsafe fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
417    let or = _mm_or_pd(a, b).as_f64x2();
418    transmute(simd_select_bitmask(k, or, f64x2::ZERO))
419}
420
421/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
422/// and store the results in dst using writemask k (elements are copied from src if the corresponding
423/// bit is not set).
424///
425/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827)
426#[inline]
427#[target_feature(enable = "avx512dq,avx512vl")]
428#[cfg_attr(test, assert_instr(vorpd))]
429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
430pub unsafe fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
431    let or = _mm256_or_pd(a, b).as_f64x4();
432    transmute(simd_select_bitmask(k, or, src.as_f64x4()))
433}
434
435/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
436/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
437///
438/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828)
439#[inline]
440#[target_feature(enable = "avx512dq,avx512vl")]
441#[cfg_attr(test, assert_instr(vorpd))]
442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
443pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
444    let or = _mm256_or_pd(a, b).as_f64x4();
445    transmute(simd_select_bitmask(k, or, f64x4::ZERO))
446}
447
448/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
449/// and store the results in dst.
450///
451/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829)
452#[inline]
453#[target_feature(enable = "avx512dq")]
454#[cfg_attr(test, assert_instr(vorp))]
455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
456pub unsafe fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
457    transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
458}
459
460/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
461/// store the results in dst using writemask k (elements are copied from src if the corresponding
462/// bit is not set).
463///
464/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830)
465#[inline]
466#[target_feature(enable = "avx512dq")]
467#[cfg_attr(test, assert_instr(vorpd))]
468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
469pub unsafe fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
470    let or = _mm512_or_pd(a, b).as_f64x8();
471    transmute(simd_select_bitmask(k, or, src.as_f64x8()))
472}
473
474/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
475/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
476///
477/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831)
478#[inline]
479#[target_feature(enable = "avx512dq")]
480#[cfg_attr(test, assert_instr(vorpd))]
481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
482pub unsafe fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
483    let or = _mm512_or_pd(a, b).as_f64x8();
484    transmute(simd_select_bitmask(k, or, f64x8::ZERO))
485}
486
487/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
488/// and store the results in dst using writemask k (elements are copied from src if the corresponding
489/// bit is not set).
490///
491/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833)
492#[inline]
493#[target_feature(enable = "avx512dq,avx512vl")]
494#[cfg_attr(test, assert_instr(vorps))]
495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
496pub unsafe fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
497    let or = _mm_or_ps(a, b).as_f32x4();
498    transmute(simd_select_bitmask(k, or, src.as_f32x4()))
499}
500
501/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
502/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
503///
504/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834)
505#[inline]
506#[target_feature(enable = "avx512dq,avx512vl")]
507#[cfg_attr(test, assert_instr(vorps))]
508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
509pub unsafe fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
510    let or = _mm_or_ps(a, b).as_f32x4();
511    transmute(simd_select_bitmask(k, or, f32x4::ZERO))
512}
513
514/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
515/// and store the results in dst using writemask k (elements are copied from src if the corresponding
516/// bit is not set).
517///
518/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836)
519#[inline]
520#[target_feature(enable = "avx512dq,avx512vl")]
521#[cfg_attr(test, assert_instr(vorps))]
522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
523pub unsafe fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
524    let or = _mm256_or_ps(a, b).as_f32x8();
525    transmute(simd_select_bitmask(k, or, src.as_f32x8()))
526}
527
528/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
529/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
530///
531/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837)
532#[inline]
533#[target_feature(enable = "avx512dq,avx512vl")]
534#[cfg_attr(test, assert_instr(vorps))]
535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
536pub unsafe fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
537    let or = _mm256_or_ps(a, b).as_f32x8();
538    transmute(simd_select_bitmask(k, or, f32x8::ZERO))
539}
540
541/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
542/// and store the results in dst.
543///
544/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838)
545#[inline]
546#[target_feature(enable = "avx512dq")]
547#[cfg_attr(test, assert_instr(vorps))]
548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
549pub unsafe fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
550    transmute(simd_or(
551        transmute::<_, u32x16>(a),
552        transmute::<_, u32x16>(b),
553    ))
554}
555
556/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
557/// store the results in dst using writemask k (elements are copied from src if the corresponding
558/// bit is not set).
559///
560/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839)
561#[inline]
562#[target_feature(enable = "avx512dq")]
563#[cfg_attr(test, assert_instr(vorps))]
564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
565pub unsafe fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
566    let or = _mm512_or_ps(a, b).as_f32x16();
567    transmute(simd_select_bitmask(k, or, src.as_f32x16()))
568}
569
570/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
571/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
572///
573/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840)
574#[inline]
575#[target_feature(enable = "avx512dq")]
576#[cfg_attr(test, assert_instr(vorps))]
577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
578pub unsafe fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
579    let or = _mm512_or_ps(a, b).as_f32x16();
580    transmute(simd_select_bitmask(k, or, f32x16::ZERO))
581}
582
583// Xor
584
585/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
586/// and store the results in dst using writemask k (elements are copied from src if the corresponding
587/// bit is not set).
588///
589/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094)
590#[inline]
591#[target_feature(enable = "avx512dq,avx512vl")]
592#[cfg_attr(test, assert_instr(vxorpd))]
593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
594pub unsafe fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
595    let xor = _mm_xor_pd(a, b).as_f64x2();
596    transmute(simd_select_bitmask(k, xor, src.as_f64x2()))
597}
598
599/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
600/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
601///
602/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095)
603#[inline]
604#[target_feature(enable = "avx512dq,avx512vl")]
605#[cfg_attr(test, assert_instr(vxorpd))]
606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
607pub unsafe fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
608    let xor = _mm_xor_pd(a, b).as_f64x2();
609    transmute(simd_select_bitmask(k, xor, f64x2::ZERO))
610}
611
612/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
613/// and store the results in dst using writemask k (elements are copied from src if the corresponding
614/// bit is not set).
615///
616/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097)
617#[inline]
618#[target_feature(enable = "avx512dq,avx512vl")]
619#[cfg_attr(test, assert_instr(vxorpd))]
620#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
621pub unsafe fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
622    let xor = _mm256_xor_pd(a, b).as_f64x4();
623    transmute(simd_select_bitmask(k, xor, src.as_f64x4()))
624}
625
626/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
627/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
628///
629/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098)
630#[inline]
631#[target_feature(enable = "avx512dq,avx512vl")]
632#[cfg_attr(test, assert_instr(vxorpd))]
633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
634pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
635    let xor = _mm256_xor_pd(a, b).as_f64x4();
636    transmute(simd_select_bitmask(k, xor, f64x4::ZERO))
637}
638
639/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
640/// and store the results in dst.
641///
642/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102)
643#[inline]
644#[target_feature(enable = "avx512dq")]
645#[cfg_attr(test, assert_instr(vxorp))]
646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
647pub unsafe fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
648    transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
649}
650
651/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
652/// store the results in dst using writemask k (elements are copied from src if the corresponding
653/// bit is not set).
654///
655/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100)
656#[inline]
657#[target_feature(enable = "avx512dq")]
658#[cfg_attr(test, assert_instr(vxorpd))]
659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
660pub unsafe fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
661    let xor = _mm512_xor_pd(a, b).as_f64x8();
662    transmute(simd_select_bitmask(k, xor, src.as_f64x8()))
663}
664
665/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
666/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
667///
668/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101)
669#[inline]
670#[target_feature(enable = "avx512dq")]
671#[cfg_attr(test, assert_instr(vxorpd))]
672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
673pub unsafe fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
674    let xor = _mm512_xor_pd(a, b).as_f64x8();
675    transmute(simd_select_bitmask(k, xor, f64x8::ZERO))
676}
677
678/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
679/// and store the results in dst using writemask k (elements are copied from src if the corresponding
680/// bit is not set).
681///
682/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103)
683#[inline]
684#[target_feature(enable = "avx512dq,avx512vl")]
685#[cfg_attr(test, assert_instr(vxorps))]
686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
687pub unsafe fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
688    let xor = _mm_xor_ps(a, b).as_f32x4();
689    transmute(simd_select_bitmask(k, xor, src.as_f32x4()))
690}
691
692/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
693/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
694///
695/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104)
696#[inline]
697#[target_feature(enable = "avx512dq,avx512vl")]
698#[cfg_attr(test, assert_instr(vxorps))]
699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
700pub unsafe fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
701    let xor = _mm_xor_ps(a, b).as_f32x4();
702    transmute(simd_select_bitmask(k, xor, f32x4::ZERO))
703}
704
705/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
706/// and store the results in dst using writemask k (elements are copied from src if the corresponding
707/// bit is not set).
708///
709/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106)
710#[inline]
711#[target_feature(enable = "avx512dq,avx512vl")]
712#[cfg_attr(test, assert_instr(vxorps))]
713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
714pub unsafe fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
715    let xor = _mm256_xor_ps(a, b).as_f32x8();
716    transmute(simd_select_bitmask(k, xor, src.as_f32x8()))
717}
718
719/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
720/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
721///
722/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107)
723#[inline]
724#[target_feature(enable = "avx512dq,avx512vl")]
725#[cfg_attr(test, assert_instr(vxorps))]
726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
727pub unsafe fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
728    let xor = _mm256_xor_ps(a, b).as_f32x8();
729    transmute(simd_select_bitmask(k, xor, f32x8::ZERO))
730}
731
732/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
733/// and store the results in dst.
734///
735/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111)
736#[inline]
737#[target_feature(enable = "avx512dq")]
738#[cfg_attr(test, assert_instr(vxorps))]
739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
740pub unsafe fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
741    transmute(simd_xor(
742        transmute::<_, u32x16>(a),
743        transmute::<_, u32x16>(b),
744    ))
745}
746
747/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
748/// store the results in dst using writemask k (elements are copied from src if the corresponding
749/// bit is not set).
750///
751/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109)
752#[inline]
753#[target_feature(enable = "avx512dq")]
754#[cfg_attr(test, assert_instr(vxorps))]
755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
756pub unsafe fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
757    let xor = _mm512_xor_ps(a, b).as_f32x16();
758    transmute(simd_select_bitmask(k, xor, src.as_f32x16()))
759}
760
761/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
762/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
763///
764/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110)
765#[inline]
766#[target_feature(enable = "avx512dq")]
767#[cfg_attr(test, assert_instr(vxorps))]
768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
769pub unsafe fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
770    let xor = _mm512_xor_ps(a, b).as_f32x16();
771    transmute(simd_select_bitmask(k, xor, f32x16::ZERO))
772}
773
774// Broadcast
775
776/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
777/// elements of dst.
778///
779/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509)
780#[inline]
781#[target_feature(enable = "avx512dq,avx512vl")]
782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
783pub unsafe fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
784    let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
785    transmute(b)
786}
787
788/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
789/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
790///
791/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510)
792#[inline]
793#[target_feature(enable = "avx512dq,avx512vl")]
794#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
796pub unsafe fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
797    let b = _mm256_broadcast_f32x2(a).as_f32x8();
798    transmute(simd_select_bitmask(k, b, src.as_f32x8()))
799}
800
801/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
802/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
803///
804/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511)
805#[inline]
806#[target_feature(enable = "avx512dq,avx512vl")]
807#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
809pub unsafe fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
810    let b = _mm256_broadcast_f32x2(a).as_f32x8();
811    transmute(simd_select_bitmask(k, b, f32x8::ZERO))
812}
813
814/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
815/// elements of dst.
816///
817/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512)
818#[inline]
819#[target_feature(enable = "avx512dq")]
820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
821pub unsafe fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
822    let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
823    transmute(b)
824}
825
826/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
827/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
828///
829/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513)
830#[inline]
831#[target_feature(enable = "avx512dq")]
832#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
834pub unsafe fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
835    let b = _mm512_broadcast_f32x2(a).as_f32x16();
836    transmute(simd_select_bitmask(k, b, src.as_f32x16()))
837}
838
839/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
840/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
841///
842/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514)
843#[inline]
844#[target_feature(enable = "avx512dq")]
845#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
847pub unsafe fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
848    let b = _mm512_broadcast_f32x2(a).as_f32x16();
849    transmute(simd_select_bitmask(k, b, f32x16::ZERO))
850}
851
852/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
853/// elements of dst.
854///
855/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521)
856#[inline]
857#[target_feature(enable = "avx512dq")]
858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
859pub unsafe fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
860    let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
861    transmute(b)
862}
863
864/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
865/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
866///
867/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522)
868#[inline]
869#[target_feature(enable = "avx512dq")]
870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
871pub unsafe fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
872    let b = _mm512_broadcast_f32x8(a).as_f32x16();
873    transmute(simd_select_bitmask(k, b, src.as_f32x16()))
874}
875
876/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
877/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
878///
879/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523)
880#[inline]
881#[target_feature(enable = "avx512dq")]
882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
883pub unsafe fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
884    let b = _mm512_broadcast_f32x8(a).as_f32x16();
885    transmute(simd_select_bitmask(k, b, f32x16::ZERO))
886}
887
888/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
889/// elements of dst.
890///
891/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524)
892#[inline]
893#[target_feature(enable = "avx512dq,avx512vl")]
894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
895pub unsafe fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
896    let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
897    transmute(b)
898}
899
900/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
901/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
902///
903/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525)
904#[inline]
905#[target_feature(enable = "avx512dq,avx512vl")]
906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
907pub unsafe fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
908    let b = _mm256_broadcast_f64x2(a).as_f64x4();
909    transmute(simd_select_bitmask(k, b, src.as_f64x4()))
910}
911
912/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
913/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
914///
915/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526)
916#[inline]
917#[target_feature(enable = "avx512dq,avx512vl")]
918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
919pub unsafe fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
920    let b = _mm256_broadcast_f64x2(a).as_f64x4();
921    transmute(simd_select_bitmask(k, b, f64x4::ZERO))
922}
923
924/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
925/// elements of dst.
926///
927/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527)
928#[inline]
929#[target_feature(enable = "avx512dq")]
930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
931pub unsafe fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
932    let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
933    transmute(b)
934}
935
936/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
937/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
938///
939/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528)
940#[inline]
941#[target_feature(enable = "avx512dq")]
942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
943pub unsafe fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
944    let b = _mm512_broadcast_f64x2(a).as_f64x8();
945    transmute(simd_select_bitmask(k, b, src.as_f64x8()))
946}
947
948/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
949/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
950///
951/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529)
952#[inline]
953#[target_feature(enable = "avx512dq")]
954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955pub unsafe fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
956    let b = _mm512_broadcast_f64x2(a).as_f64x8();
957    transmute(simd_select_bitmask(k, b, f64x8::ZERO))
958}
959
960/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
961///
962/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533)
963#[inline]
964#[target_feature(enable = "avx512dq,avx512vl")]
965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
966pub unsafe fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
967    let a = a.as_i32x4();
968    let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
969    transmute(b)
970}
971
972/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
973/// (elements are copied from src if the corresponding bit is not set).
974///
975/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534)
976#[inline]
977#[target_feature(enable = "avx512dq,avx512vl")]
978#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
980pub unsafe fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
981    let b = _mm_broadcast_i32x2(a).as_i32x4();
982    transmute(simd_select_bitmask(k, b, src.as_i32x4()))
983}
984
985/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
986/// (elements are zeroed out if the corresponding bit is not set).
987///
988/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535)
989#[inline]
990#[target_feature(enable = "avx512dq,avx512vl")]
991#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
993pub unsafe fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
994    let b = _mm_broadcast_i32x2(a).as_i32x4();
995    transmute(simd_select_bitmask(k, b, i32x4::ZERO))
996}
997
998/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
999///
1000/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536)
1001#[inline]
1002#[target_feature(enable = "avx512dq,avx512vl")]
1003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1004pub unsafe fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
1005    let a = a.as_i32x4();
1006    let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1007    transmute(b)
1008}
1009
1010/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1011/// (elements are copied from src if the corresponding bit is not set).
1012///
1013/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537)
1014#[inline]
1015#[target_feature(enable = "avx512dq,avx512vl")]
1016#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1018pub unsafe fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1019    let b = _mm256_broadcast_i32x2(a).as_i32x8();
1020    transmute(simd_select_bitmask(k, b, src.as_i32x8()))
1021}
1022
1023/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1024/// (elements are zeroed out if the corresponding bit is not set).
1025///
1026/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538)
1027#[inline]
1028#[target_feature(enable = "avx512dq,avx512vl")]
1029#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1031pub unsafe fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
1032    let b = _mm256_broadcast_i32x2(a).as_i32x8();
1033    transmute(simd_select_bitmask(k, b, i32x8::ZERO))
1034}
1035
1036/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1037///
1038/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539)
1039#[inline]
1040#[target_feature(enable = "avx512dq")]
1041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1042pub unsafe fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
1043    let a = a.as_i32x4();
1044    let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
1045    transmute(b)
1046}
1047
1048/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1049/// (elements are copied from src if the corresponding bit is not set).
1050///
1051/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540)
1052#[inline]
1053#[target_feature(enable = "avx512dq")]
1054#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1056pub unsafe fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
1057    let b = _mm512_broadcast_i32x2(a).as_i32x16();
1058    transmute(simd_select_bitmask(k, b, src.as_i32x16()))
1059}
1060
1061/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1062/// (elements are zeroed out if the corresponding bit is not set).
1063///
1064/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541)
1065#[inline]
1066#[target_feature(enable = "avx512dq")]
1067#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1069pub unsafe fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
1070    let b = _mm512_broadcast_i32x2(a).as_i32x16();
1071    transmute(simd_select_bitmask(k, b, i32x16::ZERO))
1072}
1073
1074/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst.
1075///
1076/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548)
1077#[inline]
1078#[target_feature(enable = "avx512dq")]
1079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1080pub unsafe fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
1081    let a = a.as_i32x8();
1082    let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
1083    transmute(b)
1084}
1085
1086/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k
1087/// (elements are copied from src if the corresponding bit is not set).
1088///
1089/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549)
1090#[inline]
1091#[target_feature(enable = "avx512dq")]
1092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1093pub unsafe fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
1094    let b = _mm512_broadcast_i32x8(a).as_i32x16();
1095    transmute(simd_select_bitmask(k, b, src.as_i32x16()))
1096}
1097
1098/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k
1099/// (elements are zeroed out if the corresponding bit is not set).
1100///
1101/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550)
1102#[inline]
1103#[target_feature(enable = "avx512dq")]
1104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1105pub unsafe fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
1106    let b = _mm512_broadcast_i32x8(a).as_i32x16();
1107    transmute(simd_select_bitmask(k, b, i32x16::ZERO))
1108}
1109
1110/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1111///
1112/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551)
1113#[inline]
1114#[target_feature(enable = "avx512dq,avx512vl")]
1115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1116pub unsafe fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
1117    let a = a.as_i64x2();
1118    let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1119    transmute(b)
1120}
1121
1122/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1123/// (elements are copied from src if the corresponding bit is not set).
1124///
1125/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552)
1126#[inline]
1127#[target_feature(enable = "avx512dq,avx512vl")]
1128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1129pub unsafe fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1130    let b = _mm256_broadcast_i64x2(a).as_i64x4();
1131    transmute(simd_select_bitmask(k, b, src.as_i64x4()))
1132}
1133
1134/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1135/// (elements are zeroed out if the corresponding bit is not set).
1136///
1137/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553)
1138#[inline]
1139#[target_feature(enable = "avx512dq,avx512vl")]
1140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1141pub unsafe fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
1142    let b = _mm256_broadcast_i64x2(a).as_i64x4();
1143    transmute(simd_select_bitmask(k, b, i64x4::ZERO))
1144}
1145
1146/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1147///
1148/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554)
1149#[inline]
1150#[target_feature(enable = "avx512dq")]
1151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1152pub unsafe fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
1153    let a = a.as_i64x2();
1154    let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1155    transmute(b)
1156}
1157
1158/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1159/// (elements are copied from src if the corresponding bit is not set).
1160///
1161/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555)
1162#[inline]
1163#[target_feature(enable = "avx512dq")]
1164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1165pub unsafe fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
1166    let b = _mm512_broadcast_i64x2(a).as_i64x8();
1167    transmute(simd_select_bitmask(k, b, src.as_i64x8()))
1168}
1169
1170/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1171/// (elements are zeroed out if the corresponding bit is not set).
1172///
1173/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556)
1174#[inline]
1175#[target_feature(enable = "avx512dq")]
1176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1177pub unsafe fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
1178    let b = _mm512_broadcast_i64x2(a).as_i64x8();
1179    transmute(simd_select_bitmask(k, b, i64x8::ZERO))
1180}
1181
1182// Extract
1183
1184/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1185/// selected with IMM8, and stores the result in dst.
1186///
1187/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946)
1188#[inline]
1189#[target_feature(enable = "avx512dq")]
1190#[rustc_legacy_const_generics(1)]
1191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1192pub unsafe fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
1193    static_assert_uimm_bits!(IMM8, 1);
1194    match IMM8 & 1 {
1195        0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1196        _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1197    }
1198}
1199
1200/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1201/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1202/// if the corresponding bit is not set).
1203///
1204/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947)
1205#[inline]
1206#[target_feature(enable = "avx512dq")]
1207#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1208#[rustc_legacy_const_generics(3)]
1209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1210pub unsafe fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(
1211    src: __m256,
1212    k: __mmask8,
1213    a: __m512,
1214) -> __m256 {
1215    static_assert_uimm_bits!(IMM8, 1);
1216    let b = _mm512_extractf32x8_ps::<IMM8>(a);
1217    transmute(simd_select_bitmask(k, b.as_f32x8(), src.as_f32x8()))
1218}
1219
1220/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1221/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1222/// corresponding bit is not set).
1223///
1224/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948)
1225#[inline]
1226#[target_feature(enable = "avx512dq")]
1227#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1228#[rustc_legacy_const_generics(2)]
1229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1230pub unsafe fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
1231    static_assert_uimm_bits!(IMM8, 1);
1232    let b = _mm512_extractf32x8_ps::<IMM8>(a);
1233    transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO))
1234}
1235
1236/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1237/// selected with IMM8, and stores the result in dst.
1238///
1239/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949)
1240#[inline]
1241#[target_feature(enable = "avx512dq,avx512vl")]
1242#[rustc_legacy_const_generics(1)]
1243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1244pub unsafe fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
1245    static_assert_uimm_bits!(IMM8, 1);
1246    match IMM8 & 1 {
1247        0 => simd_shuffle!(a, a, [0, 1]),
1248        _ => simd_shuffle!(a, a, [2, 3]),
1249    }
1250}
1251
1252/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1253/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1254/// if the corresponding bit is not set).
1255///
1256/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950)
1257#[inline]
1258#[target_feature(enable = "avx512dq,avx512vl")]
1259#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1260#[rustc_legacy_const_generics(3)]
1261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1262pub unsafe fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
1263    src: __m128d,
1264    k: __mmask8,
1265    a: __m256d,
1266) -> __m128d {
1267    static_assert_uimm_bits!(IMM8, 1);
1268    let b = _mm256_extractf64x2_pd::<IMM8>(a);
1269    transmute(simd_select_bitmask(k, b.as_f64x2(), src.as_f64x2()))
1270}
1271
1272/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1273/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1274/// corresponding bit is not set).
1275///
1276/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951)
1277#[inline]
1278#[target_feature(enable = "avx512dq,avx512vl")]
1279#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1280#[rustc_legacy_const_generics(2)]
1281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1282pub unsafe fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
1283    static_assert_uimm_bits!(IMM8, 1);
1284    let b = _mm256_extractf64x2_pd::<IMM8>(a);
1285    transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO))
1286}
1287
1288/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1289/// selected with IMM8, and stores the result in dst.
1290///
1291/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952)
1292#[inline]
1293#[target_feature(enable = "avx512dq")]
1294#[rustc_legacy_const_generics(1)]
1295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1296pub unsafe fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
1297    static_assert_uimm_bits!(IMM8, 2);
1298    match IMM8 & 3 {
1299        0 => simd_shuffle!(a, a, [0, 1]),
1300        1 => simd_shuffle!(a, a, [2, 3]),
1301        2 => simd_shuffle!(a, a, [4, 5]),
1302        _ => simd_shuffle!(a, a, [6, 7]),
1303    }
1304}
1305
1306/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1307/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1308/// if the corresponding bit is not set).
1309///
1310/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953)
1311#[inline]
1312#[target_feature(enable = "avx512dq")]
1313#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1314#[rustc_legacy_const_generics(3)]
1315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1316pub unsafe fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
1317    src: __m128d,
1318    k: __mmask8,
1319    a: __m512d,
1320) -> __m128d {
1321    static_assert_uimm_bits!(IMM8, 2);
1322    let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1323    transmute(simd_select_bitmask(k, b, src.as_f64x2()))
1324}
1325
1326/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1327/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1328/// corresponding bit is not set).
1329///
1330/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954)
1331#[inline]
1332#[target_feature(enable = "avx512dq")]
1333#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1334#[rustc_legacy_const_generics(2)]
1335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1336pub unsafe fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
1337    static_assert_uimm_bits!(IMM8, 2);
1338    let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1339    transmute(simd_select_bitmask(k, b, f64x2::ZERO))
1340}
1341
1342/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1343/// the result in dst.
1344///
1345/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965)
1346#[inline]
1347#[target_feature(enable = "avx512dq")]
1348#[rustc_legacy_const_generics(1)]
1349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1350pub unsafe fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
1351    static_assert_uimm_bits!(IMM8, 1);
1352    let a = a.as_i32x16();
1353    let b: i32x8 = match IMM8 & 1 {
1354        0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1355        _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1356    };
1357    transmute(b)
1358}
1359
1360/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1361/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1362///
1363/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966)
1364#[inline]
1365#[target_feature(enable = "avx512dq")]
1366#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1367#[rustc_legacy_const_generics(3)]
1368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1369pub unsafe fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
1370    src: __m256i,
1371    k: __mmask8,
1372    a: __m512i,
1373) -> __m256i {
1374    static_assert_uimm_bits!(IMM8, 1);
1375    let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1376    transmute(simd_select_bitmask(k, b, src.as_i32x8()))
1377}
1378
1379/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1380/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1381///
1382/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967)
1383#[inline]
1384#[target_feature(enable = "avx512dq")]
1385#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1386#[rustc_legacy_const_generics(2)]
1387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1388pub unsafe fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
1389    static_assert_uimm_bits!(IMM8, 1);
1390    let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1391    transmute(simd_select_bitmask(k, b, i32x8::ZERO))
1392}
1393
1394/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1395/// the result in dst.
1396///
1397/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968)
1398#[inline]
1399#[target_feature(enable = "avx512dq,avx512vl")]
1400#[rustc_legacy_const_generics(1)]
1401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1402pub unsafe fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
1403    static_assert_uimm_bits!(IMM8, 1);
1404    let a = a.as_i64x4();
1405    match IMM8 & 1 {
1406        0 => simd_shuffle!(a, a, [0, 1]),
1407        _ => simd_shuffle!(a, a, [2, 3]),
1408    }
1409}
1410
1411/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1412/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1413///
1414/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969)
1415#[inline]
1416#[target_feature(enable = "avx512dq,avx512vl")]
1417#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1418#[rustc_legacy_const_generics(3)]
1419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420pub unsafe fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
1421    src: __m128i,
1422    k: __mmask8,
1423    a: __m256i,
1424) -> __m128i {
1425    static_assert_uimm_bits!(IMM8, 1);
1426    let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1427    transmute(simd_select_bitmask(k, b, src.as_i64x2()))
1428}
1429
1430/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1431/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1432///
1433/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970)
1434#[inline]
1435#[target_feature(enable = "avx512dq,avx512vl")]
1436#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1437#[rustc_legacy_const_generics(2)]
1438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1439pub unsafe fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
1440    static_assert_uimm_bits!(IMM8, 1);
1441    let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1442    transmute(simd_select_bitmask(k, b, i64x2::ZERO))
1443}
1444
1445/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1446/// the result in dst.
1447///
1448/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971)
1449#[inline]
1450#[target_feature(enable = "avx512dq")]
1451#[rustc_legacy_const_generics(1)]
1452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1453pub unsafe fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
1454    static_assert_uimm_bits!(IMM8, 2);
1455    let a = a.as_i64x8();
1456    match IMM8 & 3 {
1457        0 => simd_shuffle!(a, a, [0, 1]),
1458        1 => simd_shuffle!(a, a, [2, 3]),
1459        2 => simd_shuffle!(a, a, [4, 5]),
1460        _ => simd_shuffle!(a, a, [6, 7]),
1461    }
1462}
1463
1464/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1465/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1466///
1467/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972)
1468#[inline]
1469#[target_feature(enable = "avx512dq")]
1470#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1471#[rustc_legacy_const_generics(3)]
1472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1473pub unsafe fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
1474    src: __m128i,
1475    k: __mmask8,
1476    a: __m512i,
1477) -> __m128i {
1478    static_assert_uimm_bits!(IMM8, 2);
1479    let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1480    transmute(simd_select_bitmask(k, b, src.as_i64x2()))
1481}
1482
1483/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1484/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1485///
1486/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973)
1487#[inline]
1488#[target_feature(enable = "avx512dq")]
1489#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1490#[rustc_legacy_const_generics(2)]
1491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1492pub unsafe fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
1493    static_assert_uimm_bits!(IMM8, 2);
1494    let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1495    transmute(simd_select_bitmask(k, b, i64x2::ZERO))
1496}
1497
1498// Insert
1499
1500/// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1501/// elements) from b into dst at the location specified by IMM8.
1502///
1503/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850)
1504#[inline]
1505#[target_feature(enable = "avx512dq")]
1506#[rustc_legacy_const_generics(2)]
1507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1508pub unsafe fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
1509    static_assert_uimm_bits!(IMM8, 1);
1510    let b = _mm512_castps256_ps512(b);
1511    match IMM8 & 1 {
1512        0 => simd_shuffle!(
1513            a,
1514            b,
1515            [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
1516        ),
1517        _ => simd_shuffle!(
1518            a,
1519            b,
1520            [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
1521        ),
1522    }
1523}
1524
1525/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1526/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1527/// (elements are copied from src if the corresponding bit is not set).
1528///
1529/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851)
1530#[inline]
1531#[target_feature(enable = "avx512dq")]
1532#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1533#[rustc_legacy_const_generics(4)]
1534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1535pub unsafe fn _mm512_mask_insertf32x8<const IMM8: i32>(
1536    src: __m512,
1537    k: __mmask16,
1538    a: __m512,
1539    b: __m256,
1540) -> __m512 {
1541    static_assert_uimm_bits!(IMM8, 1);
1542    let c = _mm512_insertf32x8::<IMM8>(a, b);
1543    transmute(simd_select_bitmask(k, c.as_f32x16(), src.as_f32x16()))
1544}
1545
1546/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1547/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1548/// (elements are zeroed out if the corresponding bit is not set).
1549///
1550/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852)
1551#[inline]
1552#[target_feature(enable = "avx512dq")]
1553#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1554#[rustc_legacy_const_generics(3)]
1555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1556pub unsafe fn _mm512_maskz_insertf32x8<const IMM8: i32>(
1557    k: __mmask16,
1558    a: __m512,
1559    b: __m256,
1560) -> __m512 {
1561    static_assert_uimm_bits!(IMM8, 1);
1562    let c = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16();
1563    transmute(simd_select_bitmask(k, c, f32x16::ZERO))
1564}
1565
1566/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1567/// elements) from b into dst at the location specified by IMM8.
1568///
1569/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853)
1570#[inline]
1571#[target_feature(enable = "avx512dq,avx512vl")]
1572#[rustc_legacy_const_generics(2)]
1573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1574pub unsafe fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
1575    static_assert_uimm_bits!(IMM8, 1);
1576    let b = _mm256_castpd128_pd256(b);
1577    match IMM8 & 1 {
1578        0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
1579        _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
1580    }
1581}
1582
1583/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1584/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1585/// (elements are copied from src if the corresponding bit is not set).
1586///
1587/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854)
1588#[inline]
1589#[target_feature(enable = "avx512dq,avx512vl")]
1590#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1591#[rustc_legacy_const_generics(4)]
1592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1593pub unsafe fn _mm256_mask_insertf64x2<const IMM8: i32>(
1594    src: __m256d,
1595    k: __mmask8,
1596    a: __m256d,
1597    b: __m128d,
1598) -> __m256d {
1599    static_assert_uimm_bits!(IMM8, 1);
1600    let c = _mm256_insertf64x2::<IMM8>(a, b);
1601    transmute(simd_select_bitmask(k, c.as_f64x4(), src.as_f64x4()))
1602}
1603
1604/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1605/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1606/// (elements are zeroed out if the corresponding bit is not set).
1607///
1608/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855)
1609#[inline]
1610#[target_feature(enable = "avx512dq,avx512vl")]
1611#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1612#[rustc_legacy_const_generics(3)]
1613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1614pub unsafe fn _mm256_maskz_insertf64x2<const IMM8: i32>(
1615    k: __mmask8,
1616    a: __m256d,
1617    b: __m128d,
1618) -> __m256d {
1619    static_assert_uimm_bits!(IMM8, 1);
1620    let c = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4();
1621    transmute(simd_select_bitmask(k, c, f64x4::ZERO))
1622}
1623
1624/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1625/// elements) from b into dst at the location specified by IMM8.
1626///
1627/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856)
1628#[inline]
1629#[target_feature(enable = "avx512dq")]
1630#[rustc_legacy_const_generics(2)]
1631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1632pub unsafe fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
1633    static_assert_uimm_bits!(IMM8, 2);
1634    let b = _mm512_castpd128_pd512(b);
1635    match IMM8 & 3 {
1636        0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
1637        1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
1638        2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
1639        _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
1640    }
1641}
1642
1643/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1644/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1645/// (elements are copied from src if the corresponding bit is not set).
1646///
1647/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857)
1648#[inline]
1649#[target_feature(enable = "avx512dq")]
1650#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
1651#[rustc_legacy_const_generics(4)]
1652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1653pub unsafe fn _mm512_mask_insertf64x2<const IMM8: i32>(
1654    src: __m512d,
1655    k: __mmask8,
1656    a: __m512d,
1657    b: __m128d,
1658) -> __m512d {
1659    static_assert_uimm_bits!(IMM8, 2);
1660    let c = _mm512_insertf64x2::<IMM8>(a, b);
1661    transmute(simd_select_bitmask(k, c.as_f64x8(), src.as_f64x8()))
1662}
1663
1664/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1665/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1666/// (elements are zeroed out if the corresponding bit is not set).
1667///
1668/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858)
1669#[inline]
1670#[target_feature(enable = "avx512dq")]
1671#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
1672#[rustc_legacy_const_generics(3)]
1673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1674pub unsafe fn _mm512_maskz_insertf64x2<const IMM8: i32>(
1675    k: __mmask8,
1676    a: __m512d,
1677    b: __m128d,
1678) -> __m512d {
1679    static_assert_uimm_bits!(IMM8, 2);
1680    let c = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8();
1681    transmute(simd_select_bitmask(k, c, f64x8::ZERO))
1682}
1683
1684/// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the
1685/// location specified by IMM8.
1686///
1687/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869)
1688#[inline]
1689#[target_feature(enable = "avx512dq")]
1690#[rustc_legacy_const_generics(2)]
1691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1692pub unsafe fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
1693    static_assert_uimm_bits!(IMM8, 1);
1694    let a = a.as_i32x16();
1695    let b = _mm512_castsi256_si512(b).as_i32x16();
1696    let r: i32x16 = match IMM8 & 1 {
1697        0 => simd_shuffle!(
1698            a,
1699            b,
1700            [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
1701        ),
1702        _ => simd_shuffle!(
1703            a,
1704            b,
1705            [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
1706        ),
1707    };
1708    transmute(r)
1709}
1710
1711/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
1712/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
1713/// the corresponding bit is not set).
1714///
1715/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870)
1716#[inline]
1717#[target_feature(enable = "avx512dq")]
1718#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
1719#[rustc_legacy_const_generics(4)]
1720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1721pub unsafe fn _mm512_mask_inserti32x8<const IMM8: i32>(
1722    src: __m512i,
1723    k: __mmask16,
1724    a: __m512i,
1725    b: __m256i,
1726) -> __m512i {
1727    static_assert_uimm_bits!(IMM8, 1);
1728    let c = _mm512_inserti32x8::<IMM8>(a, b);
1729    transmute(simd_select_bitmask(k, c.as_i32x16(), src.as_i32x16()))
1730}
1731
1732/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
1733/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
1734/// corresponding bit is not set).
1735///
1736/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871)
1737#[inline]
1738#[target_feature(enable = "avx512dq")]
1739#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
1740#[rustc_legacy_const_generics(3)]
1741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1742pub unsafe fn _mm512_maskz_inserti32x8<const IMM8: i32>(
1743    k: __mmask16,
1744    a: __m512i,
1745    b: __m256i,
1746) -> __m512i {
1747    static_assert_uimm_bits!(IMM8, 1);
1748    let c = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16();
1749    transmute(simd_select_bitmask(k, c, i32x16::ZERO))
1750}
1751
1752/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
1753/// location specified by IMM8.
1754///
1755/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872)
1756#[inline]
1757#[target_feature(enable = "avx512dq,avx512vl")]
1758#[rustc_legacy_const_generics(2)]
1759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1760pub unsafe fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
1761    static_assert_uimm_bits!(IMM8, 1);
1762    let a = a.as_i64x4();
1763    let b = _mm256_castsi128_si256(b).as_i64x4();
1764    match IMM8 & 1 {
1765        0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
1766        _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
1767    }
1768}
1769
1770/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
1771/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
1772/// the corresponding bit is not set).
1773///
1774/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873)
1775#[inline]
1776#[target_feature(enable = "avx512dq,avx512vl")]
1777#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
1778#[rustc_legacy_const_generics(4)]
1779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1780pub unsafe fn _mm256_mask_inserti64x2<const IMM8: i32>(
1781    src: __m256i,
1782    k: __mmask8,
1783    a: __m256i,
1784    b: __m128i,
1785) -> __m256i {
1786    static_assert_uimm_bits!(IMM8, 1);
1787    let c = _mm256_inserti64x2::<IMM8>(a, b);
1788    transmute(simd_select_bitmask(k, c.as_i64x4(), src.as_i64x4()))
1789}
1790
1791/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
1792/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
1793/// corresponding bit is not set).
1794///
1795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874)
1796#[inline]
1797#[target_feature(enable = "avx512dq,avx512vl")]
1798#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
1799#[rustc_legacy_const_generics(3)]
1800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1801pub unsafe fn _mm256_maskz_inserti64x2<const IMM8: i32>(
1802    k: __mmask8,
1803    a: __m256i,
1804    b: __m128i,
1805) -> __m256i {
1806    static_assert_uimm_bits!(IMM8, 1);
1807    let c = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4();
1808    transmute(simd_select_bitmask(k, c, i64x4::ZERO))
1809}
1810
1811/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
1812/// location specified by IMM8.
1813///
1814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875)
1815#[inline]
1816#[target_feature(enable = "avx512dq")]
1817#[rustc_legacy_const_generics(2)]
1818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1819pub unsafe fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
1820    static_assert_uimm_bits!(IMM8, 2);
1821    let a = a.as_i64x8();
1822    let b = _mm512_castsi128_si512(b).as_i64x8();
1823    match IMM8 & 3 {
1824        0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
1825        1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
1826        2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
1827        _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
1828    }
1829}
1830
1831/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
1832/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
1833/// the corresponding bit is not set).
1834///
1835/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876)
1836#[inline]
1837#[target_feature(enable = "avx512dq")]
1838#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
1839#[rustc_legacy_const_generics(4)]
1840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1841pub unsafe fn _mm512_mask_inserti64x2<const IMM8: i32>(
1842    src: __m512i,
1843    k: __mmask8,
1844    a: __m512i,
1845    b: __m128i,
1846) -> __m512i {
1847    static_assert_uimm_bits!(IMM8, 2);
1848    let c = _mm512_inserti64x2::<IMM8>(a, b);
1849    transmute(simd_select_bitmask(k, c.as_i64x8(), src.as_i64x8()))
1850}
1851
1852/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
1853/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
1854/// corresponding bit is not set).
1855///
1856/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877)
1857#[inline]
1858#[target_feature(enable = "avx512dq")]
1859#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
1860#[rustc_legacy_const_generics(3)]
1861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1862pub unsafe fn _mm512_maskz_inserti64x2<const IMM8: i32>(
1863    k: __mmask8,
1864    a: __m512i,
1865    b: __m128i,
1866) -> __m512i {
1867    static_assert_uimm_bits!(IMM8, 2);
1868    let c = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8();
1869    transmute(simd_select_bitmask(k, c, i64x8::ZERO))
1870}
1871
1872// Convert
1873
1874/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
1875/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
1876///
1877/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1878/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1879/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1880/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1881/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1882///
1883/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437)
1884#[inline]
1885#[target_feature(enable = "avx512dq")]
1886#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
1887#[rustc_legacy_const_generics(1)]
1888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1889pub unsafe fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
1890    static_assert_rounding!(ROUNDING);
1891    transmute(vcvtqq2pd_512(a.as_i64x8(), ROUNDING))
1892}
1893
1894/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
1895/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
1896/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
1897///
1898/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1899/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1900/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1901/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1902/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1903///
1904/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438)
1905#[inline]
1906#[target_feature(enable = "avx512dq")]
1907#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
1908#[rustc_legacy_const_generics(3)]
1909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1910pub unsafe fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>(
1911    src: __m512d,
1912    k: __mmask8,
1913    a: __m512i,
1914) -> __m512d {
1915    static_assert_rounding!(ROUNDING);
1916    let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
1917    transmute(simd_select_bitmask(k, b, src.as_f64x8()))
1918}
1919
1920/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
1921/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1922/// Rounding is done according to the ROUNDING parameter, which can be one of:
1923///
1924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
1925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
1926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
1927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
1928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
1929///
1930/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439)
1931#[inline]
1932#[target_feature(enable = "avx512dq")]
1933#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
1934#[rustc_legacy_const_generics(2)]
1935#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1936pub unsafe fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(
1937    k: __mmask8,
1938    a: __m512i,
1939) -> __m512d {
1940    static_assert_rounding!(ROUNDING);
1941    let b = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
1942    transmute(simd_select_bitmask(k, b, f64x8::ZERO))
1943}
1944
1945/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
1946/// and store the results in dst.
1947///
1948/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705)
1949#[inline]
1950#[target_feature(enable = "avx512dq,avx512vl")]
1951#[cfg_attr(test, assert_instr(vcvtqq2pd))]
1952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1953pub unsafe fn _mm_cvtepi64_pd(a: __m128i) -> __m128d {
1954    transmute(vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION))
1955}
1956
1957/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
1958/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
1959/// not set).
1960///
1961/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706)
1962#[inline]
1963#[target_feature(enable = "avx512dq,avx512vl")]
1964#[cfg_attr(test, assert_instr(vcvtqq2pd))]
1965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1966pub unsafe fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
1967    let b = _mm_cvtepi64_pd(a).as_f64x2();
1968    transmute(simd_select_bitmask(k, b, src.as_f64x2()))
1969}
1970
1971/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
1972/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1973///
1974/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707)
1975#[inline]
1976#[target_feature(enable = "avx512dq,avx512vl")]
1977#[cfg_attr(test, assert_instr(vcvtqq2pd))]
1978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1979pub unsafe fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d {
1980    let b = _mm_cvtepi64_pd(a).as_f64x2();
1981    transmute(simd_select_bitmask(k, b, f64x2::ZERO))
1982}
1983
1984/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
1985/// and store the results in dst.
1986///
1987/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708)
1988#[inline]
1989#[target_feature(enable = "avx512dq,avx512vl")]
1990#[cfg_attr(test, assert_instr(vcvtqq2pd))]
1991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1992pub unsafe fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d {
1993    transmute(vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION))
1994}
1995
1996/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
1997/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
1998/// not set).
1999///
2000/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709)
2001#[inline]
2002#[target_feature(enable = "avx512dq,avx512vl")]
2003#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2005pub unsafe fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2006    let b = _mm256_cvtepi64_pd(a).as_f64x4();
2007    transmute(simd_select_bitmask(k, b, src.as_f64x4()))
2008}
2009
2010/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2011/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2012///
2013/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710)
2014#[inline]
2015#[target_feature(enable = "avx512dq,avx512vl")]
2016#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2018pub unsafe fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d {
2019    let b = _mm256_cvtepi64_pd(a).as_f64x4();
2020    transmute(simd_select_bitmask(k, b, f64x4::ZERO))
2021}
2022
2023/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2024/// and store the results in dst.
2025///
2026/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711)
2027#[inline]
2028#[target_feature(enable = "avx512dq")]
2029#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2031pub unsafe fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d {
2032    transmute(vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION))
2033}
2034
2035/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2036/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2037/// not set).
2038///
2039/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712)
2040#[inline]
2041#[target_feature(enable = "avx512dq")]
2042#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2044pub unsafe fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2045    let b = _mm512_cvtepi64_pd(a).as_f64x8();
2046    transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2047}
2048
2049/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2050/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2051///
2052/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713)
2053#[inline]
2054#[target_feature(enable = "avx512dq")]
2055#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2057pub unsafe fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d {
2058    let b = _mm512_cvtepi64_pd(a).as_f64x8();
2059    transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2060}
2061
2062/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2063/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2064///
2065/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2066/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2067/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2068/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2069/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2070///
2071/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443)
2072#[inline]
2073#[target_feature(enable = "avx512dq")]
2074#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2075#[rustc_legacy_const_generics(1)]
2076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2077pub unsafe fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2078    static_assert_rounding!(ROUNDING);
2079    transmute(vcvtqq2ps_512(a.as_i64x8(), ROUNDING))
2080}
2081
2082/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2083/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2084/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2085///
2086/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2087/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2088/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2089/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2091///
2092/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444)
2093#[inline]
2094#[target_feature(enable = "avx512dq")]
2095#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2096#[rustc_legacy_const_generics(3)]
2097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2098pub unsafe fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>(
2099    src: __m256,
2100    k: __mmask8,
2101    a: __m512i,
2102) -> __m256 {
2103    static_assert_rounding!(ROUNDING);
2104    let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2105    transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2106}
2107
2108/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2109/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2110/// Rounding is done according to the ROUNDING parameter, which can be one of:
2111///
2112/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2113/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2114/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2115/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2116/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2117///
2118/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445)
2119#[inline]
2120#[target_feature(enable = "avx512dq")]
2121#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2122#[rustc_legacy_const_generics(2)]
2123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2124pub unsafe fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(
2125    k: __mmask8,
2126    a: __m512i,
2127) -> __m256 {
2128    static_assert_rounding!(ROUNDING);
2129    let b = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2130    transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2131}
2132
2133/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2134/// and store the results in dst.
2135///
2136/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723)
2137#[inline]
2138#[target_feature(enable = "avx512dq,avx512vl")]
2139#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2141pub unsafe fn _mm_cvtepi64_ps(a: __m128i) -> __m128 {
2142    _mm_mask_cvtepi64_ps(_mm_undefined_ps(), 0xff, a)
2143}
2144
2145/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2146/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2147/// not set).
2148///
2149/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724)
2150#[inline]
2151#[target_feature(enable = "avx512dq,avx512vl")]
2152#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2154pub unsafe fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2155    transmute(vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k))
2156}
2157
2158/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2159/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2160///
2161/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725)
2162#[inline]
2163#[target_feature(enable = "avx512dq,avx512vl")]
2164#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2166pub unsafe fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 {
2167    _mm_mask_cvtepi64_ps(_mm_setzero_ps(), k, a)
2168}
2169
2170/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2171/// and store the results in dst.
2172///
2173/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726)
2174#[inline]
2175#[target_feature(enable = "avx512dq,avx512vl")]
2176#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2178pub unsafe fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 {
2179    transmute(vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION))
2180}
2181
2182/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2183/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2184/// not set).
2185///
2186/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727)
2187#[inline]
2188#[target_feature(enable = "avx512dq,avx512vl")]
2189#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191pub unsafe fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2192    let b = _mm256_cvtepi64_ps(a).as_f32x4();
2193    transmute(simd_select_bitmask(k, b, src.as_f32x4()))
2194}
2195
2196/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2197/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2198///
2199/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728)
2200#[inline]
2201#[target_feature(enable = "avx512dq,avx512vl")]
2202#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2204pub unsafe fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 {
2205    let b = _mm256_cvtepi64_ps(a).as_f32x4();
2206    transmute(simd_select_bitmask(k, b, f32x4::ZERO))
2207}
2208
2209/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2210/// and store the results in dst.
2211///
2212/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729)
2213#[inline]
2214#[target_feature(enable = "avx512dq")]
2215#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2217pub unsafe fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 {
2218    transmute(vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION))
2219}
2220
2221/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2222/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2223/// not set).
2224///
2225/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730)
2226#[inline]
2227#[target_feature(enable = "avx512dq")]
2228#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2230pub unsafe fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2231    let b = _mm512_cvtepi64_ps(a).as_f32x8();
2232    transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2233}
2234
2235/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2236/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2237///
2238/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731)
2239#[inline]
2240#[target_feature(enable = "avx512dq")]
2241#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2243pub unsafe fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 {
2244    let b = _mm512_cvtepi64_ps(a).as_f32x8();
2245    transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2246}
2247
2248/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2249/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2250///
2251/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2252/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2253/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2254/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2255/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2256///
2257/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455)
2258#[inline]
2259#[target_feature(enable = "avx512dq")]
2260#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2261#[rustc_legacy_const_generics(1)]
2262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263pub unsafe fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2264    static_assert_rounding!(ROUNDING);
2265    transmute(vcvtuqq2pd_512(a.as_u64x8(), ROUNDING))
2266}
2267
2268/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2269/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2270/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2271///
2272/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2273/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2274/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2275/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2276/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2277///
2278/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456)
2279#[inline]
2280#[target_feature(enable = "avx512dq")]
2281#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2282#[rustc_legacy_const_generics(3)]
2283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2284pub unsafe fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>(
2285    src: __m512d,
2286    k: __mmask8,
2287    a: __m512i,
2288) -> __m512d {
2289    static_assert_rounding!(ROUNDING);
2290    let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2291    transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2292}
2293
2294/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2295/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2296/// Rounding is done according to the ROUNDING parameter, which can be one of:
2297///
2298/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2299/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2300/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2301/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2302/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2303///
2304/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457)
2305#[inline]
2306#[target_feature(enable = "avx512dq")]
2307#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2308#[rustc_legacy_const_generics(2)]
2309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2310pub unsafe fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(
2311    k: __mmask8,
2312    a: __m512i,
2313) -> __m512d {
2314    static_assert_rounding!(ROUNDING);
2315    let b = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2316    transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2317}
2318
2319/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2320/// and store the results in dst.
2321///
2322/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827)
2323#[inline]
2324#[target_feature(enable = "avx512dq,avx512vl")]
2325#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2326#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2327pub unsafe fn _mm_cvtepu64_pd(a: __m128i) -> __m128d {
2328    transmute(vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION))
2329}
2330
2331/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2332/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2333/// not set).
2334///
2335/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828)
2336#[inline]
2337#[target_feature(enable = "avx512dq,avx512vl")]
2338#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2340pub unsafe fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2341    let b = _mm_cvtepu64_pd(a).as_f64x2();
2342    transmute(simd_select_bitmask(k, b, src.as_f64x2()))
2343}
2344
2345/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2346/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2347///
2348/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829)
2349#[inline]
2350#[target_feature(enable = "avx512dq,avx512vl")]
2351#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2353pub unsafe fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d {
2354    let b = _mm_cvtepu64_pd(a).as_f64x2();
2355    transmute(simd_select_bitmask(k, b, f64x2::ZERO))
2356}
2357
2358/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2359/// and store the results in dst.
2360///
2361/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830)
2362#[inline]
2363#[target_feature(enable = "avx512dq,avx512vl")]
2364#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2366pub unsafe fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d {
2367    transmute(vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION))
2368}
2369
2370/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2371/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2372/// not set).
2373///
2374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831)
2375#[inline]
2376#[target_feature(enable = "avx512dq,avx512vl")]
2377#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2379pub unsafe fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2380    let b = _mm256_cvtepu64_pd(a).as_f64x4();
2381    transmute(simd_select_bitmask(k, b, src.as_f64x4()))
2382}
2383
2384/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2385/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2386///
2387/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832)
2388#[inline]
2389#[target_feature(enable = "avx512dq,avx512vl")]
2390#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2392pub unsafe fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d {
2393    let b = _mm256_cvtepu64_pd(a).as_f64x4();
2394    transmute(simd_select_bitmask(k, b, f64x4::ZERO))
2395}
2396
2397/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2398/// and store the results in dst.
2399///
2400/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833)
2401#[inline]
2402#[target_feature(enable = "avx512dq")]
2403#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2405pub unsafe fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d {
2406    transmute(vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION))
2407}
2408
2409/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2410/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2411/// not set).
2412///
2413/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834)
2414#[inline]
2415#[target_feature(enable = "avx512dq")]
2416#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2418pub unsafe fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2419    let b = _mm512_cvtepu64_pd(a).as_f64x8();
2420    transmute(simd_select_bitmask(k, b, src.as_f64x8()))
2421}
2422
2423/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2424/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2425///
2426/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835)
2427#[inline]
2428#[target_feature(enable = "avx512dq")]
2429#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2431pub unsafe fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d {
2432    let b = _mm512_cvtepu64_pd(a).as_f64x8();
2433    transmute(simd_select_bitmask(k, b, f64x8::ZERO))
2434}
2435
2436/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2437/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2438///
2439/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2440/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2441/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2442/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2443/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2444///
2445/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461)
2446#[inline]
2447#[target_feature(enable = "avx512dq")]
2448#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2449#[rustc_legacy_const_generics(1)]
2450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2451pub unsafe fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2452    static_assert_rounding!(ROUNDING);
2453    transmute(vcvtuqq2ps_512(a.as_u64x8(), ROUNDING))
2454}
2455
2456/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2457/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2458/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2459///
2460/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2461/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2462/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2463/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2464/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2465///
2466/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462)
2467#[inline]
2468#[target_feature(enable = "avx512dq")]
2469#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2470#[rustc_legacy_const_generics(3)]
2471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2472pub unsafe fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>(
2473    src: __m256,
2474    k: __mmask8,
2475    a: __m512i,
2476) -> __m256 {
2477    static_assert_rounding!(ROUNDING);
2478    let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2479    transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2480}
2481
2482/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2483/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2484/// Rounding is done according to the ROUNDING parameter, which can be one of:
2485///
2486/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2487/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2488/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2489/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2490/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2491///
2492/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463)
2493#[inline]
2494#[target_feature(enable = "avx512dq")]
2495#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2496#[rustc_legacy_const_generics(2)]
2497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2498pub unsafe fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(
2499    k: __mmask8,
2500    a: __m512i,
2501) -> __m256 {
2502    static_assert_rounding!(ROUNDING);
2503    let b = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2504    transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2505}
2506
2507/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2508/// and store the results in dst.
2509///
2510/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845)
2511#[inline]
2512#[target_feature(enable = "avx512dq,avx512vl")]
2513#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2515pub unsafe fn _mm_cvtepu64_ps(a: __m128i) -> __m128 {
2516    _mm_mask_cvtepu64_ps(_mm_undefined_ps(), 0xff, a)
2517}
2518
2519/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2520/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2521/// not set).
2522///
2523/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846)
2524#[inline]
2525#[target_feature(enable = "avx512dq,avx512vl")]
2526#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2528pub unsafe fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2529    transmute(vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k))
2530}
2531
2532/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2533/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2534///
2535/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847)
2536#[inline]
2537#[target_feature(enable = "avx512dq,avx512vl")]
2538#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2540pub unsafe fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 {
2541    _mm_mask_cvtepu64_ps(_mm_setzero_ps(), k, a)
2542}
2543
2544/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2545/// and store the results in dst.
2546///
2547/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848)
2548#[inline]
2549#[target_feature(enable = "avx512dq,avx512vl")]
2550#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2552pub unsafe fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 {
2553    transmute(vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION))
2554}
2555
2556/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2557/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2558/// not set).
2559///
2560/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849)
2561#[inline]
2562#[target_feature(enable = "avx512dq,avx512vl")]
2563#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2565pub unsafe fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2566    let b = _mm256_cvtepu64_ps(a).as_f32x4();
2567    transmute(simd_select_bitmask(k, b, src.as_f32x4()))
2568}
2569
2570/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2571/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2572///
2573/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850)
2574#[inline]
2575#[target_feature(enable = "avx512dq,avx512vl")]
2576#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2578pub unsafe fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 {
2579    let b = _mm256_cvtepu64_ps(a).as_f32x4();
2580    transmute(simd_select_bitmask(k, b, f32x4::ZERO))
2581}
2582
2583/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2584/// and store the results in dst.
2585///
2586/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851)
2587#[inline]
2588#[target_feature(enable = "avx512dq")]
2589#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2591pub unsafe fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 {
2592    transmute(vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION))
2593}
2594
2595/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2596/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2597/// not set).
2598///
2599/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852)
2600#[inline]
2601#[target_feature(enable = "avx512dq")]
2602#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2604pub unsafe fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2605    let b = _mm512_cvtepu64_ps(a).as_f32x8();
2606    transmute(simd_select_bitmask(k, b, src.as_f32x8()))
2607}
2608
2609/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2610/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2611///
2612/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853)
2613#[inline]
2614#[target_feature(enable = "avx512dq")]
2615#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2617pub unsafe fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 {
2618    let b = _mm512_cvtepu64_ps(a).as_f32x8();
2619    transmute(simd_select_bitmask(k, b, f32x8::ZERO))
2620}
2621
2622/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2623/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2624///
2625/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2626/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2627/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2628/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2630///
2631/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472)
2632#[inline]
2633#[target_feature(enable = "avx512dq")]
2634#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
2635#[rustc_legacy_const_generics(1)]
2636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2637pub unsafe fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i {
2638    static_assert_rounding!(ROUNDING);
2639    _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
2640}
2641
2642/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2643/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2644/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2645///
2646/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2647/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2648/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2649/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2650/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2651///
2652/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473)
2653#[inline]
2654#[target_feature(enable = "avx512dq")]
2655#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
2656#[rustc_legacy_const_generics(3)]
2657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2658pub unsafe fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>(
2659    src: __m512i,
2660    k: __mmask8,
2661    a: __m512d,
2662) -> __m512i {
2663    static_assert_rounding!(ROUNDING);
2664    transmute(vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING))
2665}
2666
2667/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2668/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2669/// Rounding is done according to the ROUNDING parameter, which can be one of:
2670///
2671/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2672/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2673/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2674/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2675/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2676///
2677/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474)
2678#[inline]
2679#[target_feature(enable = "avx512dq")]
2680#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
2681#[rustc_legacy_const_generics(2)]
2682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2683pub unsafe fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(
2684    k: __mmask8,
2685    a: __m512d,
2686) -> __m512i {
2687    static_assert_rounding!(ROUNDING);
2688    _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
2689}
2690
2691/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2692/// and store the results in dst.
2693///
2694/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941)
2695#[inline]
2696#[target_feature(enable = "avx512dq,avx512vl")]
2697#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2699pub unsafe fn _mm_cvtpd_epi64(a: __m128d) -> __m128i {
2700    _mm_mask_cvtpd_epi64(_mm_undefined_si128(), 0xff, a)
2701}
2702
2703/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2704/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2705/// not set).
2706///
2707/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942)
2708#[inline]
2709#[target_feature(enable = "avx512dq,avx512vl")]
2710#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2712pub unsafe fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
2713    transmute(vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k))
2714}
2715
2716/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2717/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2718///
2719/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943)
2720#[inline]
2721#[target_feature(enable = "avx512dq,avx512vl")]
2722#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2724pub unsafe fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
2725    _mm_mask_cvtpd_epi64(_mm_setzero_si128(), k, a)
2726}
2727
2728/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2729/// and store the results in dst.
2730///
2731/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944)
2732#[inline]
2733#[target_feature(enable = "avx512dq,avx512vl")]
2734#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2736pub unsafe fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i {
2737    _mm256_mask_cvtpd_epi64(_mm256_undefined_si256(), 0xff, a)
2738}
2739
2740/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2741/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2742/// not set).
2743///
2744/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945)
2745#[inline]
2746#[target_feature(enable = "avx512dq,avx512vl")]
2747#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2749pub unsafe fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
2750    transmute(vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k))
2751}
2752
2753/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2754/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2755///
2756/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946)
2757#[inline]
2758#[target_feature(enable = "avx512dq,avx512vl")]
2759#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2761pub unsafe fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
2762    _mm256_mask_cvtpd_epi64(_mm256_setzero_si256(), k, a)
2763}
2764
2765/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2766/// and store the results in dst.
2767///
2768/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947)
2769#[inline]
2770#[target_feature(enable = "avx512dq")]
2771#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2773pub unsafe fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i {
2774    _mm512_mask_cvtpd_epi64(_mm512_undefined_epi32(), 0xff, a)
2775}
2776
2777/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2778/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2779/// not set).
2780///
2781/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948)
2782#[inline]
2783#[target_feature(enable = "avx512dq")]
2784#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2786pub unsafe fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
2787    transmute(vcvtpd2qq_512(
2788        a.as_f64x8(),
2789        src.as_i64x8(),
2790        k,
2791        _MM_FROUND_CUR_DIRECTION,
2792    ))
2793}
2794
2795/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
2796/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2797///
2798/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949)
2799#[inline]
2800#[target_feature(enable = "avx512dq")]
2801#[cfg_attr(test, assert_instr(vcvtpd2qq))]
2802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2803pub unsafe fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
2804    _mm512_mask_cvtpd_epi64(_mm512_setzero_si512(), k, a)
2805}
2806
2807/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2808/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2809///
2810/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2811/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2812/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2813/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2814/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2815///
2816/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514)
2817#[inline]
2818#[target_feature(enable = "avx512dq")]
2819#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
2820#[rustc_legacy_const_generics(1)]
2821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2822pub unsafe fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i {
2823    static_assert_rounding!(ROUNDING);
2824    _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
2825}
2826
2827/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2828/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2829/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2830///
2831/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2832/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2833/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2834/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2835/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2836///
2837/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515)
2838#[inline]
2839#[target_feature(enable = "avx512dq")]
2840#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
2841#[rustc_legacy_const_generics(3)]
2842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843pub unsafe fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>(
2844    src: __m512i,
2845    k: __mmask8,
2846    a: __m256,
2847) -> __m512i {
2848    static_assert_rounding!(ROUNDING);
2849    transmute(vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING))
2850}
2851
2852/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2853/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2854/// Rounding is done according to the ROUNDING parameter, which can be one of:
2855///
2856/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2857/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2858/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2859/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2860/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2861///
2862/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516)
2863#[inline]
2864#[target_feature(enable = "avx512dq")]
2865#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
2866#[rustc_legacy_const_generics(2)]
2867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2868pub unsafe fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(
2869    k: __mmask8,
2870    a: __m256,
2871) -> __m512i {
2872    static_assert_rounding!(ROUNDING);
2873    _mm512_mask_cvt_roundps_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
2874}
2875
2876/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2877/// and store the results in dst.
2878///
2879/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075)
2880#[inline]
2881#[target_feature(enable = "avx512dq,avx512vl")]
2882#[cfg_attr(test, assert_instr(vcvtps2qq))]
2883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2884pub unsafe fn _mm_cvtps_epi64(a: __m128) -> __m128i {
2885    _mm_mask_cvtps_epi64(_mm_undefined_si128(), 0xff, a)
2886}
2887
2888/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2889/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2890/// not set).
2891///
2892/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076)
2893#[inline]
2894#[target_feature(enable = "avx512dq,avx512vl")]
2895#[cfg_attr(test, assert_instr(vcvtps2qq))]
2896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2897pub unsafe fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
2898    transmute(vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k))
2899}
2900
2901/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2902/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2903///
2904/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077)
2905#[inline]
2906#[target_feature(enable = "avx512dq,avx512vl")]
2907#[cfg_attr(test, assert_instr(vcvtps2qq))]
2908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2909pub unsafe fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i {
2910    _mm_mask_cvtps_epi64(_mm_setzero_si128(), k, a)
2911}
2912
2913/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2914/// and store the results in dst.
2915///
2916/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078)
2917#[inline]
2918#[target_feature(enable = "avx512dq,avx512vl")]
2919#[cfg_attr(test, assert_instr(vcvtps2qq))]
2920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2921pub unsafe fn _mm256_cvtps_epi64(a: __m128) -> __m256i {
2922    _mm256_mask_cvtps_epi64(_mm256_undefined_si256(), 0xff, a)
2923}
2924
2925/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2926/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2927/// not set).
2928///
2929/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079)
2930#[inline]
2931#[target_feature(enable = "avx512dq,avx512vl")]
2932#[cfg_attr(test, assert_instr(vcvtps2qq))]
2933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2934pub unsafe fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
2935    transmute(vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k))
2936}
2937
2938/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2939/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2940///
2941/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080)
2942#[inline]
2943#[target_feature(enable = "avx512dq,avx512vl")]
2944#[cfg_attr(test, assert_instr(vcvtps2qq))]
2945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2946pub unsafe fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i {
2947    _mm256_mask_cvtps_epi64(_mm256_setzero_si256(), k, a)
2948}
2949
2950/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2951/// and store the results in dst.
2952///
2953/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081)
2954#[inline]
2955#[target_feature(enable = "avx512dq")]
2956#[cfg_attr(test, assert_instr(vcvtps2qq))]
2957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2958pub unsafe fn _mm512_cvtps_epi64(a: __m256) -> __m512i {
2959    _mm512_mask_cvtps_epi64(_mm512_undefined_epi32(), 0xff, a)
2960}
2961
2962/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2963/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2964/// not set).
2965///
2966/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082)
2967#[inline]
2968#[target_feature(enable = "avx512dq")]
2969#[cfg_attr(test, assert_instr(vcvtps2qq))]
2970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2971pub unsafe fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
2972    transmute(vcvtps2qq_512(
2973        a.as_f32x8(),
2974        src.as_i64x8(),
2975        k,
2976        _MM_FROUND_CUR_DIRECTION,
2977    ))
2978}
2979
2980/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
2981/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2982///
2983/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083)
2984#[inline]
2985#[target_feature(enable = "avx512dq")]
2986#[cfg_attr(test, assert_instr(vcvtps2qq))]
2987#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2988pub unsafe fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i {
2989    _mm512_mask_cvtps_epi64(_mm512_setzero_si512(), k, a)
2990}
2991
2992/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
2993/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2994///
2995/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2996/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2997/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2998/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2999/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3000///
3001/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478)
3002#[inline]
3003#[target_feature(enable = "avx512dq")]
3004#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3005#[rustc_legacy_const_generics(1)]
3006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3007pub unsafe fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3008    static_assert_rounding!(ROUNDING);
3009    _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3010}
3011
3012/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3013/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3014/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3015///
3016/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3017/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3018/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3019/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3020/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3021///
3022/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479)
3023#[inline]
3024#[target_feature(enable = "avx512dq")]
3025#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3026#[rustc_legacy_const_generics(3)]
3027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3028pub unsafe fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>(
3029    src: __m512i,
3030    k: __mmask8,
3031    a: __m512d,
3032) -> __m512i {
3033    static_assert_rounding!(ROUNDING);
3034    transmute(vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING))
3035}
3036
3037/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3038/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3039/// Rounding is done according to the ROUNDING parameter, which can be one of:
3040///
3041/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3042/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3043/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3044/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3045/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3046///
3047/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480)
3048#[inline]
3049#[target_feature(enable = "avx512dq")]
3050#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3051#[rustc_legacy_const_generics(2)]
3052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3053pub unsafe fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(
3054    k: __mmask8,
3055    a: __m512d,
3056) -> __m512i {
3057    static_assert_rounding!(ROUNDING);
3058    _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3059}
3060
3061/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3062/// and store the results in dst.
3063///
3064/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959)
3065#[inline]
3066#[target_feature(enable = "avx512dq,avx512vl")]
3067#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3069pub unsafe fn _mm_cvtpd_epu64(a: __m128d) -> __m128i {
3070    _mm_mask_cvtpd_epu64(_mm_undefined_si128(), 0xff, a)
3071}
3072
3073/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3074/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3075/// not set).
3076///
3077/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960)
3078#[inline]
3079#[target_feature(enable = "avx512dq,avx512vl")]
3080#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3082pub unsafe fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3083    transmute(vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k))
3084}
3085
3086/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3087/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3088///
3089/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961)
3090#[inline]
3091#[target_feature(enable = "avx512dq,avx512vl")]
3092#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3094pub unsafe fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
3095    _mm_mask_cvtpd_epu64(_mm_setzero_si128(), k, a)
3096}
3097
3098/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3099/// and store the results in dst.
3100///
3101/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962)
3102#[inline]
3103#[target_feature(enable = "avx512dq,avx512vl")]
3104#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3106pub unsafe fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i {
3107    _mm256_mask_cvtpd_epu64(_mm256_undefined_si256(), 0xff, a)
3108}
3109
3110/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3111/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3112/// not set).
3113///
3114/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963)
3115#[inline]
3116#[target_feature(enable = "avx512dq,avx512vl")]
3117#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3119pub unsafe fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3120    transmute(vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k))
3121}
3122
3123/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3124/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3125///
3126/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964)
3127#[inline]
3128#[target_feature(enable = "avx512dq,avx512vl")]
3129#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3131pub unsafe fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
3132    _mm256_mask_cvtpd_epu64(_mm256_setzero_si256(), k, a)
3133}
3134
3135/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3136/// and store the results in dst.
3137///
3138/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965)
3139#[inline]
3140#[target_feature(enable = "avx512dq")]
3141#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3143pub unsafe fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i {
3144    _mm512_mask_cvtpd_epu64(_mm512_undefined_epi32(), 0xff, a)
3145}
3146
3147/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3148/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3149/// not set).
3150///
3151/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966)
3152#[inline]
3153#[target_feature(enable = "avx512dq")]
3154#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3156pub unsafe fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3157    transmute(vcvtpd2uqq_512(
3158        a.as_f64x8(),
3159        src.as_u64x8(),
3160        k,
3161        _MM_FROUND_CUR_DIRECTION,
3162    ))
3163}
3164
3165/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3166/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3167///
3168/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967)
3169#[inline]
3170#[target_feature(enable = "avx512dq")]
3171#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3173pub unsafe fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
3174    _mm512_mask_cvtpd_epu64(_mm512_setzero_si512(), k, a)
3175}
3176
3177/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3178/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3179///
3180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3185///
3186/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520)
3187#[inline]
3188#[target_feature(enable = "avx512dq")]
3189#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3190#[rustc_legacy_const_generics(1)]
3191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3192pub unsafe fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i {
3193    static_assert_rounding!(ROUNDING);
3194    _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
3195}
3196
3197/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3198/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3199/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3200///
3201/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3202/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3203/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3204/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3205/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3206///
3207/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521)
3208#[inline]
3209#[target_feature(enable = "avx512dq")]
3210#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3211#[rustc_legacy_const_generics(3)]
3212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3213pub unsafe fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>(
3214    src: __m512i,
3215    k: __mmask8,
3216    a: __m256,
3217) -> __m512i {
3218    static_assert_rounding!(ROUNDING);
3219    transmute(vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING))
3220}
3221
3222/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3223/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3224/// Rounding is done according to the ROUNDING parameter, which can be one of:
3225///
3226/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3227/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3228/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3229/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3230/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3231///
3232/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522)
3233#[inline]
3234#[target_feature(enable = "avx512dq")]
3235#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3236#[rustc_legacy_const_generics(2)]
3237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3238pub unsafe fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(
3239    k: __mmask8,
3240    a: __m256,
3241) -> __m512i {
3242    static_assert_rounding!(ROUNDING);
3243    _mm512_mask_cvt_roundps_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
3244}
3245
3246/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3247/// and store the results in dst.
3248///
3249/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093)
3250#[inline]
3251#[target_feature(enable = "avx512dq,avx512vl")]
3252#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3254pub unsafe fn _mm_cvtps_epu64(a: __m128) -> __m128i {
3255    _mm_mask_cvtps_epu64(_mm_undefined_si128(), 0xff, a)
3256}
3257
3258/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3259/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3260/// not set).
3261///
3262/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094)
3263#[inline]
3264#[target_feature(enable = "avx512dq,avx512vl")]
3265#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3267pub unsafe fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3268    transmute(vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k))
3269}
3270
3271/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3272/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3273///
3274/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095)
3275#[inline]
3276#[target_feature(enable = "avx512dq,avx512vl")]
3277#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3279pub unsafe fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i {
3280    _mm_mask_cvtps_epu64(_mm_setzero_si128(), k, a)
3281}
3282
3283/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3284/// and store the results in dst.
3285///
3286/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096)
3287#[inline]
3288#[target_feature(enable = "avx512dq,avx512vl")]
3289#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3291pub unsafe fn _mm256_cvtps_epu64(a: __m128) -> __m256i {
3292    _mm256_mask_cvtps_epu64(_mm256_undefined_si256(), 0xff, a)
3293}
3294
3295/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3296/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3297/// not set).
3298///
3299/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097)
3300#[inline]
3301#[target_feature(enable = "avx512dq,avx512vl")]
3302#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3304pub unsafe fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3305    transmute(vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k))
3306}
3307
3308/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3309/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3310///
3311/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098)
3312#[inline]
3313#[target_feature(enable = "avx512dq,avx512vl")]
3314#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3316pub unsafe fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i {
3317    _mm256_mask_cvtps_epu64(_mm256_setzero_si256(), k, a)
3318}
3319
3320/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3321/// and store the results in dst.
3322///
3323/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099)
3324#[inline]
3325#[target_feature(enable = "avx512dq")]
3326#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3328pub unsafe fn _mm512_cvtps_epu64(a: __m256) -> __m512i {
3329    _mm512_mask_cvtps_epu64(_mm512_undefined_epi32(), 0xff, a)
3330}
3331
3332/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3333/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3334/// not set).
3335///
3336/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100)
3337#[inline]
3338#[target_feature(enable = "avx512dq")]
3339#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3341pub unsafe fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3342    transmute(vcvtps2uqq_512(
3343        a.as_f32x8(),
3344        src.as_u64x8(),
3345        k,
3346        _MM_FROUND_CUR_DIRECTION,
3347    ))
3348}
3349
3350/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3351/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3352///
3353/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101)
3354#[inline]
3355#[target_feature(enable = "avx512dq")]
3356#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3358pub unsafe fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i {
3359    _mm512_mask_cvtps_epu64(_mm512_setzero_si512(), k, a)
3360}
3361
3362/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3363/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3364/// to the sae parameter.
3365///
3366/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264)
3367#[inline]
3368#[target_feature(enable = "avx512dq")]
3369#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3370#[rustc_legacy_const_generics(1)]
3371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3372pub unsafe fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i {
3373    static_assert_sae!(SAE);
3374    _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3375}
3376
3377/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3378/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3379/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3380///
3381/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265)
3382#[inline]
3383#[target_feature(enable = "avx512dq")]
3384#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3385#[rustc_legacy_const_generics(3)]
3386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3387pub unsafe fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>(
3388    src: __m512i,
3389    k: __mmask8,
3390    a: __m512d,
3391) -> __m512i {
3392    static_assert_sae!(SAE);
3393    transmute(vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE))
3394}
3395
3396/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3397/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3398/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3399///
3400/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266)
3401#[inline]
3402#[target_feature(enable = "avx512dq")]
3403#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3404#[rustc_legacy_const_generics(2)]
3405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3406pub unsafe fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
3407    static_assert_sae!(SAE);
3408    _mm512_mask_cvtt_roundpd_epi64::<SAE>(_mm512_setzero_si512(), k, a)
3409}
3410
3411/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3412/// with truncation, and store the result in dst.
3413///
3414/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329)
3415#[inline]
3416#[target_feature(enable = "avx512dq,avx512vl")]
3417#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3419pub unsafe fn _mm_cvttpd_epi64(a: __m128d) -> __m128i {
3420    _mm_mask_cvttpd_epi64(_mm_undefined_si128(), 0xff, a)
3421}
3422
3423/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3424/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3425/// corresponding bit is not set).
3426///
3427/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330)
3428#[inline]
3429#[target_feature(enable = "avx512dq,avx512vl")]
3430#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3432pub unsafe fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3433    transmute(vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k))
3434}
3435
3436/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3437/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3438/// bit is not set).
3439///
3440/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331)
3441#[inline]
3442#[target_feature(enable = "avx512dq,avx512vl")]
3443#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3445pub unsafe fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3446    _mm_mask_cvttpd_epi64(_mm_setzero_si128(), k, a)
3447}
3448
3449/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3450/// with truncation, and store the result in dst.
3451///
3452/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332)
3453#[inline]
3454#[target_feature(enable = "avx512dq,avx512vl")]
3455#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3457pub unsafe fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i {
3458    _mm256_mask_cvttpd_epi64(_mm256_undefined_si256(), 0xff, a)
3459}
3460
3461/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3462/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3463/// corresponding bit is not set).
3464///
3465/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333)
3466#[inline]
3467#[target_feature(enable = "avx512dq,avx512vl")]
3468#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3470pub unsafe fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3471    transmute(vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k))
3472}
3473
3474/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3475/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3476/// bit is not set).
3477///
3478/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334)
3479#[inline]
3480#[target_feature(enable = "avx512dq,avx512vl")]
3481#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3483pub unsafe fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3484    _mm256_mask_cvttpd_epi64(_mm256_setzero_si256(), k, a)
3485}
3486
3487/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3488/// with truncation, and store the result in dst.
3489///
3490/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335)
3491#[inline]
3492#[target_feature(enable = "avx512dq")]
3493#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3495pub unsafe fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i {
3496    _mm512_mask_cvttpd_epi64(_mm512_undefined_epi32(), 0xff, a)
3497}
3498
3499/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3500/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3501/// corresponding bit is not set).
3502///
3503/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336)
3504#[inline]
3505#[target_feature(enable = "avx512dq")]
3506#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3508pub unsafe fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3509    transmute(vcvttpd2qq_512(
3510        a.as_f64x8(),
3511        src.as_i64x8(),
3512        k,
3513        _MM_FROUND_CUR_DIRECTION,
3514    ))
3515}
3516
3517/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3518/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3519/// bit is not set).
3520///
3521/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337)
3522#[inline]
3523#[target_feature(enable = "avx512dq")]
3524#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3526pub unsafe fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3527    _mm512_mask_cvttpd_epi64(_mm512_setzero_si512(), k, a)
3528}
3529
3530/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3531/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3532/// to the sae parameter.
3533///
3534/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294)
3535#[inline]
3536#[target_feature(enable = "avx512dq")]
3537#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3538#[rustc_legacy_const_generics(1)]
3539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3540pub unsafe fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i {
3541    static_assert_sae!(SAE);
3542    _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3543}
3544
3545/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3546/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3547/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3548///
3549/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295)
3550#[inline]
3551#[target_feature(enable = "avx512dq")]
3552#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3553#[rustc_legacy_const_generics(3)]
3554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3555pub unsafe fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>(
3556    src: __m512i,
3557    k: __mmask8,
3558    a: __m256,
3559) -> __m512i {
3560    static_assert_sae!(SAE);
3561    transmute(vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE))
3562}
3563
3564/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3565/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3566/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3567///
3568/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296)
3569#[inline]
3570#[target_feature(enable = "avx512dq")]
3571#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3572#[rustc_legacy_const_generics(2)]
3573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3574pub unsafe fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
3575    static_assert_sae!(SAE);
3576    _mm512_mask_cvtt_roundps_epi64::<SAE>(_mm512_setzero_si512(), k, a)
3577}
3578
3579/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3580/// with truncation, and store the result in dst.
3581///
3582/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420)
3583#[inline]
3584#[target_feature(enable = "avx512dq,avx512vl")]
3585#[cfg_attr(test, assert_instr(vcvttps2qq))]
3586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3587pub unsafe fn _mm_cvttps_epi64(a: __m128) -> __m128i {
3588    _mm_mask_cvttps_epi64(_mm_undefined_si128(), 0xff, a)
3589}
3590
3591/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3592/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3593/// corresponding bit is not set).
3594///
3595/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421)
3596#[inline]
3597#[target_feature(enable = "avx512dq,avx512vl")]
3598#[cfg_attr(test, assert_instr(vcvttps2qq))]
3599#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3600pub unsafe fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3601    transmute(vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k))
3602}
3603
3604/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3605/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3606/// bit is not set).
3607///
3608/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422)
3609#[inline]
3610#[target_feature(enable = "avx512dq,avx512vl")]
3611#[cfg_attr(test, assert_instr(vcvttps2qq))]
3612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3613pub unsafe fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i {
3614    _mm_mask_cvttps_epi64(_mm_setzero_si128(), k, a)
3615}
3616
3617/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3618/// with truncation, and store the result in dst.
3619///
3620/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423)
3621#[inline]
3622#[target_feature(enable = "avx512dq,avx512vl")]
3623#[cfg_attr(test, assert_instr(vcvttps2qq))]
3624#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3625pub unsafe fn _mm256_cvttps_epi64(a: __m128) -> __m256i {
3626    _mm256_mask_cvttps_epi64(_mm256_undefined_si256(), 0xff, a)
3627}
3628
3629/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3630/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3631/// corresponding bit is not set).
3632///
3633/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424)
3634#[inline]
3635#[target_feature(enable = "avx512dq,avx512vl")]
3636#[cfg_attr(test, assert_instr(vcvttps2qq))]
3637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3638pub unsafe fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3639    transmute(vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k))
3640}
3641
3642/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3643/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3644/// bit is not set).
3645///
3646/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425)
3647#[inline]
3648#[target_feature(enable = "avx512dq,avx512vl")]
3649#[cfg_attr(test, assert_instr(vcvttps2qq))]
3650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3651pub unsafe fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i {
3652    _mm256_mask_cvttps_epi64(_mm256_setzero_si256(), k, a)
3653}
3654
3655/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3656/// with truncation, and store the result in dst.
3657///
3658/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426)
3659#[inline]
3660#[target_feature(enable = "avx512dq")]
3661#[cfg_attr(test, assert_instr(vcvttps2qq))]
3662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3663pub unsafe fn _mm512_cvttps_epi64(a: __m256) -> __m512i {
3664    _mm512_mask_cvttps_epi64(_mm512_undefined_epi32(), 0xff, a)
3665}
3666
3667/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3668/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3669/// corresponding bit is not set).
3670///
3671/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427)
3672#[inline]
3673#[target_feature(enable = "avx512dq")]
3674#[cfg_attr(test, assert_instr(vcvttps2qq))]
3675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3676pub unsafe fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3677    transmute(vcvttps2qq_512(
3678        a.as_f32x8(),
3679        src.as_i64x8(),
3680        k,
3681        _MM_FROUND_CUR_DIRECTION,
3682    ))
3683}
3684
3685/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3686/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3687/// bit is not set).
3688///
3689/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428)
3690#[inline]
3691#[target_feature(enable = "avx512dq")]
3692#[cfg_attr(test, assert_instr(vcvttps2qq))]
3693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3694pub unsafe fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i {
3695    _mm512_mask_cvttps_epi64(_mm512_setzero_si512(), k, a)
3696}
3697
3698/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3699/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3700/// to the sae parameter.
3701///
3702/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965)
3703#[inline]
3704#[target_feature(enable = "avx512dq")]
3705#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
3706#[rustc_legacy_const_generics(1)]
3707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3708pub unsafe fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i {
3709    static_assert_sae!(SAE);
3710    _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3711}
3712
3713/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3714/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3715/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3716///
3717/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966)
3718#[inline]
3719#[target_feature(enable = "avx512dq")]
3720#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
3721#[rustc_legacy_const_generics(3)]
3722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3723pub unsafe fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>(
3724    src: __m512i,
3725    k: __mmask8,
3726    a: __m512d,
3727) -> __m512i {
3728    static_assert_sae!(SAE);
3729    transmute(vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE))
3730}
3731
3732/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3733/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3734/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3735///
3736/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967)
3737#[inline]
3738#[target_feature(enable = "avx512dq")]
3739#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
3740#[rustc_legacy_const_generics(2)]
3741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3742pub unsafe fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
3743    static_assert_sae!(SAE);
3744    _mm512_mask_cvtt_roundpd_epu64::<SAE>(_mm512_setzero_si512(), k, a)
3745}
3746
3747/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3748/// with truncation, and store the result in dst.
3749///
3750/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347)
3751#[inline]
3752#[target_feature(enable = "avx512dq,avx512vl")]
3753#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3755pub unsafe fn _mm_cvttpd_epu64(a: __m128d) -> __m128i {
3756    _mm_mask_cvttpd_epu64(_mm_undefined_si128(), 0xff, a)
3757}
3758
3759/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3760/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
3761/// bit is not set).
3762///
3763/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348)
3764#[inline]
3765#[target_feature(enable = "avx512dq,avx512vl")]
3766#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3768pub unsafe fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3769    transmute(vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k))
3770}
3771
3772/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3773/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3774/// bit is not set).
3775///
3776/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349)
3777#[inline]
3778#[target_feature(enable = "avx512dq,avx512vl")]
3779#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3781pub unsafe fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
3782    _mm_mask_cvttpd_epu64(_mm_setzero_si128(), k, a)
3783}
3784
3785/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3786/// with truncation, and store the result in dst.
3787///
3788/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350)
3789#[inline]
3790#[target_feature(enable = "avx512dq,avx512vl")]
3791#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3793pub unsafe fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i {
3794    _mm256_mask_cvttpd_epu64(_mm256_undefined_si256(), 0xff, a)
3795}
3796
3797/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3798/// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding
3799/// bit is not set).
3800///
3801/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351)
3802#[inline]
3803#[target_feature(enable = "avx512dq,avx512vl")]
3804#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3806pub unsafe fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3807    transmute(vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k))
3808}
3809
3810/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3811/// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding
3812/// bit is not set).
3813///
3814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352)
3815#[inline]
3816#[target_feature(enable = "avx512dq,avx512vl")]
3817#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3819pub unsafe fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
3820    _mm256_mask_cvttpd_epu64(_mm256_setzero_si256(), k, a)
3821}
3822
3823/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3824/// with truncation, and store the result in dst.
3825///
3826/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353)
3827#[inline]
3828#[target_feature(enable = "avx512dq")]
3829#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3831pub unsafe fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i {
3832    _mm512_mask_cvttpd_epu64(_mm512_undefined_epi32(), 0xff, a)
3833}
3834
3835/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3836/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
3837/// bit is not set).
3838///
3839/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354)
3840#[inline]
3841#[target_feature(enable = "avx512dq")]
3842#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3844pub unsafe fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3845    transmute(vcvttpd2uqq_512(
3846        a.as_f64x8(),
3847        src.as_u64x8(),
3848        k,
3849        _MM_FROUND_CUR_DIRECTION,
3850    ))
3851}
3852
3853/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
3854/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3855///
3856///
3857/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355)
3858#[inline]
3859#[target_feature(enable = "avx512dq")]
3860#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
3861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3862pub unsafe fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
3863    _mm512_mask_cvttpd_epu64(_mm512_setzero_si512(), k, a)
3864}
3865
3866/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3867/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3868/// to the sae parameter.
3869///
3870/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300)
3871#[inline]
3872#[target_feature(enable = "avx512dq")]
3873#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
3874#[rustc_legacy_const_generics(1)]
3875#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3876pub unsafe fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i {
3877    static_assert_sae!(SAE);
3878    _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
3879}
3880
3881/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3882/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3883/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3884///
3885/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301)
3886#[inline]
3887#[target_feature(enable = "avx512dq")]
3888#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
3889#[rustc_legacy_const_generics(3)]
3890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891pub unsafe fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>(
3892    src: __m512i,
3893    k: __mmask8,
3894    a: __m256,
3895) -> __m512i {
3896    static_assert_sae!(SAE);
3897    transmute(vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE))
3898}
3899
3900/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3901/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3902/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3903///
3904/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302)
3905#[inline]
3906#[target_feature(enable = "avx512dq")]
3907#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
3908#[rustc_legacy_const_generics(2)]
3909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3910pub unsafe fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
3911    static_assert_sae!(SAE);
3912    _mm512_mask_cvtt_roundps_epu64::<SAE>(_mm512_setzero_si512(), k, a)
3913}
3914
3915/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3916/// with truncation, and store the result in dst.
3917///
3918/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438)
3919#[inline]
3920#[target_feature(enable = "avx512dq,avx512vl")]
3921#[cfg_attr(test, assert_instr(vcvttps2uqq))]
3922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3923pub unsafe fn _mm_cvttps_epu64(a: __m128) -> __m128i {
3924    _mm_mask_cvttps_epu64(_mm_undefined_si128(), 0xff, a)
3925}
3926
3927/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3928/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3929/// corresponding bit is not set).
3930///
3931/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439)
3932#[inline]
3933#[target_feature(enable = "avx512dq,avx512vl")]
3934#[cfg_attr(test, assert_instr(vcvttps2uqq))]
3935#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3936pub unsafe fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3937    transmute(vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k))
3938}
3939
3940/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3941/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3942/// bit is not set).
3943///
3944/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440)
3945#[inline]
3946#[target_feature(enable = "avx512dq,avx512vl")]
3947#[cfg_attr(test, assert_instr(vcvttps2uqq))]
3948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3949pub unsafe fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i {
3950    _mm_mask_cvttps_epu64(_mm_setzero_si128(), k, a)
3951}
3952
3953/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3954/// with truncation, and store the result in dst.
3955///
3956/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441)
3957#[inline]
3958#[target_feature(enable = "avx512dq,avx512vl")]
3959#[cfg_attr(test, assert_instr(vcvttps2uqq))]
3960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3961pub unsafe fn _mm256_cvttps_epu64(a: __m128) -> __m256i {
3962    _mm256_mask_cvttps_epu64(_mm256_undefined_si256(), 0xff, a)
3963}
3964
3965/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3966/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3967/// corresponding bit is not set).
3968///
3969/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442)
3970#[inline]
3971#[target_feature(enable = "avx512dq,avx512vl")]
3972#[cfg_attr(test, assert_instr(vcvttps2uqq))]
3973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3974pub unsafe fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3975    transmute(vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k))
3976}
3977
3978/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3979/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3980/// bit is not set).
3981///
3982/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443)
3983#[inline]
3984#[target_feature(enable = "avx512dq,avx512vl")]
3985#[cfg_attr(test, assert_instr(vcvttps2uqq))]
3986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3987pub unsafe fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i {
3988    _mm256_mask_cvttps_epu64(_mm256_setzero_si256(), k, a)
3989}
3990
3991/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
3992/// with truncation, and store the result in dst.
3993///
3994/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444)
3995#[inline]
3996#[target_feature(enable = "avx512dq")]
3997#[cfg_attr(test, assert_instr(vcvttps2uqq))]
3998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3999pub unsafe fn _mm512_cvttps_epu64(a: __m256) -> __m512i {
4000    _mm512_mask_cvttps_epu64(_mm512_undefined_epi32(), 0xff, a)
4001}
4002
4003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4004/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4005/// corresponding bit is not set).
4006///
4007/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445)
4008#[inline]
4009#[target_feature(enable = "avx512dq")]
4010#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4012pub unsafe fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4013    transmute(vcvttps2uqq_512(
4014        a.as_f32x8(),
4015        src.as_u64x8(),
4016        k,
4017        _MM_FROUND_CUR_DIRECTION,
4018    ))
4019}
4020
4021/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4022/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4023/// bit is not set).
4024///
4025/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446)
4026#[inline]
4027#[target_feature(enable = "avx512dq")]
4028#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4030pub unsafe fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i {
4031    _mm512_mask_cvttps_epu64(_mm512_setzero_si512(), k, a)
4032}
4033
4034// Multiply-Low
4035
4036/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4037/// the low 64 bits of the intermediate integers in `dst`.
4038///
4039/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778)
4040#[inline]
4041#[target_feature(enable = "avx512dq,avx512vl")]
4042#[cfg_attr(test, assert_instr(vpmullq))]
4043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4044pub unsafe fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
4045    transmute(simd_mul(a.as_i64x2(), b.as_i64x2()))
4046}
4047
4048/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4049/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4050/// `src` if the corresponding bit is not set).
4051///
4052/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776)
4053#[inline]
4054#[target_feature(enable = "avx512dq,avx512vl")]
4055#[cfg_attr(test, assert_instr(vpmullq))]
4056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4057pub unsafe fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4058    let b = _mm_mullo_epi64(a, b).as_i64x2();
4059    transmute(simd_select_bitmask(k, b, src.as_i64x2()))
4060}
4061
4062/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4063/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4064/// the corresponding bit is not set).
4065///
4066/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777)
4067#[inline]
4068#[target_feature(enable = "avx512dq,avx512vl")]
4069#[cfg_attr(test, assert_instr(vpmullq))]
4070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4071pub unsafe fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4072    let b = _mm_mullo_epi64(a, b).as_i64x2();
4073    transmute(simd_select_bitmask(k, b, i64x2::ZERO))
4074}
4075
4076/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4077/// the low 64 bits of the intermediate integers in `dst`.
4078///
4079/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781)
4080#[inline]
4081#[target_feature(enable = "avx512dq,avx512vl")]
4082#[cfg_attr(test, assert_instr(vpmullq))]
4083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4084pub unsafe fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
4085    transmute(simd_mul(a.as_i64x4(), b.as_i64x4()))
4086}
4087
4088/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4089/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4090/// `src` if the corresponding bit is not set).
4091///
4092/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779)
4093#[inline]
4094#[target_feature(enable = "avx512dq,avx512vl")]
4095#[cfg_attr(test, assert_instr(vpmullq))]
4096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4097pub unsafe fn _mm256_mask_mullo_epi64(
4098    src: __m256i,
4099    k: __mmask8,
4100    a: __m256i,
4101    b: __m256i,
4102) -> __m256i {
4103    let b = _mm256_mullo_epi64(a, b).as_i64x4();
4104    transmute(simd_select_bitmask(k, b, src.as_i64x4()))
4105}
4106
4107/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4108/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4109/// the corresponding bit is not set).
4110///
4111/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780)
4112#[inline]
4113#[target_feature(enable = "avx512dq,avx512vl")]
4114#[cfg_attr(test, assert_instr(vpmullq))]
4115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4116pub unsafe fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4117    let b = _mm256_mullo_epi64(a, b).as_i64x4();
4118    transmute(simd_select_bitmask(k, b, i64x4::ZERO))
4119}
4120
4121/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4122/// the low 64 bits of the intermediate integers in `dst`.
4123///
4124/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784)
4125#[inline]
4126#[target_feature(enable = "avx512dq")]
4127#[cfg_attr(test, assert_instr(vpmullq))]
4128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4129pub unsafe fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
4130    transmute(simd_mul(a.as_i64x8(), b.as_i64x8()))
4131}
4132
4133/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4134/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4135/// `src` if the corresponding bit is not set).
4136///
4137/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782)
4138#[inline]
4139#[target_feature(enable = "avx512dq")]
4140#[cfg_attr(test, assert_instr(vpmullq))]
4141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4142pub unsafe fn _mm512_mask_mullo_epi64(
4143    src: __m512i,
4144    k: __mmask8,
4145    a: __m512i,
4146    b: __m512i,
4147) -> __m512i {
4148    let b = _mm512_mullo_epi64(a, b).as_i64x8();
4149    transmute(simd_select_bitmask(k, b, src.as_i64x8()))
4150}
4151
4152/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4153/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4154/// the corresponding bit is not set).
4155///
4156/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783)
4157#[inline]
4158#[target_feature(enable = "avx512dq")]
4159#[cfg_attr(test, assert_instr(vpmullq))]
4160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4161pub unsafe fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4162    let b = _mm512_mullo_epi64(a, b).as_i64x8();
4163    transmute(simd_select_bitmask(k, b, i64x8::ZERO))
4164}
4165
4166// Mask Registers
4167
4168/// Convert 8-bit mask a to a 32-bit integer value and store the result in dst.
4169///
4170/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891)
4171#[inline]
4172#[target_feature(enable = "avx512dq")]
4173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4174pub unsafe fn _cvtmask8_u32(a: __mmask8) -> u32 {
4175    a as u32
4176}
4177
4178/// Convert 32-bit integer value a to an 8-bit mask and store the result in dst.
4179///
4180/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467)
4181#[inline]
4182#[target_feature(enable = "avx512dq")]
4183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4184pub unsafe fn _cvtu32_mask8(a: u32) -> __mmask8 {
4185    a as __mmask8
4186}
4187
4188/// Add 16-bit masks a and b, and store the result in dst.
4189///
4190/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903)
4191#[inline]
4192#[target_feature(enable = "avx512dq")]
4193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4194pub unsafe fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
4195    a + b
4196}
4197
4198/// Add 8-bit masks a and b, and store the result in dst.
4199///
4200/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906)
4201#[inline]
4202#[target_feature(enable = "avx512dq")]
4203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4204pub unsafe fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4205    a + b
4206}
4207
4208/// Bitwise AND of 8-bit masks a and b, and store the result in dst.
4209///
4210/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911)
4211#[inline]
4212#[target_feature(enable = "avx512dq")]
4213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4214pub unsafe fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4215    a & b
4216}
4217
4218/// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst.
4219///
4220/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916)
4221#[inline]
4222#[target_feature(enable = "avx512dq")]
4223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4224pub unsafe fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4225    _knot_mask8(a) & b
4226}
4227
4228/// Bitwise NOT of 8-bit mask a, and store the result in dst.
4229///
4230/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922)
4231#[inline]
4232#[target_feature(enable = "avx512dq")]
4233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4234pub unsafe fn _knot_mask8(a: __mmask8) -> __mmask8 {
4235    a ^ 0b11111111
4236}
4237
4238/// Bitwise OR of 8-bit masks a and b, and store the result in dst.
4239///
4240/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927)
4241#[inline]
4242#[target_feature(enable = "avx512dq")]
4243#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4244pub unsafe fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4245    a | b
4246}
4247
4248/// Bitwise XNOR of 8-bit masks a and b, and store the result in dst.
4249///
4250/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969)
4251#[inline]
4252#[target_feature(enable = "avx512dq")]
4253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4254pub unsafe fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4255    _knot_mask8(_kxor_mask8(a, b))
4256}
4257
4258/// Bitwise XOR of 8-bit masks a and b, and store the result in dst.
4259///
4260/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974)
4261#[inline]
4262#[target_feature(enable = "avx512dq")]
4263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4264pub unsafe fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4265    a ^ b
4266}
4267
4268/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4269/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
4270///
4271/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931)
4272#[inline]
4273#[target_feature(enable = "avx512dq")]
4274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4275pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
4276    let tmp = _kor_mask8(a, b);
4277    *all_ones = (tmp == 0xff) as u8;
4278    (tmp == 0) as u8
4279}
4280
4281/// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
4282/// store 0 in dst.
4283///
4284/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936)
4285#[inline]
4286#[target_feature(enable = "avx512dq")]
4287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4288pub unsafe fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4289    (_kor_mask8(a, b) == 0xff) as u8
4290}
4291
4292/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4293/// store 0 in dst.
4294///
4295/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941)
4296#[inline]
4297#[target_feature(enable = "avx512dq")]
4298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4299pub unsafe fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4300    (_kor_mask8(a, b) == 0) as u8
4301}
4302
4303/// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst.
4304///
4305/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945)
4306#[inline]
4307#[target_feature(enable = "avx512dq")]
4308#[rustc_legacy_const_generics(1)]
4309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4310pub unsafe fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4311    a << COUNT
4312}
4313
4314/// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst.
4315///
4316/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949)
4317#[inline]
4318#[target_feature(enable = "avx512dq")]
4319#[rustc_legacy_const_generics(1)]
4320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4321pub unsafe fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4322    a >> COUNT
4323}
4324
4325/// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst,
4326/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4327/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4328///
4329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950)
4330#[inline]
4331#[target_feature(enable = "avx512dq")]
4332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4333pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
4334    *and_not = (_kandn_mask16(a, b) == 0) as u8;
4335    (_kand_mask16(a, b) == 0) as u8
4336}
4337
4338/// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst,
4339/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4340/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4341///
4342/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953)
4343#[inline]
4344#[target_feature(enable = "avx512dq")]
4345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4346pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
4347    *and_not = (_kandn_mask8(a, b) == 0) as u8;
4348    (_kand_mask8(a, b) == 0) as u8
4349}
4350
4351/// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all
4352/// zeros, store 1 in dst, otherwise store 0 in dst.
4353///
4354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954)
4355#[inline]
4356#[target_feature(enable = "avx512dq")]
4357#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4358pub unsafe fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4359    (_kandn_mask16(a, b) == 0) as u8
4360}
4361
4362/// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all
4363/// zeros, store 1 in dst, otherwise store 0 in dst.
4364///
4365/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957)
4366#[inline]
4367#[target_feature(enable = "avx512dq")]
4368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4369pub unsafe fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4370    (_kandn_mask8(a, b) == 0) as u8
4371}
4372
4373/// Compute the bitwise AND of 16-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
4374/// store 0 in dst.
4375///
4376/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958)
4377#[inline]
4378#[target_feature(enable = "avx512dq")]
4379#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4380pub unsafe fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4381    (_kand_mask16(a, b) == 0) as u8
4382}
4383
4384/// Compute the bitwise AND of 8-bit masks a and  b, if the result is all zeros, store 1 in dst, otherwise
4385/// store 0 in dst.
4386///
4387/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961)
4388#[inline]
4389#[target_feature(enable = "avx512dq")]
4390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4391pub unsafe fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4392    (_kand_mask8(a, b) == 0) as u8
4393}
4394
4395/// Load 8-bit mask from memory
4396///
4397/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999)
4398#[inline]
4399#[target_feature(enable = "avx512dq")]
4400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4401pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
4402    *mem_addr
4403}
4404
4405/// Store 8-bit mask to memory
4406///
4407/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468)
4408#[inline]
4409#[target_feature(enable = "avx512dq")]
4410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4411pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
4412    *mem_addr = a;
4413}
4414
4415/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4416/// integer in a.
4417///
4418/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612)
4419#[inline]
4420#[target_feature(enable = "avx512dq,avx512vl")]
4421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4422pub unsafe fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
4423    let zero = _mm_setzero_si128();
4424    _mm_cmplt_epi32_mask(a, zero)
4425}
4426
4427/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4428/// integer in a.
4429///
4430/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613)
4431#[inline]
4432#[target_feature(enable = "avx512dq,avx512vl")]
4433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4434pub unsafe fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
4435    let zero = _mm256_setzero_si256();
4436    _mm256_cmplt_epi32_mask(a, zero)
4437}
4438
4439/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4440/// integer in a.
4441///
4442/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614)
4443#[inline]
4444#[target_feature(enable = "avx512dq")]
4445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4446pub unsafe fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
4447    let zero = _mm512_setzero_si512();
4448    _mm512_cmplt_epi32_mask(a, zero)
4449}
4450
4451/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4452/// integer in a.
4453///
4454/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615)
4455#[inline]
4456#[target_feature(enable = "avx512dq,avx512vl")]
4457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4458pub unsafe fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
4459    let zero = _mm_setzero_si128();
4460    _mm_cmplt_epi64_mask(a, zero)
4461}
4462
4463/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4464/// integer in a.
4465///
4466/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616)
4467#[inline]
4468#[target_feature(enable = "avx512dq,avx512vl")]
4469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4470pub unsafe fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
4471    let zero = _mm256_setzero_si256();
4472    _mm256_cmplt_epi64_mask(a, zero)
4473}
4474
4475/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4476/// integer in a.
4477///
4478/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617)
4479#[inline]
4480#[target_feature(enable = "avx512dq")]
4481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4482pub unsafe fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
4483    let zero = _mm512_setzero_si512();
4484    _mm512_cmplt_epi64_mask(a, zero)
4485}
4486
4487/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4488/// bit in k.
4489///
4490/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625)
4491#[inline]
4492#[target_feature(enable = "avx512dq,avx512vl")]
4493#[cfg_attr(test, assert_instr(vpmovm2d))]
4494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4495pub unsafe fn _mm_movm_epi32(k: __mmask8) -> __m128i {
4496    let ones = _mm_set1_epi32(-1);
4497    _mm_maskz_mov_epi32(k, ones)
4498}
4499
4500/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4501/// bit in k.
4502///
4503/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626)
4504#[inline]
4505#[target_feature(enable = "avx512dq,avx512vl")]
4506#[cfg_attr(test, assert_instr(vpmovm2d))]
4507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4508pub unsafe fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
4509    let ones = _mm256_set1_epi32(-1);
4510    _mm256_maskz_mov_epi32(k, ones)
4511}
4512
4513/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4514/// bit in k.
4515///
4516/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627)
4517#[inline]
4518#[target_feature(enable = "avx512dq")]
4519#[cfg_attr(test, assert_instr(vpmovm2d))]
4520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4521pub unsafe fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
4522    let ones = _mm512_set1_epi32(-1);
4523    _mm512_maskz_mov_epi32(k, ones)
4524}
4525
4526/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4527/// bit in k.
4528///
4529/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628)
4530#[inline]
4531#[target_feature(enable = "avx512dq,avx512vl")]
4532#[cfg_attr(test, assert_instr(vpmovm2q))]
4533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4534pub unsafe fn _mm_movm_epi64(k: __mmask8) -> __m128i {
4535    let ones = _mm_set1_epi64x(-1);
4536    _mm_maskz_mov_epi64(k, ones)
4537}
4538
4539/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4540/// bit in k.
4541///
4542/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629)
4543#[inline]
4544#[target_feature(enable = "avx512dq,avx512vl")]
4545#[cfg_attr(test, assert_instr(vpmovm2q))]
4546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4547pub unsafe fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
4548    let ones = _mm256_set1_epi64x(-1);
4549    _mm256_maskz_mov_epi64(k, ones)
4550}
4551
4552/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
4553/// bit in k.
4554///
4555/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630)
4556#[inline]
4557#[target_feature(enable = "avx512dq")]
4558#[cfg_attr(test, assert_instr(vpmovm2q))]
4559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4560pub unsafe fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
4561    let ones = _mm512_set1_epi64(-1);
4562    _mm512_maskz_mov_epi64(k, ones)
4563}
4564
4565// Range
4566
4567/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4568/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
4569/// Lower 2 bits of IMM8 specifies the operation control:
4570///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4571/// Upper 2 bits of IMM8 specifies the sign control:
4572///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4573/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4574///
4575/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210)
4576#[inline]
4577#[target_feature(enable = "avx512dq")]
4578#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
4579#[rustc_legacy_const_generics(2, 3)]
4580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4581pub unsafe fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(
4582    a: __m512d,
4583    b: __m512d,
4584) -> __m512d {
4585    static_assert_uimm_bits!(IMM8, 4);
4586    static_assert_sae!(SAE);
4587    _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), 0xff, a, b)
4588}
4589
4590/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4591/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4592/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4593/// Lower 2 bits of IMM8 specifies the operation control:
4594///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4595/// Upper 2 bits of IMM8 specifies the sign control:
4596///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4597/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4598///
4599/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208)
4600#[inline]
4601#[target_feature(enable = "avx512dq")]
4602#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
4603#[rustc_legacy_const_generics(4, 5)]
4604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4605pub unsafe fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>(
4606    src: __m512d,
4607    k: __mmask8,
4608    a: __m512d,
4609    b: __m512d,
4610) -> __m512d {
4611    static_assert_uimm_bits!(IMM8, 4);
4612    static_assert_sae!(SAE);
4613    transmute(vrangepd_512(
4614        a.as_f64x8(),
4615        b.as_f64x8(),
4616        IMM8,
4617        src.as_f64x8(),
4618        k,
4619        SAE,
4620    ))
4621}
4622
4623/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4624/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4625/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4626/// Lower 2 bits of IMM8 specifies the operation control:
4627///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4628/// Upper 2 bits of IMM8 specifies the sign control:
4629///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4630/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4631///
4632/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209)
4633#[inline]
4634#[target_feature(enable = "avx512dq")]
4635#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
4636#[rustc_legacy_const_generics(3, 4)]
4637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4638pub unsafe fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>(
4639    k: __mmask8,
4640    a: __m512d,
4641    b: __m512d,
4642) -> __m512d {
4643    static_assert_uimm_bits!(IMM8, 4);
4644    static_assert_sae!(SAE);
4645    _mm512_mask_range_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a, b)
4646}
4647
4648/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4649/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
4650/// Lower 2 bits of IMM8 specifies the operation control:
4651///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4652/// Upper 2 bits of IMM8 specifies the sign control:
4653///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4654///
4655/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192)
4656#[inline]
4657#[target_feature(enable = "avx512dq,avx512vl")]
4658#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4659#[rustc_legacy_const_generics(2)]
4660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4661pub unsafe fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
4662    static_assert_uimm_bits!(IMM8, 4);
4663    _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), 0xff, a, b)
4664}
4665
4666/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4667/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4668/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4669/// Lower 2 bits of IMM8 specifies the operation control:
4670///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4671/// Upper 2 bits of IMM8 specifies the sign control:
4672///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4673///
4674/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190)
4675#[inline]
4676#[target_feature(enable = "avx512dq,avx512vl")]
4677#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4678#[rustc_legacy_const_generics(4)]
4679#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4680pub unsafe fn _mm_mask_range_pd<const IMM8: i32>(
4681    src: __m128d,
4682    k: __mmask8,
4683    a: __m128d,
4684    b: __m128d,
4685) -> __m128d {
4686    static_assert_uimm_bits!(IMM8, 4);
4687    transmute(vrangepd_128(
4688        a.as_f64x2(),
4689        b.as_f64x2(),
4690        IMM8,
4691        src.as_f64x2(),
4692        k,
4693    ))
4694}
4695
4696/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4697/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4698/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4699/// Lower 2 bits of IMM8 specifies the operation control:
4700///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4701/// Upper 2 bits of IMM8 specifies the sign control:
4702///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4703///
4704/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191)
4705#[inline]
4706#[target_feature(enable = "avx512dq,avx512vl")]
4707#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4708#[rustc_legacy_const_generics(3)]
4709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4710pub unsafe fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
4711    static_assert_uimm_bits!(IMM8, 4);
4712    _mm_mask_range_pd::<IMM8>(_mm_setzero_pd(), k, a, b)
4713}
4714
4715/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4716/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
4717/// Lower 2 bits of IMM8 specifies the operation control:
4718///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4719/// Upper 2 bits of IMM8 specifies the sign control:
4720///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4721///
4722/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195)
4723#[inline]
4724#[target_feature(enable = "avx512dq,avx512vl")]
4725#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4726#[rustc_legacy_const_generics(2)]
4727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4728pub unsafe fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
4729    static_assert_uimm_bits!(IMM8, 4);
4730    _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), 0xff, a, b)
4731}
4732
4733/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4734/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4735/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4736/// Lower 2 bits of IMM8 specifies the operation control:
4737///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4738/// Upper 2 bits of IMM8 specifies the sign control:
4739///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4740///
4741/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193)
4742#[inline]
4743#[target_feature(enable = "avx512dq,avx512vl")]
4744#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4745#[rustc_legacy_const_generics(4)]
4746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4747pub unsafe fn _mm256_mask_range_pd<const IMM8: i32>(
4748    src: __m256d,
4749    k: __mmask8,
4750    a: __m256d,
4751    b: __m256d,
4752) -> __m256d {
4753    static_assert_uimm_bits!(IMM8, 4);
4754    transmute(vrangepd_256(
4755        a.as_f64x4(),
4756        b.as_f64x4(),
4757        IMM8,
4758        src.as_f64x4(),
4759        k,
4760    ))
4761}
4762
4763/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4764/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4765/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4766/// Lower 2 bits of IMM8 specifies the operation control:
4767///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4768/// Upper 2 bits of IMM8 specifies the sign control:
4769///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4770///
4771/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194)
4772#[inline]
4773#[target_feature(enable = "avx512dq,avx512vl")]
4774#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4775#[rustc_legacy_const_generics(3)]
4776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4777pub unsafe fn _mm256_maskz_range_pd<const IMM8: i32>(
4778    k: __mmask8,
4779    a: __m256d,
4780    b: __m256d,
4781) -> __m256d {
4782    static_assert_uimm_bits!(IMM8, 4);
4783    _mm256_mask_range_pd::<IMM8>(_mm256_setzero_pd(), k, a, b)
4784}
4785
4786/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4787/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
4788/// Lower 2 bits of IMM8 specifies the operation control:
4789///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4790/// Upper 2 bits of IMM8 specifies the sign control:
4791///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4792///
4793/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198)
4794#[inline]
4795#[target_feature(enable = "avx512dq")]
4796#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4797#[rustc_legacy_const_generics(2)]
4798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4799pub unsafe fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d {
4800    static_assert_uimm_bits!(IMM8, 4);
4801    _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), 0xff, a, b)
4802}
4803
4804/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4805/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4806/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4807/// Lower 2 bits of IMM8 specifies the operation control:
4808///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4809/// Upper 2 bits of IMM8 specifies the sign control:
4810///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4811///
4812/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196)
4813#[inline]
4814#[target_feature(enable = "avx512dq")]
4815#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4816#[rustc_legacy_const_generics(4)]
4817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4818pub unsafe fn _mm512_mask_range_pd<const IMM8: i32>(
4819    src: __m512d,
4820    k: __mmask8,
4821    a: __m512d,
4822    b: __m512d,
4823) -> __m512d {
4824    static_assert_uimm_bits!(IMM8, 4);
4825    transmute(vrangepd_512(
4826        a.as_f64x8(),
4827        b.as_f64x8(),
4828        IMM8,
4829        src.as_f64x8(),
4830        k,
4831        _MM_FROUND_CUR_DIRECTION,
4832    ))
4833}
4834
4835/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4836/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
4837/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4838/// Lower 2 bits of IMM8 specifies the operation control:
4839///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4840/// Upper 2 bits of IMM8 specifies the sign control:
4841///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4842///
4843/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197)
4844#[inline]
4845#[target_feature(enable = "avx512dq")]
4846#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
4847#[rustc_legacy_const_generics(3)]
4848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4849pub unsafe fn _mm512_maskz_range_pd<const IMM8: i32>(
4850    k: __mmask8,
4851    a: __m512d,
4852    b: __m512d,
4853) -> __m512d {
4854    static_assert_uimm_bits!(IMM8, 4);
4855    _mm512_mask_range_pd::<IMM8>(_mm512_setzero_pd(), k, a, b)
4856}
4857
4858/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4859/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
4860/// Lower 2 bits of IMM8 specifies the operation control:
4861///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4862/// Upper 2 bits of IMM8 specifies the sign control:
4863///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4864/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4865///
4866/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213)
4867#[inline]
4868#[target_feature(enable = "avx512dq")]
4869#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
4870#[rustc_legacy_const_generics(2, 3)]
4871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4872pub unsafe fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(
4873    a: __m512,
4874    b: __m512,
4875) -> __m512 {
4876    static_assert_uimm_bits!(IMM8, 4);
4877    static_assert_sae!(SAE);
4878    _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), 0xffff, a, b)
4879}
4880
4881/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4882/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
4883/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4884/// Lower 2 bits of IMM8 specifies the operation control:
4885///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4886/// Upper 2 bits of IMM8 specifies the sign control:
4887///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4888///
4889/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211)
4890#[inline]
4891#[target_feature(enable = "avx512dq")]
4892#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
4893#[rustc_legacy_const_generics(4, 5)]
4894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4895pub unsafe fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>(
4896    src: __m512,
4897    k: __mmask16,
4898    a: __m512,
4899    b: __m512,
4900) -> __m512 {
4901    static_assert_uimm_bits!(IMM8, 4);
4902    static_assert_sae!(SAE);
4903    transmute(vrangeps_512(
4904        a.as_f32x16(),
4905        b.as_f32x16(),
4906        IMM8,
4907        src.as_f32x16(),
4908        k,
4909        SAE,
4910    ))
4911}
4912
4913/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4914/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
4915/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4916/// Lower 2 bits of IMM8 specifies the operation control:
4917///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4918/// Upper 2 bits of IMM8 specifies the sign control:
4919///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4920///
4921/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212)
4922#[inline]
4923#[target_feature(enable = "avx512dq")]
4924#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
4925#[rustc_legacy_const_generics(3, 4)]
4926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4927pub unsafe fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>(
4928    k: __mmask16,
4929    a: __m512,
4930    b: __m512,
4931) -> __m512 {
4932    static_assert_uimm_bits!(IMM8, 4);
4933    static_assert_sae!(SAE);
4934    _mm512_mask_range_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a, b)
4935}
4936
4937/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4938/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
4939/// Lower 2 bits of IMM8 specifies the operation control:
4940///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4941/// Upper 2 bits of IMM8 specifies the sign control:
4942///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4943///
4944/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201)
4945#[inline]
4946#[target_feature(enable = "avx512dq,avx512vl")]
4947#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
4948#[rustc_legacy_const_generics(2)]
4949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4950pub unsafe fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
4951    static_assert_uimm_bits!(IMM8, 4);
4952    _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), 0xff, a, b)
4953}
4954
4955/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4956/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
4957/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
4958/// Lower 2 bits of IMM8 specifies the operation control:
4959///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4960/// Upper 2 bits of IMM8 specifies the sign control:
4961///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4962///
4963/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199)
4964#[inline]
4965#[target_feature(enable = "avx512dq,avx512vl")]
4966#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
4967#[rustc_legacy_const_generics(4)]
4968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4969pub unsafe fn _mm_mask_range_ps<const IMM8: i32>(
4970    src: __m128,
4971    k: __mmask8,
4972    a: __m128,
4973    b: __m128,
4974) -> __m128 {
4975    static_assert_uimm_bits!(IMM8, 4);
4976    transmute(vrangeps_128(
4977        a.as_f32x4(),
4978        b.as_f32x4(),
4979        IMM8,
4980        src.as_f32x4(),
4981        k,
4982    ))
4983}
4984
4985/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
4986/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
4987/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
4988/// Lower 2 bits of IMM8 specifies the operation control:
4989///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
4990/// Upper 2 bits of IMM8 specifies the sign control:
4991///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
4992///
4993/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200)
4994#[inline]
4995#[target_feature(enable = "avx512dq,avx512vl")]
4996#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
4997#[rustc_legacy_const_generics(3)]
4998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4999pub unsafe fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5000    static_assert_uimm_bits!(IMM8, 4);
5001    _mm_mask_range_ps::<IMM8>(_mm_setzero_ps(), k, a, b)
5002}
5003
5004/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5005/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5006/// Lower 2 bits of IMM8 specifies the operation control:
5007///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5008/// Upper 2 bits of IMM8 specifies the sign control:
5009///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5010///
5011/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204)
5012#[inline]
5013#[target_feature(enable = "avx512dq,avx512vl")]
5014#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5015#[rustc_legacy_const_generics(2)]
5016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5017pub unsafe fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
5018    static_assert_uimm_bits!(IMM8, 4);
5019    _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), 0xff, a, b)
5020}
5021
5022/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5023/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5024/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5025/// Lower 2 bits of IMM8 specifies the operation control:
5026///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5027/// Upper 2 bits of IMM8 specifies the sign control:
5028///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5029///
5030/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202)
5031#[inline]
5032#[target_feature(enable = "avx512dq,avx512vl")]
5033#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5034#[rustc_legacy_const_generics(4)]
5035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5036pub unsafe fn _mm256_mask_range_ps<const IMM8: i32>(
5037    src: __m256,
5038    k: __mmask8,
5039    a: __m256,
5040    b: __m256,
5041) -> __m256 {
5042    static_assert_uimm_bits!(IMM8, 4);
5043    transmute(vrangeps_256(
5044        a.as_f32x8(),
5045        b.as_f32x8(),
5046        IMM8,
5047        src.as_f32x8(),
5048        k,
5049    ))
5050}
5051
5052/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5053/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5054/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5055/// Lower 2 bits of IMM8 specifies the operation control:
5056///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5057/// Upper 2 bits of IMM8 specifies the sign control:
5058///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5059///
5060/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203)
5061#[inline]
5062#[target_feature(enable = "avx512dq,avx512vl")]
5063#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5064#[rustc_legacy_const_generics(3)]
5065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5066pub unsafe fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5067    static_assert_uimm_bits!(IMM8, 4);
5068    _mm256_mask_range_ps::<IMM8>(_mm256_setzero_ps(), k, a, b)
5069}
5070
5071/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5072/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5073/// Lower 2 bits of IMM8 specifies the operation control:
5074///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5075/// Upper 2 bits of IMM8 specifies the sign control:
5076///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5077///
5078/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207)
5079#[inline]
5080#[target_feature(enable = "avx512dq")]
5081#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5082#[rustc_legacy_const_generics(2)]
5083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5084pub unsafe fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 {
5085    static_assert_uimm_bits!(IMM8, 4);
5086    _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), 0xffff, a, b)
5087}
5088
5089/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5090/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5091/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5092/// Lower 2 bits of IMM8 specifies the operation control:
5093///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5094/// Upper 2 bits of IMM8 specifies the sign control:
5095///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5096///
5097/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205)
5098#[inline]
5099#[target_feature(enable = "avx512dq")]
5100#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5101#[rustc_legacy_const_generics(4)]
5102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103pub unsafe fn _mm512_mask_range_ps<const IMM8: i32>(
5104    src: __m512,
5105    k: __mmask16,
5106    a: __m512,
5107    b: __m512,
5108) -> __m512 {
5109    static_assert_uimm_bits!(IMM8, 4);
5110    transmute(vrangeps_512(
5111        a.as_f32x16(),
5112        b.as_f32x16(),
5113        IMM8,
5114        src.as_f32x16(),
5115        k,
5116        _MM_FROUND_CUR_DIRECTION,
5117    ))
5118}
5119
5120/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5121/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5122/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5123/// Lower 2 bits of IMM8 specifies the operation control:
5124///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5125/// Upper 2 bits of IMM8 specifies the sign control:
5126///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5127///
5128/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206)
5129#[inline]
5130#[target_feature(enable = "avx512dq")]
5131#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5132#[rustc_legacy_const_generics(3)]
5133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5134pub unsafe fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5135    static_assert_uimm_bits!(IMM8, 4);
5136    _mm512_mask_range_ps::<IMM8>(_mm512_setzero_ps(), k, a, b)
5137}
5138
5139/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5140/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5141/// of dst, and copy the upper element from a to the upper element of dst.
5142/// Lower 2 bits of IMM8 specifies the operation control:
5143///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5144/// Upper 2 bits of IMM8 specifies the sign control:
5145///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5146/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5147///
5148/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216)
5149#[inline]
5150#[target_feature(enable = "avx512dq")]
5151#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5152#[rustc_legacy_const_generics(2, 3)]
5153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5154pub unsafe fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(
5155    a: __m128d,
5156    b: __m128d,
5157) -> __m128d {
5158    static_assert_uimm_bits!(IMM8, 4);
5159    static_assert_sae!(SAE);
5160    _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), 0xff, a, b)
5161}
5162
5163/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5164/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5165/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5166/// upper element from a to the upper element of dst.
5167/// Lower 2 bits of IMM8 specifies the operation control:
5168///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5169/// Upper 2 bits of IMM8 specifies the sign control:
5170///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5171/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5172///
5173/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214)
5174#[inline]
5175#[target_feature(enable = "avx512dq")]
5176#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5177#[rustc_legacy_const_generics(4, 5)]
5178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5179pub unsafe fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>(
5180    src: __m128d,
5181    k: __mmask8,
5182    a: __m128d,
5183    b: __m128d,
5184) -> __m128d {
5185    static_assert_uimm_bits!(IMM8, 4);
5186    static_assert_sae!(SAE);
5187    transmute(vrangesd(
5188        a.as_f64x2(),
5189        b.as_f64x2(),
5190        src.as_f64x2(),
5191        k,
5192        IMM8,
5193        SAE,
5194    ))
5195}
5196
5197/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5198/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5199/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5200/// element from a to the upper element of dst.
5201/// Lower 2 bits of IMM8 specifies the operation control:
5202///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5203/// Upper 2 bits of IMM8 specifies the sign control:
5204///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5205/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5206///
5207/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215)
5208#[inline]
5209#[target_feature(enable = "avx512dq")]
5210#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5211#[rustc_legacy_const_generics(3, 4)]
5212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213pub unsafe fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>(
5214    k: __mmask8,
5215    a: __m128d,
5216    b: __m128d,
5217) -> __m128d {
5218    static_assert_uimm_bits!(IMM8, 4);
5219    static_assert_sae!(SAE);
5220    _mm_mask_range_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
5221}
5222
5223/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5224/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5225/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5226/// upper element from a to the upper element of dst.
5227/// Lower 2 bits of IMM8 specifies the operation control:
5228///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5229/// Upper 2 bits of IMM8 specifies the sign control:
5230///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5231///
5232/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220)
5233#[inline]
5234#[target_feature(enable = "avx512dq")]
5235#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5236#[rustc_legacy_const_generics(4)]
5237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5238pub unsafe fn _mm_mask_range_sd<const IMM8: i32>(
5239    src: __m128d,
5240    k: __mmask8,
5241    a: __m128d,
5242    b: __m128d,
5243) -> __m128d {
5244    static_assert_uimm_bits!(IMM8, 4);
5245    transmute(vrangesd(
5246        a.as_f64x2(),
5247        b.as_f64x2(),
5248        src.as_f64x2(),
5249        k,
5250        IMM8,
5251        _MM_FROUND_CUR_DIRECTION,
5252    ))
5253}
5254
5255/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5256/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5257/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5258/// element from a to the upper element of dst.
5259/// Lower 2 bits of IMM8 specifies the operation control:
5260///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5261/// Upper 2 bits of IMM8 specifies the sign control:
5262///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5263///
5264/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221)
5265#[inline]
5266#[target_feature(enable = "avx512dq")]
5267#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5268#[rustc_legacy_const_generics(3)]
5269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5270pub unsafe fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5271    static_assert_uimm_bits!(IMM8, 4);
5272    _mm_mask_range_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
5273}
5274
5275/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5276/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5277/// of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
5278/// Lower 2 bits of IMM8 specifies the operation control:
5279///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5280/// Upper 2 bits of IMM8 specifies the sign control:
5281///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5282/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5283///
5284/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219)
5285#[inline]
5286#[target_feature(enable = "avx512dq")]
5287#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5288#[rustc_legacy_const_generics(2, 3)]
5289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5290pub unsafe fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
5291    static_assert_uimm_bits!(IMM8, 4);
5292    static_assert_sae!(SAE);
5293    _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), 0xff, a, b)
5294}
5295
5296/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5297/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5298/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5299/// upper 3 packed elements from a to the upper elements of dst.
5300/// Lower 2 bits of IMM8 specifies the operation control:
5301///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5302/// Upper 2 bits of IMM8 specifies the sign control:
5303///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5304/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5305///
5306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217)
5307#[inline]
5308#[target_feature(enable = "avx512dq")]
5309#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5310#[rustc_legacy_const_generics(4, 5)]
5311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5312pub unsafe fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>(
5313    src: __m128,
5314    k: __mmask8,
5315    a: __m128,
5316    b: __m128,
5317) -> __m128 {
5318    static_assert_uimm_bits!(IMM8, 4);
5319    static_assert_sae!(SAE);
5320    transmute(vrangess(
5321        a.as_f32x4(),
5322        b.as_f32x4(),
5323        src.as_f32x4(),
5324        k,
5325        IMM8,
5326        SAE,
5327    ))
5328}
5329
5330/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5331/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5332/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5333/// 3 packed elements from a to the upper elements of dst.
5334/// Lower 2 bits of IMM8 specifies the operation control:
5335///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5336/// Upper 2 bits of IMM8 specifies the sign control:
5337///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5338/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5339///
5340/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218)
5341#[inline]
5342#[target_feature(enable = "avx512dq")]
5343#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5344#[rustc_legacy_const_generics(3, 4)]
5345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5346pub unsafe fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>(
5347    k: __mmask8,
5348    a: __m128,
5349    b: __m128,
5350) -> __m128 {
5351    static_assert_uimm_bits!(IMM8, 4);
5352    static_assert_sae!(SAE);
5353    _mm_mask_range_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
5354}
5355
5356/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5357/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5358/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5359/// upper 3 packed elements from a to the upper elements of dst.
5360/// Lower 2 bits of IMM8 specifies the operation control:
5361///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5362/// Upper 2 bits of IMM8 specifies the sign control:
5363///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5364///
5365/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222)
5366#[inline]
5367#[target_feature(enable = "avx512dq")]
5368#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5369#[rustc_legacy_const_generics(4)]
5370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5371pub unsafe fn _mm_mask_range_ss<const IMM8: i32>(
5372    src: __m128,
5373    k: __mmask8,
5374    a: __m128,
5375    b: __m128,
5376) -> __m128 {
5377    static_assert_uimm_bits!(IMM8, 4);
5378    transmute(vrangess(
5379        a.as_f32x4(),
5380        b.as_f32x4(),
5381        src.as_f32x4(),
5382        k,
5383        IMM8,
5384        _MM_FROUND_CUR_DIRECTION,
5385    ))
5386}
5387
5388/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5389/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5390/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5391/// 3 packed elements from a to the upper elements of dst.
5392/// Lower 2 bits of IMM8 specifies the operation control:
5393///     00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5394/// Upper 2 bits of IMM8 specifies the sign control:
5395///     00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5396///
5397/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223)
5398#[inline]
5399#[target_feature(enable = "avx512dq")]
5400#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5401#[rustc_legacy_const_generics(3)]
5402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5403pub unsafe fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5404    static_assert_uimm_bits!(IMM8, 4);
5405    _mm_mask_range_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
5406}
5407
5408// Reduce
5409
5410/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5411/// the number of bits specified by imm8, and store the results in dst.
5412/// Rounding is done according to the imm8 parameter, which can be one of:
5413///
5414/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5415/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5416/// * [`_MM_FROUND_TO_POS_INF`] : round up
5417/// * [`_MM_FROUND_TO_ZERO`] : truncate
5418/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5419///
5420/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5421///
5422/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438)
5423#[inline]
5424#[target_feature(enable = "avx512dq")]
5425#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5426#[rustc_legacy_const_generics(1, 2)]
5427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5428pub unsafe fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
5429    static_assert_uimm_bits!(IMM8, 8);
5430    static_assert_sae!(SAE);
5431    _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_undefined_pd(), 0xff, a)
5432}
5433
5434/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5435/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5436/// copied from src to dst if the corresponding mask bit is not set).
5437/// Rounding is done according to the imm8 parameter, which can be one of:
5438///
5439/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5440/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5441/// * [`_MM_FROUND_TO_POS_INF`] : round up
5442/// * [`_MM_FROUND_TO_ZERO`] : truncate
5443/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5444///
5445/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5446///
5447/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436)
5448#[inline]
5449#[target_feature(enable = "avx512dq")]
5450#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5451#[rustc_legacy_const_generics(3, 4)]
5452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5453pub unsafe fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5454    src: __m512d,
5455    k: __mmask8,
5456    a: __m512d,
5457) -> __m512d {
5458    static_assert_uimm_bits!(IMM8, 8);
5459    static_assert_sae!(SAE);
5460    transmute(vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE))
5461}
5462
5463/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5464/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5465/// zeroed out if the corresponding mask bit is not set).
5466/// Rounding is done according to the imm8 parameter, which can be one of:
5467///
5468/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5469/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5470/// * [`_MM_FROUND_TO_POS_INF`] : round up
5471/// * [`_MM_FROUND_TO_ZERO`] : truncate
5472/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5473///
5474/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5475///
5476/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437)
5477#[inline]
5478#[target_feature(enable = "avx512dq")]
5479#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5480#[rustc_legacy_const_generics(2, 3)]
5481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5482pub unsafe fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5483    k: __mmask8,
5484    a: __m512d,
5485) -> __m512d {
5486    static_assert_uimm_bits!(IMM8, 8);
5487    static_assert_sae!(SAE);
5488    _mm512_mask_reduce_round_pd::<IMM8, SAE>(_mm512_setzero_pd(), k, a)
5489}
5490
5491/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5492/// the number of bits specified by imm8, and store the results in dst.
5493/// Rounding is done according to the imm8 parameter, which can be one of:
5494///
5495/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5496/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5497/// * [`_MM_FROUND_TO_POS_INF`] : round up
5498/// * [`_MM_FROUND_TO_ZERO`] : truncate
5499/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5500///
5501/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411)
5502#[inline]
5503#[target_feature(enable = "avx512dq,avx512vl")]
5504#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5505#[rustc_legacy_const_generics(1)]
5506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5507pub unsafe fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5508    static_assert_uimm_bits!(IMM8, 8);
5509    _mm_mask_reduce_pd::<IMM8>(_mm_undefined_pd(), 0xff, a)
5510}
5511
5512/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5513/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5514/// copied from src to dst if the corresponding mask bit is not set).
5515/// Rounding is done according to the imm8 parameter, which can be one of:
5516///
5517/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5518/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5519/// * [`_MM_FROUND_TO_POS_INF`] : round up
5520/// * [`_MM_FROUND_TO_ZERO`] : truncate
5521/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5522///
5523/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409)
5524#[inline]
5525#[target_feature(enable = "avx512dq,avx512vl")]
5526#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5527#[rustc_legacy_const_generics(3)]
5528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5529pub unsafe fn _mm_mask_reduce_pd<const IMM8: i32>(
5530    src: __m128d,
5531    k: __mmask8,
5532    a: __m128d,
5533) -> __m128d {
5534    static_assert_uimm_bits!(IMM8, 8);
5535    transmute(vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k))
5536}
5537
5538/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5539/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5540/// zeroed out if the corresponding mask bit is not set).
5541/// Rounding is done according to the imm8 parameter, which can be one of:
5542///
5543/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5544/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5545/// * [`_MM_FROUND_TO_POS_INF`] : round up
5546/// * [`_MM_FROUND_TO_ZERO`] : truncate
5547/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5548///
5549/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410)
5550#[inline]
5551#[target_feature(enable = "avx512dq,avx512vl")]
5552#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5553#[rustc_legacy_const_generics(2)]
5554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5555pub unsafe fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5556    static_assert_uimm_bits!(IMM8, 8);
5557    _mm_mask_reduce_pd::<IMM8>(_mm_setzero_pd(), k, a)
5558}
5559
5560/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5561/// the number of bits specified by imm8, and store the results in dst.
5562/// Rounding is done according to the imm8 parameter, which can be one of:
5563///
5564/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5565/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5566/// * [`_MM_FROUND_TO_POS_INF`] : round up
5567/// * [`_MM_FROUND_TO_ZERO`] : truncate
5568/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5569///
5570/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414)
5571#[inline]
5572#[target_feature(enable = "avx512dq,avx512vl")]
5573#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5574#[rustc_legacy_const_generics(1)]
5575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5576pub unsafe fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5577    static_assert_uimm_bits!(IMM8, 8);
5578    _mm256_mask_reduce_pd::<IMM8>(_mm256_undefined_pd(), 0xff, a)
5579}
5580
5581/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5582/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5583/// copied from src to dst if the corresponding mask bit is not set).
5584/// Rounding is done according to the imm8 parameter, which can be one of:
5585///
5586/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5587/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5588/// * [`_MM_FROUND_TO_POS_INF`] : round up
5589/// * [`_MM_FROUND_TO_ZERO`] : truncate
5590/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5591///
5592/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412)
5593#[inline]
5594#[target_feature(enable = "avx512dq,avx512vl")]
5595#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5596#[rustc_legacy_const_generics(3)]
5597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5598pub unsafe fn _mm256_mask_reduce_pd<const IMM8: i32>(
5599    src: __m256d,
5600    k: __mmask8,
5601    a: __m256d,
5602) -> __m256d {
5603    static_assert_uimm_bits!(IMM8, 8);
5604    transmute(vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k))
5605}
5606
5607/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5608/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5609/// zeroed out if the corresponding mask bit is not set).
5610/// Rounding is done according to the imm8 parameter, which can be one of:
5611///
5612/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5613/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5614/// * [`_MM_FROUND_TO_POS_INF`] : round up
5615/// * [`_MM_FROUND_TO_ZERO`] : truncate
5616/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5617///
5618/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413)
5619#[inline]
5620#[target_feature(enable = "avx512dq,avx512vl")]
5621#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5622#[rustc_legacy_const_generics(2)]
5623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5624pub unsafe fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5625    static_assert_uimm_bits!(IMM8, 8);
5626    _mm256_mask_reduce_pd::<IMM8>(_mm256_setzero_pd(), k, a)
5627}
5628
5629/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5630/// the number of bits specified by imm8, and store the results in dst.
5631/// Rounding is done according to the imm8 parameter, which can be one of:
5632///
5633/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5634/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5635/// * [`_MM_FROUND_TO_POS_INF`] : round up
5636/// * [`_MM_FROUND_TO_ZERO`] : truncate
5637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5638///
5639/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417)
5640#[inline]
5641#[target_feature(enable = "avx512dq")]
5642#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5643#[rustc_legacy_const_generics(1)]
5644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5645pub unsafe fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5646    static_assert_uimm_bits!(IMM8, 8);
5647    _mm512_mask_reduce_pd::<IMM8>(_mm512_undefined_pd(), 0xff, a)
5648}
5649
5650/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5651/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5652/// copied from src to dst if the corresponding mask bit is not set).
5653/// Rounding is done according to the imm8 parameter, which can be one of:
5654///
5655/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5656/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5657/// * [`_MM_FROUND_TO_POS_INF`] : round up
5658/// * [`_MM_FROUND_TO_ZERO`] : truncate
5659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5660///
5661/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415)
5662#[inline]
5663#[target_feature(enable = "avx512dq")]
5664#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5665#[rustc_legacy_const_generics(3)]
5666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5667pub unsafe fn _mm512_mask_reduce_pd<const IMM8: i32>(
5668    src: __m512d,
5669    k: __mmask8,
5670    a: __m512d,
5671) -> __m512d {
5672    static_assert_uimm_bits!(IMM8, 8);
5673    transmute(vreducepd_512(
5674        a.as_f64x8(),
5675        IMM8,
5676        src.as_f64x8(),
5677        k,
5678        _MM_FROUND_CUR_DIRECTION,
5679    ))
5680}
5681
5682/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5683/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5684/// zeroed out if the corresponding mask bit is not set).
5685/// Rounding is done according to the imm8 parameter, which can be one of:
5686///
5687/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5688/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5689/// * [`_MM_FROUND_TO_POS_INF`] : round up
5690/// * [`_MM_FROUND_TO_ZERO`] : truncate
5691/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5692///
5693/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416)
5694#[inline]
5695#[target_feature(enable = "avx512dq")]
5696#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
5697#[rustc_legacy_const_generics(2)]
5698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5699pub unsafe fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5700    static_assert_uimm_bits!(IMM8, 8);
5701    _mm512_mask_reduce_pd::<IMM8>(_mm512_setzero_pd(), k, a)
5702}
5703
5704/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5705/// the number of bits specified by imm8, and store the results in dst.
5706/// Rounding is done according to the imm8 parameter, which can be one of:
5707///
5708/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5709/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5710/// * [`_MM_FROUND_TO_POS_INF`] : round up
5711/// * [`_MM_FROUND_TO_ZERO`] : truncate
5712/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5713///
5714/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5715///
5716/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444)
5717#[inline]
5718#[target_feature(enable = "avx512dq")]
5719#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
5720#[rustc_legacy_const_generics(1, 2)]
5721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5722pub unsafe fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
5723    static_assert_uimm_bits!(IMM8, 8);
5724    static_assert_sae!(SAE);
5725    _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_undefined_ps(), 0xffff, a)
5726}
5727
5728/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5729/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5730/// copied from src to dst if the corresponding mask bit is not set).
5731/// Rounding is done according to the imm8 parameter, which can be one of:
5732///
5733/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5734/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5735/// * [`_MM_FROUND_TO_POS_INF`] : round up
5736/// * [`_MM_FROUND_TO_ZERO`] : truncate
5737/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5738///
5739/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5740///
5741/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442)
5742#[inline]
5743#[target_feature(enable = "avx512dq")]
5744#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
5745#[rustc_legacy_const_generics(3, 4)]
5746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5747pub unsafe fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>(
5748    src: __m512,
5749    k: __mmask16,
5750    a: __m512,
5751) -> __m512 {
5752    static_assert_uimm_bits!(IMM8, 8);
5753    static_assert_sae!(SAE);
5754    transmute(vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE))
5755}
5756
5757/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5758/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5759/// zeroed out if the corresponding mask bit is not set).
5760/// Rounding is done according to the imm8 parameter, which can be one of:
5761///
5762/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5763/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5764/// * [`_MM_FROUND_TO_POS_INF`] : round up
5765/// * [`_MM_FROUND_TO_ZERO`] : truncate
5766/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5767///
5768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5769///
5770/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443)
5771#[inline]
5772#[target_feature(enable = "avx512dq")]
5773#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
5774#[rustc_legacy_const_generics(2, 3)]
5775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5776pub unsafe fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>(
5777    k: __mmask16,
5778    a: __m512,
5779) -> __m512 {
5780    static_assert_uimm_bits!(IMM8, 8);
5781    static_assert_sae!(SAE);
5782    _mm512_mask_reduce_round_ps::<IMM8, SAE>(_mm512_setzero_ps(), k, a)
5783}
5784
5785/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5786/// the number of bits specified by imm8, and store the results in dst.
5787/// Rounding is done according to the imm8 parameter, which can be one of:
5788///
5789/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5790/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5791/// * [`_MM_FROUND_TO_POS_INF`] : round up
5792/// * [`_MM_FROUND_TO_ZERO`] : truncate
5793/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5794///
5795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429)
5796#[inline]
5797#[target_feature(enable = "avx512dq,avx512vl")]
5798#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5799#[rustc_legacy_const_generics(1)]
5800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5801pub unsafe fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 {
5802    static_assert_uimm_bits!(IMM8, 8);
5803    _mm_mask_reduce_ps::<IMM8>(_mm_undefined_ps(), 0xff, a)
5804}
5805
5806/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5807/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5808/// copied from src to dst if the corresponding mask bit is not set).
5809/// Rounding is done according to the imm8 parameter, which can be one of:
5810///
5811/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5812/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5813/// * [`_MM_FROUND_TO_POS_INF`] : round up
5814/// * [`_MM_FROUND_TO_ZERO`] : truncate
5815/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5816///
5817/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427)
5818#[inline]
5819#[target_feature(enable = "avx512dq,avx512vl")]
5820#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5821#[rustc_legacy_const_generics(3)]
5822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5823pub unsafe fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5824    static_assert_uimm_bits!(IMM8, 8);
5825    transmute(vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k))
5826}
5827
5828/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5829/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5830/// zeroed out if the corresponding mask bit is not set).
5831/// Rounding is done according to the imm8 parameter, which can be one of:
5832///
5833/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5834/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5835/// * [`_MM_FROUND_TO_POS_INF`] : round up
5836/// * [`_MM_FROUND_TO_ZERO`] : truncate
5837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5838///
5839/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428)
5840#[inline]
5841#[target_feature(enable = "avx512dq,avx512vl")]
5842#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5843#[rustc_legacy_const_generics(2)]
5844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5845pub unsafe fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5846    static_assert_uimm_bits!(IMM8, 8);
5847    _mm_mask_reduce_ps::<IMM8>(_mm_setzero_ps(), k, a)
5848}
5849
5850/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5851/// the number of bits specified by imm8, and store the results in dst.
5852/// Rounding is done according to the imm8 parameter, which can be one of:
5853///
5854/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5855/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5856/// * [`_MM_FROUND_TO_POS_INF`] : round up
5857/// * [`_MM_FROUND_TO_ZERO`] : truncate
5858/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5859///
5860/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432)
5861#[inline]
5862#[target_feature(enable = "avx512dq,avx512vl")]
5863#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5864#[rustc_legacy_const_generics(1)]
5865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5866pub unsafe fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 {
5867    static_assert_uimm_bits!(IMM8, 8);
5868    _mm256_mask_reduce_ps::<IMM8>(_mm256_undefined_ps(), 0xff, a)
5869}
5870
5871/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5872/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5873/// copied from src to dst if the corresponding mask bit is not set).
5874/// Rounding is done according to the imm8 parameter, which can be one of:
5875///
5876/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5877/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5878/// * [`_MM_FROUND_TO_POS_INF`] : round up
5879/// * [`_MM_FROUND_TO_ZERO`] : truncate
5880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5881///
5882/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430)
5883#[inline]
5884#[target_feature(enable = "avx512dq,avx512vl")]
5885#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5886#[rustc_legacy_const_generics(3)]
5887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5888pub unsafe fn _mm256_mask_reduce_ps<const IMM8: i32>(
5889    src: __m256,
5890    k: __mmask8,
5891    a: __m256,
5892) -> __m256 {
5893    static_assert_uimm_bits!(IMM8, 8);
5894    transmute(vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k))
5895}
5896
5897/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5898/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5899/// zeroed out if the corresponding mask bit is not set).
5900/// Rounding is done according to the imm8 parameter, which can be one of:
5901///
5902/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5903/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5904/// * [`_MM_FROUND_TO_POS_INF`] : round up
5905/// * [`_MM_FROUND_TO_ZERO`] : truncate
5906/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5907///
5908/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431)
5909#[inline]
5910#[target_feature(enable = "avx512dq,avx512vl")]
5911#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5912#[rustc_legacy_const_generics(2)]
5913#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5914pub unsafe fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5915    static_assert_uimm_bits!(IMM8, 8);
5916    _mm256_mask_reduce_ps::<IMM8>(_mm256_setzero_ps(), k, a)
5917}
5918
5919/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5920/// the number of bits specified by imm8, and store the results in dst.
5921/// Rounding is done according to the imm8 parameter, which can be one of:
5922///
5923/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5924/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5925/// * [`_MM_FROUND_TO_POS_INF`] : round up
5926/// * [`_MM_FROUND_TO_ZERO`] : truncate
5927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5928///
5929/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435)
5930#[inline]
5931#[target_feature(enable = "avx512dq")]
5932#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5933#[rustc_legacy_const_generics(1)]
5934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5935pub unsafe fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 {
5936    static_assert_uimm_bits!(IMM8, 8);
5937    _mm512_mask_reduce_ps::<IMM8>(_mm512_undefined_ps(), 0xffff, a)
5938}
5939
5940/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5941/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5942/// copied from src to dst if the corresponding mask bit is not set).
5943/// Rounding is done according to the imm8 parameter, which can be one of:
5944///
5945/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5946/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5947/// * [`_MM_FROUND_TO_POS_INF`] : round up
5948/// * [`_MM_FROUND_TO_ZERO`] : truncate
5949/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5950///
5951/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433)
5952#[inline]
5953#[target_feature(enable = "avx512dq")]
5954#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5955#[rustc_legacy_const_generics(3)]
5956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5957pub unsafe fn _mm512_mask_reduce_ps<const IMM8: i32>(
5958    src: __m512,
5959    k: __mmask16,
5960    a: __m512,
5961) -> __m512 {
5962    static_assert_uimm_bits!(IMM8, 8);
5963    transmute(vreduceps_512(
5964        a.as_f32x16(),
5965        IMM8,
5966        src.as_f32x16(),
5967        k,
5968        _MM_FROUND_CUR_DIRECTION,
5969    ))
5970}
5971
5972/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
5973/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5974/// zeroed out if the corresponding mask bit is not set).
5975/// Rounding is done according to the imm8 parameter, which can be one of:
5976///
5977/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5978/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5979/// * [`_MM_FROUND_TO_POS_INF`] : round up
5980/// * [`_MM_FROUND_TO_ZERO`] : truncate
5981/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5982///
5983/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434)
5984#[inline]
5985#[target_feature(enable = "avx512dq")]
5986#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
5987#[rustc_legacy_const_generics(2)]
5988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5989pub unsafe fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5990    static_assert_uimm_bits!(IMM8, 8);
5991    _mm512_mask_reduce_ps::<IMM8>(_mm512_setzero_ps(), k, a)
5992}
5993
5994/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
5995/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
5996/// the upper element from a to the upper element of dst.
5997/// Rounding is done according to the imm8 parameter, which can be one of:
5998///
5999/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6000/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6001/// * [`_MM_FROUND_TO_POS_INF`] : round up
6002/// * [`_MM_FROUND_TO_ZERO`] : truncate
6003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6004///
6005/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6006///
6007/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447)
6008#[inline]
6009#[target_feature(enable = "avx512dq")]
6010#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6011#[rustc_legacy_const_generics(2, 3)]
6012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6013pub unsafe fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6014    a: __m128d,
6015    b: __m128d,
6016) -> __m128d {
6017    static_assert_uimm_bits!(IMM8, 8);
6018    static_assert_sae!(SAE);
6019    _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_undefined_pd(), 0xff, a, b)
6020}
6021
6022/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6023/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6024/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6025/// to the upper element of dst.
6026/// Rounding is done according to the imm8 parameter, which can be one of:
6027///
6028/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6029/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6030/// * [`_MM_FROUND_TO_POS_INF`] : round up
6031/// * [`_MM_FROUND_TO_ZERO`] : truncate
6032/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6033///
6034/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6035///
6036/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445)
6037#[inline]
6038#[target_feature(enable = "avx512dq")]
6039#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6040#[rustc_legacy_const_generics(4, 5)]
6041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6042pub unsafe fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6043    src: __m128d,
6044    k: __mmask8,
6045    a: __m128d,
6046    b: __m128d,
6047) -> __m128d {
6048    static_assert_uimm_bits!(IMM8, 8);
6049    static_assert_sae!(SAE);
6050    transmute(vreducesd(
6051        a.as_f64x2(),
6052        b.as_f64x2(),
6053        src.as_f64x2(),
6054        k,
6055        IMM8,
6056        SAE,
6057    ))
6058}
6059
6060/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6061/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6062/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6063/// to the upper element of dst.
6064/// Rounding is done according to the imm8 parameter, which can be one of:
6065///
6066/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6067/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6068/// * [`_MM_FROUND_TO_POS_INF`] : round up
6069/// * [`_MM_FROUND_TO_ZERO`] : truncate
6070/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6071///
6072/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6073///
6074/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446)
6075#[inline]
6076#[target_feature(enable = "avx512dq")]
6077#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6078#[rustc_legacy_const_generics(3, 4)]
6079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6080pub unsafe fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6081    k: __mmask8,
6082    a: __m128d,
6083    b: __m128d,
6084) -> __m128d {
6085    static_assert_uimm_bits!(IMM8, 8);
6086    static_assert_sae!(SAE);
6087    _mm_mask_reduce_round_sd::<IMM8, SAE>(_mm_setzero_pd(), k, a, b)
6088}
6089
6090/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6091/// by the number of bits specified by imm8, store the result in the lower element of dst using, and
6092/// copy the upper element from a.
6093/// to the upper element of dst.
6094/// Rounding is done according to the imm8 parameter, which can be one of:
6095///
6096/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6097/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6098/// * [`_MM_FROUND_TO_POS_INF`] : round up
6099/// * [`_MM_FROUND_TO_ZERO`] : truncate
6100/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6101///
6102/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456)
6103#[inline]
6104#[target_feature(enable = "avx512dq")]
6105#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6106#[rustc_legacy_const_generics(2)]
6107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6108pub unsafe fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
6109    static_assert_uimm_bits!(IMM8, 8);
6110    _mm_mask_reduce_sd::<IMM8>(_mm_undefined_pd(), 0xff, a, b)
6111}
6112
6113/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6114/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6115/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6116/// to the upper element of dst.
6117/// Rounding is done according to the imm8 parameter, which can be one of:
6118///
6119/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6120/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6121/// * [`_MM_FROUND_TO_POS_INF`] : round up
6122/// * [`_MM_FROUND_TO_ZERO`] : truncate
6123/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6124///
6125/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454)
6126#[inline]
6127#[target_feature(enable = "avx512dq")]
6128#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6129#[rustc_legacy_const_generics(4)]
6130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6131pub unsafe fn _mm_mask_reduce_sd<const IMM8: i32>(
6132    src: __m128d,
6133    k: __mmask8,
6134    a: __m128d,
6135    b: __m128d,
6136) -> __m128d {
6137    static_assert_uimm_bits!(IMM8, 8);
6138    transmute(vreducesd(
6139        a.as_f64x2(),
6140        b.as_f64x2(),
6141        src.as_f64x2(),
6142        k,
6143        IMM8,
6144        _MM_FROUND_CUR_DIRECTION,
6145    ))
6146}
6147
6148/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6149/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6150/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6151/// to the upper element of dst.
6152/// Rounding is done according to the imm8 parameter, which can be one of:
6153///
6154/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6155/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6156/// * [`_MM_FROUND_TO_POS_INF`] : round up
6157/// * [`_MM_FROUND_TO_ZERO`] : truncate
6158/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6159///
6160/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455)
6161#[inline]
6162#[target_feature(enable = "avx512dq")]
6163#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6164#[rustc_legacy_const_generics(3)]
6165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6166pub unsafe fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6167    static_assert_uimm_bits!(IMM8, 8);
6168    _mm_mask_reduce_sd::<IMM8>(_mm_setzero_pd(), k, a, b)
6169}
6170
6171/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6172/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6173/// the upper element from a.
6174/// to the upper element of dst.
6175/// Rounding is done according to the imm8 parameter, which can be one of:
6176///
6177/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6178/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6179/// * [`_MM_FROUND_TO_POS_INF`] : round up
6180/// * [`_MM_FROUND_TO_ZERO`] : truncate
6181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6182///
6183/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6184///
6185/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453)
6186#[inline]
6187#[target_feature(enable = "avx512dq")]
6188#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6189#[rustc_legacy_const_generics(2, 3)]
6190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6191pub unsafe fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
6192    static_assert_uimm_bits!(IMM8, 8);
6193    static_assert_sae!(SAE);
6194    _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_undefined_ps(), 0xff, a, b)
6195}
6196
6197/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6198/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6199/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6200/// to the upper element of dst.
6201/// Rounding is done according to the imm8 parameter, which can be one of:
6202///
6203/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6204/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6205/// * [`_MM_FROUND_TO_POS_INF`] : round up
6206/// * [`_MM_FROUND_TO_ZERO`] : truncate
6207/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6208///
6209/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6210///
6211/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451)
6212#[inline]
6213#[target_feature(enable = "avx512dq")]
6214#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6215#[rustc_legacy_const_generics(4, 5)]
6216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6217pub unsafe fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6218    src: __m128,
6219    k: __mmask8,
6220    a: __m128,
6221    b: __m128,
6222) -> __m128 {
6223    static_assert_uimm_bits!(IMM8, 8);
6224    static_assert_sae!(SAE);
6225    transmute(vreducess(
6226        a.as_f32x4(),
6227        b.as_f32x4(),
6228        src.as_f32x4(),
6229        k,
6230        IMM8,
6231        SAE,
6232    ))
6233}
6234
6235/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6236/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6237/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6238/// to the upper element of dst.
6239/// Rounding is done according to the imm8 parameter, which can be one of:
6240///
6241/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6242/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6243/// * [`_MM_FROUND_TO_POS_INF`] : round up
6244/// * [`_MM_FROUND_TO_ZERO`] : truncate
6245/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6246///
6247/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6248///
6249/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452)
6250#[inline]
6251#[target_feature(enable = "avx512dq")]
6252#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6253#[rustc_legacy_const_generics(3, 4)]
6254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6255pub unsafe fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6256    k: __mmask8,
6257    a: __m128,
6258    b: __m128,
6259) -> __m128 {
6260    static_assert_uimm_bits!(IMM8, 8);
6261    static_assert_sae!(SAE);
6262    _mm_mask_reduce_round_ss::<IMM8, SAE>(_mm_setzero_ps(), k, a, b)
6263}
6264
6265/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6266/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6267/// the upper element from a.
6268/// to the upper element of dst.
6269/// Rounding is done according to the imm8 parameter, which can be one of:
6270///
6271/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6272/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6273/// * [`_MM_FROUND_TO_POS_INF`] : round up
6274/// * [`_MM_FROUND_TO_ZERO`] : truncate
6275/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6276///
6277/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462)
6278#[inline]
6279#[target_feature(enable = "avx512dq")]
6280#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6281#[rustc_legacy_const_generics(2)]
6282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6283pub unsafe fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
6284    static_assert_uimm_bits!(IMM8, 8);
6285    _mm_mask_reduce_ss::<IMM8>(_mm_undefined_ps(), 0xff, a, b)
6286}
6287
6288/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6289/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6290/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6291/// to the upper element of dst.
6292/// Rounding is done according to the imm8 parameter, which can be one of:
6293///
6294/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6295/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6296/// * [`_MM_FROUND_TO_POS_INF`] : round up
6297/// * [`_MM_FROUND_TO_ZERO`] : truncate
6298/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6299///
6300/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460)
6301#[inline]
6302#[target_feature(enable = "avx512dq")]
6303#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6304#[rustc_legacy_const_generics(4)]
6305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6306pub unsafe fn _mm_mask_reduce_ss<const IMM8: i32>(
6307    src: __m128,
6308    k: __mmask8,
6309    a: __m128,
6310    b: __m128,
6311) -> __m128 {
6312    static_assert_uimm_bits!(IMM8, 8);
6313    transmute(vreducess(
6314        a.as_f32x4(),
6315        b.as_f32x4(),
6316        src.as_f32x4(),
6317        k,
6318        IMM8,
6319        _MM_FROUND_CUR_DIRECTION,
6320    ))
6321}
6322
6323/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6324/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6325/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6326/// to the upper element of dst.
6327/// Rounding is done according to the imm8 parameter, which can be one of:
6328///
6329/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6330/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6331/// * [`_MM_FROUND_TO_POS_INF`] : round up
6332/// * [`_MM_FROUND_TO_ZERO`] : truncate
6333/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6334///
6335/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461)
6336#[inline]
6337#[target_feature(enable = "avx512dq")]
6338#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6339#[rustc_legacy_const_generics(3)]
6340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6341pub unsafe fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6342    static_assert_uimm_bits!(IMM8, 8);
6343    _mm_mask_reduce_ss::<IMM8>(_mm_setzero_ps(), k, a, b)
6344}
6345
6346// FP-Class
6347
6348/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6349/// by imm8, and store the results in mask vector k.
6350/// imm can be a combination of:
6351///
6352///     - 0x01 // QNaN
6353///     - 0x02 // Positive Zero
6354///     - 0x04 // Negative Zero
6355///     - 0x08 // Positive Infinity
6356///     - 0x10 // Negative Infinity
6357///     - 0x20 // Denormal
6358///     - 0x40 // Negative
6359///     - 0x80 // SNaN
6360///
6361/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493)
6362#[inline]
6363#[target_feature(enable = "avx512dq,avx512vl")]
6364#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6365#[rustc_legacy_const_generics(1)]
6366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6367pub unsafe fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6368    static_assert_uimm_bits!(IMM8, 8);
6369    _mm_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6370}
6371
6372/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6373/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6374/// corresponding mask bit is not set).
6375/// imm can be a combination of:
6376///
6377///     - 0x01 // QNaN
6378///     - 0x02 // Positive Zero
6379///     - 0x04 // Negative Zero
6380///     - 0x08 // Positive Infinity
6381///     - 0x10 // Negative Infinity
6382///     - 0x20 // Denormal
6383///     - 0x40 // Negative
6384///     - 0x80 // SNaN
6385///
6386/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494)
6387#[inline]
6388#[target_feature(enable = "avx512dq,avx512vl")]
6389#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6390#[rustc_legacy_const_generics(2)]
6391#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6392pub unsafe fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
6393    static_assert_uimm_bits!(IMM8, 8);
6394    transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1))
6395}
6396
6397/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6398/// by imm8, and store the results in mask vector k.
6399/// imm can be a combination of:
6400///
6401///     - 0x01 // QNaN
6402///     - 0x02 // Positive Zero
6403///     - 0x04 // Negative Zero
6404///     - 0x08 // Positive Infinity
6405///     - 0x10 // Negative Infinity
6406///     - 0x20 // Denormal
6407///     - 0x40 // Negative
6408///     - 0x80 // SNaN
6409///
6410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495)
6411#[inline]
6412#[target_feature(enable = "avx512dq,avx512vl")]
6413#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6414#[rustc_legacy_const_generics(1)]
6415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6416pub unsafe fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
6417    static_assert_uimm_bits!(IMM8, 8);
6418    _mm256_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6419}
6420
6421/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6422/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6423/// corresponding mask bit is not set).
6424/// imm can be a combination of:
6425///
6426///     - 0x01 // QNaN
6427///     - 0x02 // Positive Zero
6428///     - 0x04 // Negative Zero
6429///     - 0x08 // Positive Infinity
6430///     - 0x10 // Negative Infinity
6431///     - 0x20 // Denormal
6432///     - 0x40 // Negative
6433///     - 0x80 // SNaN
6434///
6435/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496)
6436#[inline]
6437#[target_feature(enable = "avx512dq,avx512vl")]
6438#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6439#[rustc_legacy_const_generics(2)]
6440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6441pub unsafe fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 {
6442    static_assert_uimm_bits!(IMM8, 8);
6443    transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1))
6444}
6445
6446/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6447/// by imm8, and store the results in mask vector k.
6448/// imm can be a combination of:
6449///
6450///     - 0x01 // QNaN
6451///     - 0x02 // Positive Zero
6452///     - 0x04 // Negative Zero
6453///     - 0x08 // Positive Infinity
6454///     - 0x10 // Negative Infinity
6455///     - 0x20 // Denormal
6456///     - 0x40 // Negative
6457///     - 0x80 // SNaN
6458///
6459/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497)
6460#[inline]
6461#[target_feature(enable = "avx512dq")]
6462#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6463#[rustc_legacy_const_generics(1)]
6464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6465pub unsafe fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
6466    static_assert_uimm_bits!(IMM8, 8);
6467    _mm512_mask_fpclass_pd_mask::<IMM8>(0xff, a)
6468}
6469
6470/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6471/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6472/// corresponding mask bit is not set).
6473/// imm can be a combination of:
6474///
6475///     - 0x01 // QNaN
6476///     - 0x02 // Positive Zero
6477///     - 0x04 // Negative Zero
6478///     - 0x08 // Positive Infinity
6479///     - 0x10 // Negative Infinity
6480///     - 0x20 // Denormal
6481///     - 0x40 // Negative
6482///     - 0x80 // SNaN
6483///
6484/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498)
6485#[inline]
6486#[target_feature(enable = "avx512dq")]
6487#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6488#[rustc_legacy_const_generics(2)]
6489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6490pub unsafe fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 {
6491    static_assert_uimm_bits!(IMM8, 8);
6492    transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1))
6493}
6494
6495/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6496/// by imm8, and store the results in mask vector k.
6497/// imm can be a combination of:
6498///
6499///     - 0x01 // QNaN
6500///     - 0x02 // Positive Zero
6501///     - 0x04 // Negative Zero
6502///     - 0x08 // Positive Infinity
6503///     - 0x10 // Negative Infinity
6504///     - 0x20 // Denormal
6505///     - 0x40 // Negative
6506///     - 0x80 // SNaN
6507///
6508/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505)
6509#[inline]
6510#[target_feature(enable = "avx512dq,avx512vl")]
6511#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6512#[rustc_legacy_const_generics(1)]
6513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6514pub unsafe fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
6515    static_assert_uimm_bits!(IMM8, 8);
6516    _mm_mask_fpclass_ps_mask::<IMM8>(0xff, a)
6517}
6518
6519/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6520/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6521/// corresponding mask bit is not set).
6522/// imm can be a combination of:
6523///
6524///     - 0x01 // QNaN
6525///     - 0x02 // Positive Zero
6526///     - 0x04 // Negative Zero
6527///     - 0x08 // Positive Infinity
6528///     - 0x10 // Negative Infinity
6529///     - 0x20 // Denormal
6530///     - 0x40 // Negative
6531///     - 0x80 // SNaN
6532///
6533/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506)
6534#[inline]
6535#[target_feature(enable = "avx512dq,avx512vl")]
6536#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6537#[rustc_legacy_const_generics(2)]
6538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6539pub unsafe fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
6540    static_assert_uimm_bits!(IMM8, 8);
6541    transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1))
6542}
6543
6544/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6545/// by imm8, and store the results in mask vector k.
6546/// imm can be a combination of:
6547///
6548///     - 0x01 // QNaN
6549///     - 0x02 // Positive Zero
6550///     - 0x04 // Negative Zero
6551///     - 0x08 // Positive Infinity
6552///     - 0x10 // Negative Infinity
6553///     - 0x20 // Denormal
6554///     - 0x40 // Negative
6555///     - 0x80 // SNaN
6556///
6557/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507)
6558#[inline]
6559#[target_feature(enable = "avx512dq,avx512vl")]
6560#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6561#[rustc_legacy_const_generics(1)]
6562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6563pub unsafe fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
6564    static_assert_uimm_bits!(IMM8, 8);
6565    _mm256_mask_fpclass_ps_mask::<IMM8>(0xff, a)
6566}
6567
6568/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6569/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6570/// corresponding mask bit is not set).
6571/// imm can be a combination of:
6572///
6573///     - 0x01 // QNaN
6574///     - 0x02 // Positive Zero
6575///     - 0x04 // Negative Zero
6576///     - 0x08 // Positive Infinity
6577///     - 0x10 // Negative Infinity
6578///     - 0x20 // Denormal
6579///     - 0x40 // Negative
6580///     - 0x80 // SNaN
6581///
6582/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508)
6583#[inline]
6584#[target_feature(enable = "avx512dq,avx512vl")]
6585#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6586#[rustc_legacy_const_generics(2)]
6587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6588pub unsafe fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 {
6589    static_assert_uimm_bits!(IMM8, 8);
6590    transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1))
6591}
6592
6593/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6594/// by imm8, and store the results in mask vector k.
6595/// imm can be a combination of:
6596///
6597///     - 0x01 // QNaN
6598///     - 0x02 // Positive Zero
6599///     - 0x04 // Negative Zero
6600///     - 0x08 // Positive Infinity
6601///     - 0x10 // Negative Infinity
6602///     - 0x20 // Denormal
6603///     - 0x40 // Negative
6604///     - 0x80 // SNaN
6605///
6606/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509)
6607#[inline]
6608#[target_feature(enable = "avx512dq")]
6609#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6610#[rustc_legacy_const_generics(1)]
6611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6612pub unsafe fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
6613    static_assert_uimm_bits!(IMM8, 8);
6614    _mm512_mask_fpclass_ps_mask::<IMM8>(0xffff, a)
6615}
6616
6617/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
6618/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6619/// corresponding mask bit is not set).
6620/// imm can be a combination of:
6621///
6622///     - 0x01 // QNaN
6623///     - 0x02 // Positive Zero
6624///     - 0x04 // Negative Zero
6625///     - 0x08 // Positive Infinity
6626///     - 0x10 // Negative Infinity
6627///     - 0x20 // Denormal
6628///     - 0x40 // Negative
6629///     - 0x80 // SNaN
6630///
6631/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510)
6632#[inline]
6633#[target_feature(enable = "avx512dq")]
6634#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
6635#[rustc_legacy_const_generics(2)]
6636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6637pub unsafe fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 {
6638    static_assert_uimm_bits!(IMM8, 8);
6639    transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1))
6640}
6641
6642/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
6643/// by imm8, and store the results in mask vector k.
6644/// imm can be a combination of:
6645///
6646///     - 0x01 // QNaN
6647///     - 0x02 // Positive Zero
6648///     - 0x04 // Negative Zero
6649///     - 0x08 // Positive Infinity
6650///     - 0x10 // Negative Infinity
6651///     - 0x20 // Denormal
6652///     - 0x40 // Negative
6653///     - 0x80 // SNaN
6654///
6655/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511)
6656#[inline]
6657#[target_feature(enable = "avx512dq")]
6658#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
6659#[rustc_legacy_const_generics(1)]
6660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6661pub unsafe fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6662    static_assert_uimm_bits!(IMM8, 8);
6663    _mm_mask_fpclass_sd_mask::<IMM8>(0xff, a)
6664}
6665
6666/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
6667/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6668/// corresponding mask bit is not set).
6669/// imm can be a combination of:
6670///
6671///     - 0x01 // QNaN
6672///     - 0x02 // Positive Zero
6673///     - 0x04 // Negative Zero
6674///     - 0x08 // Positive Infinity
6675///     - 0x10 // Negative Infinity
6676///     - 0x20 // Denormal
6677///     - 0x40 // Negative
6678///     - 0x80 // SNaN
6679///
6680/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512)
6681#[inline]
6682#[target_feature(enable = "avx512dq")]
6683#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
6684#[rustc_legacy_const_generics(2)]
6685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6686pub unsafe fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
6687    static_assert_uimm_bits!(IMM8, 8);
6688    vfpclasssd(a.as_f64x2(), IMM8, k1)
6689}
6690
6691/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
6692/// by imm8, and store the results in mask vector k.
6693/// imm can be a combination of:
6694///
6695///     - 0x01 // QNaN
6696///     - 0x02 // Positive Zero
6697///     - 0x04 // Negative Zero
6698///     - 0x08 // Positive Infinity
6699///     - 0x10 // Negative Infinity
6700///     - 0x20 // Denormal
6701///     - 0x40 // Negative
6702///     - 0x80 // SNaN
6703///
6704/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515)
6705#[inline]
6706#[target_feature(enable = "avx512dq")]
6707#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
6708#[rustc_legacy_const_generics(1)]
6709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6710pub unsafe fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
6711    static_assert_uimm_bits!(IMM8, 8);
6712    _mm_mask_fpclass_ss_mask::<IMM8>(0xff, a)
6713}
6714
6715/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
6716/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6717/// corresponding mask bit is not set).
6718/// imm can be a combination of:
6719///
6720///     - 0x01 // QNaN
6721///     - 0x02 // Positive Zero
6722///     - 0x04 // Negative Zero
6723///     - 0x08 // Positive Infinity
6724///     - 0x10 // Negative Infinity
6725///     - 0x20 // Denormal
6726///     - 0x40 // Negative
6727///     - 0x80 // SNaN
6728///
6729/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516)
6730#[inline]
6731#[target_feature(enable = "avx512dq")]
6732#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
6733#[rustc_legacy_const_generics(2)]
6734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6735pub unsafe fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
6736    static_assert_uimm_bits!(IMM8, 8);
6737    vfpclassss(a.as_f32x4(), IMM8, k1)
6738}
6739
6740#[allow(improper_ctypes)]
6741extern "C" {
6742    #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"]
6743    fn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2;
6744    #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"]
6745    fn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4;
6746    #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"]
6747    fn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8;
6748
6749    #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"]
6750    fn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4;
6751    #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"]
6752    fn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4;
6753    #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"]
6754    fn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8;
6755
6756    #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2u64"]
6757    fn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2;
6758    #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4u64"]
6759    fn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4;
6760    #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8u64"]
6761    fn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8;
6762
6763    #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"]
6764    fn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4;
6765    #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4u64"]
6766    fn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4;
6767    #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8u64"]
6768    fn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8;
6769
6770    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"]
6771    fn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
6772    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"]
6773    fn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
6774    #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"]
6775    fn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
6776
6777    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"]
6778    fn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
6779    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"]
6780    fn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
6781    #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"]
6782    fn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
6783
6784    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"]
6785    fn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
6786    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"]
6787    fn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
6788    #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"]
6789    fn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
6790
6791    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"]
6792    fn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
6793    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"]
6794    fn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
6795    #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"]
6796    fn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
6797
6798    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"]
6799    fn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
6800    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"]
6801    fn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
6802    #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"]
6803    fn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
6804
6805    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"]
6806    fn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
6807    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"]
6808    fn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
6809    #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"]
6810    fn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
6811
6812    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"]
6813    fn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
6814    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"]
6815    fn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
6816    #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"]
6817    fn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
6818
6819    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"]
6820    fn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
6821    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"]
6822    fn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
6823    #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"]
6824    fn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
6825
6826    #[link_name = "llvm.x86.avx512.mask.range.pd.128"]
6827    fn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
6828    #[link_name = "llvm.x86.avx512.mask.range.pd.256"]
6829    fn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
6830    #[link_name = "llvm.x86.avx512.mask.range.pd.512"]
6831    fn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
6832
6833    #[link_name = "llvm.x86.avx512.mask.range.ps.128"]
6834    fn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
6835    #[link_name = "llvm.x86.avx512.mask.range.ps.256"]
6836    fn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
6837    #[link_name = "llvm.x86.avx512.mask.range.ps.512"]
6838    fn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32)
6839        -> f32x16;
6840
6841    #[link_name = "llvm.x86.avx512.mask.range.sd"]
6842    fn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
6843    #[link_name = "llvm.x86.avx512.mask.range.ss"]
6844    fn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
6845
6846    #[link_name = "llvm.x86.avx512.mask.reduce.pd.128"]
6847    fn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
6848    #[link_name = "llvm.x86.avx512.mask.reduce.pd.256"]
6849    fn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
6850    #[link_name = "llvm.x86.avx512.mask.reduce.pd.512"]
6851    fn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
6852
6853    #[link_name = "llvm.x86.avx512.mask.reduce.ps.128"]
6854    fn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
6855    #[link_name = "llvm.x86.avx512.mask.reduce.ps.256"]
6856    fn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
6857    #[link_name = "llvm.x86.avx512.mask.reduce.ps.512"]
6858    fn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16;
6859
6860    #[link_name = "llvm.x86.avx512.mask.reduce.sd"]
6861    fn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
6862    #[link_name = "llvm.x86.avx512.mask.reduce.ss"]
6863    fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
6864
6865    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"]
6866    fn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
6867    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"]
6868    fn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8;
6869    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"]
6870    fn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8;
6871
6872    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"]
6873    fn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
6874    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"]
6875    fn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8;
6876    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"]
6877    fn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16;
6878
6879    #[link_name = "llvm.x86.avx512.mask.fpclass.sd"]
6880    fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
6881    #[link_name = "llvm.x86.avx512.mask.fpclass.ss"]
6882    fn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
6883}
6884
6885#[cfg(test)]
6886mod tests {
6887    use super::*;
6888
6889    use stdarch_test::simd_test;
6890
6891    use crate::core_arch::x86::*;
6892    use crate::mem::transmute;
6893
6894    const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) };
6895    const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) };
6896
6897    const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) };
6898    const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) };
6899    const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) };
6900    const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) };
6901
6902    const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) };
6903    const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) };
6904
6905    const AND_32: f32 = unsafe { transmute(0x11111111_u32) };
6906    const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) };
6907    const OR_32: f32 = unsafe { transmute(0x77777777_u32) };
6908    const XOR_32: f32 = unsafe { transmute(0x66666666_u32) };
6909
6910    #[simd_test(enable = "avx512dq,avx512vl")]
6911    unsafe fn test_mm_mask_and_pd() {
6912        let a = _mm_set1_pd(OPRND1_64);
6913        let b = _mm_set1_pd(OPRND2_64);
6914        let src = _mm_set_pd(1., 2.);
6915        let r = _mm_mask_and_pd(src, 0b01, a, b);
6916        let e = _mm_set_pd(1., AND_64);
6917        assert_eq_m128d(r, e);
6918    }
6919
6920    #[simd_test(enable = "avx512dq,avx512vl")]
6921    unsafe fn test_mm_maskz_and_pd() {
6922        let a = _mm_set1_pd(OPRND1_64);
6923        let b = _mm_set1_pd(OPRND2_64);
6924        let r = _mm_maskz_and_pd(0b01, a, b);
6925        let e = _mm_set_pd(0.0, AND_64);
6926        assert_eq_m128d(r, e);
6927    }
6928
6929    #[simd_test(enable = "avx512dq,avx512vl")]
6930    unsafe fn test_mm256_mask_and_pd() {
6931        let a = _mm256_set1_pd(OPRND1_64);
6932        let b = _mm256_set1_pd(OPRND2_64);
6933        let src = _mm256_set_pd(1., 2., 3., 4.);
6934        let r = _mm256_mask_and_pd(src, 0b0101, a, b);
6935        let e = _mm256_set_pd(1., AND_64, 3., AND_64);
6936        assert_eq_m256d(r, e);
6937    }
6938
6939    #[simd_test(enable = "avx512dq,avx512vl")]
6940    unsafe fn test_mm256_maskz_and_pd() {
6941        let a = _mm256_set1_pd(OPRND1_64);
6942        let b = _mm256_set1_pd(OPRND2_64);
6943        let r = _mm256_maskz_and_pd(0b0101, a, b);
6944        let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64);
6945        assert_eq_m256d(r, e);
6946    }
6947
6948    #[simd_test(enable = "avx512dq")]
6949    unsafe fn test_mm512_and_pd() {
6950        let a = _mm512_set1_pd(OPRND1_64);
6951        let b = _mm512_set1_pd(OPRND2_64);
6952        let r = _mm512_and_pd(a, b);
6953        let e = _mm512_set1_pd(AND_64);
6954        assert_eq_m512d(r, e);
6955    }
6956
6957    #[simd_test(enable = "avx512dq")]
6958    unsafe fn test_mm512_mask_and_pd() {
6959        let a = _mm512_set1_pd(OPRND1_64);
6960        let b = _mm512_set1_pd(OPRND2_64);
6961        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
6962        let r = _mm512_mask_and_pd(src, 0b01010101, a, b);
6963        let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64);
6964        assert_eq_m512d(r, e);
6965    }
6966
6967    #[simd_test(enable = "avx512dq")]
6968    unsafe fn test_mm512_maskz_and_pd() {
6969        let a = _mm512_set1_pd(OPRND1_64);
6970        let b = _mm512_set1_pd(OPRND2_64);
6971        let r = _mm512_maskz_and_pd(0b01010101, a, b);
6972        let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64);
6973        assert_eq_m512d(r, e);
6974    }
6975
6976    #[simd_test(enable = "avx512dq,avx512vl")]
6977    unsafe fn test_mm_mask_and_ps() {
6978        let a = _mm_set1_ps(OPRND1_32);
6979        let b = _mm_set1_ps(OPRND2_32);
6980        let src = _mm_set_ps(1., 2., 3., 4.);
6981        let r = _mm_mask_and_ps(src, 0b0101, a, b);
6982        let e = _mm_set_ps(1., AND_32, 3., AND_32);
6983        assert_eq_m128(r, e);
6984    }
6985
6986    #[simd_test(enable = "avx512dq,avx512vl")]
6987    unsafe fn test_mm_maskz_and_ps() {
6988        let a = _mm_set1_ps(OPRND1_32);
6989        let b = _mm_set1_ps(OPRND2_32);
6990        let r = _mm_maskz_and_ps(0b0101, a, b);
6991        let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32);
6992        assert_eq_m128(r, e);
6993    }
6994
6995    #[simd_test(enable = "avx512dq,avx512vl")]
6996    unsafe fn test_mm256_mask_and_ps() {
6997        let a = _mm256_set1_ps(OPRND1_32);
6998        let b = _mm256_set1_ps(OPRND2_32);
6999        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7000        let r = _mm256_mask_and_ps(src, 0b01010101, a, b);
7001        let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32);
7002        assert_eq_m256(r, e);
7003    }
7004
7005    #[simd_test(enable = "avx512dq,avx512vl")]
7006    unsafe fn test_mm256_maskz_and_ps() {
7007        let a = _mm256_set1_ps(OPRND1_32);
7008        let b = _mm256_set1_ps(OPRND2_32);
7009        let r = _mm256_maskz_and_ps(0b01010101, a, b);
7010        let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32);
7011        assert_eq_m256(r, e);
7012    }
7013
7014    #[simd_test(enable = "avx512dq")]
7015    unsafe fn test_mm512_and_ps() {
7016        let a = _mm512_set1_ps(OPRND1_32);
7017        let b = _mm512_set1_ps(OPRND2_32);
7018        let r = _mm512_and_ps(a, b);
7019        let e = _mm512_set1_ps(AND_32);
7020        assert_eq_m512(r, e);
7021    }
7022
7023    #[simd_test(enable = "avx512dq")]
7024    unsafe fn test_mm512_mask_and_ps() {
7025        let a = _mm512_set1_ps(OPRND1_32);
7026        let b = _mm512_set1_ps(OPRND2_32);
7027        let src = _mm512_set_ps(
7028            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7029        );
7030        let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b);
7031        let e = _mm512_set_ps(
7032            1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32,
7033            15., AND_32,
7034        );
7035        assert_eq_m512(r, e);
7036    }
7037
7038    #[simd_test(enable = "avx512dq")]
7039    unsafe fn test_mm512_maskz_and_ps() {
7040        let a = _mm512_set1_ps(OPRND1_32);
7041        let b = _mm512_set1_ps(OPRND2_32);
7042        let r = _mm512_maskz_and_ps(0b0101010101010101, a, b);
7043        let e = _mm512_set_ps(
7044            0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0.,
7045            AND_32,
7046        );
7047        assert_eq_m512(r, e);
7048    }
7049
7050    #[simd_test(enable = "avx512dq,avx512vl")]
7051    unsafe fn test_mm_mask_andnot_pd() {
7052        let a = _mm_set1_pd(OPRND1_64);
7053        let b = _mm_set1_pd(OPRND2_64);
7054        let src = _mm_set_pd(1., 2.);
7055        let r = _mm_mask_andnot_pd(src, 0b01, a, b);
7056        let e = _mm_set_pd(1., ANDN_64);
7057        assert_eq_m128d(r, e);
7058    }
7059
7060    #[simd_test(enable = "avx512dq,avx512vl")]
7061    unsafe fn test_mm_maskz_andnot_pd() {
7062        let a = _mm_set1_pd(OPRND1_64);
7063        let b = _mm_set1_pd(OPRND2_64);
7064        let r = _mm_maskz_andnot_pd(0b01, a, b);
7065        let e = _mm_set_pd(0.0, ANDN_64);
7066        assert_eq_m128d(r, e);
7067    }
7068
7069    #[simd_test(enable = "avx512dq,avx512vl")]
7070    unsafe fn test_mm256_mask_andnot_pd() {
7071        let a = _mm256_set1_pd(OPRND1_64);
7072        let b = _mm256_set1_pd(OPRND2_64);
7073        let src = _mm256_set_pd(1., 2., 3., 4.);
7074        let r = _mm256_mask_andnot_pd(src, 0b0101, a, b);
7075        let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64);
7076        assert_eq_m256d(r, e);
7077    }
7078
7079    #[simd_test(enable = "avx512dq,avx512vl")]
7080    unsafe fn test_mm256_maskz_andnot_pd() {
7081        let a = _mm256_set1_pd(OPRND1_64);
7082        let b = _mm256_set1_pd(OPRND2_64);
7083        let r = _mm256_maskz_andnot_pd(0b0101, a, b);
7084        let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64);
7085        assert_eq_m256d(r, e);
7086    }
7087
7088    #[simd_test(enable = "avx512dq")]
7089    unsafe fn test_mm512_andnot_pd() {
7090        let a = _mm512_set1_pd(OPRND1_64);
7091        let b = _mm512_set1_pd(OPRND2_64);
7092        let r = _mm512_andnot_pd(a, b);
7093        let e = _mm512_set1_pd(ANDN_64);
7094        assert_eq_m512d(r, e);
7095    }
7096
7097    #[simd_test(enable = "avx512dq")]
7098    unsafe fn test_mm512_mask_andnot_pd() {
7099        let a = _mm512_set1_pd(OPRND1_64);
7100        let b = _mm512_set1_pd(OPRND2_64);
7101        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7102        let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b);
7103        let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64);
7104        assert_eq_m512d(r, e);
7105    }
7106
7107    #[simd_test(enable = "avx512dq")]
7108    unsafe fn test_mm512_maskz_andnot_pd() {
7109        let a = _mm512_set1_pd(OPRND1_64);
7110        let b = _mm512_set1_pd(OPRND2_64);
7111        let r = _mm512_maskz_andnot_pd(0b01010101, a, b);
7112        let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64);
7113        assert_eq_m512d(r, e);
7114    }
7115
7116    #[simd_test(enable = "avx512dq,avx512vl")]
7117    unsafe fn test_mm_mask_andnot_ps() {
7118        let a = _mm_set1_ps(OPRND1_32);
7119        let b = _mm_set1_ps(OPRND2_32);
7120        let src = _mm_set_ps(1., 2., 3., 4.);
7121        let r = _mm_mask_andnot_ps(src, 0b0101, a, b);
7122        let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32);
7123        assert_eq_m128(r, e);
7124    }
7125
7126    #[simd_test(enable = "avx512dq,avx512vl")]
7127    unsafe fn test_mm_maskz_andnot_ps() {
7128        let a = _mm_set1_ps(OPRND1_32);
7129        let b = _mm_set1_ps(OPRND2_32);
7130        let r = _mm_maskz_andnot_ps(0b0101, a, b);
7131        let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32);
7132        assert_eq_m128(r, e);
7133    }
7134
7135    #[simd_test(enable = "avx512dq,avx512vl")]
7136    unsafe fn test_mm256_mask_andnot_ps() {
7137        let a = _mm256_set1_ps(OPRND1_32);
7138        let b = _mm256_set1_ps(OPRND2_32);
7139        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7140        let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b);
7141        let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32);
7142        assert_eq_m256(r, e);
7143    }
7144
7145    #[simd_test(enable = "avx512dq,avx512vl")]
7146    unsafe fn test_mm256_maskz_andnot_ps() {
7147        let a = _mm256_set1_ps(OPRND1_32);
7148        let b = _mm256_set1_ps(OPRND2_32);
7149        let r = _mm256_maskz_andnot_ps(0b01010101, a, b);
7150        let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32);
7151        assert_eq_m256(r, e);
7152    }
7153
7154    #[simd_test(enable = "avx512dq")]
7155    unsafe fn test_mm512_andnot_ps() {
7156        let a = _mm512_set1_ps(OPRND1_32);
7157        let b = _mm512_set1_ps(OPRND2_32);
7158        let r = _mm512_andnot_ps(a, b);
7159        let e = _mm512_set1_ps(ANDN_32);
7160        assert_eq_m512(r, e);
7161    }
7162
7163    #[simd_test(enable = "avx512dq")]
7164    unsafe fn test_mm512_mask_andnot_ps() {
7165        let a = _mm512_set1_ps(OPRND1_32);
7166        let b = _mm512_set1_ps(OPRND2_32);
7167        let src = _mm512_set_ps(
7168            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7169        );
7170        let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b);
7171        let e = _mm512_set_ps(
7172            1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13.,
7173            ANDN_32, 15., ANDN_32,
7174        );
7175        assert_eq_m512(r, e);
7176    }
7177
7178    #[simd_test(enable = "avx512dq")]
7179    unsafe fn test_mm512_maskz_andnot_ps() {
7180        let a = _mm512_set1_ps(OPRND1_32);
7181        let b = _mm512_set1_ps(OPRND2_32);
7182        let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b);
7183        let e = _mm512_set_ps(
7184            0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0.,
7185            ANDN_32, 0., ANDN_32,
7186        );
7187        assert_eq_m512(r, e);
7188    }
7189
7190    #[simd_test(enable = "avx512dq,avx512vl")]
7191    unsafe fn test_mm_mask_or_pd() {
7192        let a = _mm_set1_pd(OPRND1_64);
7193        let b = _mm_set1_pd(OPRND2_64);
7194        let src = _mm_set_pd(1., 2.);
7195        let r = _mm_mask_or_pd(src, 0b01, a, b);
7196        let e = _mm_set_pd(1., OR_64);
7197        assert_eq_m128d(r, e);
7198    }
7199
7200    #[simd_test(enable = "avx512dq,avx512vl")]
7201    unsafe fn test_mm_maskz_or_pd() {
7202        let a = _mm_set1_pd(OPRND1_64);
7203        let b = _mm_set1_pd(OPRND2_64);
7204        let r = _mm_maskz_or_pd(0b01, a, b);
7205        let e = _mm_set_pd(0.0, OR_64);
7206        assert_eq_m128d(r, e);
7207    }
7208
7209    #[simd_test(enable = "avx512dq,avx512vl")]
7210    unsafe fn test_mm256_mask_or_pd() {
7211        let a = _mm256_set1_pd(OPRND1_64);
7212        let b = _mm256_set1_pd(OPRND2_64);
7213        let src = _mm256_set_pd(1., 2., 3., 4.);
7214        let r = _mm256_mask_or_pd(src, 0b0101, a, b);
7215        let e = _mm256_set_pd(1., OR_64, 3., OR_64);
7216        assert_eq_m256d(r, e);
7217    }
7218
7219    #[simd_test(enable = "avx512dq,avx512vl")]
7220    unsafe fn test_mm256_maskz_or_pd() {
7221        let a = _mm256_set1_pd(OPRND1_64);
7222        let b = _mm256_set1_pd(OPRND2_64);
7223        let r = _mm256_maskz_or_pd(0b0101, a, b);
7224        let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64);
7225        assert_eq_m256d(r, e);
7226    }
7227
7228    #[simd_test(enable = "avx512dq")]
7229    unsafe fn test_mm512_or_pd() {
7230        let a = _mm512_set1_pd(OPRND1_64);
7231        let b = _mm512_set1_pd(OPRND2_64);
7232        let r = _mm512_or_pd(a, b);
7233        let e = _mm512_set1_pd(OR_64);
7234        assert_eq_m512d(r, e);
7235    }
7236
7237    #[simd_test(enable = "avx512dq")]
7238    unsafe fn test_mm512_mask_or_pd() {
7239        let a = _mm512_set1_pd(OPRND1_64);
7240        let b = _mm512_set1_pd(OPRND2_64);
7241        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7242        let r = _mm512_mask_or_pd(src, 0b01010101, a, b);
7243        let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64);
7244        assert_eq_m512d(r, e);
7245    }
7246
7247    #[simd_test(enable = "avx512dq")]
7248    unsafe fn test_mm512_maskz_or_pd() {
7249        let a = _mm512_set1_pd(OPRND1_64);
7250        let b = _mm512_set1_pd(OPRND2_64);
7251        let r = _mm512_maskz_or_pd(0b01010101, a, b);
7252        let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64);
7253        assert_eq_m512d(r, e);
7254    }
7255
7256    #[simd_test(enable = "avx512dq,avx512vl")]
7257    unsafe fn test_mm_mask_or_ps() {
7258        let a = _mm_set1_ps(OPRND1_32);
7259        let b = _mm_set1_ps(OPRND2_32);
7260        let src = _mm_set_ps(1., 2., 3., 4.);
7261        let r = _mm_mask_or_ps(src, 0b0101, a, b);
7262        let e = _mm_set_ps(1., OR_32, 3., OR_32);
7263        assert_eq_m128(r, e);
7264    }
7265
7266    #[simd_test(enable = "avx512dq,avx512vl")]
7267    unsafe fn test_mm_maskz_or_ps() {
7268        let a = _mm_set1_ps(OPRND1_32);
7269        let b = _mm_set1_ps(OPRND2_32);
7270        let r = _mm_maskz_or_ps(0b0101, a, b);
7271        let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32);
7272        assert_eq_m128(r, e);
7273    }
7274
7275    #[simd_test(enable = "avx512dq,avx512vl")]
7276    unsafe fn test_mm256_mask_or_ps() {
7277        let a = _mm256_set1_ps(OPRND1_32);
7278        let b = _mm256_set1_ps(OPRND2_32);
7279        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7280        let r = _mm256_mask_or_ps(src, 0b01010101, a, b);
7281        let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32);
7282        assert_eq_m256(r, e);
7283    }
7284
7285    #[simd_test(enable = "avx512dq,avx512vl")]
7286    unsafe fn test_mm256_maskz_or_ps() {
7287        let a = _mm256_set1_ps(OPRND1_32);
7288        let b = _mm256_set1_ps(OPRND2_32);
7289        let r = _mm256_maskz_or_ps(0b01010101, a, b);
7290        let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32);
7291        assert_eq_m256(r, e);
7292    }
7293
7294    #[simd_test(enable = "avx512dq")]
7295    unsafe fn test_mm512_or_ps() {
7296        let a = _mm512_set1_ps(OPRND1_32);
7297        let b = _mm512_set1_ps(OPRND2_32);
7298        let r = _mm512_or_ps(a, b);
7299        let e = _mm512_set1_ps(OR_32);
7300        assert_eq_m512(r, e);
7301    }
7302
7303    #[simd_test(enable = "avx512dq")]
7304    unsafe fn test_mm512_mask_or_ps() {
7305        let a = _mm512_set1_ps(OPRND1_32);
7306        let b = _mm512_set1_ps(OPRND2_32);
7307        let src = _mm512_set_ps(
7308            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7309        );
7310        let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b);
7311        let e = _mm512_set_ps(
7312            1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15.,
7313            OR_32,
7314        );
7315        assert_eq_m512(r, e);
7316    }
7317
7318    #[simd_test(enable = "avx512dq")]
7319    unsafe fn test_mm512_maskz_or_ps() {
7320        let a = _mm512_set1_ps(OPRND1_32);
7321        let b = _mm512_set1_ps(OPRND2_32);
7322        let r = _mm512_maskz_or_ps(0b0101010101010101, a, b);
7323        let e = _mm512_set_ps(
7324            0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32,
7325        );
7326        assert_eq_m512(r, e);
7327    }
7328
7329    #[simd_test(enable = "avx512dq,avx512vl")]
7330    unsafe fn test_mm_mask_xor_pd() {
7331        let a = _mm_set1_pd(OPRND1_64);
7332        let b = _mm_set1_pd(OPRND2_64);
7333        let src = _mm_set_pd(1., 2.);
7334        let r = _mm_mask_xor_pd(src, 0b01, a, b);
7335        let e = _mm_set_pd(1., XOR_64);
7336        assert_eq_m128d(r, e);
7337    }
7338
7339    #[simd_test(enable = "avx512dq,avx512vl")]
7340    unsafe fn test_mm_maskz_xor_pd() {
7341        let a = _mm_set1_pd(OPRND1_64);
7342        let b = _mm_set1_pd(OPRND2_64);
7343        let r = _mm_maskz_xor_pd(0b01, a, b);
7344        let e = _mm_set_pd(0.0, XOR_64);
7345        assert_eq_m128d(r, e);
7346    }
7347
7348    #[simd_test(enable = "avx512dq,avx512vl")]
7349    unsafe fn test_mm256_mask_xor_pd() {
7350        let a = _mm256_set1_pd(OPRND1_64);
7351        let b = _mm256_set1_pd(OPRND2_64);
7352        let src = _mm256_set_pd(1., 2., 3., 4.);
7353        let r = _mm256_mask_xor_pd(src, 0b0101, a, b);
7354        let e = _mm256_set_pd(1., XOR_64, 3., XOR_64);
7355        assert_eq_m256d(r, e);
7356    }
7357
7358    #[simd_test(enable = "avx512dq,avx512vl")]
7359    unsafe fn test_mm256_maskz_xor_pd() {
7360        let a = _mm256_set1_pd(OPRND1_64);
7361        let b = _mm256_set1_pd(OPRND2_64);
7362        let r = _mm256_maskz_xor_pd(0b0101, a, b);
7363        let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64);
7364        assert_eq_m256d(r, e);
7365    }
7366
7367    #[simd_test(enable = "avx512dq")]
7368    unsafe fn test_mm512_xor_pd() {
7369        let a = _mm512_set1_pd(OPRND1_64);
7370        let b = _mm512_set1_pd(OPRND2_64);
7371        let r = _mm512_xor_pd(a, b);
7372        let e = _mm512_set1_pd(XOR_64);
7373        assert_eq_m512d(r, e);
7374    }
7375
7376    #[simd_test(enable = "avx512dq")]
7377    unsafe fn test_mm512_mask_xor_pd() {
7378        let a = _mm512_set1_pd(OPRND1_64);
7379        let b = _mm512_set1_pd(OPRND2_64);
7380        let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7381        let r = _mm512_mask_xor_pd(src, 0b01010101, a, b);
7382        let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64);
7383        assert_eq_m512d(r, e);
7384    }
7385
7386    #[simd_test(enable = "avx512dq")]
7387    unsafe fn test_mm512_maskz_xor_pd() {
7388        let a = _mm512_set1_pd(OPRND1_64);
7389        let b = _mm512_set1_pd(OPRND2_64);
7390        let r = _mm512_maskz_xor_pd(0b01010101, a, b);
7391        let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64);
7392        assert_eq_m512d(r, e);
7393    }
7394
7395    #[simd_test(enable = "avx512dq,avx512vl")]
7396    unsafe fn test_mm_mask_xor_ps() {
7397        let a = _mm_set1_ps(OPRND1_32);
7398        let b = _mm_set1_ps(OPRND2_32);
7399        let src = _mm_set_ps(1., 2., 3., 4.);
7400        let r = _mm_mask_xor_ps(src, 0b0101, a, b);
7401        let e = _mm_set_ps(1., XOR_32, 3., XOR_32);
7402        assert_eq_m128(r, e);
7403    }
7404
7405    #[simd_test(enable = "avx512dq,avx512vl")]
7406    unsafe fn test_mm_maskz_xor_ps() {
7407        let a = _mm_set1_ps(OPRND1_32);
7408        let b = _mm_set1_ps(OPRND2_32);
7409        let r = _mm_maskz_xor_ps(0b0101, a, b);
7410        let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32);
7411        assert_eq_m128(r, e);
7412    }
7413
7414    #[simd_test(enable = "avx512dq,avx512vl")]
7415    unsafe fn test_mm256_mask_xor_ps() {
7416        let a = _mm256_set1_ps(OPRND1_32);
7417        let b = _mm256_set1_ps(OPRND2_32);
7418        let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7419        let r = _mm256_mask_xor_ps(src, 0b01010101, a, b);
7420        let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32);
7421        assert_eq_m256(r, e);
7422    }
7423
7424    #[simd_test(enable = "avx512dq,avx512vl")]
7425    unsafe fn test_mm256_maskz_xor_ps() {
7426        let a = _mm256_set1_ps(OPRND1_32);
7427        let b = _mm256_set1_ps(OPRND2_32);
7428        let r = _mm256_maskz_xor_ps(0b01010101, a, b);
7429        let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32);
7430        assert_eq_m256(r, e);
7431    }
7432
7433    #[simd_test(enable = "avx512dq")]
7434    unsafe fn test_mm512_xor_ps() {
7435        let a = _mm512_set1_ps(OPRND1_32);
7436        let b = _mm512_set1_ps(OPRND2_32);
7437        let r = _mm512_xor_ps(a, b);
7438        let e = _mm512_set1_ps(XOR_32);
7439        assert_eq_m512(r, e);
7440    }
7441
7442    #[simd_test(enable = "avx512dq")]
7443    unsafe fn test_mm512_mask_xor_ps() {
7444        let a = _mm512_set1_ps(OPRND1_32);
7445        let b = _mm512_set1_ps(OPRND2_32);
7446        let src = _mm512_set_ps(
7447            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7448        );
7449        let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b);
7450        let e = _mm512_set_ps(
7451            1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32,
7452            15., XOR_32,
7453        );
7454        assert_eq_m512(r, e);
7455    }
7456
7457    #[simd_test(enable = "avx512dq")]
7458    unsafe fn test_mm512_maskz_xor_ps() {
7459        let a = _mm512_set1_ps(OPRND1_32);
7460        let b = _mm512_set1_ps(OPRND2_32);
7461        let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b);
7462        let e = _mm512_set_ps(
7463            0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0.,
7464            XOR_32,
7465        );
7466        assert_eq_m512(r, e);
7467    }
7468
7469    #[simd_test(enable = "avx512dq,avx512vl")]
7470    unsafe fn test_mm256_broadcast_f32x2() {
7471        let a = _mm_set_ps(1., 2., 3., 4.);
7472        let r = _mm256_broadcast_f32x2(a);
7473        let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.);
7474        assert_eq_m256(r, e);
7475    }
7476
7477    #[simd_test(enable = "avx512dq,avx512vl")]
7478    unsafe fn test_mm256_mask_broadcast_f32x2() {
7479        let a = _mm_set_ps(1., 2., 3., 4.);
7480        let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.);
7481        let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a);
7482        let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.);
7483        assert_eq_m256(r, e);
7484    }
7485
7486    #[simd_test(enable = "avx512dq,avx512vl")]
7487    unsafe fn test_mm256_maskz_broadcast_f32x2() {
7488        let a = _mm_set_ps(1., 2., 3., 4.);
7489        let r = _mm256_maskz_broadcast_f32x2(0b01101001, a);
7490        let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.);
7491        assert_eq_m256(r, e);
7492    }
7493
7494    #[simd_test(enable = "avx512dq")]
7495    unsafe fn test_mm512_broadcast_f32x2() {
7496        let a = _mm_set_ps(1., 2., 3., 4.);
7497        let r = _mm512_broadcast_f32x2(a);
7498        let e = _mm512_set_ps(
7499            3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4.,
7500        );
7501        assert_eq_m512(r, e);
7502    }
7503
7504    #[simd_test(enable = "avx512dq")]
7505    unsafe fn test_mm512_mask_broadcast_f32x2() {
7506        let a = _mm_set_ps(1., 2., 3., 4.);
7507        let b = _mm512_set_ps(
7508            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
7509        );
7510        let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a);
7511        let e = _mm512_set_ps(
7512            5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20.,
7513        );
7514        assert_eq_m512(r, e);
7515    }
7516
7517    #[simd_test(enable = "avx512dq")]
7518    unsafe fn test_mm512_maskz_broadcast_f32x2() {
7519        let a = _mm_set_ps(1., 2., 3., 4.);
7520        let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a);
7521        let e = _mm512_set_ps(
7522            0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0.,
7523        );
7524        assert_eq_m512(r, e);
7525    }
7526
7527    #[simd_test(enable = "avx512dq")]
7528    unsafe fn test_mm512_broadcast_f32x8() {
7529        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7530        let r = _mm512_broadcast_f32x8(a);
7531        let e = _mm512_set_ps(
7532            1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8.,
7533        );
7534        assert_eq_m512(r, e);
7535    }
7536
7537    #[simd_test(enable = "avx512dq")]
7538    unsafe fn test_mm512_mask_broadcast_f32x8() {
7539        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7540        let b = _mm512_set_ps(
7541            9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
7542        );
7543        let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a);
7544        let e = _mm512_set_ps(
7545            9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24.,
7546        );
7547        assert_eq_m512(r, e);
7548    }
7549
7550    #[simd_test(enable = "avx512dq")]
7551    unsafe fn test_mm512_maskz_broadcast_f32x8() {
7552        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7553        let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a);
7554        let e = _mm512_set_ps(
7555            0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0.,
7556        );
7557        assert_eq_m512(r, e);
7558    }
7559
7560    #[simd_test(enable = "avx512dq,avx512vl")]
7561    unsafe fn test_mm256_broadcast_f64x2() {
7562        let a = _mm_set_pd(1., 2.);
7563        let r = _mm256_broadcast_f64x2(a);
7564        let e = _mm256_set_pd(1., 2., 1., 2.);
7565        assert_eq_m256d(r, e);
7566    }
7567
7568    #[simd_test(enable = "avx512dq,avx512vl")]
7569    unsafe fn test_mm256_mask_broadcast_f64x2() {
7570        let a = _mm_set_pd(1., 2.);
7571        let b = _mm256_set_pd(3., 4., 5., 6.);
7572        let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a);
7573        let e = _mm256_set_pd(3., 2., 1., 6.);
7574        assert_eq_m256d(r, e);
7575    }
7576
7577    #[simd_test(enable = "avx512dq,avx512vl")]
7578    unsafe fn test_mm256_maskz_broadcast_f64x2() {
7579        let a = _mm_set_pd(1., 2.);
7580        let r = _mm256_maskz_broadcast_f64x2(0b0110, a);
7581        let e = _mm256_set_pd(0., 2., 1., 0.);
7582        assert_eq_m256d(r, e);
7583    }
7584
7585    #[simd_test(enable = "avx512dq")]
7586    unsafe fn test_mm512_broadcast_f64x2() {
7587        let a = _mm_set_pd(1., 2.);
7588        let r = _mm512_broadcast_f64x2(a);
7589        let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.);
7590        assert_eq_m512d(r, e);
7591    }
7592
7593    #[simd_test(enable = "avx512dq")]
7594    unsafe fn test_mm512_mask_broadcast_f64x2() {
7595        let a = _mm_set_pd(1., 2.);
7596        let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
7597        let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a);
7598        let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.);
7599        assert_eq_m512d(r, e);
7600    }
7601
7602    #[simd_test(enable = "avx512dq")]
7603    unsafe fn test_mm512_maskz_broadcast_f64x2() {
7604        let a = _mm_set_pd(1., 2.);
7605        let r = _mm512_maskz_broadcast_f64x2(0b01101001, a);
7606        let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.);
7607        assert_eq_m512d(r, e);
7608    }
7609
7610    #[simd_test(enable = "avx512dq,avx512vl")]
7611    unsafe fn test_mm_broadcast_i32x2() {
7612        let a = _mm_set_epi32(1, 2, 3, 4);
7613        let r = _mm_broadcast_i32x2(a);
7614        let e = _mm_set_epi32(3, 4, 3, 4);
7615        assert_eq_m128i(r, e);
7616    }
7617
7618    #[simd_test(enable = "avx512dq,avx512vl")]
7619    unsafe fn test_mm_mask_broadcast_i32x2() {
7620        let a = _mm_set_epi32(1, 2, 3, 4);
7621        let b = _mm_set_epi32(5, 6, 7, 8);
7622        let r = _mm_mask_broadcast_i32x2(b, 0b0110, a);
7623        let e = _mm_set_epi32(5, 4, 3, 8);
7624        assert_eq_m128i(r, e);
7625    }
7626
7627    #[simd_test(enable = "avx512dq,avx512vl")]
7628    unsafe fn test_mm_maskz_broadcast_i32x2() {
7629        let a = _mm_set_epi32(1, 2, 3, 4);
7630        let r = _mm_maskz_broadcast_i32x2(0b0110, a);
7631        let e = _mm_set_epi32(0, 4, 3, 0);
7632        assert_eq_m128i(r, e);
7633    }
7634
7635    #[simd_test(enable = "avx512dq,avx512vl")]
7636    unsafe fn test_mm256_broadcast_i32x2() {
7637        let a = _mm_set_epi32(1, 2, 3, 4);
7638        let r = _mm256_broadcast_i32x2(a);
7639        let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4);
7640        assert_eq_m256i(r, e);
7641    }
7642
7643    #[simd_test(enable = "avx512dq,avx512vl")]
7644    unsafe fn test_mm256_mask_broadcast_i32x2() {
7645        let a = _mm_set_epi32(1, 2, 3, 4);
7646        let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
7647        let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a);
7648        let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4);
7649        assert_eq_m256i(r, e);
7650    }
7651
7652    #[simd_test(enable = "avx512dq,avx512vl")]
7653    unsafe fn test_mm256_maskz_broadcast_i32x2() {
7654        let a = _mm_set_epi32(1, 2, 3, 4);
7655        let r = _mm256_maskz_broadcast_i32x2(0b01101001, a);
7656        let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4);
7657        assert_eq_m256i(r, e);
7658    }
7659
7660    #[simd_test(enable = "avx512dq")]
7661    unsafe fn test_mm512_broadcast_i32x2() {
7662        let a = _mm_set_epi32(1, 2, 3, 4);
7663        let r = _mm512_broadcast_i32x2(a);
7664        let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4);
7665        assert_eq_m512i(r, e);
7666    }
7667
7668    #[simd_test(enable = "avx512dq")]
7669    unsafe fn test_mm512_mask_broadcast_i32x2() {
7670        let a = _mm_set_epi32(1, 2, 3, 4);
7671        let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
7672        let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a);
7673        let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20);
7674        assert_eq_m512i(r, e);
7675    }
7676
7677    #[simd_test(enable = "avx512dq")]
7678    unsafe fn test_mm512_maskz_broadcast_i32x2() {
7679        let a = _mm_set_epi32(1, 2, 3, 4);
7680        let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a);
7681        let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0);
7682        assert_eq_m512i(r, e);
7683    }
7684
7685    #[simd_test(enable = "avx512dq")]
7686    unsafe fn test_mm512_broadcast_i32x8() {
7687        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
7688        let r = _mm512_broadcast_i32x8(a);
7689        let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
7690        assert_eq_m512i(r, e);
7691    }
7692
7693    #[simd_test(enable = "avx512dq")]
7694    unsafe fn test_mm512_mask_broadcast_i32x8() {
7695        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
7696        let b = _mm512_set_epi32(
7697            9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
7698        );
7699        let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a);
7700        let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24);
7701        assert_eq_m512i(r, e);
7702    }
7703
7704    #[simd_test(enable = "avx512dq")]
7705    unsafe fn test_mm512_maskz_broadcast_i32x8() {
7706        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
7707        let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a);
7708        let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0);
7709        assert_eq_m512i(r, e);
7710    }
7711
7712    #[simd_test(enable = "avx512dq,avx512vl")]
7713    unsafe fn test_mm256_broadcast_i64x2() {
7714        let a = _mm_set_epi64x(1, 2);
7715        let r = _mm256_broadcast_i64x2(a);
7716        let e = _mm256_set_epi64x(1, 2, 1, 2);
7717        assert_eq_m256i(r, e);
7718    }
7719
7720    #[simd_test(enable = "avx512dq,avx512vl")]
7721    unsafe fn test_mm256_mask_broadcast_i64x2() {
7722        let a = _mm_set_epi64x(1, 2);
7723        let b = _mm256_set_epi64x(3, 4, 5, 6);
7724        let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a);
7725        let e = _mm256_set_epi64x(3, 2, 1, 6);
7726        assert_eq_m256i(r, e);
7727    }
7728
7729    #[simd_test(enable = "avx512dq,avx512vl")]
7730    unsafe fn test_mm256_maskz_broadcast_i64x2() {
7731        let a = _mm_set_epi64x(1, 2);
7732        let r = _mm256_maskz_broadcast_i64x2(0b0110, a);
7733        let e = _mm256_set_epi64x(0, 2, 1, 0);
7734        assert_eq_m256i(r, e);
7735    }
7736
7737    #[simd_test(enable = "avx512dq")]
7738    unsafe fn test_mm512_broadcast_i64x2() {
7739        let a = _mm_set_epi64x(1, 2);
7740        let r = _mm512_broadcast_i64x2(a);
7741        let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2);
7742        assert_eq_m512i(r, e);
7743    }
7744
7745    #[simd_test(enable = "avx512dq")]
7746    unsafe fn test_mm512_mask_broadcast_i64x2() {
7747        let a = _mm_set_epi64x(1, 2);
7748        let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10);
7749        let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a);
7750        let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2);
7751        assert_eq_m512i(r, e);
7752    }
7753
7754    #[simd_test(enable = "avx512dq")]
7755    unsafe fn test_mm512_maskz_broadcast_i64x2() {
7756        let a = _mm_set_epi64x(1, 2);
7757        let r = _mm512_maskz_broadcast_i64x2(0b01101001, a);
7758        let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2);
7759        assert_eq_m512i(r, e);
7760    }
7761
7762    #[simd_test(enable = "avx512dq")]
7763    unsafe fn test_mm512_extractf32x8_ps() {
7764        let a = _mm512_set_ps(
7765            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7766        );
7767        let r = _mm512_extractf32x8_ps::<1>(a);
7768        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7769        assert_eq_m256(r, e);
7770    }
7771
7772    #[simd_test(enable = "avx512dq")]
7773    unsafe fn test_mm512_mask_extractf32x8_ps() {
7774        let a = _mm512_set_ps(
7775            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7776        );
7777        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
7778        let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a);
7779        let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.);
7780        assert_eq_m256(r, e);
7781    }
7782
7783    #[simd_test(enable = "avx512dq")]
7784    unsafe fn test_mm512_maskz_extractf32x8_ps() {
7785        let a = _mm512_set_ps(
7786            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7787        );
7788        let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a);
7789        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
7790        assert_eq_m256(r, e);
7791    }
7792
7793    #[simd_test(enable = "avx512dq,avx512vl")]
7794    unsafe fn test_mm256_extractf64x2_pd() {
7795        let a = _mm256_set_pd(1., 2., 3., 4.);
7796        let r = _mm256_extractf64x2_pd::<1>(a);
7797        let e = _mm_set_pd(1., 2.);
7798        assert_eq_m128d(r, e);
7799    }
7800
7801    #[simd_test(enable = "avx512dq,avx512vl")]
7802    unsafe fn test_mm256_mask_extractf64x2_pd() {
7803        let a = _mm256_set_pd(1., 2., 3., 4.);
7804        let b = _mm_set_pd(5., 6.);
7805        let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a);
7806        let e = _mm_set_pd(5., 2.);
7807        assert_eq_m128d(r, e);
7808    }
7809
7810    #[simd_test(enable = "avx512dq,avx512vl")]
7811    unsafe fn test_mm256_maskz_extractf64x2_pd() {
7812        let a = _mm256_set_pd(1., 2., 3., 4.);
7813        let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a);
7814        let e = _mm_set_pd(0., 2.);
7815        assert_eq_m128d(r, e);
7816    }
7817
7818    #[simd_test(enable = "avx512dq")]
7819    unsafe fn test_mm512_extractf64x2_pd() {
7820        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7821        let r = _mm512_extractf64x2_pd::<2>(a);
7822        let e = _mm_set_pd(3., 4.);
7823        assert_eq_m128d(r, e);
7824    }
7825
7826    #[simd_test(enable = "avx512dq")]
7827    unsafe fn test_mm512_mask_extractf64x2_pd() {
7828        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7829        let b = _mm_set_pd(9., 10.);
7830        let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a);
7831        let e = _mm_set_pd(9., 4.);
7832        assert_eq_m128d(r, e);
7833    }
7834
7835    #[simd_test(enable = "avx512dq")]
7836    unsafe fn test_mm512_maskz_extractf64x2_pd() {
7837        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7838        let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a);
7839        let e = _mm_set_pd(0., 4.);
7840        assert_eq_m128d(r, e);
7841    }
7842
7843    #[simd_test(enable = "avx512dq")]
7844    unsafe fn test_mm512_extracti32x8_epi32() {
7845        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
7846        let r = _mm512_extracti32x8_epi32::<1>(a);
7847        let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
7848        assert_eq_m256i(r, e);
7849    }
7850
7851    #[simd_test(enable = "avx512dq")]
7852    unsafe fn test_mm512_mask_extracti32x8_epi32() {
7853        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
7854        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
7855        let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a);
7856        let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8);
7857        assert_eq_m256i(r, e);
7858    }
7859
7860    #[simd_test(enable = "avx512dq")]
7861    unsafe fn test_mm512_maskz_extracti32x8_epi32() {
7862        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
7863        let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a);
7864        let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8);
7865        assert_eq_m256i(r, e);
7866    }
7867
7868    #[simd_test(enable = "avx512dq,avx512vl")]
7869    unsafe fn test_mm256_extracti64x2_epi64() {
7870        let a = _mm256_set_epi64x(1, 2, 3, 4);
7871        let r = _mm256_extracti64x2_epi64::<1>(a);
7872        let e = _mm_set_epi64x(1, 2);
7873        assert_eq_m128i(r, e);
7874    }
7875
7876    #[simd_test(enable = "avx512dq,avx512vl")]
7877    unsafe fn test_mm256_mask_extracti64x2_epi64() {
7878        let a = _mm256_set_epi64x(1, 2, 3, 4);
7879        let b = _mm_set_epi64x(5, 6);
7880        let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a);
7881        let e = _mm_set_epi64x(5, 2);
7882        assert_eq_m128i(r, e);
7883    }
7884
7885    #[simd_test(enable = "avx512dq,avx512vl")]
7886    unsafe fn test_mm256_maskz_extracti64x2_epi64() {
7887        let a = _mm256_set_epi64x(1, 2, 3, 4);
7888        let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a);
7889        let e = _mm_set_epi64x(0, 2);
7890        assert_eq_m128i(r, e);
7891    }
7892
7893    #[simd_test(enable = "avx512dq")]
7894    unsafe fn test_mm512_extracti64x2_epi64() {
7895        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
7896        let r = _mm512_extracti64x2_epi64::<2>(a);
7897        let e = _mm_set_epi64x(3, 4);
7898        assert_eq_m128i(r, e);
7899    }
7900
7901    #[simd_test(enable = "avx512dq")]
7902    unsafe fn test_mm512_mask_extracti64x2_epi64() {
7903        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
7904        let b = _mm_set_epi64x(9, 10);
7905        let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a);
7906        let e = _mm_set_epi64x(9, 4);
7907        assert_eq_m128i(r, e);
7908    }
7909
7910    #[simd_test(enable = "avx512dq")]
7911    unsafe fn test_mm512_maskz_extracti64x2_epi64() {
7912        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
7913        let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a);
7914        let e = _mm_set_epi64x(0, 4);
7915        assert_eq_m128i(r, e);
7916    }
7917
7918    #[simd_test(enable = "avx512dq")]
7919    unsafe fn test_mm512_insertf32x8() {
7920        let a = _mm512_set_ps(
7921            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7922        );
7923        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
7924        let r = _mm512_insertf32x8::<1>(a, b);
7925        let e = _mm512_set_ps(
7926            17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16.,
7927        );
7928        assert_eq_m512(r, e);
7929    }
7930
7931    #[simd_test(enable = "avx512dq")]
7932    unsafe fn test_mm512_mask_insertf32x8() {
7933        let a = _mm512_set_ps(
7934            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7935        );
7936        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
7937        let src = _mm512_set_ps(
7938            25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40.,
7939        );
7940        let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b);
7941        let e = _mm512_set_ps(
7942            25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40.,
7943        );
7944        assert_eq_m512(r, e);
7945    }
7946
7947    #[simd_test(enable = "avx512dq")]
7948    unsafe fn test_mm512_maskz_insertf32x8() {
7949        let a = _mm512_set_ps(
7950            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7951        );
7952        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
7953        let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b);
7954        let e = _mm512_set_ps(
7955            0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0.,
7956        );
7957        assert_eq_m512(r, e);
7958    }
7959
7960    #[simd_test(enable = "avx512dq,avx512vl")]
7961    unsafe fn test_mm256_insertf64x2() {
7962        let a = _mm256_set_pd(1., 2., 3., 4.);
7963        let b = _mm_set_pd(5., 6.);
7964        let r = _mm256_insertf64x2::<1>(a, b);
7965        let e = _mm256_set_pd(5., 6., 3., 4.);
7966        assert_eq_m256d(r, e);
7967    }
7968
7969    #[simd_test(enable = "avx512dq,avx512vl")]
7970    unsafe fn test_mm256_mask_insertf64x2() {
7971        let a = _mm256_set_pd(1., 2., 3., 4.);
7972        let b = _mm_set_pd(5., 6.);
7973        let src = _mm256_set_pd(7., 8., 9., 10.);
7974        let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b);
7975        let e = _mm256_set_pd(7., 6., 3., 10.);
7976        assert_eq_m256d(r, e);
7977    }
7978
7979    #[simd_test(enable = "avx512dq,avx512vl")]
7980    unsafe fn test_mm256_maskz_insertf64x2() {
7981        let a = _mm256_set_pd(1., 2., 3., 4.);
7982        let b = _mm_set_pd(5., 6.);
7983        let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b);
7984        let e = _mm256_set_pd(0., 6., 3., 0.);
7985        assert_eq_m256d(r, e);
7986    }
7987
7988    #[simd_test(enable = "avx512dq")]
7989    unsafe fn test_mm512_insertf64x2() {
7990        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7991        let b = _mm_set_pd(9., 10.);
7992        let r = _mm512_insertf64x2::<2>(a, b);
7993        let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.);
7994        assert_eq_m512d(r, e);
7995    }
7996
7997    #[simd_test(enable = "avx512dq")]
7998    unsafe fn test_mm512_mask_insertf64x2() {
7999        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8000        let b = _mm_set_pd(9., 10.);
8001        let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.);
8002        let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b);
8003        let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.);
8004        assert_eq_m512d(r, e);
8005    }
8006
8007    #[simd_test(enable = "avx512dq")]
8008    unsafe fn test_mm512_maskz_insertf64x2() {
8009        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8010        let b = _mm_set_pd(9., 10.);
8011        let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b);
8012        let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.);
8013        assert_eq_m512d(r, e);
8014    }
8015
8016    #[simd_test(enable = "avx512dq")]
8017    unsafe fn test_mm512_inserti32x8() {
8018        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8019        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8020        let r = _mm512_inserti32x8::<1>(a, b);
8021        let e = _mm512_set_epi32(
8022            17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16,
8023        );
8024        assert_eq_m512i(r, e);
8025    }
8026
8027    #[simd_test(enable = "avx512dq")]
8028    unsafe fn test_mm512_mask_inserti32x8() {
8029        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8030        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8031        let src = _mm512_set_epi32(
8032            25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
8033        );
8034        let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b);
8035        let e = _mm512_set_epi32(
8036            25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40,
8037        );
8038        assert_eq_m512i(r, e);
8039    }
8040
8041    #[simd_test(enable = "avx512dq")]
8042    unsafe fn test_mm512_maskz_inserti32x8() {
8043        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8044        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8045        let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b);
8046        let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0);
8047        assert_eq_m512i(r, e);
8048    }
8049
8050    #[simd_test(enable = "avx512dq,avx512vl")]
8051    unsafe fn test_mm256_inserti64x2() {
8052        let a = _mm256_set_epi64x(1, 2, 3, 4);
8053        let b = _mm_set_epi64x(5, 6);
8054        let r = _mm256_inserti64x2::<1>(a, b);
8055        let e = _mm256_set_epi64x(5, 6, 3, 4);
8056        assert_eq_m256i(r, e);
8057    }
8058
8059    #[simd_test(enable = "avx512dq,avx512vl")]
8060    unsafe fn test_mm256_mask_inserti64x2() {
8061        let a = _mm256_set_epi64x(1, 2, 3, 4);
8062        let b = _mm_set_epi64x(5, 6);
8063        let src = _mm256_set_epi64x(7, 8, 9, 10);
8064        let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b);
8065        let e = _mm256_set_epi64x(7, 6, 3, 10);
8066        assert_eq_m256i(r, e);
8067    }
8068
8069    #[simd_test(enable = "avx512dq,avx512vl")]
8070    unsafe fn test_mm256_maskz_inserti64x2() {
8071        let a = _mm256_set_epi64x(1, 2, 3, 4);
8072        let b = _mm_set_epi64x(5, 6);
8073        let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b);
8074        let e = _mm256_set_epi64x(0, 6, 3, 0);
8075        assert_eq_m256i(r, e);
8076    }
8077
8078    #[simd_test(enable = "avx512dq")]
8079    unsafe fn test_mm512_inserti64x2() {
8080        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8081        let b = _mm_set_epi64x(9, 10);
8082        let r = _mm512_inserti64x2::<2>(a, b);
8083        let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8);
8084        assert_eq_m512i(r, e);
8085    }
8086
8087    #[simd_test(enable = "avx512dq")]
8088    unsafe fn test_mm512_mask_inserti64x2() {
8089        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8090        let b = _mm_set_epi64x(9, 10);
8091        let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18);
8092        let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b);
8093        let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8);
8094        assert_eq_m512i(r, e);
8095    }
8096
8097    #[simd_test(enable = "avx512dq")]
8098    unsafe fn test_mm512_maskz_inserti64x2() {
8099        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8100        let b = _mm_set_epi64x(9, 10);
8101        let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b);
8102        let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8);
8103        assert_eq_m512i(r, e);
8104    }
8105
8106    #[simd_test(enable = "avx512dq")]
8107    unsafe fn test_mm512_cvt_roundepi64_pd() {
8108        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8109        let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8110        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8111        assert_eq_m512d(r, e);
8112    }
8113
8114    #[simd_test(enable = "avx512dq")]
8115    unsafe fn test_mm512_mask_cvt_roundepi64_pd() {
8116        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8117        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8118        let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8119            b, 0b01101001, a,
8120        );
8121        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8122        assert_eq_m512d(r, e);
8123    }
8124
8125    #[simd_test(enable = "avx512dq")]
8126    unsafe fn test_mm512_maskz_cvt_roundepi64_pd() {
8127        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8128        let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8129            0b01101001, a,
8130        );
8131        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8132        assert_eq_m512d(r, e);
8133    }
8134
8135    #[simd_test(enable = "avx512dq,avx512vl")]
8136    unsafe fn test_mm_cvtepi64_pd() {
8137        let a = _mm_set_epi64x(1, 2);
8138        let r = _mm_cvtepi64_pd(a);
8139        let e = _mm_set_pd(1., 2.);
8140        assert_eq_m128d(r, e);
8141    }
8142
8143    #[simd_test(enable = "avx512dq,avx512vl")]
8144    unsafe fn test_mm_mask_cvtepi64_pd() {
8145        let a = _mm_set_epi64x(1, 2);
8146        let b = _mm_set_pd(3., 4.);
8147        let r = _mm_mask_cvtepi64_pd(b, 0b01, a);
8148        let e = _mm_set_pd(3., 2.);
8149        assert_eq_m128d(r, e);
8150    }
8151
8152    #[simd_test(enable = "avx512dq,avx512vl")]
8153    unsafe fn test_mm_maskz_cvtepi64_pd() {
8154        let a = _mm_set_epi64x(1, 2);
8155        let r = _mm_maskz_cvtepi64_pd(0b01, a);
8156        let e = _mm_set_pd(0., 2.);
8157        assert_eq_m128d(r, e);
8158    }
8159
8160    #[simd_test(enable = "avx512dq,avx512vl")]
8161    unsafe fn test_mm256_cvtepi64_pd() {
8162        let a = _mm256_set_epi64x(1, 2, 3, 4);
8163        let r = _mm256_cvtepi64_pd(a);
8164        let e = _mm256_set_pd(1., 2., 3., 4.);
8165        assert_eq_m256d(r, e);
8166    }
8167
8168    #[simd_test(enable = "avx512dq,avx512vl")]
8169    unsafe fn test_mm256_mask_cvtepi64_pd() {
8170        let a = _mm256_set_epi64x(1, 2, 3, 4);
8171        let b = _mm256_set_pd(5., 6., 7., 8.);
8172        let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a);
8173        let e = _mm256_set_pd(5., 2., 3., 8.);
8174        assert_eq_m256d(r, e);
8175    }
8176
8177    #[simd_test(enable = "avx512dq,avx512vl")]
8178    unsafe fn test_mm256_maskz_cvtepi64_pd() {
8179        let a = _mm256_set_epi64x(1, 2, 3, 4);
8180        let r = _mm256_maskz_cvtepi64_pd(0b0110, a);
8181        let e = _mm256_set_pd(0., 2., 3., 0.);
8182        assert_eq_m256d(r, e);
8183    }
8184
8185    #[simd_test(enable = "avx512dq")]
8186    unsafe fn test_mm512_cvtepi64_pd() {
8187        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8188        let r = _mm512_cvtepi64_pd(a);
8189        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8190        assert_eq_m512d(r, e);
8191    }
8192
8193    #[simd_test(enable = "avx512dq")]
8194    unsafe fn test_mm512_mask_cvtepi64_pd() {
8195        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8196        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8197        let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a);
8198        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8199        assert_eq_m512d(r, e);
8200    }
8201
8202    #[simd_test(enable = "avx512dq")]
8203    unsafe fn test_mm512_maskz_cvtepi64_pd() {
8204        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8205        let r = _mm512_maskz_cvtepi64_pd(0b01101001, a);
8206        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8207        assert_eq_m512d(r, e);
8208    }
8209
8210    #[simd_test(enable = "avx512dq")]
8211    unsafe fn test_mm512_cvt_roundepi64_ps() {
8212        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8213        let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8214        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8215        assert_eq_m256(r, e);
8216    }
8217
8218    #[simd_test(enable = "avx512dq")]
8219    unsafe fn test_mm512_mask_cvt_roundepi64_ps() {
8220        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8221        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8222        let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8223            b, 0b01101001, a,
8224        );
8225        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8226        assert_eq_m256(r, e);
8227    }
8228
8229    #[simd_test(enable = "avx512dq")]
8230    unsafe fn test_mm512_maskz_cvt_roundepi64_ps() {
8231        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8232        let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8233            0b01101001, a,
8234        );
8235        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8236        assert_eq_m256(r, e);
8237    }
8238
8239    #[simd_test(enable = "avx512dq,avx512vl")]
8240    unsafe fn test_mm_cvtepi64_ps() {
8241        let a = _mm_set_epi64x(1, 2);
8242        let r = _mm_cvtepi64_ps(a);
8243        let e = _mm_set_ps(0., 0., 1., 2.);
8244        assert_eq_m128(r, e);
8245    }
8246
8247    #[simd_test(enable = "avx512dq,avx512vl")]
8248    unsafe fn test_mm_mask_cvtepi64_ps() {
8249        let a = _mm_set_epi64x(1, 2);
8250        let b = _mm_set_ps(3., 4., 5., 6.);
8251        let r = _mm_mask_cvtepi64_ps(b, 0b01, a);
8252        let e = _mm_set_ps(0., 0., 5., 2.);
8253        assert_eq_m128(r, e);
8254    }
8255
8256    #[simd_test(enable = "avx512dq,avx512vl")]
8257    unsafe fn test_mm_maskz_cvtepi64_ps() {
8258        let a = _mm_set_epi64x(1, 2);
8259        let r = _mm_maskz_cvtepi64_ps(0b01, a);
8260        let e = _mm_set_ps(0., 0., 0., 2.);
8261        assert_eq_m128(r, e);
8262    }
8263
8264    #[simd_test(enable = "avx512dq,avx512vl")]
8265    unsafe fn test_mm256_cvtepi64_ps() {
8266        let a = _mm256_set_epi64x(1, 2, 3, 4);
8267        let r = _mm256_cvtepi64_ps(a);
8268        let e = _mm_set_ps(1., 2., 3., 4.);
8269        assert_eq_m128(r, e);
8270    }
8271
8272    #[simd_test(enable = "avx512dq,avx512vl")]
8273    unsafe fn test_mm256_mask_cvtepi64_ps() {
8274        let a = _mm256_set_epi64x(1, 2, 3, 4);
8275        let b = _mm_set_ps(5., 6., 7., 8.);
8276        let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a);
8277        let e = _mm_set_ps(5., 2., 3., 8.);
8278        assert_eq_m128(r, e);
8279    }
8280
8281    #[simd_test(enable = "avx512dq,avx512vl")]
8282    unsafe fn test_mm256_maskz_cvtepi64_ps() {
8283        let a = _mm256_set_epi64x(1, 2, 3, 4);
8284        let r = _mm256_maskz_cvtepi64_ps(0b0110, a);
8285        let e = _mm_set_ps(0., 2., 3., 0.);
8286        assert_eq_m128(r, e);
8287    }
8288
8289    #[simd_test(enable = "avx512dq")]
8290    unsafe fn test_mm512_cvtepi64_ps() {
8291        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8292        let r = _mm512_cvtepi64_ps(a);
8293        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8294        assert_eq_m256(r, e);
8295    }
8296
8297    #[simd_test(enable = "avx512dq")]
8298    unsafe fn test_mm512_mask_cvtepi64_ps() {
8299        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8300        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8301        let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a);
8302        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8303        assert_eq_m256(r, e);
8304    }
8305
8306    #[simd_test(enable = "avx512dq")]
8307    unsafe fn test_mm512_maskz_cvtepi64_ps() {
8308        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8309        let r = _mm512_maskz_cvtepi64_ps(0b01101001, a);
8310        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8311        assert_eq_m256(r, e);
8312    }
8313
8314    #[simd_test(enable = "avx512dq")]
8315    unsafe fn test_mm512_cvt_roundepu64_pd() {
8316        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8317        let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8318        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8319        assert_eq_m512d(r, e);
8320    }
8321
8322    #[simd_test(enable = "avx512dq")]
8323    unsafe fn test_mm512_mask_cvt_roundepu64_pd() {
8324        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8325        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8326        let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8327            b, 0b01101001, a,
8328        );
8329        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8330        assert_eq_m512d(r, e);
8331    }
8332
8333    #[simd_test(enable = "avx512dq")]
8334    unsafe fn test_mm512_maskz_cvt_roundepu64_pd() {
8335        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8336        let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8337            0b01101001, a,
8338        );
8339        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8340        assert_eq_m512d(r, e);
8341    }
8342
8343    #[simd_test(enable = "avx512dq,avx512vl")]
8344    unsafe fn test_mm_cvtepu64_pd() {
8345        let a = _mm_set_epi64x(1, 2);
8346        let r = _mm_cvtepu64_pd(a);
8347        let e = _mm_set_pd(1., 2.);
8348        assert_eq_m128d(r, e);
8349    }
8350
8351    #[simd_test(enable = "avx512dq,avx512vl")]
8352    unsafe fn test_mm_mask_cvtepu64_pd() {
8353        let a = _mm_set_epi64x(1, 2);
8354        let b = _mm_set_pd(3., 4.);
8355        let r = _mm_mask_cvtepu64_pd(b, 0b01, a);
8356        let e = _mm_set_pd(3., 2.);
8357        assert_eq_m128d(r, e);
8358    }
8359
8360    #[simd_test(enable = "avx512dq,avx512vl")]
8361    unsafe fn test_mm_maskz_cvtepu64_pd() {
8362        let a = _mm_set_epi64x(1, 2);
8363        let r = _mm_maskz_cvtepu64_pd(0b01, a);
8364        let e = _mm_set_pd(0., 2.);
8365        assert_eq_m128d(r, e);
8366    }
8367
8368    #[simd_test(enable = "avx512dq,avx512vl")]
8369    unsafe fn test_mm256_cvtepu64_pd() {
8370        let a = _mm256_set_epi64x(1, 2, 3, 4);
8371        let r = _mm256_cvtepu64_pd(a);
8372        let e = _mm256_set_pd(1., 2., 3., 4.);
8373        assert_eq_m256d(r, e);
8374    }
8375
8376    #[simd_test(enable = "avx512dq,avx512vl")]
8377    unsafe fn test_mm256_mask_cvtepu64_pd() {
8378        let a = _mm256_set_epi64x(1, 2, 3, 4);
8379        let b = _mm256_set_pd(5., 6., 7., 8.);
8380        let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a);
8381        let e = _mm256_set_pd(5., 2., 3., 8.);
8382        assert_eq_m256d(r, e);
8383    }
8384
8385    #[simd_test(enable = "avx512dq,avx512vl")]
8386    unsafe fn test_mm256_maskz_cvtepu64_pd() {
8387        let a = _mm256_set_epi64x(1, 2, 3, 4);
8388        let r = _mm256_maskz_cvtepu64_pd(0b0110, a);
8389        let e = _mm256_set_pd(0., 2., 3., 0.);
8390        assert_eq_m256d(r, e);
8391    }
8392
8393    #[simd_test(enable = "avx512dq")]
8394    unsafe fn test_mm512_cvtepu64_pd() {
8395        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8396        let r = _mm512_cvtepu64_pd(a);
8397        let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8398        assert_eq_m512d(r, e);
8399    }
8400
8401    #[simd_test(enable = "avx512dq")]
8402    unsafe fn test_mm512_mask_cvtepu64_pd() {
8403        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8404        let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8405        let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a);
8406        let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8407        assert_eq_m512d(r, e);
8408    }
8409
8410    #[simd_test(enable = "avx512dq")]
8411    unsafe fn test_mm512_maskz_cvtepu64_pd() {
8412        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8413        let r = _mm512_maskz_cvtepu64_pd(0b01101001, a);
8414        let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8415        assert_eq_m512d(r, e);
8416    }
8417
8418    #[simd_test(enable = "avx512dq")]
8419    unsafe fn test_mm512_cvt_roundepu64_ps() {
8420        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8421        let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8422        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8423        assert_eq_m256(r, e);
8424    }
8425
8426    #[simd_test(enable = "avx512dq")]
8427    unsafe fn test_mm512_mask_cvt_roundepu64_ps() {
8428        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8429        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8430        let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8431            b, 0b01101001, a,
8432        );
8433        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8434        assert_eq_m256(r, e);
8435    }
8436
8437    #[simd_test(enable = "avx512dq")]
8438    unsafe fn test_mm512_maskz_cvt_roundepu64_ps() {
8439        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8440        let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8441            0b01101001, a,
8442        );
8443        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8444        assert_eq_m256(r, e);
8445    }
8446
8447    #[simd_test(enable = "avx512dq,avx512vl")]
8448    unsafe fn test_mm_cvtepu64_ps() {
8449        let a = _mm_set_epi64x(1, 2);
8450        let r = _mm_cvtepu64_ps(a);
8451        let e = _mm_set_ps(0., 0., 1., 2.);
8452        assert_eq_m128(r, e);
8453    }
8454
8455    #[simd_test(enable = "avx512dq,avx512vl")]
8456    unsafe fn test_mm_mask_cvtepu64_ps() {
8457        let a = _mm_set_epi64x(1, 2);
8458        let b = _mm_set_ps(3., 4., 5., 6.);
8459        let r = _mm_mask_cvtepu64_ps(b, 0b01, a);
8460        let e = _mm_set_ps(0., 0., 5., 2.);
8461        assert_eq_m128(r, e);
8462    }
8463
8464    #[simd_test(enable = "avx512dq,avx512vl")]
8465    unsafe fn test_mm_maskz_cvtepu64_ps() {
8466        let a = _mm_set_epi64x(1, 2);
8467        let r = _mm_maskz_cvtepu64_ps(0b01, a);
8468        let e = _mm_set_ps(0., 0., 0., 2.);
8469        assert_eq_m128(r, e);
8470    }
8471
8472    #[simd_test(enable = "avx512dq,avx512vl")]
8473    unsafe fn test_mm256_cvtepu64_ps() {
8474        let a = _mm256_set_epi64x(1, 2, 3, 4);
8475        let r = _mm256_cvtepu64_ps(a);
8476        let e = _mm_set_ps(1., 2., 3., 4.);
8477        assert_eq_m128(r, e);
8478    }
8479
8480    #[simd_test(enable = "avx512dq,avx512vl")]
8481    unsafe fn test_mm256_mask_cvtepu64_ps() {
8482        let a = _mm256_set_epi64x(1, 2, 3, 4);
8483        let b = _mm_set_ps(5., 6., 7., 8.);
8484        let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a);
8485        let e = _mm_set_ps(5., 2., 3., 8.);
8486        assert_eq_m128(r, e);
8487    }
8488
8489    #[simd_test(enable = "avx512dq,avx512vl")]
8490    unsafe fn test_mm256_maskz_cvtepu64_ps() {
8491        let a = _mm256_set_epi64x(1, 2, 3, 4);
8492        let r = _mm256_maskz_cvtepu64_ps(0b0110, a);
8493        let e = _mm_set_ps(0., 2., 3., 0.);
8494        assert_eq_m128(r, e);
8495    }
8496
8497    #[simd_test(enable = "avx512dq")]
8498    unsafe fn test_mm512_cvtepu64_ps() {
8499        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8500        let r = _mm512_cvtepu64_ps(a);
8501        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8502        assert_eq_m256(r, e);
8503    }
8504
8505    #[simd_test(enable = "avx512dq")]
8506    unsafe fn test_mm512_mask_cvtepu64_ps() {
8507        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8508        let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8509        let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a);
8510        let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8511        assert_eq_m256(r, e);
8512    }
8513
8514    #[simd_test(enable = "avx512dq")]
8515    unsafe fn test_mm512_maskz_cvtepu64_ps() {
8516        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8517        let r = _mm512_maskz_cvtepu64_ps(0b01101001, a);
8518        let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8519        assert_eq_m256(r, e);
8520    }
8521
8522    #[simd_test(enable = "avx512dq")]
8523    unsafe fn test_mm512_cvt_roundpd_epi64() {
8524        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8525        let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8526        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8527        assert_eq_m512i(r, e);
8528    }
8529
8530    #[simd_test(enable = "avx512dq")]
8531    unsafe fn test_mm512_mask_cvt_roundpd_epi64() {
8532        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8533        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8534        let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8535            b, 0b01101001, a,
8536        );
8537        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8538        assert_eq_m512i(r, e);
8539    }
8540
8541    #[simd_test(enable = "avx512dq")]
8542    unsafe fn test_mm512_maskz_cvt_roundpd_epi64() {
8543        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8544        let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8545            0b01101001, a,
8546        );
8547        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8548        assert_eq_m512i(r, e);
8549    }
8550
8551    #[simd_test(enable = "avx512dq,avx512vl")]
8552    unsafe fn test_mm_cvtpd_epi64() {
8553        let a = _mm_set_pd(1., 2.);
8554        let r = _mm_cvtpd_epi64(a);
8555        let e = _mm_set_epi64x(1, 2);
8556        assert_eq_m128i(r, e);
8557    }
8558
8559    #[simd_test(enable = "avx512dq,avx512vl")]
8560    unsafe fn test_mm_mask_cvtpd_epi64() {
8561        let a = _mm_set_pd(1., 2.);
8562        let b = _mm_set_epi64x(3, 4);
8563        let r = _mm_mask_cvtpd_epi64(b, 0b01, a);
8564        let e = _mm_set_epi64x(3, 2);
8565        assert_eq_m128i(r, e);
8566    }
8567
8568    #[simd_test(enable = "avx512dq,avx512vl")]
8569    unsafe fn test_mm_maskz_cvtpd_epi64() {
8570        let a = _mm_set_pd(1., 2.);
8571        let r = _mm_maskz_cvtpd_epi64(0b01, a);
8572        let e = _mm_set_epi64x(0, 2);
8573        assert_eq_m128i(r, e);
8574    }
8575
8576    #[simd_test(enable = "avx512dq,avx512vl")]
8577    unsafe fn test_mm256_cvtpd_epi64() {
8578        let a = _mm256_set_pd(1., 2., 3., 4.);
8579        let r = _mm256_cvtpd_epi64(a);
8580        let e = _mm256_set_epi64x(1, 2, 3, 4);
8581        assert_eq_m256i(r, e);
8582    }
8583
8584    #[simd_test(enable = "avx512dq,avx512vl")]
8585    unsafe fn test_mm256_mask_cvtpd_epi64() {
8586        let a = _mm256_set_pd(1., 2., 3., 4.);
8587        let b = _mm256_set_epi64x(5, 6, 7, 8);
8588        let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a);
8589        let e = _mm256_set_epi64x(5, 2, 3, 8);
8590        assert_eq_m256i(r, e);
8591    }
8592
8593    #[simd_test(enable = "avx512dq,avx512vl")]
8594    unsafe fn test_mm256_maskz_cvtpd_epi64() {
8595        let a = _mm256_set_pd(1., 2., 3., 4.);
8596        let r = _mm256_maskz_cvtpd_epi64(0b0110, a);
8597        let e = _mm256_set_epi64x(0, 2, 3, 0);
8598        assert_eq_m256i(r, e);
8599    }
8600
8601    #[simd_test(enable = "avx512dq")]
8602    unsafe fn test_mm512_cvtpd_epi64() {
8603        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8604        let r = _mm512_cvtpd_epi64(a);
8605        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8606        assert_eq_m512i(r, e);
8607    }
8608
8609    #[simd_test(enable = "avx512dq")]
8610    unsafe fn test_mm512_mask_cvtpd_epi64() {
8611        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8612        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8613        let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a);
8614        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8615        assert_eq_m512i(r, e);
8616    }
8617
8618    #[simd_test(enable = "avx512dq")]
8619    unsafe fn test_mm512_maskz_cvtpd_epi64() {
8620        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8621        let r = _mm512_maskz_cvtpd_epi64(0b01101001, a);
8622        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8623        assert_eq_m512i(r, e);
8624    }
8625
8626    #[simd_test(enable = "avx512dq")]
8627    unsafe fn test_mm512_cvt_roundps_epi64() {
8628        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8629        let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8630        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8631        assert_eq_m512i(r, e);
8632    }
8633
8634    #[simd_test(enable = "avx512dq")]
8635    unsafe fn test_mm512_mask_cvt_roundps_epi64() {
8636        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8637        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8638        let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8639            b, 0b01101001, a,
8640        );
8641        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8642        assert_eq_m512i(r, e);
8643    }
8644
8645    #[simd_test(enable = "avx512dq")]
8646    unsafe fn test_mm512_maskz_cvt_roundps_epi64() {
8647        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8648        let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8649            0b01101001, a,
8650        );
8651        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8652        assert_eq_m512i(r, e);
8653    }
8654
8655    #[simd_test(enable = "avx512dq,avx512vl")]
8656    unsafe fn test_mm_cvtps_epi64() {
8657        let a = _mm_set_ps(1., 2., 3., 4.);
8658        let r = _mm_cvtps_epi64(a);
8659        let e = _mm_set_epi64x(3, 4);
8660        assert_eq_m128i(r, e);
8661    }
8662
8663    #[simd_test(enable = "avx512dq,avx512vl")]
8664    unsafe fn test_mm_mask_cvtps_epi64() {
8665        let a = _mm_set_ps(1., 2., 3., 4.);
8666        let b = _mm_set_epi64x(5, 6);
8667        let r = _mm_mask_cvtps_epi64(b, 0b01, a);
8668        let e = _mm_set_epi64x(5, 4);
8669        assert_eq_m128i(r, e);
8670    }
8671
8672    #[simd_test(enable = "avx512dq,avx512vl")]
8673    unsafe fn test_mm_maskz_cvtps_epi64() {
8674        let a = _mm_set_ps(1., 2., 3., 4.);
8675        let r = _mm_maskz_cvtps_epi64(0b01, a);
8676        let e = _mm_set_epi64x(0, 4);
8677        assert_eq_m128i(r, e);
8678    }
8679
8680    #[simd_test(enable = "avx512dq,avx512vl")]
8681    unsafe fn test_mm256_cvtps_epi64() {
8682        let a = _mm_set_ps(1., 2., 3., 4.);
8683        let r = _mm256_cvtps_epi64(a);
8684        let e = _mm256_set_epi64x(1, 2, 3, 4);
8685        assert_eq_m256i(r, e);
8686    }
8687
8688    #[simd_test(enable = "avx512dq,avx512vl")]
8689    unsafe fn test_mm256_mask_cvtps_epi64() {
8690        let a = _mm_set_ps(1., 2., 3., 4.);
8691        let b = _mm256_set_epi64x(5, 6, 7, 8);
8692        let r = _mm256_mask_cvtps_epi64(b, 0b0110, a);
8693        let e = _mm256_set_epi64x(5, 2, 3, 8);
8694        assert_eq_m256i(r, e);
8695    }
8696
8697    #[simd_test(enable = "avx512dq,avx512vl")]
8698    unsafe fn test_mm256_maskz_cvtps_epi64() {
8699        let a = _mm_set_ps(1., 2., 3., 4.);
8700        let r = _mm256_maskz_cvtps_epi64(0b0110, a);
8701        let e = _mm256_set_epi64x(0, 2, 3, 0);
8702        assert_eq_m256i(r, e);
8703    }
8704
8705    #[simd_test(enable = "avx512dq")]
8706    unsafe fn test_mm512_cvtps_epi64() {
8707        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8708        let r = _mm512_cvtps_epi64(a);
8709        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8710        assert_eq_m512i(r, e);
8711    }
8712
8713    #[simd_test(enable = "avx512dq")]
8714    unsafe fn test_mm512_mask_cvtps_epi64() {
8715        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8716        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8717        let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a);
8718        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8719        assert_eq_m512i(r, e);
8720    }
8721
8722    #[simd_test(enable = "avx512dq")]
8723    unsafe fn test_mm512_maskz_cvtps_epi64() {
8724        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8725        let r = _mm512_maskz_cvtps_epi64(0b01101001, a);
8726        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8727        assert_eq_m512i(r, e);
8728    }
8729
8730    #[simd_test(enable = "avx512dq")]
8731    unsafe fn test_mm512_cvt_roundpd_epu64() {
8732        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8733        let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8734        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8735        assert_eq_m512i(r, e);
8736    }
8737
8738    #[simd_test(enable = "avx512dq")]
8739    unsafe fn test_mm512_mask_cvt_roundpd_epu64() {
8740        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8741        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8742        let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8743            b, 0b01101001, a,
8744        );
8745        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8746        assert_eq_m512i(r, e);
8747    }
8748
8749    #[simd_test(enable = "avx512dq")]
8750    unsafe fn test_mm512_maskz_cvt_roundpd_epu64() {
8751        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8752        let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8753            0b01101001, a,
8754        );
8755        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8756        assert_eq_m512i(r, e);
8757    }
8758
8759    #[simd_test(enable = "avx512dq,avx512vl")]
8760    unsafe fn test_mm_cvtpd_epu64() {
8761        let a = _mm_set_pd(1., 2.);
8762        let r = _mm_cvtpd_epu64(a);
8763        let e = _mm_set_epi64x(1, 2);
8764        assert_eq_m128i(r, e);
8765    }
8766
8767    #[simd_test(enable = "avx512dq,avx512vl")]
8768    unsafe fn test_mm_mask_cvtpd_epu64() {
8769        let a = _mm_set_pd(1., 2.);
8770        let b = _mm_set_epi64x(3, 4);
8771        let r = _mm_mask_cvtpd_epu64(b, 0b01, a);
8772        let e = _mm_set_epi64x(3, 2);
8773        assert_eq_m128i(r, e);
8774    }
8775
8776    #[simd_test(enable = "avx512dq,avx512vl")]
8777    unsafe fn test_mm_maskz_cvtpd_epu64() {
8778        let a = _mm_set_pd(1., 2.);
8779        let r = _mm_maskz_cvtpd_epu64(0b01, a);
8780        let e = _mm_set_epi64x(0, 2);
8781        assert_eq_m128i(r, e);
8782    }
8783
8784    #[simd_test(enable = "avx512dq,avx512vl")]
8785    unsafe fn test_mm256_cvtpd_epu64() {
8786        let a = _mm256_set_pd(1., 2., 3., 4.);
8787        let r = _mm256_cvtpd_epu64(a);
8788        let e = _mm256_set_epi64x(1, 2, 3, 4);
8789        assert_eq_m256i(r, e);
8790    }
8791
8792    #[simd_test(enable = "avx512dq,avx512vl")]
8793    unsafe fn test_mm256_mask_cvtpd_epu64() {
8794        let a = _mm256_set_pd(1., 2., 3., 4.);
8795        let b = _mm256_set_epi64x(5, 6, 7, 8);
8796        let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a);
8797        let e = _mm256_set_epi64x(5, 2, 3, 8);
8798        assert_eq_m256i(r, e);
8799    }
8800
8801    #[simd_test(enable = "avx512dq,avx512vl")]
8802    unsafe fn test_mm256_maskz_cvtpd_epu64() {
8803        let a = _mm256_set_pd(1., 2., 3., 4.);
8804        let r = _mm256_maskz_cvtpd_epu64(0b0110, a);
8805        let e = _mm256_set_epi64x(0, 2, 3, 0);
8806        assert_eq_m256i(r, e);
8807    }
8808
8809    #[simd_test(enable = "avx512dq")]
8810    unsafe fn test_mm512_cvtpd_epu64() {
8811        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8812        let r = _mm512_cvtpd_epu64(a);
8813        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8814        assert_eq_m512i(r, e);
8815    }
8816
8817    #[simd_test(enable = "avx512dq")]
8818    unsafe fn test_mm512_mask_cvtpd_epu64() {
8819        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8820        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8821        let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a);
8822        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8823        assert_eq_m512i(r, e);
8824    }
8825
8826    #[simd_test(enable = "avx512dq")]
8827    unsafe fn test_mm512_maskz_cvtpd_epu64() {
8828        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8829        let r = _mm512_maskz_cvtpd_epu64(0b01101001, a);
8830        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8831        assert_eq_m512i(r, e);
8832    }
8833
8834    #[simd_test(enable = "avx512dq")]
8835    unsafe fn test_mm512_cvt_roundps_epu64() {
8836        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8837        let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8838        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8839        assert_eq_m512i(r, e);
8840    }
8841
8842    #[simd_test(enable = "avx512dq")]
8843    unsafe fn test_mm512_mask_cvt_roundps_epu64() {
8844        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8845        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8846        let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8847            b, 0b01101001, a,
8848        );
8849        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8850        assert_eq_m512i(r, e);
8851    }
8852
8853    #[simd_test(enable = "avx512dq")]
8854    unsafe fn test_mm512_maskz_cvt_roundps_epu64() {
8855        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8856        let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8857            0b01101001, a,
8858        );
8859        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8860        assert_eq_m512i(r, e);
8861    }
8862
8863    #[simd_test(enable = "avx512dq,avx512vl")]
8864    unsafe fn test_mm_cvtps_epu64() {
8865        let a = _mm_set_ps(1., 2., 3., 4.);
8866        let r = _mm_cvtps_epu64(a);
8867        let e = _mm_set_epi64x(3, 4);
8868        assert_eq_m128i(r, e);
8869    }
8870
8871    #[simd_test(enable = "avx512dq,avx512vl")]
8872    unsafe fn test_mm_mask_cvtps_epu64() {
8873        let a = _mm_set_ps(1., 2., 3., 4.);
8874        let b = _mm_set_epi64x(5, 6);
8875        let r = _mm_mask_cvtps_epu64(b, 0b01, a);
8876        let e = _mm_set_epi64x(5, 4);
8877        assert_eq_m128i(r, e);
8878    }
8879
8880    #[simd_test(enable = "avx512dq,avx512vl")]
8881    unsafe fn test_mm_maskz_cvtps_epu64() {
8882        let a = _mm_set_ps(1., 2., 3., 4.);
8883        let r = _mm_maskz_cvtps_epu64(0b01, a);
8884        let e = _mm_set_epi64x(0, 4);
8885        assert_eq_m128i(r, e);
8886    }
8887
8888    #[simd_test(enable = "avx512dq,avx512vl")]
8889    unsafe fn test_mm256_cvtps_epu64() {
8890        let a = _mm_set_ps(1., 2., 3., 4.);
8891        let r = _mm256_cvtps_epu64(a);
8892        let e = _mm256_set_epi64x(1, 2, 3, 4);
8893        assert_eq_m256i(r, e);
8894    }
8895
8896    #[simd_test(enable = "avx512dq,avx512vl")]
8897    unsafe fn test_mm256_mask_cvtps_epu64() {
8898        let a = _mm_set_ps(1., 2., 3., 4.);
8899        let b = _mm256_set_epi64x(5, 6, 7, 8);
8900        let r = _mm256_mask_cvtps_epu64(b, 0b0110, a);
8901        let e = _mm256_set_epi64x(5, 2, 3, 8);
8902        assert_eq_m256i(r, e);
8903    }
8904
8905    #[simd_test(enable = "avx512dq,avx512vl")]
8906    unsafe fn test_mm256_maskz_cvtps_epu64() {
8907        let a = _mm_set_ps(1., 2., 3., 4.);
8908        let r = _mm256_maskz_cvtps_epu64(0b0110, a);
8909        let e = _mm256_set_epi64x(0, 2, 3, 0);
8910        assert_eq_m256i(r, e);
8911    }
8912
8913    #[simd_test(enable = "avx512dq")]
8914    unsafe fn test_mm512_cvtps_epu64() {
8915        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8916        let r = _mm512_cvtps_epu64(a);
8917        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8918        assert_eq_m512i(r, e);
8919    }
8920
8921    #[simd_test(enable = "avx512dq")]
8922    unsafe fn test_mm512_mask_cvtps_epu64() {
8923        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8924        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8925        let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a);
8926        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8927        assert_eq_m512i(r, e);
8928    }
8929
8930    #[simd_test(enable = "avx512dq")]
8931    unsafe fn test_mm512_maskz_cvtps_epu64() {
8932        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8933        let r = _mm512_maskz_cvtps_epu64(0b01101001, a);
8934        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8935        assert_eq_m512i(r, e);
8936    }
8937
8938    #[simd_test(enable = "avx512dq")]
8939    unsafe fn test_mm512_cvtt_roundpd_epi64() {
8940        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8941        let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a);
8942        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8943        assert_eq_m512i(r, e);
8944    }
8945
8946    #[simd_test(enable = "avx512dq")]
8947    unsafe fn test_mm512_mask_cvtt_roundpd_epi64() {
8948        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8949        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
8950        let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
8951        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
8952        assert_eq_m512i(r, e);
8953    }
8954
8955    #[simd_test(enable = "avx512dq")]
8956    unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() {
8957        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8958        let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
8959        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
8960        assert_eq_m512i(r, e);
8961    }
8962
8963    #[simd_test(enable = "avx512dq,avx512vl")]
8964    unsafe fn test_mm_cvttpd_epi64() {
8965        let a = _mm_set_pd(1., 2.);
8966        let r = _mm_cvttpd_epi64(a);
8967        let e = _mm_set_epi64x(1, 2);
8968        assert_eq_m128i(r, e);
8969    }
8970
8971    #[simd_test(enable = "avx512dq,avx512vl")]
8972    unsafe fn test_mm_mask_cvttpd_epi64() {
8973        let a = _mm_set_pd(1., 2.);
8974        let b = _mm_set_epi64x(3, 4);
8975        let r = _mm_mask_cvttpd_epi64(b, 0b01, a);
8976        let e = _mm_set_epi64x(3, 2);
8977        assert_eq_m128i(r, e);
8978    }
8979
8980    #[simd_test(enable = "avx512dq,avx512vl")]
8981    unsafe fn test_mm_maskz_cvttpd_epi64() {
8982        let a = _mm_set_pd(1., 2.);
8983        let r = _mm_maskz_cvttpd_epi64(0b01, a);
8984        let e = _mm_set_epi64x(0, 2);
8985        assert_eq_m128i(r, e);
8986    }
8987
8988    #[simd_test(enable = "avx512dq,avx512vl")]
8989    unsafe fn test_mm256_cvttpd_epi64() {
8990        let a = _mm256_set_pd(1., 2., 3., 4.);
8991        let r = _mm256_cvttpd_epi64(a);
8992        let e = _mm256_set_epi64x(1, 2, 3, 4);
8993        assert_eq_m256i(r, e);
8994    }
8995
8996    #[simd_test(enable = "avx512dq,avx512vl")]
8997    unsafe fn test_mm256_mask_cvttpd_epi64() {
8998        let a = _mm256_set_pd(1., 2., 3., 4.);
8999        let b = _mm256_set_epi64x(5, 6, 7, 8);
9000        let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a);
9001        let e = _mm256_set_epi64x(5, 2, 3, 8);
9002        assert_eq_m256i(r, e);
9003    }
9004
9005    #[simd_test(enable = "avx512dq,avx512vl")]
9006    unsafe fn test_mm256_maskz_cvttpd_epi64() {
9007        let a = _mm256_set_pd(1., 2., 3., 4.);
9008        let r = _mm256_maskz_cvttpd_epi64(0b0110, a);
9009        let e = _mm256_set_epi64x(0, 2, 3, 0);
9010        assert_eq_m256i(r, e);
9011    }
9012
9013    #[simd_test(enable = "avx512dq")]
9014    unsafe fn test_mm512_cvttpd_epi64() {
9015        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9016        let r = _mm512_cvttpd_epi64(a);
9017        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9018        assert_eq_m512i(r, e);
9019    }
9020
9021    #[simd_test(enable = "avx512dq")]
9022    unsafe fn test_mm512_mask_cvttpd_epi64() {
9023        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9024        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9025        let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a);
9026        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9027        assert_eq_m512i(r, e);
9028    }
9029
9030    #[simd_test(enable = "avx512dq")]
9031    unsafe fn test_mm512_maskz_cvttpd_epi64() {
9032        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9033        let r = _mm512_maskz_cvttpd_epi64(0b01101001, a);
9034        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9035        assert_eq_m512i(r, e);
9036    }
9037
9038    #[simd_test(enable = "avx512dq")]
9039    unsafe fn test_mm512_cvtt_roundps_epi64() {
9040        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9041        let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a);
9042        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9043        assert_eq_m512i(r, e);
9044    }
9045
9046    #[simd_test(enable = "avx512dq")]
9047    unsafe fn test_mm512_mask_cvtt_roundps_epi64() {
9048        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9049        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9050        let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9051        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9052        assert_eq_m512i(r, e);
9053    }
9054
9055    #[simd_test(enable = "avx512dq")]
9056    unsafe fn test_mm512_maskz_cvtt_roundps_epi64() {
9057        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9058        let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9059        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9060        assert_eq_m512i(r, e);
9061    }
9062
9063    #[simd_test(enable = "avx512dq,avx512vl")]
9064    unsafe fn test_mm_cvttps_epi64() {
9065        let a = _mm_set_ps(1., 2., 3., 4.);
9066        let r = _mm_cvttps_epi64(a);
9067        let e = _mm_set_epi64x(3, 4);
9068        assert_eq_m128i(r, e);
9069    }
9070
9071    #[simd_test(enable = "avx512dq,avx512vl")]
9072    unsafe fn test_mm_mask_cvttps_epi64() {
9073        let a = _mm_set_ps(1., 2., 3., 4.);
9074        let b = _mm_set_epi64x(5, 6);
9075        let r = _mm_mask_cvttps_epi64(b, 0b01, a);
9076        let e = _mm_set_epi64x(5, 4);
9077        assert_eq_m128i(r, e);
9078    }
9079
9080    #[simd_test(enable = "avx512dq,avx512vl")]
9081    unsafe fn test_mm_maskz_cvttps_epi64() {
9082        let a = _mm_set_ps(1., 2., 3., 4.);
9083        let r = _mm_maskz_cvttps_epi64(0b01, a);
9084        let e = _mm_set_epi64x(0, 4);
9085        assert_eq_m128i(r, e);
9086    }
9087
9088    #[simd_test(enable = "avx512dq,avx512vl")]
9089    unsafe fn test_mm256_cvttps_epi64() {
9090        let a = _mm_set_ps(1., 2., 3., 4.);
9091        let r = _mm256_cvttps_epi64(a);
9092        let e = _mm256_set_epi64x(1, 2, 3, 4);
9093        assert_eq_m256i(r, e);
9094    }
9095
9096    #[simd_test(enable = "avx512dq,avx512vl")]
9097    unsafe fn test_mm256_mask_cvttps_epi64() {
9098        let a = _mm_set_ps(1., 2., 3., 4.);
9099        let b = _mm256_set_epi64x(5, 6, 7, 8);
9100        let r = _mm256_mask_cvttps_epi64(b, 0b0110, a);
9101        let e = _mm256_set_epi64x(5, 2, 3, 8);
9102        assert_eq_m256i(r, e);
9103    }
9104
9105    #[simd_test(enable = "avx512dq,avx512vl")]
9106    unsafe fn test_mm256_maskz_cvttps_epi64() {
9107        let a = _mm_set_ps(1., 2., 3., 4.);
9108        let r = _mm256_maskz_cvttps_epi64(0b0110, a);
9109        let e = _mm256_set_epi64x(0, 2, 3, 0);
9110        assert_eq_m256i(r, e);
9111    }
9112
9113    #[simd_test(enable = "avx512dq")]
9114    unsafe fn test_mm512_cvttps_epi64() {
9115        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9116        let r = _mm512_cvttps_epi64(a);
9117        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9118        assert_eq_m512i(r, e);
9119    }
9120
9121    #[simd_test(enable = "avx512dq")]
9122    unsafe fn test_mm512_mask_cvttps_epi64() {
9123        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9124        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9125        let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a);
9126        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9127        assert_eq_m512i(r, e);
9128    }
9129
9130    #[simd_test(enable = "avx512dq")]
9131    unsafe fn test_mm512_maskz_cvttps_epi64() {
9132        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9133        let r = _mm512_maskz_cvttps_epi64(0b01101001, a);
9134        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9135        assert_eq_m512i(r, e);
9136    }
9137
9138    #[simd_test(enable = "avx512dq")]
9139    unsafe fn test_mm512_cvtt_roundpd_epu64() {
9140        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9141        let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a);
9142        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9143        assert_eq_m512i(r, e);
9144    }
9145
9146    #[simd_test(enable = "avx512dq")]
9147    unsafe fn test_mm512_mask_cvtt_roundpd_epu64() {
9148        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9149        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9150        let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9151        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9152        assert_eq_m512i(r, e);
9153    }
9154
9155    #[simd_test(enable = "avx512dq")]
9156    unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() {
9157        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9158        let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9159        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9160        assert_eq_m512i(r, e);
9161    }
9162
9163    #[simd_test(enable = "avx512dq,avx512vl")]
9164    unsafe fn test_mm_cvttpd_epu64() {
9165        let a = _mm_set_pd(1., 2.);
9166        let r = _mm_cvttpd_epu64(a);
9167        let e = _mm_set_epi64x(1, 2);
9168        assert_eq_m128i(r, e);
9169    }
9170
9171    #[simd_test(enable = "avx512dq,avx512vl")]
9172    unsafe fn test_mm_mask_cvttpd_epu64() {
9173        let a = _mm_set_pd(1., 2.);
9174        let b = _mm_set_epi64x(3, 4);
9175        let r = _mm_mask_cvttpd_epu64(b, 0b01, a);
9176        let e = _mm_set_epi64x(3, 2);
9177        assert_eq_m128i(r, e);
9178    }
9179
9180    #[simd_test(enable = "avx512dq,avx512vl")]
9181    unsafe fn test_mm_maskz_cvttpd_epu64() {
9182        let a = _mm_set_pd(1., 2.);
9183        let r = _mm_maskz_cvttpd_epu64(0b01, a);
9184        let e = _mm_set_epi64x(0, 2);
9185        assert_eq_m128i(r, e);
9186    }
9187
9188    #[simd_test(enable = "avx512dq,avx512vl")]
9189    unsafe fn test_mm256_cvttpd_epu64() {
9190        let a = _mm256_set_pd(1., 2., 3., 4.);
9191        let r = _mm256_cvttpd_epu64(a);
9192        let e = _mm256_set_epi64x(1, 2, 3, 4);
9193        assert_eq_m256i(r, e);
9194    }
9195
9196    #[simd_test(enable = "avx512dq,avx512vl")]
9197    unsafe fn test_mm256_mask_cvttpd_epu64() {
9198        let a = _mm256_set_pd(1., 2., 3., 4.);
9199        let b = _mm256_set_epi64x(5, 6, 7, 8);
9200        let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a);
9201        let e = _mm256_set_epi64x(5, 2, 3, 8);
9202        assert_eq_m256i(r, e);
9203    }
9204
9205    #[simd_test(enable = "avx512dq,avx512vl")]
9206    unsafe fn test_mm256_maskz_cvttpd_epu64() {
9207        let a = _mm256_set_pd(1., 2., 3., 4.);
9208        let r = _mm256_maskz_cvttpd_epu64(0b0110, a);
9209        let e = _mm256_set_epi64x(0, 2, 3, 0);
9210        assert_eq_m256i(r, e);
9211    }
9212
9213    #[simd_test(enable = "avx512dq")]
9214    unsafe fn test_mm512_cvttpd_epu64() {
9215        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9216        let r = _mm512_cvttpd_epu64(a);
9217        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9218        assert_eq_m512i(r, e);
9219    }
9220
9221    #[simd_test(enable = "avx512dq")]
9222    unsafe fn test_mm512_mask_cvttpd_epu64() {
9223        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9224        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9225        let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a);
9226        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9227        assert_eq_m512i(r, e);
9228    }
9229
9230    #[simd_test(enable = "avx512dq")]
9231    unsafe fn test_mm512_maskz_cvttpd_epu64() {
9232        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9233        let r = _mm512_maskz_cvttpd_epu64(0b01101001, a);
9234        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9235        assert_eq_m512i(r, e);
9236    }
9237
9238    #[simd_test(enable = "avx512dq")]
9239    unsafe fn test_mm512_cvtt_roundps_epu64() {
9240        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9241        let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a);
9242        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9243        assert_eq_m512i(r, e);
9244    }
9245
9246    #[simd_test(enable = "avx512dq")]
9247    unsafe fn test_mm512_mask_cvtt_roundps_epu64() {
9248        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9249        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9250        let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9251        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9252        assert_eq_m512i(r, e);
9253    }
9254
9255    #[simd_test(enable = "avx512dq")]
9256    unsafe fn test_mm512_maskz_cvtt_roundps_epu64() {
9257        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9258        let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9259        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9260        assert_eq_m512i(r, e);
9261    }
9262
9263    #[simd_test(enable = "avx512dq,avx512vl")]
9264    unsafe fn test_mm_cvttps_epu64() {
9265        let a = _mm_set_ps(1., 2., 3., 4.);
9266        let r = _mm_cvttps_epu64(a);
9267        let e = _mm_set_epi64x(3, 4);
9268        assert_eq_m128i(r, e);
9269    }
9270
9271    #[simd_test(enable = "avx512dq,avx512vl")]
9272    unsafe fn test_mm_mask_cvttps_epu64() {
9273        let a = _mm_set_ps(1., 2., 3., 4.);
9274        let b = _mm_set_epi64x(5, 6);
9275        let r = _mm_mask_cvttps_epu64(b, 0b01, a);
9276        let e = _mm_set_epi64x(5, 4);
9277        assert_eq_m128i(r, e);
9278    }
9279
9280    #[simd_test(enable = "avx512dq,avx512vl")]
9281    unsafe fn test_mm_maskz_cvttps_epu64() {
9282        let a = _mm_set_ps(1., 2., 3., 4.);
9283        let r = _mm_maskz_cvttps_epu64(0b01, a);
9284        let e = _mm_set_epi64x(0, 4);
9285        assert_eq_m128i(r, e);
9286    }
9287
9288    #[simd_test(enable = "avx512dq,avx512vl")]
9289    unsafe fn test_mm256_cvttps_epu64() {
9290        let a = _mm_set_ps(1., 2., 3., 4.);
9291        let r = _mm256_cvttps_epu64(a);
9292        let e = _mm256_set_epi64x(1, 2, 3, 4);
9293        assert_eq_m256i(r, e);
9294    }
9295
9296    #[simd_test(enable = "avx512dq,avx512vl")]
9297    unsafe fn test_mm256_mask_cvttps_epu64() {
9298        let a = _mm_set_ps(1., 2., 3., 4.);
9299        let b = _mm256_set_epi64x(5, 6, 7, 8);
9300        let r = _mm256_mask_cvttps_epu64(b, 0b0110, a);
9301        let e = _mm256_set_epi64x(5, 2, 3, 8);
9302        assert_eq_m256i(r, e);
9303    }
9304
9305    #[simd_test(enable = "avx512dq,avx512vl")]
9306    unsafe fn test_mm256_maskz_cvttps_epu64() {
9307        let a = _mm_set_ps(1., 2., 3., 4.);
9308        let r = _mm256_maskz_cvttps_epu64(0b0110, a);
9309        let e = _mm256_set_epi64x(0, 2, 3, 0);
9310        assert_eq_m256i(r, e);
9311    }
9312
9313    #[simd_test(enable = "avx512dq")]
9314    unsafe fn test_mm512_cvttps_epu64() {
9315        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9316        let r = _mm512_cvttps_epu64(a);
9317        let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9318        assert_eq_m512i(r, e);
9319    }
9320
9321    #[simd_test(enable = "avx512dq")]
9322    unsafe fn test_mm512_mask_cvttps_epu64() {
9323        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9324        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9325        let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a);
9326        let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9327        assert_eq_m512i(r, e);
9328    }
9329
9330    #[simd_test(enable = "avx512dq")]
9331    unsafe fn test_mm512_maskz_cvttps_epu64() {
9332        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9333        let r = _mm512_maskz_cvttps_epu64(0b01101001, a);
9334        let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9335        assert_eq_m512i(r, e);
9336    }
9337
9338    #[simd_test(enable = "avx512dq,avx512vl")]
9339    unsafe fn test_mm_mullo_epi64() {
9340        let a = _mm_set_epi64x(1, 2);
9341        let b = _mm_set_epi64x(3, 4);
9342        let r = _mm_mullo_epi64(a, b);
9343        let e = _mm_set_epi64x(3, 8);
9344        assert_eq_m128i(r, e);
9345    }
9346
9347    #[simd_test(enable = "avx512dq,avx512vl")]
9348    unsafe fn test_mm_mask_mullo_epi64() {
9349        let a = _mm_set_epi64x(1, 2);
9350        let b = _mm_set_epi64x(3, 4);
9351        let c = _mm_set_epi64x(5, 6);
9352        let r = _mm_mask_mullo_epi64(c, 0b01, a, b);
9353        let e = _mm_set_epi64x(5, 8);
9354        assert_eq_m128i(r, e);
9355    }
9356
9357    #[simd_test(enable = "avx512dq,avx512vl")]
9358    unsafe fn test_mm_maskz_mullo_epi64() {
9359        let a = _mm_set_epi64x(1, 2);
9360        let b = _mm_set_epi64x(3, 4);
9361        let r = _mm_maskz_mullo_epi64(0b01, a, b);
9362        let e = _mm_set_epi64x(0, 8);
9363        assert_eq_m128i(r, e);
9364    }
9365
9366    #[simd_test(enable = "avx512dq,avx512vl")]
9367    unsafe fn test_mm256_mullo_epi64() {
9368        let a = _mm256_set_epi64x(1, 2, 3, 4);
9369        let b = _mm256_set_epi64x(5, 6, 7, 8);
9370        let r = _mm256_mullo_epi64(a, b);
9371        let e = _mm256_set_epi64x(5, 12, 21, 32);
9372        assert_eq_m256i(r, e);
9373    }
9374
9375    #[simd_test(enable = "avx512dq,avx512vl")]
9376    unsafe fn test_mm256_mask_mullo_epi64() {
9377        let a = _mm256_set_epi64x(1, 2, 3, 4);
9378        let b = _mm256_set_epi64x(5, 6, 7, 8);
9379        let c = _mm256_set_epi64x(9, 10, 11, 12);
9380        let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b);
9381        let e = _mm256_set_epi64x(9, 12, 21, 12);
9382        assert_eq_m256i(r, e);
9383    }
9384
9385    #[simd_test(enable = "avx512dq,avx512vl")]
9386    unsafe fn test_mm256_maskz_mullo_epi64() {
9387        let a = _mm256_set_epi64x(1, 2, 3, 4);
9388        let b = _mm256_set_epi64x(5, 6, 7, 8);
9389        let r = _mm256_maskz_mullo_epi64(0b0110, a, b);
9390        let e = _mm256_set_epi64x(0, 12, 21, 0);
9391        assert_eq_m256i(r, e);
9392    }
9393
9394    #[simd_test(enable = "avx512dq")]
9395    unsafe fn test_mm512_mullo_epi64() {
9396        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9397        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9398        let r = _mm512_mullo_epi64(a, b);
9399        let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128);
9400        assert_eq_m512i(r, e);
9401    }
9402
9403    #[simd_test(enable = "avx512dq")]
9404    unsafe fn test_mm512_mask_mullo_epi64() {
9405        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9406        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9407        let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
9408        let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b);
9409        let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128);
9410        assert_eq_m512i(r, e);
9411    }
9412
9413    #[simd_test(enable = "avx512dq")]
9414    unsafe fn test_mm512_maskz_mullo_epi64() {
9415        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9416        let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9417        let r = _mm512_maskz_mullo_epi64(0b01101001, a, b);
9418        let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128);
9419        assert_eq_m512i(r, e);
9420    }
9421
9422    #[simd_test(enable = "avx512dq")]
9423    unsafe fn test_cvtmask8_u32() {
9424        let a: __mmask8 = 0b01101001;
9425        let r = _cvtmask8_u32(a);
9426        let e: u32 = 0b01101001;
9427        assert_eq!(r, e);
9428    }
9429
9430    #[simd_test(enable = "avx512dq")]
9431    unsafe fn test_cvtu32_mask8() {
9432        let a: u32 = 0b01101001;
9433        let r = _cvtu32_mask8(a);
9434        let e: __mmask8 = 0b01101001;
9435        assert_eq!(r, e);
9436    }
9437
9438    #[simd_test(enable = "avx512dq")]
9439    unsafe fn test_kadd_mask16() {
9440        let a: __mmask16 = 27549;
9441        let b: __mmask16 = 23434;
9442        let r = _kadd_mask16(a, b);
9443        let e: __mmask16 = 50983;
9444        assert_eq!(r, e);
9445    }
9446
9447    #[simd_test(enable = "avx512dq")]
9448    unsafe fn test_kadd_mask8() {
9449        let a: __mmask8 = 98;
9450        let b: __mmask8 = 117;
9451        let r = _kadd_mask8(a, b);
9452        let e: __mmask8 = 215;
9453        assert_eq!(r, e);
9454    }
9455
9456    #[simd_test(enable = "avx512dq")]
9457    unsafe fn test_kand_mask8() {
9458        let a: __mmask8 = 0b01101001;
9459        let b: __mmask8 = 0b10110011;
9460        let r = _kand_mask8(a, b);
9461        let e: __mmask8 = 0b00100001;
9462        assert_eq!(r, e);
9463    }
9464
9465    #[simd_test(enable = "avx512dq")]
9466    unsafe fn test_kandn_mask8() {
9467        let a: __mmask8 = 0b01101001;
9468        let b: __mmask8 = 0b10110011;
9469        let r = _kandn_mask8(a, b);
9470        let e: __mmask8 = 0b10010010;
9471        assert_eq!(r, e);
9472    }
9473
9474    #[simd_test(enable = "avx512dq")]
9475    unsafe fn test_knot_mask8() {
9476        let a: __mmask8 = 0b01101001;
9477        let r = _knot_mask8(a);
9478        let e: __mmask8 = 0b10010110;
9479        assert_eq!(r, e);
9480    }
9481
9482    #[simd_test(enable = "avx512dq")]
9483    unsafe fn test_kor_mask8() {
9484        let a: __mmask8 = 0b01101001;
9485        let b: __mmask8 = 0b10110011;
9486        let r = _kor_mask8(a, b);
9487        let e: __mmask8 = 0b11111011;
9488        assert_eq!(r, e);
9489    }
9490
9491    #[simd_test(enable = "avx512dq")]
9492    unsafe fn test_kxnor_mask8() {
9493        let a: __mmask8 = 0b01101001;
9494        let b: __mmask8 = 0b10110011;
9495        let r = _kxnor_mask8(a, b);
9496        let e: __mmask8 = 0b00100101;
9497        assert_eq!(r, e);
9498    }
9499
9500    #[simd_test(enable = "avx512dq")]
9501    unsafe fn test_kxor_mask8() {
9502        let a: __mmask8 = 0b01101001;
9503        let b: __mmask8 = 0b10110011;
9504        let r = _kxor_mask8(a, b);
9505        let e: __mmask8 = 0b11011010;
9506        assert_eq!(r, e);
9507    }
9508
9509    #[simd_test(enable = "avx512dq")]
9510    unsafe fn test_kortest_mask8_u8() {
9511        let a: __mmask8 = 0b01101001;
9512        let b: __mmask8 = 0b10110110;
9513        let mut all_ones: u8 = 0;
9514        let r = _kortest_mask8_u8(a, b, &mut all_ones);
9515        assert_eq!(r, 0);
9516        assert_eq!(all_ones, 1);
9517    }
9518
9519    #[simd_test(enable = "avx512dq")]
9520    unsafe fn test_kortestc_mask8_u8() {
9521        let a: __mmask8 = 0b01101001;
9522        let b: __mmask8 = 0b10110110;
9523        let r = _kortestc_mask8_u8(a, b);
9524        assert_eq!(r, 1);
9525    }
9526
9527    #[simd_test(enable = "avx512dq")]
9528    unsafe fn test_kortestz_mask8_u8() {
9529        let a: __mmask8 = 0b01101001;
9530        let b: __mmask8 = 0b10110110;
9531        let r = _kortestz_mask8_u8(a, b);
9532        assert_eq!(r, 0);
9533    }
9534
9535    #[simd_test(enable = "avx512dq")]
9536    unsafe fn test_kshiftli_mask8() {
9537        let a: __mmask8 = 0b01101001;
9538        let r = _kshiftli_mask8::<3>(a);
9539        let e: __mmask8 = 0b01001000;
9540        assert_eq!(r, e);
9541    }
9542
9543    #[simd_test(enable = "avx512dq")]
9544    unsafe fn test_kshiftri_mask8() {
9545        let a: __mmask8 = 0b01101001;
9546        let r = _kshiftri_mask8::<3>(a);
9547        let e: __mmask8 = 0b00001101;
9548        assert_eq!(r, e);
9549    }
9550
9551    #[simd_test(enable = "avx512dq")]
9552    unsafe fn test_ktest_mask8_u8() {
9553        let a: __mmask8 = 0b01101001;
9554        let b: __mmask8 = 0b10010110;
9555        let mut and_not: u8 = 0;
9556        let r = _ktest_mask8_u8(a, b, &mut and_not);
9557        assert_eq!(r, 1);
9558        assert_eq!(and_not, 0);
9559    }
9560
9561    #[simd_test(enable = "avx512dq")]
9562    unsafe fn test_ktestc_mask8_u8() {
9563        let a: __mmask8 = 0b01101001;
9564        let b: __mmask8 = 0b10010110;
9565        let r = _ktestc_mask8_u8(a, b);
9566        assert_eq!(r, 0);
9567    }
9568
9569    #[simd_test(enable = "avx512dq")]
9570    unsafe fn test_ktestz_mask8_u8() {
9571        let a: __mmask8 = 0b01101001;
9572        let b: __mmask8 = 0b10010110;
9573        let r = _ktestz_mask8_u8(a, b);
9574        assert_eq!(r, 1);
9575    }
9576
9577    #[simd_test(enable = "avx512dq")]
9578    unsafe fn test_ktest_mask16_u8() {
9579        let a: __mmask16 = 0b0110100100111100;
9580        let b: __mmask16 = 0b1001011011000011;
9581        let mut and_not: u8 = 0;
9582        let r = _ktest_mask16_u8(a, b, &mut and_not);
9583        assert_eq!(r, 1);
9584        assert_eq!(and_not, 0);
9585    }
9586
9587    #[simd_test(enable = "avx512dq")]
9588    unsafe fn test_ktestc_mask16_u8() {
9589        let a: __mmask16 = 0b0110100100111100;
9590        let b: __mmask16 = 0b1001011011000011;
9591        let r = _ktestc_mask16_u8(a, b);
9592        assert_eq!(r, 0);
9593    }
9594
9595    #[simd_test(enable = "avx512dq")]
9596    unsafe fn test_ktestz_mask16_u8() {
9597        let a: __mmask16 = 0b0110100100111100;
9598        let b: __mmask16 = 0b1001011011000011;
9599        let r = _ktestz_mask16_u8(a, b);
9600        assert_eq!(r, 1);
9601    }
9602
9603    #[simd_test(enable = "avx512dq")]
9604    unsafe fn test_load_mask8() {
9605        let a: __mmask8 = 0b01101001;
9606        let r = _load_mask8(&a);
9607        let e: __mmask8 = 0b01101001;
9608        assert_eq!(r, e);
9609    }
9610
9611    #[simd_test(enable = "avx512dq")]
9612    unsafe fn test_store_mask8() {
9613        let a: __mmask8 = 0b01101001;
9614        let mut r = 0;
9615        _store_mask8(&mut r, a);
9616        let e: __mmask8 = 0b01101001;
9617        assert_eq!(r, e);
9618    }
9619
9620    #[simd_test(enable = "avx512dq,avx512vl")]
9621    unsafe fn test_mm_movepi32_mask() {
9622        let a = _mm_set_epi32(0, -2, -3, 4);
9623        let r = _mm_movepi32_mask(a);
9624        let e = 0b0110;
9625        assert_eq!(r, e);
9626    }
9627
9628    #[simd_test(enable = "avx512dq,avx512vl")]
9629    unsafe fn test_mm256_movepi32_mask() {
9630        let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8);
9631        let r = _mm256_movepi32_mask(a);
9632        let e = 0b01101001;
9633        assert_eq!(r, e);
9634    }
9635
9636    #[simd_test(enable = "avx512dq")]
9637    unsafe fn test_mm512_movepi32_mask() {
9638        let a = _mm512_set_epi32(
9639            0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16,
9640        );
9641        let r = _mm512_movepi32_mask(a);
9642        let e = 0b0110100100111100;
9643        assert_eq!(r, e);
9644    }
9645
9646    #[simd_test(enable = "avx512dq,avx512vl")]
9647    unsafe fn test_mm_movepi64_mask() {
9648        let a = _mm_set_epi64x(0, -2);
9649        let r = _mm_movepi64_mask(a);
9650        let e = 0b01;
9651        assert_eq!(r, e);
9652    }
9653
9654    #[simd_test(enable = "avx512dq,avx512vl")]
9655    unsafe fn test_mm256_movepi64_mask() {
9656        let a = _mm256_set_epi64x(0, -2, -3, 4);
9657        let r = _mm256_movepi64_mask(a);
9658        let e = 0b0110;
9659        assert_eq!(r, e);
9660    }
9661
9662    #[simd_test(enable = "avx512dq")]
9663    unsafe fn test_mm512_movepi64_mask() {
9664        let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8);
9665        let r = _mm512_movepi64_mask(a);
9666        let e = 0b01101001;
9667        assert_eq!(r, e);
9668    }
9669
9670    #[simd_test(enable = "avx512dq,avx512vl")]
9671    unsafe fn test_mm_movm_epi32() {
9672        let a = 0b0110;
9673        let r = _mm_movm_epi32(a);
9674        let e = _mm_set_epi32(0, -1, -1, 0);
9675        assert_eq_m128i(r, e);
9676    }
9677
9678    #[simd_test(enable = "avx512dq,avx512vl")]
9679    unsafe fn test_mm256_movm_epi32() {
9680        let a = 0b01101001;
9681        let r = _mm256_movm_epi32(a);
9682        let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1);
9683        assert_eq_m256i(r, e);
9684    }
9685
9686    #[simd_test(enable = "avx512dq")]
9687    unsafe fn test_mm512_movm_epi32() {
9688        let a = 0b0110100100111100;
9689        let r = _mm512_movm_epi32(a);
9690        let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0);
9691        assert_eq_m512i(r, e);
9692    }
9693
9694    #[simd_test(enable = "avx512dq,avx512vl")]
9695    unsafe fn test_mm_movm_epi64() {
9696        let a = 0b01;
9697        let r = _mm_movm_epi64(a);
9698        let e = _mm_set_epi64x(0, -1);
9699        assert_eq_m128i(r, e);
9700    }
9701
9702    #[simd_test(enable = "avx512dq,avx512vl")]
9703    unsafe fn test_mm256_movm_epi64() {
9704        let a = 0b0110;
9705        let r = _mm256_movm_epi64(a);
9706        let e = _mm256_set_epi64x(0, -1, -1, 0);
9707        assert_eq_m256i(r, e);
9708    }
9709
9710    #[simd_test(enable = "avx512dq")]
9711    unsafe fn test_mm512_movm_epi64() {
9712        let a = 0b01101001;
9713        let r = _mm512_movm_epi64(a);
9714        let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1);
9715        assert_eq_m512i(r, e);
9716    }
9717
9718    #[simd_test(enable = "avx512dq")]
9719    unsafe fn test_mm512_range_round_pd() {
9720        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9721        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
9722        let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
9723        let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
9724        assert_eq_m512d(r, e);
9725    }
9726
9727    #[simd_test(enable = "avx512dq")]
9728    unsafe fn test_mm512_mask_range_round_pd() {
9729        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9730        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
9731        let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
9732        let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b);
9733        let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
9734        assert_eq_m512d(r, e);
9735    }
9736
9737    #[simd_test(enable = "avx512dq")]
9738    unsafe fn test_mm512_maskz_range_round_pd() {
9739        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9740        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
9741        let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b);
9742        let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
9743        assert_eq_m512d(r, e);
9744    }
9745
9746    #[simd_test(enable = "avx512dq,avx512vl")]
9747    unsafe fn test_mm_range_pd() {
9748        let a = _mm_set_pd(1., 2.);
9749        let b = _mm_set_pd(2., 1.);
9750        let r = _mm_range_pd::<0b0101>(a, b);
9751        let e = _mm_set_pd(2., 2.);
9752        assert_eq_m128d(r, e);
9753    }
9754
9755    #[simd_test(enable = "avx512dq,avx512vl")]
9756    unsafe fn test_mm_mask_range_pd() {
9757        let a = _mm_set_pd(1., 2.);
9758        let b = _mm_set_pd(2., 1.);
9759        let c = _mm_set_pd(3., 4.);
9760        let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b);
9761        let e = _mm_set_pd(3., 2.);
9762        assert_eq_m128d(r, e);
9763    }
9764
9765    #[simd_test(enable = "avx512dq,avx512vl")]
9766    unsafe fn test_mm_maskz_range_pd() {
9767        let a = _mm_set_pd(1., 2.);
9768        let b = _mm_set_pd(2., 1.);
9769        let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b);
9770        let e = _mm_set_pd(0., 2.);
9771        assert_eq_m128d(r, e);
9772    }
9773
9774    #[simd_test(enable = "avx512dq,avx512vl")]
9775    unsafe fn test_mm256_range_pd() {
9776        let a = _mm256_set_pd(1., 2., 3., 4.);
9777        let b = _mm256_set_pd(2., 1., 4., 3.);
9778        let r = _mm256_range_pd::<0b0101>(a, b);
9779        let e = _mm256_set_pd(2., 2., 4., 4.);
9780        assert_eq_m256d(r, e);
9781    }
9782
9783    #[simd_test(enable = "avx512dq,avx512vl")]
9784    unsafe fn test_mm256_mask_range_pd() {
9785        let a = _mm256_set_pd(1., 2., 3., 4.);
9786        let b = _mm256_set_pd(2., 1., 4., 3.);
9787        let c = _mm256_set_pd(5., 6., 7., 8.);
9788        let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b);
9789        let e = _mm256_set_pd(5., 2., 4., 8.);
9790        assert_eq_m256d(r, e);
9791    }
9792
9793    #[simd_test(enable = "avx512dq,avx512vl")]
9794    unsafe fn test_mm256_maskz_range_pd() {
9795        let a = _mm256_set_pd(1., 2., 3., 4.);
9796        let b = _mm256_set_pd(2., 1., 4., 3.);
9797        let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b);
9798        let e = _mm256_set_pd(0., 2., 4., 0.);
9799        assert_eq_m256d(r, e);
9800    }
9801
9802    #[simd_test(enable = "avx512dq")]
9803    unsafe fn test_mm512_range_pd() {
9804        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9805        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
9806        let r = _mm512_range_pd::<0b0101>(a, b);
9807        let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
9808        assert_eq_m512d(r, e);
9809    }
9810
9811    #[simd_test(enable = "avx512dq")]
9812    unsafe fn test_mm512_mask_range_pd() {
9813        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9814        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
9815        let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
9816        let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b);
9817        let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
9818        assert_eq_m512d(r, e);
9819    }
9820
9821    #[simd_test(enable = "avx512dq")]
9822    unsafe fn test_mm512_maskz_range_pd() {
9823        let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9824        let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
9825        let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b);
9826        let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
9827        assert_eq_m512d(r, e);
9828    }
9829
9830    #[simd_test(enable = "avx512dq")]
9831    unsafe fn test_mm512_range_round_ps() {
9832        let a = _mm512_set_ps(
9833            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
9834        );
9835        let b = _mm512_set_ps(
9836            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
9837        );
9838        let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b);
9839        let e = _mm512_set_ps(
9840            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
9841        );
9842        assert_eq_m512(r, e);
9843    }
9844
9845    #[simd_test(enable = "avx512dq")]
9846    unsafe fn test_mm512_mask_range_round_ps() {
9847        let a = _mm512_set_ps(
9848            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
9849        );
9850        let b = _mm512_set_ps(
9851            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
9852        );
9853        let c = _mm512_set_ps(
9854            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
9855        );
9856        let r =
9857            _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b);
9858        let e = _mm512_set_ps(
9859            17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
9860        );
9861        assert_eq_m512(r, e);
9862    }
9863
9864    #[simd_test(enable = "avx512dq")]
9865    unsafe fn test_mm512_maskz_range_round_ps() {
9866        let a = _mm512_set_ps(
9867            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
9868        );
9869        let b = _mm512_set_ps(
9870            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
9871        );
9872        let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b);
9873        let e = _mm512_set_ps(
9874            0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
9875        );
9876        assert_eq_m512(r, e);
9877    }
9878
9879    #[simd_test(enable = "avx512dq,avx512vl")]
9880    unsafe fn test_mm_range_ps() {
9881        let a = _mm_set_ps(1., 2., 3., 4.);
9882        let b = _mm_set_ps(2., 1., 4., 3.);
9883        let r = _mm_range_ps::<0b0101>(a, b);
9884        let e = _mm_set_ps(2., 2., 4., 4.);
9885        assert_eq_m128(r, e);
9886    }
9887
9888    #[simd_test(enable = "avx512dq,avx512vl")]
9889    unsafe fn test_mm_mask_range_ps() {
9890        let a = _mm_set_ps(1., 2., 3., 4.);
9891        let b = _mm_set_ps(2., 1., 4., 3.);
9892        let c = _mm_set_ps(5., 6., 7., 8.);
9893        let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b);
9894        let e = _mm_set_ps(5., 2., 4., 8.);
9895        assert_eq_m128(r, e);
9896    }
9897
9898    #[simd_test(enable = "avx512dq,avx512vl")]
9899    unsafe fn test_mm_maskz_range_ps() {
9900        let a = _mm_set_ps(1., 2., 3., 4.);
9901        let b = _mm_set_ps(2., 1., 4., 3.);
9902        let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b);
9903        let e = _mm_set_ps(0., 2., 4., 0.);
9904        assert_eq_m128(r, e);
9905    }
9906
9907    #[simd_test(enable = "avx512dq,avx512vl")]
9908    unsafe fn test_mm256_range_ps() {
9909        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9910        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
9911        let r = _mm256_range_ps::<0b0101>(a, b);
9912        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
9913        assert_eq_m256(r, e);
9914    }
9915
9916    #[simd_test(enable = "avx512dq,avx512vl")]
9917    unsafe fn test_mm256_mask_range_ps() {
9918        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9919        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
9920        let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
9921        let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b);
9922        let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.);
9923        assert_eq_m256(r, e);
9924    }
9925
9926    #[simd_test(enable = "avx512dq,avx512vl")]
9927    unsafe fn test_mm256_maskz_range_ps() {
9928        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9929        let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
9930        let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b);
9931        let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.);
9932        assert_eq_m256(r, e);
9933    }
9934
9935    #[simd_test(enable = "avx512dq")]
9936    unsafe fn test_mm512_range_ps() {
9937        let a = _mm512_set_ps(
9938            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
9939        );
9940        let b = _mm512_set_ps(
9941            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
9942        );
9943        let r = _mm512_range_ps::<0b0101>(a, b);
9944        let e = _mm512_set_ps(
9945            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
9946        );
9947        assert_eq_m512(r, e);
9948    }
9949
9950    #[simd_test(enable = "avx512dq")]
9951    unsafe fn test_mm512_mask_range_ps() {
9952        let a = _mm512_set_ps(
9953            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
9954        );
9955        let b = _mm512_set_ps(
9956            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
9957        );
9958        let c = _mm512_set_ps(
9959            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
9960        );
9961        let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b);
9962        let e = _mm512_set_ps(
9963            17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
9964        );
9965        assert_eq_m512(r, e);
9966    }
9967
9968    #[simd_test(enable = "avx512dq")]
9969    unsafe fn test_mm512_maskz_range_ps() {
9970        let a = _mm512_set_ps(
9971            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
9972        );
9973        let b = _mm512_set_ps(
9974            2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
9975        );
9976        let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b);
9977        let e = _mm512_set_ps(
9978            0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
9979        );
9980        assert_eq_m512(r, e);
9981    }
9982
9983    #[simd_test(enable = "avx512dq")]
9984    unsafe fn test_mm_range_round_sd() {
9985        let a = _mm_set_sd(1.);
9986        let b = _mm_set_sd(2.);
9987        let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
9988        let e = _mm_set_sd(2.);
9989        assert_eq_m128d(r, e);
9990    }
9991
9992    #[simd_test(enable = "avx512dq")]
9993    unsafe fn test_mm_mask_range_round_sd() {
9994        let a = _mm_set_sd(1.);
9995        let b = _mm_set_sd(2.);
9996        let c = _mm_set_sd(3.);
9997        let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
9998        let e = _mm_set_sd(3.);
9999        assert_eq_m128d(r, e);
10000    }
10001
10002    #[simd_test(enable = "avx512dq")]
10003    unsafe fn test_mm_maskz_range_round_sd() {
10004        let a = _mm_set_sd(1.);
10005        let b = _mm_set_sd(2.);
10006        let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10007        let e = _mm_set_sd(0.);
10008        assert_eq_m128d(r, e);
10009    }
10010
10011    #[simd_test(enable = "avx512dq")]
10012    unsafe fn test_mm_mask_range_sd() {
10013        let a = _mm_set_sd(1.);
10014        let b = _mm_set_sd(2.);
10015        let c = _mm_set_sd(3.);
10016        let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b);
10017        let e = _mm_set_sd(3.);
10018        assert_eq_m128d(r, e);
10019    }
10020
10021    #[simd_test(enable = "avx512dq")]
10022    unsafe fn test_mm_maskz_range_sd() {
10023        let a = _mm_set_sd(1.);
10024        let b = _mm_set_sd(2.);
10025        let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b);
10026        let e = _mm_set_sd(0.);
10027        assert_eq_m128d(r, e);
10028    }
10029
10030    #[simd_test(enable = "avx512dq")]
10031    unsafe fn test_mm_range_round_ss() {
10032        let a = _mm_set_ss(1.);
10033        let b = _mm_set_ss(2.);
10034        let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10035        let e = _mm_set_ss(2.);
10036        assert_eq_m128(r, e);
10037    }
10038
10039    #[simd_test(enable = "avx512dq")]
10040    unsafe fn test_mm_mask_range_round_ss() {
10041        let a = _mm_set_ss(1.);
10042        let b = _mm_set_ss(2.);
10043        let c = _mm_set_ss(3.);
10044        let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10045        let e = _mm_set_ss(3.);
10046        assert_eq_m128(r, e);
10047    }
10048
10049    #[simd_test(enable = "avx512dq")]
10050    unsafe fn test_mm_maskz_range_round_ss() {
10051        let a = _mm_set_ss(1.);
10052        let b = _mm_set_ss(2.);
10053        let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10054        let e = _mm_set_ss(0.);
10055        assert_eq_m128(r, e);
10056    }
10057
10058    #[simd_test(enable = "avx512dq")]
10059    unsafe fn test_mm_mask_range_ss() {
10060        let a = _mm_set_ss(1.);
10061        let b = _mm_set_ss(2.);
10062        let c = _mm_set_ss(3.);
10063        let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b);
10064        let e = _mm_set_ss(3.);
10065        assert_eq_m128(r, e);
10066    }
10067
10068    #[simd_test(enable = "avx512dq")]
10069    unsafe fn test_mm_maskz_range_ss() {
10070        let a = _mm_set_ss(1.);
10071        let b = _mm_set_ss(2.);
10072        let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b);
10073        let e = _mm_set_ss(0.);
10074        assert_eq_m128(r, e);
10075    }
10076
10077    #[simd_test(enable = "avx512dq")]
10078    unsafe fn test_mm512_reduce_round_pd() {
10079        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10080        let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10081        let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10082        assert_eq_m512d(r, e);
10083    }
10084
10085    #[simd_test(enable = "avx512dq")]
10086    unsafe fn test_mm512_mask_reduce_round_pd() {
10087        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10088        let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10089        let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10090            src, 0b01101001, a,
10091        );
10092        let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10093        assert_eq_m512d(r, e);
10094    }
10095
10096    #[simd_test(enable = "avx512dq")]
10097    unsafe fn test_mm512_maskz_reduce_round_pd() {
10098        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10099        let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10100            0b01101001, a,
10101        );
10102        let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10103        assert_eq_m512d(r, e);
10104    }
10105
10106    #[simd_test(enable = "avx512dq,avx512vl")]
10107    unsafe fn test_mm_reduce_pd() {
10108        let a = _mm_set_pd(0.25, 0.50);
10109        let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10110        let e = _mm_set_pd(0.25, 0.);
10111        assert_eq_m128d(r, e);
10112    }
10113
10114    #[simd_test(enable = "avx512dq,avx512vl")]
10115    unsafe fn test_mm_mask_reduce_pd() {
10116        let a = _mm_set_pd(0.25, 0.50);
10117        let src = _mm_set_pd(3., 4.);
10118        let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a);
10119        let e = _mm_set_pd(3., 0.);
10120        assert_eq_m128d(r, e);
10121    }
10122
10123    #[simd_test(enable = "avx512dq,avx512vl")]
10124    unsafe fn test_mm_maskz_reduce_pd() {
10125        let a = _mm_set_pd(0.25, 0.50);
10126        let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a);
10127        let e = _mm_set_pd(0., 0.);
10128        assert_eq_m128d(r, e);
10129    }
10130
10131    #[simd_test(enable = "avx512dq,avx512vl")]
10132    unsafe fn test_mm256_reduce_pd() {
10133        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10134        let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10135        let e = _mm256_set_pd(0.25, 0., 0.25, 0.);
10136        assert_eq_m256d(r, e);
10137    }
10138
10139    #[simd_test(enable = "avx512dq,avx512vl")]
10140    unsafe fn test_mm256_mask_reduce_pd() {
10141        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10142        let src = _mm256_set_pd(3., 4., 5., 6.);
10143        let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10144        let e = _mm256_set_pd(3., 0., 0.25, 6.);
10145        assert_eq_m256d(r, e);
10146    }
10147
10148    #[simd_test(enable = "avx512dq,avx512vl")]
10149    unsafe fn test_mm256_maskz_reduce_pd() {
10150        let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10151        let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10152        let e = _mm256_set_pd(0., 0., 0.25, 0.);
10153        assert_eq_m256d(r, e);
10154    }
10155
10156    #[simd_test(enable = "avx512dq")]
10157    unsafe fn test_mm512_reduce_pd() {
10158        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10159        let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10160        let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10161        assert_eq_m512d(r, e);
10162    }
10163
10164    #[simd_test(enable = "avx512dq")]
10165    unsafe fn test_mm512_mask_reduce_pd() {
10166        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10167        let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10168        let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10169        let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10170        assert_eq_m512d(r, e);
10171    }
10172
10173    #[simd_test(enable = "avx512dq")]
10174    unsafe fn test_mm512_maskz_reduce_pd() {
10175        let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10176        let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10177        let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10178        assert_eq_m512d(r, e);
10179    }
10180
10181    #[simd_test(enable = "avx512dq")]
10182    unsafe fn test_mm512_reduce_round_ps() {
10183        let a = _mm512_set_ps(
10184            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10185            4.0,
10186        );
10187        let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10188        let e = _mm512_set_ps(
10189            0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10190        );
10191        assert_eq_m512(r, e);
10192    }
10193
10194    #[simd_test(enable = "avx512dq")]
10195    unsafe fn test_mm512_mask_reduce_round_ps() {
10196        let a = _mm512_set_ps(
10197            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10198            4.0,
10199        );
10200        let src = _mm512_set_ps(
10201            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10202        );
10203        let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10204            src,
10205            0b0110100100111100,
10206            a,
10207        );
10208        let e = _mm512_set_ps(
10209            5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10210        );
10211        assert_eq_m512(r, e);
10212    }
10213
10214    #[simd_test(enable = "avx512dq")]
10215    unsafe fn test_mm512_maskz_reduce_round_ps() {
10216        let a = _mm512_set_ps(
10217            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10218            4.0,
10219        );
10220        let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10221            0b0110100100111100,
10222            a,
10223        );
10224        let e = _mm512_set_ps(
10225            0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10226        );
10227        assert_eq_m512(r, e);
10228    }
10229
10230    #[simd_test(enable = "avx512dq,avx512vl")]
10231    unsafe fn test_mm_reduce_ps() {
10232        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10233        let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10234        let e = _mm_set_ps(0.25, 0., 0.25, 0.);
10235        assert_eq_m128(r, e);
10236    }
10237
10238    #[simd_test(enable = "avx512dq,avx512vl")]
10239    unsafe fn test_mm_mask_reduce_ps() {
10240        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10241        let src = _mm_set_ps(2., 3., 4., 5.);
10242        let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10243        let e = _mm_set_ps(2., 0., 0.25, 5.);
10244        assert_eq_m128(r, e);
10245    }
10246
10247    #[simd_test(enable = "avx512dq,avx512vl")]
10248    unsafe fn test_mm_maskz_reduce_ps() {
10249        let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10250        let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10251        let e = _mm_set_ps(0., 0., 0.25, 0.);
10252        assert_eq_m128(r, e);
10253    }
10254
10255    #[simd_test(enable = "avx512dq,avx512vl")]
10256    unsafe fn test_mm256_reduce_ps() {
10257        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10258        let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10259        let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10260        assert_eq_m256(r, e);
10261    }
10262
10263    #[simd_test(enable = "avx512dq,avx512vl")]
10264    unsafe fn test_mm256_mask_reduce_ps() {
10265        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10266        let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.);
10267        let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10268        let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10269        assert_eq_m256(r, e);
10270    }
10271
10272    #[simd_test(enable = "avx512dq,avx512vl")]
10273    unsafe fn test_mm256_maskz_reduce_ps() {
10274        let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10275        let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10276        let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10277        assert_eq_m256(r, e);
10278    }
10279
10280    #[simd_test(enable = "avx512dq")]
10281    unsafe fn test_mm512_reduce_ps() {
10282        let a = _mm512_set_ps(
10283            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10284            4.0,
10285        );
10286        let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10287        let e = _mm512_set_ps(
10288            0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10289        );
10290        assert_eq_m512(r, e);
10291    }
10292
10293    #[simd_test(enable = "avx512dq")]
10294    unsafe fn test_mm512_mask_reduce_ps() {
10295        let a = _mm512_set_ps(
10296            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10297            4.0,
10298        );
10299        let src = _mm512_set_ps(
10300            5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10301        );
10302        let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a);
10303        let e = _mm512_set_ps(
10304            5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10305        );
10306        assert_eq_m512(r, e);
10307    }
10308
10309    #[simd_test(enable = "avx512dq")]
10310    unsafe fn test_mm512_maskz_reduce_ps() {
10311        let a = _mm512_set_ps(
10312            0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10313            4.0,
10314        );
10315        let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a);
10316        let e = _mm512_set_ps(
10317            0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10318        );
10319        assert_eq_m512(r, e);
10320    }
10321
10322    #[simd_test(enable = "avx512dq")]
10323    unsafe fn test_mm_reduce_round_sd() {
10324        let a = _mm_set_pd(1., 2.);
10325        let b = _mm_set_sd(0.25);
10326        let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10327        let e = _mm_set_pd(1., 0.25);
10328        assert_eq_m128d(r, e);
10329    }
10330
10331    #[simd_test(enable = "avx512dq")]
10332    unsafe fn test_mm_mask_reduce_round_sd() {
10333        let a = _mm_set_pd(1., 2.);
10334        let b = _mm_set_sd(0.25);
10335        let c = _mm_set_pd(3., 4.);
10336        let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10337            c, 0b0, a, b,
10338        );
10339        let e = _mm_set_pd(1., 4.);
10340        assert_eq_m128d(r, e);
10341    }
10342
10343    #[simd_test(enable = "avx512dq")]
10344    unsafe fn test_mm_maskz_reduce_round_sd() {
10345        let a = _mm_set_pd(1., 2.);
10346        let b = _mm_set_sd(0.25);
10347        let r =
10348            _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10349        let e = _mm_set_pd(1., 0.);
10350        assert_eq_m128d(r, e);
10351    }
10352
10353    #[simd_test(enable = "avx512dq")]
10354    unsafe fn test_mm_reduce_sd() {
10355        let a = _mm_set_pd(1., 2.);
10356        let b = _mm_set_sd(0.25);
10357        let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10358        let e = _mm_set_pd(1., 0.25);
10359        assert_eq_m128d(r, e);
10360    }
10361
10362    #[simd_test(enable = "avx512dq")]
10363    unsafe fn test_mm_mask_reduce_sd() {
10364        let a = _mm_set_pd(1., 2.);
10365        let b = _mm_set_sd(0.25);
10366        let c = _mm_set_pd(3., 4.);
10367        let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10368        let e = _mm_set_pd(1., 4.);
10369        assert_eq_m128d(r, e);
10370    }
10371
10372    #[simd_test(enable = "avx512dq")]
10373    unsafe fn test_mm_maskz_reduce_sd() {
10374        let a = _mm_set_pd(1., 2.);
10375        let b = _mm_set_sd(0.25);
10376        let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10377        let e = _mm_set_pd(1., 0.);
10378        assert_eq_m128d(r, e);
10379    }
10380
10381    #[simd_test(enable = "avx512dq")]
10382    unsafe fn test_mm_reduce_round_ss() {
10383        let a = _mm_set_ps(1., 2., 3., 4.);
10384        let b = _mm_set_ss(0.25);
10385        let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10386        let e = _mm_set_ps(1., 2., 3., 0.25);
10387        assert_eq_m128(r, e);
10388    }
10389
10390    #[simd_test(enable = "avx512dq")]
10391    unsafe fn test_mm_mask_reduce_round_ss() {
10392        let a = _mm_set_ps(1., 2., 3., 4.);
10393        let b = _mm_set_ss(0.25);
10394        let c = _mm_set_ps(5., 6., 7., 8.);
10395        let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10396            c, 0b0, a, b,
10397        );
10398        let e = _mm_set_ps(1., 2., 3., 8.);
10399        assert_eq_m128(r, e);
10400    }
10401
10402    #[simd_test(enable = "avx512dq")]
10403    unsafe fn test_mm_maskz_reduce_round_ss() {
10404        let a = _mm_set_ps(1., 2., 3., 4.);
10405        let b = _mm_set_ss(0.25);
10406        let r =
10407            _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10408        let e = _mm_set_ps(1., 2., 3., 0.);
10409        assert_eq_m128(r, e);
10410    }
10411
10412    #[simd_test(enable = "avx512dq")]
10413    unsafe fn test_mm_reduce_ss() {
10414        let a = _mm_set_ps(1., 2., 3., 4.);
10415        let b = _mm_set_ss(0.25);
10416        let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10417        let e = _mm_set_ps(1., 2., 3., 0.25);
10418        assert_eq_m128(r, e);
10419    }
10420
10421    #[simd_test(enable = "avx512dq")]
10422    unsafe fn test_mm_mask_reduce_ss() {
10423        let a = _mm_set_ps(1., 2., 3., 4.);
10424        let b = _mm_set_ss(0.25);
10425        let c = _mm_set_ps(5., 6., 7., 8.);
10426        let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10427        let e = _mm_set_ps(1., 2., 3., 8.);
10428        assert_eq_m128(r, e);
10429    }
10430
10431    #[simd_test(enable = "avx512dq")]
10432    unsafe fn test_mm_maskz_reduce_ss() {
10433        let a = _mm_set_ps(1., 2., 3., 4.);
10434        let b = _mm_set_ss(0.25);
10435        let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10436        let e = _mm_set_ps(1., 2., 3., 0.);
10437        assert_eq_m128(r, e);
10438    }
10439
10440    #[simd_test(enable = "avx512dq,avx512vl")]
10441    unsafe fn test_mm_fpclass_pd_mask() {
10442        let a = _mm_set_pd(1., f64::INFINITY);
10443        let r = _mm_fpclass_pd_mask::<0x18>(a);
10444        let e = 0b01;
10445        assert_eq!(r, e);
10446    }
10447
10448    #[simd_test(enable = "avx512dq,avx512vl")]
10449    unsafe fn test_mm_mask_fpclass_pd_mask() {
10450        let a = _mm_set_pd(1., f64::INFINITY);
10451        let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a);
10452        let e = 0b00;
10453        assert_eq!(r, e);
10454    }
10455
10456    #[simd_test(enable = "avx512dq,avx512vl")]
10457    unsafe fn test_mm256_fpclass_pd_mask() {
10458        let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
10459        let r = _mm256_fpclass_pd_mask::<0x18>(a);
10460        let e = 0b0110;
10461        assert_eq!(r, e);
10462    }
10463
10464    #[simd_test(enable = "avx512dq,avx512vl")]
10465    unsafe fn test_mm256_mask_fpclass_pd_mask() {
10466        let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
10467        let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a);
10468        let e = 0b0010;
10469        assert_eq!(r, e);
10470    }
10471
10472    #[simd_test(enable = "avx512dq")]
10473    unsafe fn test_mm512_fpclass_pd_mask() {
10474        let a = _mm512_set_pd(
10475            1.,
10476            f64::INFINITY,
10477            f64::NEG_INFINITY,
10478            0.0,
10479            -0.0,
10480            -2.0,
10481            f64::NAN,
10482            1.0e-308,
10483        );
10484        let r = _mm512_fpclass_pd_mask::<0x18>(a);
10485        let e = 0b01100000;
10486        assert_eq!(r, e);
10487    }
10488
10489    #[simd_test(enable = "avx512dq")]
10490    unsafe fn test_mm512_mask_fpclass_pd_mask() {
10491        let a = _mm512_set_pd(
10492            1.,
10493            f64::INFINITY,
10494            f64::NEG_INFINITY,
10495            0.0,
10496            -0.0,
10497            -2.0,
10498            f64::NAN,
10499            1.0e-308,
10500        );
10501        let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a);
10502        let e = 0b00100000;
10503        assert_eq!(r, e);
10504    }
10505
10506    #[simd_test(enable = "avx512dq,avx512vl")]
10507    unsafe fn test_mm_fpclass_ps_mask() {
10508        let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
10509        let r = _mm_fpclass_ps_mask::<0x18>(a);
10510        let e = 0b0110;
10511        assert_eq!(r, e);
10512    }
10513
10514    #[simd_test(enable = "avx512dq,avx512vl")]
10515    unsafe fn test_mm_mask_fpclass_ps_mask() {
10516        let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
10517        let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a);
10518        let e = 0b0010;
10519        assert_eq!(r, e);
10520    }
10521
10522    #[simd_test(enable = "avx512dq,avx512vl")]
10523    unsafe fn test_mm256_fpclass_ps_mask() {
10524        let a = _mm256_set_ps(
10525            1.,
10526            f32::INFINITY,
10527            f32::NEG_INFINITY,
10528            0.0,
10529            -0.0,
10530            -2.0,
10531            f32::NAN,
10532            1.0e-38,
10533        );
10534        let r = _mm256_fpclass_ps_mask::<0x18>(a);
10535        let e = 0b01100000;
10536        assert_eq!(r, e);
10537    }
10538
10539    #[simd_test(enable = "avx512dq,avx512vl")]
10540    unsafe fn test_mm256_mask_fpclass_ps_mask() {
10541        let a = _mm256_set_ps(
10542            1.,
10543            f32::INFINITY,
10544            f32::NEG_INFINITY,
10545            0.0,
10546            -0.0,
10547            -2.0,
10548            f32::NAN,
10549            1.0e-38,
10550        );
10551        let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a);
10552        let e = 0b00100000;
10553        assert_eq!(r, e);
10554    }
10555
10556    #[simd_test(enable = "avx512dq")]
10557    unsafe fn test_mm512_fpclass_ps_mask() {
10558        let a = _mm512_set_ps(
10559            1.,
10560            f32::INFINITY,
10561            f32::NEG_INFINITY,
10562            0.0,
10563            -0.0,
10564            -2.0,
10565            f32::NAN,
10566            1.0e-38,
10567            -1.,
10568            f32::NEG_INFINITY,
10569            f32::INFINITY,
10570            -0.0,
10571            0.0,
10572            2.0,
10573            f32::NAN,
10574            -1.0e-38,
10575        );
10576        let r = _mm512_fpclass_ps_mask::<0x18>(a);
10577        let e = 0b0110000001100000;
10578        assert_eq!(r, e);
10579    }
10580
10581    #[simd_test(enable = "avx512dq")]
10582    unsafe fn test_mm512_mask_fpclass_ps_mask() {
10583        let a = _mm512_set_ps(
10584            1.,
10585            f32::INFINITY,
10586            f32::NEG_INFINITY,
10587            0.0,
10588            -0.0,
10589            -2.0,
10590            f32::NAN,
10591            1.0e-38,
10592            -1.,
10593            f32::NEG_INFINITY,
10594            f32::INFINITY,
10595            -0.0,
10596            0.0,
10597            2.0,
10598            f32::NAN,
10599            -1.0e-38,
10600        );
10601        let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a);
10602        let e = 0b0010000000100000;
10603        assert_eq!(r, e);
10604    }
10605
10606    #[simd_test(enable = "avx512dq")]
10607    unsafe fn test_mm_fpclass_sd_mask() {
10608        let a = _mm_set_pd(1., f64::INFINITY);
10609        let r = _mm_fpclass_sd_mask::<0x18>(a);
10610        let e = 0b1;
10611        assert_eq!(r, e);
10612    }
10613
10614    #[simd_test(enable = "avx512dq")]
10615    unsafe fn test_mm_mask_fpclass_sd_mask() {
10616        let a = _mm_set_sd(f64::INFINITY);
10617        let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a);
10618        let e = 0b0;
10619        assert_eq!(r, e);
10620    }
10621
10622    #[simd_test(enable = "avx512dq")]
10623    unsafe fn test_mm_fpclass_ss_mask() {
10624        let a = _mm_set_ss(f32::INFINITY);
10625        let r = _mm_fpclass_ss_mask::<0x18>(a);
10626        let e = 0b1;
10627        assert_eq!(r, e);
10628    }
10629
10630    #[simd_test(enable = "avx512dq")]
10631    unsafe fn test_mm_mask_fpclass_ss_mask() {
10632        let a = _mm_set_ss(f32::INFINITY);
10633        let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a);
10634        let e = 0b0;
10635        assert_eq!(r, e);
10636    }
10637}